石家庄规划建设局网站,做网站点击挣钱不?,网站怎么做充值系统下载,哪个网站做免费小程序1、前言
想要了解EXT文件系统的工作原理#xff0c;那了解文件系统在磁盘上的分布就是必不可少的。这一节主要介绍EXT文件系统硬盘存储的物理结构。 由于当前主流的CPU架构均采用小端模式#xff0c;因此下文介绍均已小端模式为准。
2、超级块
2.1 属性
下表列举出超级块…1、前言
想要了解EXT文件系统的工作原理那了解文件系统在磁盘上的分布就是必不可少的。这一节主要介绍EXT文件系统硬盘存储的物理结构。 由于当前主流的CPU架构均采用小端模式因此下文介绍均已小端模式为准。
2、超级块
2.1 属性
下表列举出超级块中相对重要的属性。
属性名含义s_log_block_size块大小计算公式 2 ^ (10 s_log_block_size)s_blocks_per_group每个块组中块的个数s_inodes_per_group每个块组中索引的个数s_magic魔数(0xEF53)s_inode_size索引大小单位bytes_feature_compat兼容特性s_feature_incompat不兼容特性s_feature_ro_compat只读兼容特性s_backup_bgs包含超级块备份的块组号。s_desc_size块组描述符大小
2.2 特性
一些默认开启或者常用的文件系统特性。
属性名含义COMPAT_HAS_JOURNAL开启日志。COMPAT_EXT_ATTR支持扩展属性。COMPAT_RESIZE_INODE保留块组描述符。需要开启RO_COMPAT_SPARSE_SUPER特性。COMPAT_SPARSE_SUPER2稀疏超级块V2。开启本特性后仅s_backup_bgs 属性指向的2个块组备份超级块。INCOMPAT_FILETYPEapp_ext4_dir_entry结构中包含文件类型。INCOMPAT_META_BG开启元块组属性。与COMPAT_RESIZE_INODE特性互斥。INCOMPAT_64BIT支持超过2^32个块。INCOMPAT_FLEX_BG开启弹性块组。INCOMPAT_INLINE_DATA支持内联文件和目录。RO_COMPAT_SPARSE_SUPER稀疏超级块。
2.3 参考代码
typedef struct {ub32 s_inodes_count; /* Inodes count */ub32 s_blocks_count; /* Blocks count */ub32 s_r_blocks_count; /* Reserved blocks count */ub32 s_free_blocks_count; /* Free blocks count */ub32 s_free_inodes_count; /* Free inodes count */ub32 s_first_data_block; /* First Data Block */ub32 s_log_block_size; /* Block size */ub32 s_log_cluster_size; /* Allocation cluster size */ub32 s_blocks_per_group; /* # Blocks per group */ub32 s_clusters_per_group; /* # Fragments per group */ub32 s_inodes_per_group; /* # Inodes per group */ub32 s_mtime; /* Mount time */ub32 s_wtime; /* Write time */ub16 s_mnt_count; /* Mount count */ub16 s_max_mnt_count; /* Maximal mount count */ub16 s_magic; /* Magic signature */ub16 s_state; /* File system state */ub16 s_errors; /* Behaviour when detecting errors */ub16 s_minor_rev_level; /* minor revision level */ub32 s_lastcheck; /* time of last check */ub32 s_checkinterval; /* max. time between checks */ub32 s_creator_os; /* OS */ub32 s_rev_level; /* Revision level */ub16 s_def_resuid; /* Default uid for reserved blocks */ub16 s_def_resgid; /* Default gid for reserved blocks *//** These fields are for EXT2_DYNAMIC_REV superblocks only.** Note: the difference between the compatible feature set and* the incompatible feature set is that if there is a bit set* in the incompatible feature set that the kernel doesnt* know about, it should refuse to mount the filesystem.** e2fscks requirements are more strict; if it doesnt know* about a feature in either the compatible or incompatible* feature set, it must abort and not try to meddle with* things it doesnt understand...*/ub32 s_first_ino; /* First non-reserved inode */ub16 s_inode_size; /* size of inode structure */ub16 s_block_group_nr; /* block group # of this superblock */ub32 s_feature_compat; /* compatible feature set */ub32 s_feature_incompat; /* incompatible feature set */ub32 s_feature_ro_compat; /* readonly-compatible feature set */ub8 s_uuid[16]; /* 128-bit uuid for volume */b8 s_volume_name[16]; /* volume name */b8 s_last_mounted[64]; /* directory where last mounted */ub32 s_algorithm_usage_bitmap; /* For compression *//** Performance hints. Directory preallocation should only* happen if the EXT2_FEATURE_COMPAT_DIR_PREALLOC flag is on.*/ub8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/ub8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */ub16 s_reserved_gdt_blocks; /* Per group table for online growth *//** Journaling support valid if EXT2_FEATURE_COMPAT_HAS_JOURNAL set.*/ub8 s_journal_uuid[16]; /* uuid of journal superblock */ub32 s_journal_inum; /* inode number of journal file */ub32 s_journal_dev; /* device number of journal file */ub32 s_last_orphan; /* start of list of inodes to delete */ub32 s_hash_seed[4]; /* HTREE hash seed */ub8 s_def_hash_version; /* Default hash version to use */ub8 s_jnl_backup_type; /* Default type of journal backup */ub16 s_desc_size; /* Group desc. size: INCOMPAT_64BIT */ub32 s_default_mount_opts;ub32 s_first_meta_bg; /* First metablock group */ub32 s_mkfs_time; /* When the filesystem was created */ub32 s_jnl_blocks[17]; /* Backup of the journal inode */ub32 s_blocks_count_hi; /* Blocks count high 32bits */ub32 s_r_blocks_count_hi; /* Reserved blocks count high 32 bits*/ub32 s_free_blocks_hi; /* Free blocks count */ub16 s_min_extra_isize; /* All inodes have at least # bytes */ub16 s_want_extra_isize; /* New inodes should reserve # bytes */ub32 s_flags; /* Miscellaneous flags */ub16 s_raid_stride; /* RAID stride */ub16 s_mmp_update_interval; /* # seconds to wait in MMP checking */ub64 s_mmp_block; /* Block for multi-mount protection */ub32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/ub8 s_log_groups_per_flex; /* FLEX_BG group size */ub8 s_reserved_char_pad;ub16 s_reserved_pad; /* Padding to next 32bits */ub64 s_kbytes_written; /* nr of lifetime kilobytes written */ub32 s_snapshot_inum; /* Inode number of active snapshot */ub32 s_snapshot_id; /* sequential ID of active snapshot */ub64 s_snapshot_r_blocks_count; /* reserved blocks for activesnapshots future use */ub32 s_snapshot_list; /* inode number of the head of the on-disk snapshotlist */ub32 s_error_count; /* number of fs errors */ub32 s_first_error_time; /* first time an error happened */ub32 s_first_error_ino; /* inode involved in first error */ub64 s_first_error_block; /* block involved of first error */ub8 s_first_error_func[32]; /* function where the error happened */ub32 s_first_error_line; /* line number where error happened */ub32 s_last_error_time; /* most recent time of an error */ub32 s_last_error_ino; /* inode involved in last error */ub32 s_last_error_line; /* line number where error happened */ub64 s_last_error_block; /* block involved of last error */ub8 s_last_error_func[32]; /* function where the error happened */ub8 s_mount_opts[64];ub32 s_usr_quota_inum; /* inode number of user quota file */ub32 s_grp_quota_inum; /* inode number of group quota file */ub32 s_overhead_blocks; /* overhead blocks/clusters in fs */ub32 s_backup_bgs[2]; /* If sparse_super2 enabled */ub32 s_reserved[106]; /* Padding to the end of the block */ub32 s_checksum; /* crc32c(superblock) */
} app_ext4_super_block;3、组描述符
3.1 属性
下表列举出组描述符的关键属性。
属性名含义bg_inode_table索引表的物理偏移。bg_inode_table_hi索引表的物理偏移的高32位。
3.2 索引表计算
已知目标文件的Inode 357每个块组的Inode数 inode_count_ 8192 组描述大小 gdt_size_ 32索引Inode大小 inode_size_ 256该如何找到文件对应的组描述符呢 首先计算出文件所在的块组bg_no (inode_no - 1) / inode_count_ 356 / 8192 0 即文件属于第一个块组。 接着计算文件所在的组描述符的位置gdt_block_no bg_no / gdt_count_ 0 / (4096 / 32) 0即文件所在的组描述符在块组文件描述符的第一个块中。 然后计算文件所在的组描述符在块中的位置gdt_index bg_no % gdt_count_ 0块中的第一个组描述符即文件所在的组描述符。 其次计算文件在所在块组中的索引 inode_partition (inode_no - 1) % inode_count_ 356 % 8192 356 即文件是块组的第356个inode节点。 再次计算文件在索引表中的位置inode_block_no inode_partition / it_inode_count 356 / (4096 / 256) 22 即文件所在的索引在索引表的第22个块中。 最后从组描述的bg_inode_table和bg_inode_table_hi获取inode_table_no计算出索引表的偏移位置file_offset (inode_table_no inode_block_no) * 4096。 默认情况下所有的组描述符在第一个块组中都存在备份因此从第一个块组中读取对应的组描述符即可。 // inode 0 is defined but not exist, so actual inode no begin with 1.
// the bg number of the inode_no
b32 bg_no (inode_no - 1) / volume_-inode_count_;
// the gdt number in bg
b32 gdt_block_no bg_no / volume_-gdt_count_;
// the index of gdt in the bg which this inode in
b32 gdt_index bg_no % volume_-gdt_count_;
// the index of inode in the bg which this inode in
b32 inode_partition (inode_no - 1) % volume_-inode_count_;
// the inode count in one IT block
b32 it_inode_count volume_-block_size_ / volume_-inode_size_;
// the index of IT block in the bg which this inode in
b32 inode_block_no inode_partition / it_inode_count;
// move file pointer to gdt blockb64 file_offset 0;
if (volume_-meta_group_)file_offset GetGDTOffset(gdt_block_no * (b64)volume_-gdt_count_);
else// use gdt in first bgfile_offset GetGDTOffset(0) gdt_block_no * (b64)volume_-block_size_;
if (lseek64(volume_-fd_, file_offset, SEEK_SET) ! file_offset) goto IOErr;gdt_record_ (app_ext4_group_desc *)new char[volume_-block_size_];
if (volume_-block_size_ !read(volume_-fd_, gdt_record_, volume_-block_size_))goto IOErr;// get offset of block which inode in
if (!volume_-extend64_) {file_offset (gdt_record_[gdt_index].bg_inode_table inode_block_no) *(b64)volume_-block_size_;
} else {app_ext4_group_desc64 *gdt_record (app_ext4_group_desc64 *)((char *)gdt_record_.get() volume_-gdt_size_ * gdt_index);b64 inode_table_no gdt_record-bg_inode_table | ((b64)gdt_record-bg_inode_table_hi 32);file_offset (inode_table_no inode_block_no) * volume_-block_size_;
}
if (lseek64(volume_-fd_, file_offset, SEEK_SET) ! file_offset) goto IOErr;inode_record_ (app_ext4_inode *)new char[volume_-block_size_];
if (volume_-block_size_ !read(volume_-fd_, inode_record_, volume_-block_size_))goto IOErr;3.3 参考代码
typedef struct {ub32 bg_block_bitmap; /* Blocks bitmap block */ub32 bg_inode_bitmap; /* Inodes bitmap block */ub32 bg_inode_table; /* Inodes table block */ub16 bg_free_blocks_count; /* Free blocks count */ub16 bg_free_inodes_count; /* Free inodes count */ub16 bg_used_dirs_count; /* Directories count */ub16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ub32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ub16 bg_block_bitmap_csum_lo; /* crc32c(s_uuidgrp_numbitmap) LSB */ub16 bg_inode_bitmap_csum_lo; /* crc32c(s_uuidgrp_numbitmap) LSB */ub16 bg_itable_unused; /* Unused inodes count */ub16 bg_checksum; /* crc16(sb_uuidgroupdesc) */
} app_ext4_group_desc;typedef struct {ub32 bg_block_bitmap; /* Blocks bitmap block */ub32 bg_inode_bitmap; /* Inodes bitmap block */ub32 bg_inode_table; /* Inodes table block */ub16 bg_free_blocks_count; /* Free blocks count */ub16 bg_free_inodes_count; /* Free inodes count */ub16 bg_used_dirs_count; /* Directories count */ub16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */ub32 bg_exclude_bitmap_lo; /* Exclude bitmap for snapshots */ub16 bg_block_bitmap_csum_lo; /* crc32c(s_uuidgrp_numbitmap) LSB */ub16 bg_inode_bitmap_csum_lo; /* crc32c(s_uuidgrp_numbitmap) LSB */ub16 bg_itable_unused; /* Unused inodes count */ub16 bg_checksum; /* crc16(sb_uuidgroupdesc) */ub32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ub32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ub32 bg_inode_table_hi; /* Inodes table block MSB */ub16 bg_free_blocks_count_hi; /* Free blocks count MSB */ub16 bg_free_inodes_count_hi; /* Free inodes count MSB */ub16 bg_used_dirs_count_hi; /* Directories count MSB */ub16 bg_itable_unused_hi; /* Unused inodes count MSB */ub32 bg_exclude_bitmap_hi; /* Exclude bitmap block MSB */ub16 bg_block_bitmap_csum_hi; /* crc32c(s_uuidgrp_numbitmap) MSB */ub16 bg_inode_bitmap_csum_hi; /* crc32c(s_uuidgrp_numbitmap) MSB */ub32 bg_reserved;
} app_ext4_group_desc64;4、索引节点
4.1 属性
下表列举出Inode中相对重要的属性。
属性名含义i_mode文件属性和文件类型。i_size_lo文件大小低32位。i_links_count硬链接数量。i_flags标志位。i_block块图或者扩展树存储文件内容或者目录索引。i_size_high文件大小高32位。i_extra_isize扩展属性大小。
4.2 文件标识
值含义0x1000S_IFIFO (FIFO)0x2000S_IFCHR (Character device)0x4000S_IFDIR (Directory)0x6000S_IFBLK (Block device)0x8000S_IFREG (Regular file)0xA000S_IFLNK (Symbolic link)0xC000S_IFSOCK (Socket)
4.3 文件内容
通常情况下i_block中用于存储文件所有块的索引信息。某些特殊场景下会用于其它情况。 软链接(Symbolic Links) 当链接的目标路径长度小于60时 会将目标路径存储在i_block中。 内联数据(Inline Data) 当文件系统开启Inline Data特性且数据长度小于156(目前)时用于存储内容的前60个字节。 直接/间接块索引(Direct/Indirect Block Addressing) i_block[0:11]存储数据内容的块号。 i_block[12] 指向间接数据块(存储数据块号的数据块)。 i_block[13]指向双重间接数据块(存储间接数据块的数据块)。 i_block[14]指向三重间接数据块(存储双重间接数据块的数据块)。 扩展树索引(Extent Tree) 通过树的形式管理文件或者文件夹的数据块。扩展树的详细介绍请参考最后一节。
4.4 参考代码
#define EXT4_N_BLOCKS 15
typedef struct {ub16 i_mode; /* File mode */ub16 i_uid; /* Low 16 bits of Owner Uid */ub32 i_size; /* Size in bytes */ub32 i_atime; /* Access time */ub32 i_ctime; /* Inode Change time */ub32 i_mtime; /* Modification time */ub32 i_dtime; /* Deletion Time */ub16 i_gid; /* Low 16 bits of Group Id */ub16 i_links_count; /* Links count */ub32 i_blocks; /* Blocks count */ub32 i_flags; /* File flags */union {struct {ub32 l_i_version; /* was l_i_reserved1 */} linux1;struct {ub32 h_i_translator;} hurd1;} osd1; /* OS dependent 1 */ub32 i_block[EXT4_N_BLOCKS]; /* Pointers to blocks */ub32 i_generation; /* File version (for NFS) */ub32 i_file_acl; /* File ACL */ub32 i_size_high; /* Formerly i_dir_acl, directory ACL */ub32 i_faddr; /* Fragment address */union {struct {ub16 l_i_blocks_hi;ub16 l_i_file_acl_high;ub16 l_i_uid_high; /* these 2 fields */ub16 l_i_gid_high; /* were reserved2[0] */ub16 l_i_checksum_lo; /* crc32c(uuidinuminode) */ub16 l_i_reserved;} linux2;struct {ub8 h_i_frag; /* Fragment number */ub8 h_i_fsize; /* Fragment size */ub16 h_i_mode_high;ub16 h_i_uid_high;ub16 h_i_gid_high;ub32 h_i_author;} hurd2;} osd2; /* OS dependent 2 */ub16 i_extra_isize;ub16 i_checksum_hi; /* crc32c(uuidinuminode) */ub32 i_ctime_extra; /* extra Change time (nsec 2 | epoch) */ub32 i_mtime_extra; /* extra Modification time (nsec 2 | epoch) */ub32 i_atime_extra; /* extra Access time (nsec 2 | epoch) */ub32 i_crtime; /* File creation time */ub32 i_crtime_extra; /* extra File creation time (nsec 2 | epoch)*/ub32 i_version_hi; /* high 32 bits for 64-bit version */
} app_ext4_inode;5、扩展属性
扩展属性通常用于存储文件的ACLs访问权限和一些其他的安全属性例如selinux等。因此通常情况下使用文件系统时并不需要关注文件的扩展属性。 当有一种例外情况那就是开启了内联数据特性后文件的一部分数据内容会存储到扩展属性中。 我们可以在2个地方找到文件的扩展属性其一2个索引信息的中间其二i_file_acl指向的块。而内联数据则存在于第一个地方。 扩展属性块以app_ext4_attr_header结构开始但在索引信息后时只存在第一个字段h_magic 0xEA020000。 实际的扩展属性用app_ext4_attr_entry管理当e_name_index 7且e_name data时则代表内联数据。
typedef struct {ub32 h_magic; /* magic number for identification */ub32 h_refcount; /* reference count */ub32 h_blocks; /* number of disk blocks used */ub32 h_hash; /* hash value of all attributes */ub32 h_reserved[4]; /* zero right now */
} app_ext4_attr_header;typedef struct {ub8 e_name_len; /* length of name */ub8 e_name_index; /* attribute name index */ub16 e_value_offs; /* offset in disk block of value */ub32 e_value_block; /* disk block attribute is stored on (n/i) */ub32 e_value_size; /* size of attribute value */ub32 e_hash; /* hash value of name and value */
} app_ext4_attr_entry;// 获取扩展内联数据
app_ext4_attr_header *attr_header (app_ext4_attr_header *)((b8 *)inode_info_-i_extra_isize inode_info_-i_extra_isize);if (attr_header-h_magic ! kExtAttrMagic) return false;// Extended attributes, when stored after the inode,// have a header ext4_xattr_ibody_header that is 4 bytes longapp_ext4_attr_entry *attr_data (app_ext4_attr_entry *)((b8 *)attr_header sizeof(attr_header-h_magic));while (attr_data-e_name_index ! kExtAttrDataIdx ||attr_data-e_name_len ! sizeof(kExtAttrDataName)) {attr_data (app_ext4_attr_entry *)((b8 *)attr_data sizeof(app_ext4_attr_entry) (attr_data-e_name_len 3) / 4 * 4);}// For an inode attribute e_value_offs is relative to the first entryif (*(b32 *)((b8 *)attr_data sizeof(app_ext4_attr_entry)) kExtAttrDataName) {memcpy(inline_data_,(b8 *)attr_header sizeof(attr_header-h_magic) attr_data-e_value_offs,attr_data-e_value_size);}6、扩展树
由于直接/间接块索引的种种缺陷在EXT4中推出了扩展树取而代之。扩展树顾名思义通过树的形式管理数据块。 其中每个节点以app_ext4_extent_header开始非叶子节点时后接app_ext4_extent_idx结构叶子节点则紧跟app_ext4_extent结构。 app_ext4_extent_header用于存储当前节点的信息。
变量含义eh_magic魔数0xF30A。eh_entries当前节点存储的数据个数。eh_depth当前节点的深度0则代表当前是叶子节点。
app_ext4_extent存储实际的数据块信息。
变量含义ee_block起始的逻辑块地址。ee_len当前extent管理的实际物理块个数。ee_len ee_len 32768 ? ee_len - 32768 : ee_lenee_start_hi / ee_start按位或即可得出起始的物理块地址。
app_ext4_extent_idx存储下一层节点的信息。
变量含义ei_block起始的逻辑块地址。ei_leaf/ ei_leaf_hi按位或即可得出下一层节点的物理块地址。
typedef struct {ub16 eh_magic; /* probably will support different formats */ub16 eh_entries; /* number of valid entries */ub16 eh_max; /* capacity of store in entries */ub16 eh_depth; /* has tree real underlaying blocks? */ub32 eh_generation; /* generation of the tree */
} app_ext4_extent_header;typedef struct {ub32 ee_block; /* first logical block extent covers */ub16 ee_len; /* number of blocks covered by extent */ub16 ee_start_hi; /* high 16 bits of physical block */ub32 ee_start; /* low 32 bigs of physical block */
} app_ext4_extent;typedef struct {ub32 ei_block; /* index covers logical blocks from block */ub32 ei_leaf; /* pointer to the physical block of the next ** level. leaf or next index could bet here */ub16 ei_leaf_hi; /* high 16 bits of physical block */ub16 ei_unused;
} app_ext4_extent_idx;