inode缓存与dentry缓存
2014-01-30 10:45
639 查看
1. inode缓存
struct inode{
/* RCU path lookup touches following: */
umode_t i_mode;
uid_t i_uid;
gid_t i_gid;
const struct inode_operations*i_op;
struct super_block*i_sb;
spinlock_t i_lock;/* i_blocks, i_bytes, maybe i_size */
unsigned int i_flags;
unsigned long i_state;
#ifdef CONFIG_SECURITY
void*i_security;
#endif
struct mutex i_mutex;
unsigned long dirtied_when;/* jiffies of first dirtying */
struct hlist_node i_hash;
struct list_head i_wb_list;/* backing dev IO list */
struct list_head i_lru;/*inode LRU list */
struct list_head i_sb_list;
union{
struct list_head i_dentry;
struct rcu_head i_rcu;
};
unsigned long i_ino;
atomic_t i_count;
unsigned int i_nlink;
dev_t i_rdev;
unsigned int i_blkbits;
u64 i_version;
loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount;
#endif
struct timespec i_atime;
struct timespec i_mtime;
struct timespec i_ctime;
blkcnt_t i_blocks;
unsigned short i_bytes;
struct rw_semaphore i_alloc_sem;
const struct file_operations*i_fop;/* former ->i_op->default_file_ops */
struct file_lock*i_flock;
struct address_space*i_mapping;
struct address_space i_data;
#ifdef CONFIG_QUOTA
struct dquot*i_dquot[MAXQUOTAS];
#endif
struct list_head i_devices;
union{
struct pipe_inode_info*i_pipe;
struct block_device*i_bdev;
struct cdev*i_cdev;
};
__u32 i_generation;
#ifdef CONFIG_FSNOTIFY
__u32 i_fsnotify_mask;/* all events this inode cares about */
struct hlist_head i_fsnotify_marks;
#endif
#ifdef CONFIG_IMA
atomic_t i_readcount;/* struct files open RO */
#endif
atomic_t i_writecount;
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl*i_acl;
struct posix_acl*i_default_acl;
#endif
void*i_private;/* fs or device private pointer */
};
inode可能处于三种状态:
1)unused,里面没有保存有效的内容,可以被复用为新的用途;2)in use,正在被使用,其成员i_count以及i_nlink一定大于0,此时inode与文件系统或者说设备上的文件相关联,但是自从上次与设备同步后,内容没有发生改变,即不是dirty的;
3)dirty,inode里面的内容已经与文件系统中的文件内容不一致了,即脏了,需要进行文件同步操作。
前两种状态的inode都各自位于一个全局的链表中,而第三种的inode位于super_block结构体中的一个链表中。
先看inode结构体中的一个成员:
struct list_head i_lru; /* inode LRU list */
对应着一个全局的链表:
static LIST_HEAD(inode_lru);
static DEFINE_SPINLOCK(inode_lru_lock);
/*
* Called when we're dropping the last reference
* to an inode.
*
* Call the FS "drop_inode()" function, defaulting to
* the legacy UNIX filesystem behaviour. If it tells
* us to evict inode, do so. Otherwise, retain inode
* in cache if fs is alive, sync and evict if fs is
* shutting down.
*/
static void iput_final(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
const struct super_operations *op = inode->i_sb->s_op;
int drop;
WARN_ON(inode->i_state & I_NEW);
if (op && op->drop_inode)
drop = op->drop_inode(inode);
else
drop = generic_drop_inode(inode);
if (!drop && (sb->s_flags & MS_ACTIVE)){
inode->i_state |= I_REFERENCED;
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
inode_lru_list_add(inode);
spin_unlock(&inode->i_lock);
return;
}
if (!drop){
inode->i_state |= I_WILL_FREE;
spin_unlock(&inode->i_lock);
write_inode_now(inode, 1);
spin_lock(&inode->i_lock);
WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_WILL_FREE;
}
inode->i_state |= I_FREEING;
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
evict(inode);
}
函数iput_final是在当inode没有被任何地方引用后,即变成了unused状态后,回收inode的机制。
if (op && op->drop_inode)
drop = op->drop_inode(inode);
else
drop = generic_drop_inode(inode);
drop为0时,表示i_nlink为0,并且inode没有保存着inode_hashtable中的拉链表,即这个inode可以被释放掉。
/*
* Normal UNIX filesystem behaviour: delete the
* inode when the usage count drops to zero, and
* i_nlink is zero.
*/
int generic_drop_inode(struct inode *inode)
{
return !inode->i_nlink || inode_unhashed(inode);
}
EXPORT_SYMBOL_GPL(generic_drop_inode);
if (!drop && (sb->s_flags & MS_ACTIVE)){
inode->i_state |= I_REFERENCED;
if (!(inode->i_state & (I_DIRTY|I_SYNC)))
inode_lru_list_add(inode);
spin_unlock(&inode->i_lock);
return;
}
如果superblock还存在在系统中,就调用inode_lru_list_add将inode添加到unused列表中,即将inode缓存起来。
否则,就先调用write_inode_now写回到磁盘上,再调用inode_lru_list_del将已经缓存下来的inode删除掉,最后调用evict函数将inode彻底删除。
static void inode_lru_list_add(struct inode *inode)
{
spin_lock(&inode_lru_lock);
if (list_empty(&inode->i_lru)){
list_add(&inode->i_lru, &inode_lru);
inodes_stat.nr_unused++;
}
spin_unlock(&inode_lru_lock);
}
因此inode_lru就是全局的unused inode列表,通过“Least Recently Used”的顺序保存。
此外,操作inode_lru的函数还有prune_icache
/*
* Scan `goal' inodes on the unused list for freeable ones. They are moved to a
* temporary list and then are freed outside inode_lru_lock by dispose_list().
*
* Any inodes which are pinned purely because of attached pagecache have their
* pagecache removed. If the inode has metadata buffers attached to
* mapping->private_list then try to remove them.
*
* If the inode has the I_REFERENCED flag set, then it means that it has been
* used recently - the flag is set in iput_final(). When we encounter such an
* inode, clear the flag and move it to the back of the LRU so it gets another
* pass through the LRU before it gets reclaimed. This is necessary because of
* the fact we are doing lazy LRU updates to minimise lock contention so the
* LRU does not have strict ordering. Hence we don't want to reclaim inodes
* with this flag set because they are the inodes that are out of order.
*/
static void prune_icache(int nr_to_scan)
{
LIST_HEAD(freeable);
int nr_scanned;
unsigned long reap = 0;
down_read(&iprune_sem);
spin_lock(&inode_lru_lock);
for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++){
struct inode *inode;
if (list_empty(&inode_lru))
break;
inode = list_entry(inode_lru.prev, struct inode, i_lru);
/*
* we are inverting the inode_lru_lock/inode->i_lock here,
* so use a trylock. If we fail to get the lock, just move the
* inode to the back of the list so we don't spin on it.
*/
if (!spin_trylock(&inode->i_lock)){
list_move(&inode->i_lru, &inode_lru);
continue;
}
/*
* Referenced or dirty inodes are still in use. Give them
* another pass through the LRU as we canot reclaim them now.
*/
if (atomic_read(&inode->i_count) ||
(inode->i_state & ~I_REFERENCED)){
list_del_init(&inode->i_lru);
spin_unlock(&inode->i_lock);
inodes_stat.nr_unused--;
continue;
}
/*recently referenced inodes get one more pass */
if (inode->i_state & I_REFERENCED){
inode->i_state &= ~I_REFERENCED;
list_move(&inode->i_lru, &inode_lru);
spin_unlock(&inode->i_lock);
continue;
}
if (inode_has_buffers(inode) || inode->i_data.nrpages){
__iget(inode);
spin_unlock(&inode->i_lock);
spin_unlock(&inode_lru_lock);
if (remove_inode_buffers(inode))
reap += invalidate_mapping_pages(&inode->i_data,
0, -1);
iput(inode);
spin_lock(&inode_lru_lock);
if (inode != list_entry(inode_lru.next,
struct inode, i_lru))
continue;/* wrong inode or list_empty */
/* avoid lock inversions with trylock */
if (!spin_trylock(&inode->i_lock))
continue;
if (!can_unuse(inode)){
spin_unlock(&inode->i_lock);
continue;
}
}
WARN_ON(inode->i_state & I_NEW);
inode->i_state |= I_FREEING;
spin_unlock(&inode->i_lock);
list_move(&inode->i_lru, &freeable);
inodes_stat.nr_unused--;
}
if (current_is_kswapd())
__count_vm_events(KSWAPD_INODESTEAL, reap);
else
__count_vm_events(PGINODESTEAL, reap);
spin_unlock(&inode_lru_lock);
dispose_list(&freeable);
up_read(&iprune_sem);
}
该函数的作用是在内存压力较大时,通过缩减缓存的inode列表inode_lru以释放出更多的内存。
该函数就是从inode_lru中从头开始取inode出来,做一些简单检查,如果inode还有一些原因需要继续存在在缓存中,就将该inode移到链表的尾部,然后检查下一个inode。
使得inode继续保留的原因包括:无法获取到操作inode中数据的锁i_lock;inode中的数据是脏的;inode的使用计数非0;inode刚刚被引用过等等。
还有一个比较实用的问题,我们看到在调用iput_final时,检查如果i_nlink为0,并且没有被用作拉链表的话,就将其放到缓存inode_lru中,但是在prune_icache时,会检查i_count引用计数是否为0。
这也就是说,如果一个inode对应的磁盘文件已经被删除了,但是还有进程对其进行操作的话,那么它不会被直接删除,而是会保存在缓存中,也就是说对其操作的进程仍然可以对已经缓存下来的数据页面page进行操作。
直到没有进程再对其进行操作了,才有可能被清除出缓存。
inode中有两个链表头元素,分别是i_sb_list和i_wb_list,其中i_sb_list是super_block->s_inodes列表的元素,而i_wb_list是用于维护设备的后备inode列表。
2. dentry缓存
dentry缓存的目的,为了减少对慢速磁盘的访问,每当VFS文件系统对底层的数据进行访问时,都会将访问的结果缓存下来,保存成一个dentry对象。而且dentry对象的组织与管理,是和inode缓存极其相似的,也有一个hash表,和一个lru队列。
而且当内存压力较大时,也会调用prune_dcache来企图释放lru中优先级较低的dentry项目。
区别在于,inode是不需要维护目录的关系的,但是dentry需要,因此dentry的组织比inode要复杂。
static struct hlist_bl_head *dentry_hashtable __read_mostly;
在super_block中
/* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
struct list_head s_dentry_lru;/* unused dentry lru */
因此,保存dentry全局hash表的数据结构是全局的,而保存dentry缓存的数据结构是存在于super_block数据结构中。
/*
* dentry_lru_(add|del|move_tail) must be called with d_lock held.
*/
static void dentry_lru_add(struct dentry *dentry)
{
if (list_empty(&dentry->d_lru)){
spin_lock(&dcache_lru_lock);
list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
dentry->d_sb->s_nr_dentry_unused++;
dentry_stat.nr_unused++;
spin_unlock(&dcache_lru_lock);
}
}
dentry_lur_add函数用于向dentry缓存中添加一个释放的dentry,它被函数dput调用。
/*
* This is dput
*
* This is complicated by the fact that we do not want to put
* dentries that are no longer on any hash chain on the unused
* list: we'd much rather just get rid of them immediately.
*
* However, that implies that we have to traverse the dentry
* tree upwards to the parents which might _also_ now be
* scheduled for deletion (it may have been only waiting for
* its last child to go away).
*
* This tail recursion is done by hand as we don't want to depend
* on the compiler to always get this right (gcc generally doesn't).
* Real recursion would eat up our stack space.
*/
/*
* dput - release a dentry
* @dentry: dentry to release
*
* Release a dentry. This will drop the usage count and if appropriate
* call the dentry unlink method as well as removing it from the queues and
* releasing its resources. If the parent dentries were scheduled for release
* they too may now get deleted.
*/
void dput(struct dentry *dentry)
{
if (!dentry)
return;
repeat:
if (dentry->d_count == 1)
might_sleep();
spin_lock(&dentry->d_lock);
BUG_ON(!dentry->d_count);
if (dentry->d_count > 1){
dentry->d_count--;
spin_unlock(&dentry->d_lock);
return;
}
if (dentry->d_flags & DCACHE_OP_DELETE){
if (dentry->d_op->d_delete(dentry))
goto kill_it;
}
/*Unreachable? Get rid of it */
if (d_unhashed(dentry))
goto kill_it;
/*Otherwise leave it cached and ensure it's on the LRU */
dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
dentry->d_count--;
spin_unlock(&dentry->d_lock);
return;
kill_it:
dentry = dentry_kill(dentry, 1);
if (dentry)
goto repeat;
}
EXPORT_SYMBOL(dput);
所有的dentry实例会形成一个网络,用于反映文件系统的结构。
d_subdirs成员,里面保存着所有的子目录以及该目录下的文件组成的列表。
d_child成员,是该dentry链接到其父目录的dentry节点的锚点。
这两个成员,是构成文件系统的层次结构的基本设施。
if (dentry->d_count == 1)
might_sleep();
参考:http://yuxu9710108.blog.163.com/blog/static/23751534201011715413404/
用于调试时,提示atomic context的可能睡眠情况。
分析dput函数的逻辑:
如果dentry的引用计数大于1,那么代表还有其他的地方在使用这个dentry,因此只减少引用计数,直接返回;
如果dentry->d_flags里面设置了delete标志,那么直接调用d_op->d_delete函数指针删除该dentry,再调用dentry_kill来处理;
【d_op->d_delete与dentry_kill在功能上有什么不同?】
如果在全局的hash表中也已经找不该dentry了,那么直接调用dentry_kill来处理;
如果dentry的引用计数为1,而且也不属于上面二种需要调用dentry_kill的情况,那么就将其缓存在super_block的LRU队列中。
我们看一种可能的d_delete的实现
/*
* This is called from dput() when d_count is going to 0.
*/
static int nfs_dentry_delete(const struct dentry *dentry)
{
dfprintk(VFS, "NFS: dentry_delete(%s/%s, %x)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
dentry->d_flags);
/*Unhash any dentry with a stale inode */
if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
return 1;
if (dentry->d_flags & DCACHE_NFSFS_RENAMED){
/*Unhash it, so that ->d_iput() would be called */
return 1;
}
if (!(dentry->d_sb->s_flags & MS_ACTIVE)){
/*Unhash it, so that ancestors of killed async unlink
* files will be cleaned up during umount */
return 1;
}
return 0;
}
可见,该函数是进行一些内部的判断,决定是否需要将该dentry从全局的hash表中删除掉。
if (dentry->d_flags & DCACHE_OP_DELETE){
if (dentry->d_op->d_delete(dentry))
goto kill_it;
}
/*
* Finish off a dentry we've decided to kill.
* dentry->d_lock must be held, returns with it unlocked.
* If ref is non-zero, then decrement the refcount too.
* Returns dentry requiring refcount drop, or NULL if we're done.
*/
static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
__releases(dentry->d_lock)
{
struct inode *inode;
struct dentry *parent;
inode = dentry->d_inode;
if (inode && !spin_trylock(&inode->i_lock)){
relock:
spin_unlock(&dentry->d_lock);
cpu_relax();
return dentry;/* try again with same dentry */
}
if (IS_ROOT(dentry))
parent = NULL;
else
parent = dentry->d_parent;
if (parent && !spin_trylock(&parent->d_lock)){
if (inode)
spin_unlock(&inode->i_lock);
goto relock;
}
if (ref)
dentry->d_count--;
/*if dentry was on the d_lru list delete it from there */
dentry_lru_del(dentry);
/*if it was on the hash then remove it */
__d_drop(dentry);
return d_kill(dentry, parent);
}
相关文章推荐
- Leetcode Populating Next Right Pointers in Each Node II
- Google 的V8 JS引擎和Node.JS
- [LeetCode]24.Swap Nodes in Pairs
- Node.js学习笔记(4、events模块)
- Node.js学习笔记(2、File System模块)
- LeetCode之Remove Nth Node From End of List
- Node.js学习笔记(1、简介)
- pidera安装node.js(树莓派)
- [node.js]RPC(远程过程调用)的实现原理
- Populating Next Right Pointers in Each Node
- nodeJs+express+ejs+mongoose实现翻页原理1
- NameNode内存优化---基于缓存相同文件名的方法
- pomelo 学习 (1)
- Populating Next Right Pointers in Each Node--为每一个节点填充next right指针
- Populating Next Right Pointers in Each Node II
- Populating Next Right Pointers in Each Node
- node.js 使用 imagemagick
- node tail 日志服务
- node.js-session问题
- WebStorm绝对是开发golang/nodejs的神器啊