您的位置:首页 > 运维架构

open系统调用在内核中的流程分析

2012-01-18 16:30 501 查看
真是蛮复杂的,我分三步走,力求讲得比较清楚。

以字符设备为例,相对于块设备要简单些。

基于2.6.26的内核

一)驱动注册open函数都干了些什么?

register_chrdev -> cdev_add -> kobj_map

file: fs/char_dev.c

int register_chrdev(unsigned int major, const char *name,

const struct file_operations *fops)

{

struct char_device_struct *cd;

struct cdev *cdev;

char *s;

int err = -ENOMEM;

cd = __register_chrdev_region(major, 0, 256, name);

if (IS_ERR(cd))

return PTR_ERR(cd);

cdev = cdev_alloc();

if (!cdev)

goto out2;

cdev->owner = fops->owner;

cdev->ops = fops; // 注意,在后面的 chrdev_open会从cdev再得到 fops

...

}

file: fs/char_dev.c

int cdev_add(struct cdev *p, dev_t dev, unsigned count)

{

p->dev = dev;

p->count = count;

return kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p);

}

file: fs/char_dev.c

static struct kobject *exact_match(dev_t dev, int *part, void *data)

{

struct cdev *p = data;

return &p->kobj;

}

file: drivers/base/map.c

int kobj_map(struct kobj_map *domain, dev_t dev, unsigned long range,

struct module *module, kobj_probe_t *probe,

int (*lock)(dev_t, void *), void *data)

{

unsigned n = MAJOR(dev + range - 1) - MAJOR(dev) + 1;

unsigned index = MAJOR(dev);

unsigned i;

struct probe *p;

if (n > 255)

n = 255;

p = kmalloc(sizeof(struct probe) * n, GFP_KERNEL);

if (p == NULL)

return -ENOMEM;

for (i = 0; i < n; i++, p++) {

p->owner = module;

p->get = probe; // 此处其实就是exact_match

p->lock = lock;

p->dev = dev;

p->range = range;

p->data = data;

}

mutex_lock(domain->lock);

for (i = 0, p -= n; i < n; i++, p++, index++) {

struct probe **s = &domain->probes[index % 255];

while (*s && (*s)->range < range)

s = &(*s)->next;

p->next = *s;

*s = p;

}

mutex_unlock(domain->lock);

return 0;

}

二)从系统调用往内核走,看当初驱动里注册的file_operations里的open函数怎么被调用的

sys_open -> do_sys_open -> do_filp_open -> nameidata_to_filp -> __dentry_open

问题是 1)__dentry_open如何找到 chrdev_open?

2)最终又是如何调用file_operations里的在驱动里面注册的open函数的呢?

static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,

int flags, struct file *f,

int (*open)(struct inode *, struct file *))

{

struct inode *inode;

int error;

f->f_flags = flags;

f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK |

FMODE_PREAD | FMODE_PWRITE;

inode = dentry->d_inode;

if (f->f_mode & FMODE_WRITE) {

error = __get_file_write_access(inode, mnt);

if (error)

goto cleanup_file;

if (!special_file(inode->i_mode))

file_take_write(f);

}

f->f_mapping = inode->i_mapping;

f->f_path.dentry = dentry;

f->f_path.mnt = mnt;

f->f_pos = 0;

f->f_op = fops_get(inode->i_fop); // 此处获得 def_chr_fops

file_move(f, &inode->i_sb->s_files);

error = security_dentry_open(f);

if (error)

goto cleanup_all;

if (!open && f->f_op)

open = f->f_op->open; // 此处调用 def_chr_fops里的open函数,即chrdev_open

...

}

file: fs/char_dev.c

chrdev_open() {

struct cdev *p;

struct cdev *new = NULL;

int ret = 0;

spin_lock(&cdev_lock);

p = inode->i_cdev;

if (!p) {

struct kobject *kobj;

int idx;

spin_unlock(&cdev_lock);

kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); // 找到cdev对应的kobj对象, 跟kobj_map遥相对应的,反操作

if (!kobj)

return -ENXIO;

new = container_of(kobj, struct cdev, kobj); // 找到cdev

spin_lock(&cdev_lock);

p = inode->i_cdev;

if (!p) {

inode->i_cdev = p = new;

inode->i_cindex = idx;

list_add(&inode->i_devices, &p->list);

new = NULL;

} else if (!cdev_get(p))

ret = -ENXIO;

} else if (!cdev_get(p))

ret = -ENXIO;

spin_unlock(&cdev_lock);

cdev_put(new);

if (ret)

return ret;

filp->f_op = fops_get(p->ops); // 这里又找回了当初驱动注册时的 file_operations指针

if (!filp->f_op) {

cdev_put(p);

return -ENXIO;

}

if (filp->f_op->open) {

lock_kernel();

ret = filp->f_op->open(inode,filp); // 此处算真正的调用了file_operations里的open函数

unlock_kernel();

}

...

}

file: drivers/base/map.c

struct kobject *kobj_lookup(struct kobj_map *domain, dev_t dev, int *index)

{

struct kobject *kobj;

struct probe *p;

unsigned long best = ~0UL;

retry:

mutex_lock(domain->lock);

for (p = domain->probes[MAJOR(dev) % 255]; p; p = p->next) {

struct kobject *(*probe)(dev_t, int *, void *);

struct module *owner;

void *data;

if (p->dev > dev || p->dev + p->range - 1 < dev)

continue;

if (p->range - 1 >= best)

break;

if (!try_module_get(p->owner))

continue;

owner = p->owner;

data = p->data;

probe = p->get; // 这里其实就是 exact_match函数了

best = p->range - 1;

*index = dev - p->dev;

if (p->lock && p->lock(dev, data) < 0) {

module_put(owner);

continue;

}

mutex_unlock(domain->lock);

kobj = probe(dev, index, data); // 这里调用了 exact_match 函数

/* Currently ->owner protects _only_ ->probe() itself. */

module_put(owner);

if (kobj)

return kobj;

goto retry;

}

mutex_unlock(domain->lock);

return NULL;

}

三)什么时候为字符设备设置的def_chr_fops ?

这个跟具体的文件系统有关系的。

现在/dev/下的设备节点都是通过udev动态创建的,udev会去调用mknod(假定是ext2,内核会调用ext2_mknod),

如果是char设备,会把def_chr_fops附给inode->i_fop,而ext2_mknod会调用init_special_inode(),函数

的部分实现如下:

file: fs/ext2/namei.c

static int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t rdev)

{

struct inode * inode;

int err;

if (!new_valid_dev(rdev))

return -EINVAL;

inode = ext2_new_inode (dir, mode);

err = PTR_ERR(inode);

if (!IS_ERR(inode)) {

init_special_inode(inode, inode->i_mode, rdev); // 调用 init_special_inode

file: fs/inode.c

void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)

{

inode->i_mode = mode;

if (S_ISCHR(mode)) {

inode->i_fop = &def_chr_fops; // 这里为char设备设置的缺省操作

inode->i_rdev = rdev;

}

...

}

file: fs/char_dev.c

const struct file_operations def_chr_fops = {

.open = chrdev_open,

};
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: