您的位置:首页 > 编程语言

sys_open()代码分析

2011-12-30 17:02 288 查看
                                                                                 sys_open()源代码分析应用程序在操作任何一个文件之前,必须先调用open()来打开该文件,即通知内核新建一个代表该文件的结构,并且返回该文件的描述符(一个整数),该描述符在进程内唯一。open()的格式如下:int open(const char * pathname,int oflag, mode_t mode )pathname:代表需要打开的文件的文件名;oflag:表示打开的标识,具体的内核支持如下标记位:O_ACCMODE<0003>: 读写文件操作时,用于取出flag的低2位。O_RDONLY<00>: 只读打开O_WRONLY<01>: 只写打开O_RDWR<02>: 读写打开O_CREAT<0100>: 文件不存在则创建,需要mode_tO_EXCL<0200>: 如果同时指定了O_CREAT,而文件已经存在,则出错 O_NOCTTY<0400>: 如果pathname代表终端设备,则不将此设备分配作为此进程的控制终端O_TRUNC<01000>: 如果此文件存在,而且为只读或只写成功打开,则将其长度截短为0  O_APPEND<02000>: 每次写时都加到文件的尾端O_NONBLOCK<04000>: 如果p a t h n a m e指的是一个F I F O、一个块特殊文件或一个字符特殊文件,则此选择项为此文件的本次打开操作和后续的I / O操作设置非阻塞方式。O_NDELAY<O_NONBLOCK>O_SYNC<010000>: 使每次write都等到物理I/O操作完成。FASYNC<020000>: 兼容BSD的fcntl同步操作O_DIRECT<040000>: 直接磁盘操作标识,每次读写都不使用内核提供的缓存,直接读写磁盘设备O_LARGEFILE<0100000>: 大文件标识O_DIRECTORY<0200000>: 必须是目录O_NOFOLLOW<0400000>: 不获取连接文件O_NOATIME<01000000>: 暂无当新创建一个文件时,需要指定mode参数,以下说明的格式如宏定义名称<实际常数值>:描述如下:S_IRWXU<00700>:文件拥有者有读写执行权限S_IRUSR (S_IREAD)<00400>:文件拥有者仅有读权限S_IWUSR (S_IWRITE)<00200>:文件拥有者仅有写权限S_IXUSR (S_IEXEC)<00100>:文件拥有者仅有执行权限S_IRWXG<00070>:组用户有读写执行权限S_IRGRP<00040>:组用户仅有读权限S_IWGRP<00020>:组用户仅有写权限S_IXGRP<00010>:组用户仅有执行权限S_IRWXO<00007>:其他用户有读写执行权限S_IROTH<00004>:其他用户仅有读权限S_IWOTH<00002>:其他用户仅有写权限S_IXOTH<00001>:其他用户仅有执行权限当open()系统调用进入内核时候,最终调用的函数为SYSCALL_DEFINE3(open, const char __user*, filename, int, flags, int,mode),该函数位于fs/open.c中,下面将会分析其具体的实现过程。-------------------------------------------------------------------------------------------------------------------*******************************************************************************************************************-------------------------------------------------------------------------------------------------------------------
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
long ret;
//判断系统是否支持大文件,即判断long的位数,如果64则表示支持大文件;
if (force_o_largefile())
flags |= O_LARGEFILE;

//完成主要的open工作,AT_FDCWD表示从当前目录开始查找
ret = do_sys_open(AT_FDCWD, filename, flags, mode);
/* avoid REGPARM breakage on x86: */
asmlinkage_protect(3, ret, filename, flags, mode);
return ret;
}
该函数主要调用do_sys_open()来完成打开工作,do_sys_open()的代码分析如下。
long do_sys_open(int dfd, const char__user *filename, int flags, int mode)
{
//将欲打开的文件名拷贝到内核中,该函数的分析见下文;
char *tmp = getname(filename);
int fd = PTR_ERR(tmp);

if (!IS_ERR(tmp)) {
//从进程的文件表中找到一个空闲的文件表指针,如果出错,则返回,见下文说明;
fd = get_unused_fd_flags(flags);
if (fd >= 0) {
//执行打开操作,见下文说明,dfd=AT_FDCWD;
struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
} else {
fsnotify_open(f);
//添加打开的文件表f到当前进程的文件表数组中,见下文说明;
fd_install(fd, f);
}
}
putname(tmp);
}
return fd;
}
该函数主要分为如下几个步骤来完成打开文件的操作:1.将文件名参数从用户态拷贝至内核,调用函数get_name();2.从进程的文件表中找到一个空闲的文件表指针,调用了函数get_unused_fd_flgas();3.完成真正的打开操作,调用函数do_filp_open();4.将打开的文件添加到进程的文件表数组中,调用函数fd_install();
getname(filename)
char * getname(const char __user * filename)
{
char *tmp, *result;

result = ERR_PTR(-ENOMEM);
//从内核缓存中分配空间;
tmp = __getname();
if (tmp)  {
//将文件名从用户态拷贝至内核态;
int retval = do_getname(filename, tmp);
result = tmp;
//如果拷贝失败,则调用__putname()释放__getname()中申请的空间;
if (retval < 0) {
__putname(tmp);
result = ERR_PTR(retval);
}
}
audit_getname(result);
return result;
}
该函数主要的任务是将文件名filename从用户态拷贝至内核态:
1.首先调用__getname()来从内核缓存中分配空间;
2.调用do_getname()将文件名filename从用户态拷贝至内核态tmp中,如果成功,则返回保存用户名的内核缓冲区的起始地址,否则  ,调用__putname()释放1中申请的内核缓冲区;
get_unused_fd_flags(flags)
#define get_unused_fd_flags(flags) alloc_fd(0, (flags))
int alloc_fd(unsigned start, unsigned flags){//获得当前进程的files_struct 结构;struct files_struct *files = current->files;unsigned int fd;int error;struct fdtable *fdt;spin_lock(&files->file_lock);repeat:fdt = files_fdtable(files);fd = start;//从上一次打开的fd的下一个fd开始搜索空闲的fdif (fd < files->next_fd)fd = files->next_fd;//寻找空闲的fd,返回值为空闲的fdif (fd < fdt->max_fds)fd = find_next_zero_bit(fdt->open_fds->fds_bits,fdt->max_fds, fd);//如果有必要,即打开的fd超过max_fds,则需要expand当前进程的fd表;//返回值error<0表示出错,error=0表示无需expand,error=1表示进行了expand;error = expand_files(files, fd);if (error < 0)goto out;/** If we needed to expand the fs array we* might have blocked - try again.*///error=1表示进行了expand,那么此时需要重新去查找空闲的fd;if (error)goto repeat;//设置下一次查找的起始fd,即本次找到的空闲的fd的下一个fd,记录在files->next_fd中;if (start <= files->next_fd)files->next_fd = fd + 1;FD_SET(fd, fdt->open_fds);if (flags & O_CLOEXEC)FD_SET(fd, fdt->close_on_exec);elseFD_CLR(fd, fdt->close_on_exec);error = fd;#if 1/* Sanity check */if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);rcu_assign_pointer(fdt->fd[fd], NULL);}#endifout:spin_unlock(&files->file_lock);return error;}
该函数为需要打开的文件在当前进程内分配一个空闲的文件描述符fd,该fd就是open()系统调用的返回值。
该函数涉及到一个很重要的数据结构struct files_struct,可参考“数据结构”章节的描述。
do_filp_open(dfd, tmp, flags, mode, 0)
struct file *do_filp_open(int dfd, const char *pathname,int open_flag, int mode, int acc_mode){struct file *filp;struct nameidata nd;int error;struct path path;int count = 0;//改变参数open_flag的值,具体做法即将open_flag++;int flag = open_to_namei_flags(open_flag);int force_reval = 0;if (!(open_flag & O_CREAT))mode = 0;/** O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only* check for O_DSYNC if the need any syncing at all we enforce it's* always set instead of having to deal with possibly weird behaviour* for malicious applications setting only __O_SYNC.*///根据__O_SYNC标志来设置O_DSYNC标志,用以防止恶意破坏程序;if (open_flag & __O_SYNC)open_flag |= O_DSYNC;//设置访问权限;if (!acc_mode)acc_mode = MAY_OPEN | ACC_MODE(open_flag);/* O_TRUNC implies we need access checks for write permissions */if (open_flag & O_TRUNC)acc_mode |= MAY_WRITE;/* Allow the LSM permission hook to distinguish appendaccess from general write access. */if (open_flag & O_APPEND)acc_mode |= MAY_APPEND;/* find the parent */reval://找到需要打开文件的父目录的dentry结构,这个过程我们已经在sys_mount过程中分析;//为什么要查找父目录的struct dentry,我想是因为open的文件不一定存在,所以先找到父目录的dentry;error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);if (error)return ERR_PTR(error);if (force_reval)nd.flags |= LOOKUP_REVAL;current->total_link_count = 0;error = link_path_walk(pathname, &nd);if (error) {filp = ERR_PTR(error);goto out;}if (unlikely(!audit_dummy_context()) && (open_flag & O_CREAT))audit_inode(pathname, nd.path.dentry);/** We have the parent and last component.*/error = -ENFILE;//分配一个空闲的struct file结构,并作一些最简单的初始化;//struct file结构体可参考“数据结构部分”;filp = get_empty_filp();if (filp == NULL)goto exit_parent;//接下来要处理打开文件路径名最后一个分量了,将上面创建的struct file保存在nd中//并且根据传入的参数设置查找的标志位,保存在nd->flags中;nd.intent.open.file = filp;filp->f_flags = open_flag;nd.intent.open.flags = flag;nd.intent.open.create_mode = mode;nd.flags &= ~LOOKUP_PARENT;nd.flags |= LOOKUP_OPEN;if (open_flag & O_CREAT) {nd.flags |= LOOKUP_CREATE;if (open_flag & O_EXCL)nd.flags |= LOOKUP_EXCL;}if (open_flag & O_DIRECTORY)nd.flags |= LOOKUP_DIRECTORY;if (!(open_flag & O_NOFOLLOW))nd.flags |= LOOKUP_FOLLOW;filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);while (unlikely(!filp)) { /* trailing symlink */struct path holder;struct inode *inode = path.dentry->d_inode;void *cookie;error = -ELOOP;/* S_ISDIR part is a temporary automount kludge */if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))goto exit_dput;if (count++ == 32)goto exit_dput;/** This is subtle. Instead of calling do_follow_link() we do* the thing by hands. The reason is that this way we have zero* link_count and path_walk() (called from ->follow_link)* honoring LOOKUP_PARENT.  After that we have the parent and* last component, i.e. we are in the same situation as after* the first path_walk().  Well, almost - if the last component* is normal we get its copy stored in nd->last.name and we will* have to putname() it when we are done. Procfs-like symlinks* just set LAST_BIND.*/nd.flags |= LOOKUP_PARENT;error = security_inode_follow_link(path.dentry, &nd);if (error)goto exit_dput;error = __do_follow_link(&path, &nd, &cookie);if (unlikely(error)) {/* nd.path had been dropped */if (!IS_ERR(cookie) && inode->i_op->put_link)inode->i_op->put_link(path.dentry, &nd, cookie);path_put(&path);release_open_intent(&nd);filp = ERR_PTR(error);goto out;}holder = path;nd.flags &= ~LOOKUP_PARENT;filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);if (inode->i_op->put_link)inode->i_op->put_link(holder.dentry, &nd, cookie);path_put(&holder);}out:if (nd.root.mnt)path_put(&nd.root);if (filp == ERR_PTR(-ESTALE) && !force_reval) {force_reval = 1;goto reval;}return filp;exit_dput:path_put_conditional(&path, &nd);if (!IS_ERR(nd.intent.open.file))release_open_intent(&nd);exit_parent:path_put(&nd.path);filp = ERR_PTR(error);goto out;}
do_filp_open()主要完成如下工作:1.根据传入参数设置查找标志位open_flgas,设置访问模式acc_mode;2.找到需要open文件的父目录的struct dentry结构,具体的查找过程已经在sys_mount()中有过详细的描述;3.分配一个struct file结构给需要打开的文件,内核用一个struct file结构来代表一个打开的文件调用函数get_empty_filp();4.在do_last()中完成最后的查找过程,该函数的仔细分析见下文;do_last(&nd, &path, open_flag, acc_mode, mode, pathname)
static struct file *do_last(struct nameidata *nd, struct path *path,int open_flag, int acc_mode,int mode, const char *pathname){struct dentry *dir = nd->path.dentry;struct file *filp;int error = -EISDIR;//nd->last_type:记录路径名的最后一个分量的类型;switch (nd->last_type) {case LAST_DOTDOT:follow_dotdot(nd);dir = nd->path.dentry;case LAST_DOT:if (nd->path.mnt->mnt_sb->s_type->fs_flags & FS_REVAL_DOT) {if (!dir->d_op->d_revalidate(dir, nd)) {error = -ESTALE;goto exit;}}/* fallthrough */case LAST_ROOT:if (open_flag & O_CREAT)goto exit;/* fallthrough */case LAST_BIND:audit_inode(pathname, dir);goto ok;}/* trailing slashes? */if (nd->last.name[nd->last.len]) {if (open_flag & O_CREAT)goto exit;nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;}/* just plain open? *///以下处理的是没有设置O_CREAT标志位的情况,即简单的查找,调用函数do_lookup();//如果没有找到struct dentry,那么返回说目录项不存在;//如果找到nd->last对应的dentry,那么跳转到ok;if (!(open_flag & O_CREAT)) {error = do_lookup(nd, &nd->last, path);if (error)goto exit;error = -ENOENT;if (!path->dentry->d_inode)goto exit_dput;if (path->dentry->d_inode->i_op->follow_link)return NULL;error = -ENOTDIR;if (nd->flags & LOOKUP_DIRECTORY) {if (!path->dentry->d_inode->i_op->lookup)goto exit_dput;}path_to_nameidata(path, nd);audit_inode(pathname, nd->path.dentry);goto ok;}//以下进入O_CREAT标志位被置位的处理情况;/* OK, it's O_CREAT */mutex_lock(&dir->d_inode->i_mutex);//首先找到nd->last对应的struct dentry结构,调用函数lookup_hash();path->dentry = lookup_hash(nd);path->mnt = nd->path.mnt;error = PTR_ERR(path->dentry);if (IS_ERR(path->dentry)) {mutex_unlock(&dir->d_inode->i_mutex);goto exit;}if (IS_ERR(nd->intent.open.file)) {error = PTR_ERR(nd->intent.open.file);goto exit_mutex_unlock;}/* Negative dentry, just create the file *///如果上面找到的struct dentry为空的,即还没有和inode关联,那么此时需要创建一个inode与其关联,调用函数__open	//_namei_create();if (!path->dentry->d_inode) {/** This write is needed to ensure that a* ro->rw transition does not occur between* the time when the file is created and when* a permanent write count is taken through* the 'struct file' in nameidata_to_filp().*/error = mnt_want_write(nd->path.mnt);if (error)goto exit_mutex_unlock;error = __open_namei_create(nd, path, open_flag, mode);if (error) {mnt_drop_write(nd->path.mnt);goto exit;}//将新创建的dentry和struct file连接起来,即设置struct file中成员变量的值;filp = nameidata_to_filp(nd);mnt_drop_write(nd->path.mnt);if (!IS_ERR(filp)) {error = ima_file_check(filp, acc_mode);if (error) {fput(filp);filp = ERR_PTR(error);}}return filp;}/** It already exists.*///欲创建的文件已经存在,如果设置了O_EXCL,那么必须返回出错;mutex_unlock(&dir->d_inode->i_mutex);audit_inode(pathname, path->dentry);error = -EEXIST;if (open_flag & O_EXCL)goto exit_dput;if (__follow_mount(path)) {error = -ELOOP;if (open_flag & O_NOFOLLOW)goto exit_dput;}error = -ENOENT;if (!path->dentry->d_inode)goto exit_dput;if (path->dentry->d_inode->i_op->follow_link)return NULL;path_to_nameidata(path, nd);error = -EISDIR;if (S_ISDIR(path->dentry->d_inode->i_mode))goto exit;ok:filp = finish_open(nd, open_flag, acc_mode);return filp;exit_mutex_unlock:mutex_unlock(&dir->d_inode->i_mutex);exit_dput:path_put_conditional(path, nd);exit:if (!IS_ERR(nd->intent.open.file))release_open_intent(nd);path_put(&nd->path);return ERR_PTR(error);}
do_last()中查找文件路径名的最后分量,它根据open_flags来决定接下来的处理流程:1.如果open_flags没有设置O_CREAT标志,那么只是简单的open,找到路径名中最后分量对应的文件袋 struct dentry结构,对该结 构作一些简单的判断以后就会跳转到ok分支中;2.如果open_flags设置了O_CREAT标志,那么首先需要查找或者创建该文件相关的struct dentry结构,接下来再进行判断:denrty- >inode是否为空:a):如果dentry->inode为空说明欲创建的文件不存在,那么此时调用__open_namei_create()来创建该文件,创建完成以后,通过 函数nameidata_to_filp()将struct file结构和代表新创建文件的dentry结构联系起来,然后返回struct file结构即可;b):如果dentry->inode不为空说明欲创建的文件已经存在,此时如果打开的 标志位设置了O_EXCL,那么必须返回出错;3.ok分支处理流程:进入该分支意味着前面的一切准备工作都已经完成,查找或创建文件的struct dentry结构都已经准备妥当,此时只需调用finish _open(主要调用nameid ata_to_filp())建立struct file和struct dentry两者之间的联系即可。
上述所有的过程完成以后,最后进入fd_install()将分配的fd和代表该文件的struct file结构建立关联。比较简单,参考下代码就好,无需过多分析。void fd_install(unsigned int fd, struct file *file){struct files_struct *files = current->files;struct fdtable *fdt;spin_lock(&files->file_lock);fdt = files_fdtable(files);BUG_ON(fdt->fd[fd] != NULL);rcu_assign_pointer(fdt->fd[fd], file);spin_unlock(&files->file_lock);}

                                            
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息