您的位置：首页 > 理论基础 > 计算机网络

libevent2.0源码学习三：对网络模型的（EPOLL）的封装

2014-02-19 15:34 435 查看

一：前言

Libevent的核心是事件驱动、同步非阻塞，为了达到这一目标，必须采用系统提供的I/O多路复用技术，而这些在Windows、Linux、Unix等不同平台上却各有不同，所以，libevent对系统底层的网络模型作了一层封装，下面我们就来看一下。

二：多种I/O多路复用技术的统一

Libevent支持多种I/O多路复用技术的关键就在于结构体eventop，它的成员是一系列的函数指针, 定义在event-internal.h文件中：

struct eventop {
//事件机制的名称
const char *name;
//初始化事件机制的回调函数
void *(*init)(struct event_base *);
//添加事件的的回调函数
int (*add)(struct event_base *, evutil_socket_t fd, short old, short events, void *fdinfo);
//删除事件的回调函数
int (*del)(struct event_base *, evutil_socket_t fd, short old, short events, void *fdinfo);
//开始整个事件循环的回调函数
int (*dispatch)(struct event_base *, struct timeval *);
//删除事件机制的回调函数
void (*dealloc)(struct event_base *);
//是否需要将事件机制重新初始化（这个值目前没有被使用）
int need_reinit;
//当前事件机制所支持的I/O多路分发机制
//是这三个值的组合 EV_FEATURE_ET，EV_FEATURE_O1，EV_FEATURE_FDS
enum event_method_feature features;
//事件fd所记录的额外信息长度
//这个值我也不太明白，它好像没有被设置过，一直都是0
size_t fdinfo_len;
};

在libevent中，每种I/O demultiplex机制的实现都必须提供init, add, del, dispatch, dealloc这五个函数接口，来完成自身的初始化、销毁释放；对事件的注册、注销和分发。下面我们就以epoll为例，来看一下libevent对系统底层的网络模型的封装。

三：对epoll的封装

libevent对epoll所作的封装结构体是epollops：定义在epoll.c中，内容如下：

const struct eventop epollops = {
"epoll",
epoll_init,
epoll_nochangelist_add,
epoll_nochangelist_del,
epoll_dispatch,
epoll_dealloc,
1, /* need reinit */
EV_FEATURE_ET|EV_FEATURE_O1,
0
};

四：epollops中的各个回调函数

以下我们就来详细查看每个回调函数。

1：初始化

//底层epoll的初始化
static void *epoll_init(struct event_base *base)
{
int epfd;
struct epollop *epollop;

/* Initialize the kernel queue.  (The size field is ignored since
* 2.6.8.) */
//创建一个epoll的句柄
if ((epfd = epoll_create(32000)) == -1) {
if (errno != ENOSYS)
event_warn("epoll_create");
return (NULL);
}

//将epoll句柄设为非阻塞
evutil_make_socket_closeonexec(epfd);

if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
close(epfd);
return (NULL);
}

epollop->epfd = epfd;

/* Initialize fields */
epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
if (epollop->events == NULL) {
mm_free(epollop);
close(epfd);
return (NULL);
}
epollop->nevents = INITIAL_NEVENT;

//我们暂不考虑changelist
if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
evutil_getenv("EVENT_EPOLL_USE_CHANGELIST") != NULL))
base->evsel = &epollops_changelist;

//建立UNIX套接字（非阻塞），用于UNIX信号响应
//（这方面会在讲解信号处理的时候详细分析）
evsig_init(base);

return (epollop);
}

2：对事件的添加和删除

//EPOLL添加事件
static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p)
{
struct event_change ch;
ch.fd = fd;
ch.old_events = old;
ch.read_change = ch.write_change = 0;
if (events & EV_WRITE)
ch.write_change = EV_CHANGE_ADD |
(events & EV_ET);
if (events & EV_READ)
ch.read_change = EV_CHANGE_ADD |
(events & EV_ET);
return epoll_apply_one_change(base, base->evbase, &ch);
}

//EPOLL删除事件
static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
short old, short events, void *p)
{
struct event_change ch;
ch.fd = fd;
ch.old_events = old;
ch.read_change = ch.write_change = 0;
if (events & EV_WRITE)
ch.write_change = EV_CHANGE_DEL;
if (events & EV_READ)
ch.read_change = EV_CHANGE_DEL;
return epoll_apply_one_change(base, base->evbase, &ch);
}

//可以看出，添加和删除事件的主要工作，都是在epoll_apply_one_change这个函数里面实现的

//某个epoll事件的状改变了（这个函数官方的注释比较详细，我就不用作解释了）
static int epoll_apply_one_change(struct event_base *base,
struct epollop *epollop,
const struct event_change *ch)
{
struct epoll_event epev;
int op, events = 0;

if (1) {
/* The logic here is a little tricky.  If we had no events set
on the fd before, we need to set op="ADD" and set
events=the events we want to add.  If we had any events set
on the fd before, and we want any events to remain on the
fd, we need to say op="MOD" and set events=the events we
want to remain.  But if we want to delete the last event,
we say op="DEL" and set events=the remaining events.  What
fun!
*/

/* TODO: Turn this into a switch or a table lookup. */

if ((ch->read_change & EV_CHANGE_ADD) ||
(ch->write_change & EV_CHANGE_ADD)) {
/* If we are adding anything at all, we'll want to do
* either an ADD or a MOD. */
events = 0;
op = EPOLL_CTL_ADD;
if (ch->read_change & EV_CHANGE_ADD) {
events |= EPOLLIN;
} else if (ch->read_change & EV_CHANGE_DEL) {
;
} else if (ch->old_events & EV_READ) {
events |= EPOLLIN;
}
if (ch->write_change & EV_CHANGE_ADD) {
events |= EPOLLOUT;
} else if (ch->write_change & EV_CHANGE_DEL) {
;
} else if (ch->old_events & EV_WRITE) {
events |= EPOLLOUT;
}
if ((ch->read_change|ch->write_change) & EV_ET)
events |= EPOLLET;

if (ch->old_events) {
/* If MOD fails, we retry as an ADD, and if
* ADD fails we will retry as a MOD.  So the
* only hard part here is to guess which one
* will work.  As a heuristic, we'll try
* MOD first if we think there were old
* events and ADD if we think there were none.
*
* We can be wrong about the MOD if the file
* has in fact been closed and re-opened.
*
* We can be wrong about the ADD if the
* the fd has been re-created with a dup()
* of the same file that it was before.
*/
op = EPOLL_CTL_MOD;
}
} else if ((ch->read_change & EV_CHANGE_DEL) ||
(ch->write_change & EV_CHANGE_DEL)) {
/* If we're deleting anything, we'll want to do a MOD
* or a DEL. */
op = EPOLL_CTL_DEL;

if (ch->read_change & EV_CHANGE_DEL) {
if (ch->write_change & EV_CHANGE_DEL) {
events = EPOLLIN|EPOLLOUT;
} else if (ch->old_events & EV_WRITE) {
events = EPOLLOUT;
op = EPOLL_CTL_MOD;
} else {
events = EPOLLIN;
}
} else if (ch->write_change & EV_CHANGE_DEL) {
if (ch->old_events & EV_READ) {
events = EPOLLIN;
op = EPOLL_CTL_MOD;
} else {
events = EPOLLOUT;
}
}
}

if (!events)
return 0;

memset(&epev, 0, sizeof(epev));
epev.data.fd = ch->fd;
epev.events = events;
if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == -1) {
if (op == EPOLL_CTL_MOD && errno == ENOENT) {
/* If a MOD operation fails with ENOENT, the
* fd was probably closed and re-opened.  We
* should retry the operation as an ADD.
*/
if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
(int)epev.events, ch->fd);
return -1;
} else {
event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
(int)epev.events,
ch->fd));
}
} else if (op == EPOLL_CTL_ADD && errno == EEXIST) {
/* If an ADD operation fails with EEXIST,
* either the operation was redundant (as with a
* precautionary add), or we ran into a fun
* kernel bug where using dup*() to duplicate the
* same file into the same fd gives you the same epitem
* rather than a fresh one.  For the second case,
* we must retry with MOD. */
if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
(int)epev.events, ch->fd);
return -1;
} else {
event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
(int)epev.events,
ch->fd));
}
} else if (op == EPOLL_CTL_DEL &&
(errno == ENOENT || errno == EBADF ||
errno == EPERM)) {
/* If a delete fails with one of these errors,
* that's fine too: we closed the fd before we
* got around to calling epoll_dispatch. */
event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
(int)epev.events,
ch->fd,
strerror(errno)));
} else {
event_warn("Epoll %s(%d) on fd %d failed.  Old events were %d; read change was %d (%s); write change was %d (%s)",
epoll_op_to_string(op),
(int)epev.events,
ch->fd,
ch->old_events,
ch->read_change,
change_to_string(ch->read_change),
ch->write_change,
change_to_string(ch->write_change));
return -1;
}
} else {
event_debug(("Epoll %s(%d) on fd %d okay. [old events were %d; read change was %d; write change was %d]",
epoll_op_to_string(op),
(int)epev.events,
(int)ch->fd,
ch->old_events,
ch->read_change,
ch->write_change));
}
}
return 0;
}

3：事件循环

static int epoll_dispatch(struct event_base *base, struct timeval *tv)
{
struct epollop *epollop = base->evbase;
struct epoll_event *events = epollop->events;
int i, res;
long timeout = -1;

//获取超时时间
if (tv != NULL) {
timeout = evutil_tv_to_msec(tv);
if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
/* Linux kernels can wait forever if the timeout is
* too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
timeout = MAX_EPOLL_TIMEOUT_MSEC;
}
}

epoll_apply_changes(base);
event_changelist_remove_all(&base->changelist, base);

EVBASE_RELEASE_LOCK(base, th_base_lock);

res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);

EVBASE_ACQUIRE_LOCK(base, th_base_lock);

if (res == -1) {
if (errno != EINTR) {
event_warn("epoll_wait");
return (-1);
}

return (0);
}

event_debug(("%s: epoll_wait reports %d", __func__, res));
EVUTIL_ASSERT(res <= epollop->nevents);

for (i = 0; i < res; i++) {
int what = events[i].events;
short ev = 0;

//根据EPOLL的事件描述符生成libevent的事件描述符
if (what & (EPOLLHUP|EPOLLERR)) {
ev = EV_READ | EV_WRITE;
} else {
if (what & EPOLLIN)
ev |= EV_READ;
if (what & EPOLLOUT)
ev |= EV_WRITE;
}

if (!ev)
continue;

// 将已经ready的描述符和对应的注册事件插入event_base的活跃事件队列中
evmap_io_active(base, events[i].data.fd, ev | EV_ET);
}

//如果事件数已经达到了上限，就预先对事件容器作扩充
if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
/* We used all of the event space this time.  We should
be ready for more events next time. */
int new_nevents = epollop->nevents * 2;
struct epoll_event *new_events;

new_events = mm_realloc(epollop->events,
new_nevents * sizeof(struct epoll_event));
if (new_events) {
epollop->events = new_events;
epollop->nevents = new_nevents;
}
}

return (0);
}

4：删除

static void
epoll_dealloc(struct event_base *base)
{
struct epollop *epollop = base->evbase;

//删除所有UNIX套接字
//（这方面会在讲解信号处理的时候详细分析）
evsig_dealloc(base);

//关闭文件句柄，释放内存
if (epollop->events)
mm_free(epollop->events);
if (epollop->epfd >= 0)
close(epollop->epfd);

memset(epollop, 0, sizeof(struct epollop));
mm_free(epollop);
}

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航