您的位置:首页 > 移动开发 > Android开发

android—init进程如何重启service

2016-09-23 11:27 447 查看
《android—init.rc的读取》中介绍过,init进程会启动很多native的service,这些service如果不是oneshot的,当service出现异常挂掉后,init需要将其重新启动起来,那么具体是如何操作的?其实主要是借助了信号和socket来实现。

在init的main()函数中,首先进行了signal相关的初始化,设置了init对SIGCHLD(native的service都是在init中通过fork新建的子进程,子进程挂掉后会给init发送SIGCHLD信号)的信号处理函数,

queue_builtin_action(signal_init_action, "signal_init");

static int signal_init_action(int nargs, char **args)
{
signal_init();
return 0;
}

void signal_init(void)
{
int s[2];

struct sigaction act;
memset(&act, 0, sizeof(act));
act.sa_handler = sigchld_handler;
act.sa_flags = SA_NOCLDSTOP;
sigaction(SIGCHLD, &act, 0);

//创建一个socketpair,一个读fd一个写fd
/* create a signalling mechanism for the sigchld handler */
if (socketpair(AF_UNIX, SOCK_STREAM, 0, s) == 0) {
signal_fd = s[0];
signal_recv_fd = s[1];
fcntl(s[0], F_SETFD, FD_CLOEXEC);
fcntl(s[0], F_SETFL, O_NONBLOCK);
fcntl(s[1], F_SETFD, FD_CLOEXEC);
fcntl(s[1], F_SETFL, O_NONBLOCK);
}

handle_signal();
}


SIGCHLD的信号处理函数sigchld_handler(),就是向signal_fd中写数据,这时候signal_recv_fd将会受到数据,那么,init肯定在哪里对这个signal_recv_fd进行了poll。

//接收到SIGCHLD的处理函数
//往signal_fd里随便写个东西,socket pair的另外一端马上就能受到
static void sigchld_handler(int s)
{
write(signal_fd, &s, 1);
}


在init的for循环中,发现确实对signal_recv_fd进行了poll,

if (!signal_fd_init && get_signal_fd() > 0) {
//信号处理函数在子进程挂掉后会给signal_fd写东西
//这时候socketpair的对端,signal_recv_fd会受到,这里监听了该signal_recv_fd
ufds[fd_count].fd = get_signal_fd();
ufds[fd_count].events = POLLIN;
ufds[fd_count].revents = 0;
fd_count++;
signal_fd_init = 1;
}

int get_signal_fd()
{
return signal_recv_fd;
}


当signal_recv_fd描述符poll触发返回时,执行handle_signal()函数,

for (i = 0; i < fd_count; i++) {
if (ufds[i].revents == POLLIN) {
if (ufds[i].fd == get_property_set_fd())
handle_property_set_fd();
else if (ufds[i].fd == get_keychord_fd())
handle_keychord();
else if (ufds[i].fd == get_signal_fd())
//执行handle_signal()函数
handle_signal();
}
}


而,

void handle_signal(void)
{
char tmp[32];

//从signal_recv_fd中读数据,
/* we got a SIGCHLD - reap and restart as needed */
read(signal_recv_fd, tmp, sizeof(tmp));
while (!wait_for_one_process(0))
;
}


wait_for_one_process的主要功能是等待子进程退出,然后设置对应需要重启service的状态为SVC_RESTARTING,同时需要注意会执行service属性中的onrestart相关的,其实就是去重启相关的其他service,

/*
这个函数基本都是返回0
*/
static int wait_for_one_process(int block)
{
pid_t pid;
int status;
struct service *svc;
struct socketinfo *si;
time_t now;
struct listnode *node;
struct command *cmd;

//等待子进程执行完退出
//通过pid找到service,重启service不在这里,这里只负责启动service下面的onrestart

while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR );
if (pid <= 0) return -1;
INFO("waitpid returned pid %d, status = %08x\n", pid, status);

svc = service_find_by_pid(pid);
if (!svc) {
ERROR("untracked pid %d exited\n", pid);
return 0;
}

NOTICE("process '%s', pid %d exited\n", svc->name, pid);

//
if (!(svc->flags & SVC_ONESHOT) || (svc->flags & SVC_RESTART)) {
kill(-pid, SIGKILL);
NOTICE("process '%s' killing any children in process group\n", svc->name);
}

/* remove any sockets we may have created */
for (si = svc->sockets; si; si = si->next) {
char tmp[128];
snprintf(tmp, sizeof(tmp), ANDROID_SOCKET_DIR"/%s", si->name);
unlink(tmp);
}

svc->pid = 0;
svc->flags &= (~SVC_RUNNING);

/* oneshot processes go into the disabled state on exit,
* except when manually restarted. */
if ((svc->flags & SVC_ONESHOT) && !(svc->flags & SVC_RESTART)) {
svc->flags |= SVC_DISABLED;
}

/* disabled and reset processes do not get restarted automatically */
if (svc->flags & (SVC_DISABLED | SVC_RESET) )  {
notify_service_state(svc->name, "stopped");
return 0;
}

now = gettime();
//如果service是critical的,崩了4次,还有个4分钟后,android就重启进入recovery了
if ((svc->flags & SVC_CRITICAL) && !(svc->flags & SVC_RESTART)) {
if (svc->time_crashed + CRITICAL_CRASH_WINDOW >= now) {
if (++svc->nr_crashed > CRITICAL_CRASH_THRESHOLD) {
ERROR("critical process '%s' exited %d times in %d minutes; "
"rebooting into recovery mode\n", svc->name,
CRITICAL_CRASH_THRESHOLD, CRITICAL_CRASH_WINDOW / 60);
android_reboot(ANDROID_RB_RESTART2, 0, "recovery");
return 0;
}
} else {
svc->time_crashed = now;
svc->nr_crashed = 1;
}
}

svc->flags &= (~SVC_RESTART);
svc->flags |= SVC_RESTARTING;

/* Execute all onrestart commands for this service. */
//重启这个service下的onrestart这个action下的所有commands
list_for_each(node, &svc->onrestart.commands) {
cmd = node_to_item(node, struct command, clist);
cmd->func(cmd->nargs, cmd->args);
}
notify_service_state(svc->name, "restarting");
return 0;
}


上面只是设置了service的状态为SVC_RESTARTING,真正重启的地方在main函数中的for循环的restart_processes(),

restart_processes();

static void restart_processes()
{
process_needs_restart = 0;
//只会去重启service状态是SVC_RESTARTING的service,
service_for_each_flags(SVC_RESTARTING,
restart_service_if_needed);
}

void service_for_each_flags(unsigned matchflags,
void (*func)(struct service *svc))
{
struct listnode *node;
struct service *svc;
list_for_each(node, &service_list) {
svc = node_to_item(node, struct service, slist);
if (svc->flags & matchflags) {
func(svc);
}
}
}
static void restart_service_if_needed(struct service *svc)
{
time_t next_start_time = svc->time_started + 5;

if (next_start_time <= gettime()) {
svc->flags &= (~SVC_RESTARTING);
//重启service
service_start(svc, NULL);
return;
}

if ((next_start_time < process_needs_restart) ||
(process_needs_restart == 0)) {
process_needs_restart = next_start_time;
}
}


至此,init就完成了对service的重启,保证了某些关键service一直运行。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: