您的位置:首页 > 运维架构 > Nginx

nginx 1.6.2源码分析1 499问题

2015-07-07 15:12 501 查看
网上的解释:
http://www.lc365.net/blog/b/23997/
首先尝试从代码上解释问题:

首先要找到打印499日志的地方,看调用的逻辑

log时的栈

#0  ngx_http_log_request_time (r=0x15590f0, buf=0x15d8b63 "", op=0x15d40e0) at src/http/modules/ngx_http_log_module.c:773

#1  0x00000000004330e6 in ngx_http_log_handler (r=0x15590f0) at src/http/modules/ngx_http_log_module.c:308

#2  0x000000000042a5b4 in ngx_http_log_request (r=r@entry=0x15590f0) at src/http/ngx_http_request.c:3509

#3  0x000000000042b44b in ngx_http_free_request (r=r@entry=0x15590f0, rc=rc@entry=0) at src/http/ngx_http_request.c:3456

#4  0x000000000042b584 in ngx_http_close_request (r=0x15590f0, rc=rc@entry=0) at src/http/ngx_http_request.c:3403

#5  0x000000000042b931 in ngx_http_terminate_handler (r=<optimized out>) at src/http/ngx_http_request.c:2498

#6  0x000000000042b21c in ngx_http_run_posted_requests (c=c@entry=0x2acb98b14420) at src/http/ngx_http_request.c:2225

#7  0x000000000042b2bb in ngx_http_request_handler (ev=<optimized out>) at src/http/ngx_http_request.c:2191

#8  0x0000000000420338 in ngx_epoll_process_events (cycle=<optimized out>, timer=<optimized out>, flags=<optimized out>) at src/event/modules/ngx_epoll_module.c:691

#9  0x0000000000418f94 in ngx_process_events_and_timers (cycle=cycle@entry=0x15b4c30) at src/event/ngx_event.c:248

#10 0x000000000041ee0f in ngx_worker_process_cycle (cycle=0x15b4c30, data=<optimized out>) at src/os/unix/ngx_process_cycle.c:816

#11 0x000000000041d686 in ngx_spawn_process (cycle=cycle@entry=0x15b4c30, proc=proc@entry=0x41ed43 <ngx_worker_process_cycle>, data=data@entry=0x17, 

    name=name@entry=0x467df4 "worker process", respawn=respawn@entry=-4) at src/os/unix/ngx_process.c:198

#12 0x000000000041e145 in ngx_start_worker_processes (cycle=cycle@entry=0x15b4c30, n=32, type=type@entry=-4) at src/os/unix/ngx_process_cycle.c:364

#13 0x000000000041fb12 in ngx_master_process_cycle (cycle=0x15b4c30, cycle@entry=0x15590f0) at src/os/unix/ngx_process_cycle.c:249

#14 0x0000000000404160 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:407

log的输出格式

static ngx_str_t  ngx_http_combined_fmt =

    ngx_string("$remote_addr - $remote_user [$time_local] "

               "\"$request\" $status $body_bytes_sent "

               "\"$http_referer\" \"$http_user_agent\"");

然后通过适当更改源码,捕获出现499栈的时候的栈

(gdb) p status

$1 = 499

(gdb) bt

#0  ngx_http_log_status (r=0x84be80, 

    buf=0x85f465 "qxz1:200 62 \"-\" 0.001 qxz2:0.001\n- 10.1.242.133 [2015-07-07T16:21:55+08:00] \"GET /qiso3?if=video&key=kldby HTTP/1.1\" qxz1:200 62 \"-\" 0.001 qxz2:0.001\n- 10.1.242.133 [2015-07-07T16:21:55+08:00] \"GET /q"..., op=0x858368) at src/http/modules/ngx_http_log_module.c:806

#1  0x0000000000433157 in ngx_http_log_handler (r=0x84be80) at src/http/modules/ngx_http_log_module.c:308

#2  0x000000000042a604 in ngx_http_log_request (r=r@entry=0x84be80) at src/http/ngx_http_request.c:3509

#3  0x000000000042b49b in ngx_http_free_request (r=r@entry=0x84be80, rc=rc@entry=0) at src/http/ngx_http_request.c:3456

#4  0x000000000042b5d4 in ngx_http_close_request (r=0x84be80, rc=rc@entry=0) at src/http/ngx_http_request.c:3403

#5  0x000000000042b981 in ngx_http_terminate_handler (r=<optimized out>) at src/http/ngx_http_request.c:2498

#6  0x000000000042b26c in ngx_http_run_posted_requests (c=c@entry=0x2b716e351420) at src/http/ngx_http_request.c:2225

#7  0x000000000042b30b in ngx_http_request_handler (ev=<optimized out>) at src/http/ngx_http_request.c:2191

#8  0x0000000000419416 in ngx_event_process_posted (cycle=cycle@entry=0x8400f0, posted=0x691b98 <ngx_posted_events>) at src/event/ngx_event_posted.c:40

#9  0x000000000041904b in ngx_process_events_and_timers (cycle=cycle@entry=0x8400f0) at src/event/ngx_event.c:275

#10 0x000000000041ee5f in ngx_worker_process_cycle (cycle=0x8400f0, data=<optimized out>) at src/os/unix/ngx_process_cycle.c:816

#11 0x000000000041d6d6 in ngx_spawn_process (cycle=cycle@entry=0x8400f0, proc=proc@entry=0x41ed93 <ngx_worker_process_cycle>, data=data@entry=0x7, 

    name=name@entry=0x467e54 "worker process", respawn=respawn@entry=-3) at src/os/unix/ngx_process.c:198

#12 0x000000000041e195 in ngx_start_worker_processes (cycle=cycle@entry=0x8400f0, n=32, type=type@entry=-3) at src/os/unix/ngx_process_cycle.c:364

#13 0x000000000041f54b in ngx_master_process_cycle (cycle=cycle@entry=0x8400f0) at src/os/unix/ngx_process_cycle.c:136

#14 0x00000000004041b0 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:407

Breakpoint 6, ngx_http_finalize_request (r=r@entry=0x84be80, rc=rc@entry=499) at src/http/ngx_http_request.c:2299

2299        ngx_http_terminate_request(r, rc);

(gdb) bt

#0  ngx_http_finalize_request (r=r@entry=0x84be80, rc=rc@entry=499) at src/http/ngx_http_request.c:2299

#1  0x000000000043a5ac in ngx_http_upstream_finalize_request (r=r@entry=0x84be80, u=u@entry=0x89dc28, rc=rc@entry=499) at src/http/ngx_http_upstream.c:3534

#2  0x000000000043a7bb in ngx_http_upstream_check_broken_connection (r=0x84be80, ev=0x2b716f051280) at src/http/ngx_http_upstream.c:1110

#3  0x000000000043a974 in ngx_http_upstream_rd_check_broken_connection (r=<optimized out>) at src/http/ngx_http_upstream.c:987

#4  0x000000000042b303 in ngx_http_request_handler (ev=<optimized out>) at src/http/ngx_http_request.c:2188

#5  0x0000000000420388 in ngx_epoll_process_events (cycle=<optimized out>, timer=<optimized out>, flags=<optimized out>) at src/event/modules/ngx_epoll_module.c:691

#6  0x0000000000418fe4 in ngx_process_events_and_timers (cycle=cycle@entry=0x8400f0) at src/event/ngx_event.c:248

#7  0x000000000041ee5f in ngx_worker_process_cycle (cycle=0x8400f0, data=<optimized out>) at src/os/unix/ngx_process_cycle.c:816

#8  0x000000000041d6d6 in ngx_spawn_process (cycle=cycle@entry=0x8400f0, proc=proc@entry=0x41ed93 <ngx_worker_process_cycle>, data=data@entry=0x1c, 

    name=name@entry=0x467e54 "worker process", respawn=respawn@entry=-3) at src/os/unix/ngx_process.c:198

#9  0x000000000041e195 in ngx_start_worker_processes (cycle=cycle@entry=0x8400f0, n=32, type=type@entry=-3) at src/os/unix/ngx_process_cycle.c:364

#10 0x000000000041f54b in ngx_master_process_cycle (cycle=cycle@entry=0x8400f0) at src/os/unix/ngx_process_cycle.c:136

#11 0x00000000004041b0 in main (argc=<optimized out>, argv=<optimized out>) at src/core/nginx.c:407

最终问题集中在

src/http/ngx_http_upstream.c:1110

这点,要触发499,必须两个条件ev->pending_eof ==1和cacheable==0,但从源码分析,cacheable==0的地方很多,而ev->pending_eof ==1不确定精确含义是什么, end of file?

在复现的情况下,抓包发现,请看文件2.pcap,其中一个异常点,存在这样的报文,比如tcp.port == 7139 的报文,在发送完get请求后,就直接fin了,这样nginx会怎么想?

会不会是这个fin导致nignx出现了499?这个似乎和源码中的ngx_http_upstream_rd_check_broken_connection 和ev->pending_eof ==1相互呼应。

因此两个方法:

对应报文和日志,查看这个非正常fin的地方是否和出现499的日志的时间对应,但是发现并不完全对应。

使用一个简单程序,先发送一个get请求,然后发送一个reset,这样看499的情况:

// test.cpp : Defines the entry point for the console application.

//

#include "stdafx.h"

#include <stdio.h>

#include <Winsock2.h>

#pragma comment(lib, "ws2_32.lib")

void main();

int _tmain(int argc, _TCHAR* argv[])

{
main();
return 0;

}

/***************************************************************************

     Following variables and functions are used for simulating

****************************************************************************/

char * requestMsg[] ={ 

              "GET /qiso3?if=video&key=kldby HTTP/1.1\r\n"

              "Host: 10.77.42.29\r\n"

              "User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0\r\n"

              "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"

              "Accept-Language: zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3\r\n"

              "Accept-Encoding: gzip, deflate\r\n"

              "Connection: keep-alive\r\n"

              "\r\n"

 };

int requestSock;/*simulate a client on device, this is the socket*/

//客户端

#include< stdio.h >

#include< stdlib.h >

#include< windows.h >

#include< winsock.h >

#include< string.h >

#pragma comment( lib, "ws2_32.lib" )

#define PORT 80//554

#define MAXDATASIZE 100

#define MAX_LEN 1500

void main( void )

{

int iClientSock;
struct sockaddr_in ServerAddr;

int numbytes;
char buf[ MAXDATASIZE ] = { 0 };
char ip[20];
char ipdefault[]="10.77.42.29";
char recvBuf[MAX_LEN];  
int j=0;
char c;

WSADATA WSAData;
if( WSAStartup( MAKEWORD( 1, 1 ), &WSAData ) )
{
printf( "initialization error!\n" );
WSACleanup( );
exit( 0 );
}

printf("set server ip address:\n\r1 ip set by user;\n\r2 default %s\n\r",ipdefault);
c=getchar();
switch(c)
{
case '1':
scanf("%s",ip);
break;
default:
strcpy(ip,ipdefault);
}

do
{
ServerAddr.sin_family = AF_INET;
ServerAddr.sin_port = htons( PORT );
ServerAddr.sin_addr.s_addr = inet_addr(ip);//记得换IP
memset( &( ServerAddr.sin_zero ), 0, sizeof( ServerAddr.sin_zero ) );
if( ( iClientSock = socket( AF_INET, SOCK_STREAM, 0 ) ) == -1 )
{
printf( "创建套接字失败!\n" );
WSACleanup( );
exit( 0 );
}
if( connect( iClientSock, ( struct sockaddr * ) & ServerAddr, sizeof( struct sockaddr ) ) == -1 )
{
printf("connect失败!");
WSACleanup( );
exit( 0 );
}
char c;

        int  i =0;
numbytes = send( iClientSock, requestMsg[i], strlen( requestMsg[i] ), 0 );
if( numbytes == -1 )
{
printf( "send调用失败!\n" );
WSACleanup();
exit( 0 );
}

printf( "sent %d bytes to %s\n", numbytes, inet_ntoa( ServerAddr.sin_addr ) );
printf("msg:%s",requestMsg[i]);

j=recv(iClientSock,recvBuf,MAX_LEN,0);// 接受客户端消息 

if(j>0)

printf("recv(len:%d): \n\r",i); 
recvBuf[j]='\0';
printf("msg:%s",recvBuf);
}

c=getchar();
closesocket( iClientSock );
continue;
c=getchar();
}while(1);
c=getchar();
closesocket( iClientSock );
WSACleanup( );



使用这个程序,复现了问题。说明是客户端的fin reset导致499

而一般客户端强制断开链接可能是浏览器关闭 强制刷新或者超时等操作。

如果客户端的请求已经被回复的情况下,是不会复现499的问题的,因为这时候的断开链接是可以接受的。

问题是,nginx中,保存了怎样的数据结构,对于一个事件?为什么已经回复请求的就不会复现这个问题?

请听下回分解。

参考
http://chenzhenianqing.cn/articles/742.html nginx upstream的解释
http://www.cnblogs.com/jianxie/p/3990377.html   nginx的命令
http://www.2cto.com/net/201310/251896.html tcp的三次和四次握手
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: