先声明,如果可能分析有误,敬请见谅。
我们从strace角度进行分析nginx访问memcached:
nginx单进程模式下:
[root@localhost sbin]# strace -p 13303 Process 13303 attached - interrupt to quit gettimeofday({1281607164, 794683}, NULL) = 0 epoll_wait(9, {{EPOLLIN, {u32=158186800, u64=13834971005043195184}}}, 512, -1) = 1 gettimeofday({1281607175, 954973}, NULL) = 0 accept(6, {sa_family=AF_INET, sin_port=htons(4847), sin_addr=inet_addr("61.135.255.88")}, [16]) = 7 ioctl(7, FIONBIO, [1]) = 0 epoll_ctl(9, EPOLL_CTL_ADD, 7, {EPOLLIN|EPOLLET, {u32=158186984, u64=3458764513978727912}}) = 0 epoll_wait(9, {{EPOLLIN, {u32=158186984, u64=3458764513978727912}}}, 512, 60000) = 1 gettimeofday({1281607175, 956459}, NULL) = 0 recv(7, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024 recv(7, "Invalid license key Abo"..., 3515, 0) = 288 epoll_ctl(9, EPOLL_CTL_MOD, 7, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=158186984, u64=52946776435310056}}) = 0 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 10 ioctl(10, FIONBIO, [1]) = 0 epoll_ctl(9, EPOLL_CTL_ADD, 10, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=158187076, u64=13834962827425463876}}) = 0 connect(10, {sa_family=AF_INET, sin_port=htons(22422), sin_addr=inet_addr("61.135.250.208")}, 16) = -1 EINPROGRESS (Operation now in progress) epoll_wait(9, {{EPOLLOUT, {u32=158186984, u64=52946776435310056}}}, 512, 60000) = 1 gettimeofday({1281607175, 957018}, NULL) = 0 recv(7, 0xbfffacd3, 1, MSG_PEEK) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(9, {{EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 59999) = 1 gettimeofday({1281607175, 957136}, NULL) = 0 getsockopt(10, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 writev(10, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29 epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 60000) = 1 gettimeofday({1281607175, 957479}, NULL) = 0 recv(10, "STORED/r/n", 4096, 0) = 8 setsockopt(7, SOL_TCP, TCP_NODELAY, [1], 4) = 0 writev(7, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159 write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242 recv(7, 0x96d04e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158186984, u64=52946776435310056}}}, 512, 65000) = 1 gettimeofday({1281607176, 820436}, NULL) = 0 recv(7, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024 recv(7, "Invalid license key Abo"..., 3515, 0) = 288 getsockopt(10, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 writev(10, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29 recv(10, 0x96d2a00, 4096, 0) = -1 EAGAIN (Resource temporarily unavailable) recv(7, 0xbfffacd3, 1, MSG_PEEK) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(9, {{EPOLLIN|EPOLLOUT, {u32=158187076, u64=13834962827425463876}}}, 512, 60000) = 1 gettimeofday({1281607176, 820964}, NULL) = 0 recv(10, "STORED/r/n", 4096, 0) = 8 writev(7, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159 write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242 recv(7, 0x96d04e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(9,
一切正常,并没有对后端的连接进行关闭
多进程模式下:
[root@localhost sbin]# strace -p 12216 Process 12216 attached - interrupt to quit gettimeofday({1281606539, 572443}, NULL) = 0 epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532648, u64=13826798420996627944}}}, 512, 59245) = 1 gettimeofday({1281606555, 539391}, NULL) = 0 gettid() = 12216 write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 89) = 89 recv(8, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024 recv(8, "Invalid license key Abo"..., 3515, 0) = 262 socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 9 ioctl(9, FIONBIO, [1]) = 0 epoll_ctl(11, EPOLL_CTL_ADD, 9, {EPOLLIN|EPOLLOUT|EPOLLET, {u32=141532740, u64=13826797235585654340}}) = 0 connect(9, {sa_family=AF_INET, sin_port=htons(22422), sin_addr=inet_addr("61.135.250.208")}, 16) = -1 EINPROGRESS (Operation now in progress) epoll_wait(11, {{EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1 gettimeofday({1281606555, 540126}, NULL) = 0 gettid() = 12216 write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 93) = 93 getsockopt(9, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 writev(9, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29 epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1 gettimeofday({1281606555, 540696}, NULL) = 0 gettid() = 12216 write(5, "2010/08/12 17:49:15 [emerg] 1221"..., 93) = 93 recv(9, "STORED/r/n", 4096, 0) = 8 writev(8, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159 write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242 recv(8, 0x86ee4e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532648, u64=13826798420996627944}}}, 512, 65000) = 1 gettimeofday({1281606569, 521328}, NULL) = 0 gettid() = 12216 write(5, "2010/08/12 17:49:29 [emerg] 1221"..., 99) = 99 recv(8, "GET /bar?cmd=set&key=dog&val=ani"..., 1024, 0) = 1024 recv(8, "Invalid license key Abo"..., 3515, 0) = 288 getsockopt(9, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 writev(9, [{"set dog 1234 2000 6/r/n", 21}, {"animal", 6}, {"/r/n", 2}], 3) = 29 recv(9, 0x86f0a00, 4096, 0) = -1 EAGAIN (Resource temporarily unavailable) epoll_wait(11, {{EPOLLIN|EPOLLOUT, {u32=141532740, u64=13826797235585654340}}}, 512, 60000) = 1 gettimeofday({1281606569, 522019}, NULL) = 0 gettid() = 12216 write(5, "2010/08/12 17:49:29 [emerg] 1221"..., 99) = 99 recv(9, "STORED/r/n", 4096, 0) = 8 writev(8, [{"HTTP/1.1 201 Created/r/nServer: ng"..., 151}, {"STORED/r/n", 8}], 2) = 159 write(4, "61.135.255.88 - - [12/Aug/2010:1"..., 242) = 242 recv(8, 0x86ee4e0, 1024, 0) = -1 EAGAIN (Resource temporarily unavailable) close(9) = 0 epoll_wait(11,
我们可以看出针对fd为9的句柄进行了close操作,fd为9就是nginx与后端的连接,为啥多了close操作呢?
我们继续分析:
对比单进程和多进程下,发现多进程下多执行了ngx_event_process_posted中的 ev->handler(ev)操作,下面是多进程执行的情况
Breakpoint 2, ngx_event_process_posted (cycle=0x838bcf8, posted=0x80d09ec) at src/event/ngx_event_posted.c:28 28 ev = (ngx_event_t *) *posted; (gdb) n 33 if (ev == NULL) { (gdb) p ev $1 = (ngx_event_t *) 0x83b3da0 (gdb) n 37 ngx_delete_posted_event(ev); (gdb) n 39 ev->handler(ev); (gdb) s ngx_http_keepalive_handler (rev=0x83b3da0) at src/http/ngx_http_request.c:2489 2489 c = rev->data; (gdb) n Breakpoint 2, ngx_event_process_posted (cycle=0x838bcf8, posted=0x80d09ec) at src/event/ngx_event_posted.c:28 28 ev = (ngx_event_t *) *posted; (gdb) 33 if (ev == NULL) { (gdb) p ev $2 = (ngx_event_t *) 0x83b3dd4 (gdb) 37 ngx_delete_posted_event(ev); (gdb) n 39 ev->handler(ev); (gdb) s ngx_http_upstream_keepalive_close_handler (ev=0x83b3dd4) at /home/wangbin/work/memcached/keepalive/ngx_http_upstream_keepalive_module.c:394 394 c = ev->data;
从这里可以看出多进程情况下,执行了ngx_http_upstream_keepalive_close_handler操作,后续关闭的操作我就不分析了。
为啥多出关闭操作呢,好像跟post和events相关,我们找到如下可候选的地方: 1 src/core/ngx_connection.c|740| <<<unknown>>> ngx_mutex_lock(ngx_posted_events_mutex); 2 src/core/ngx_connection.c|759| <<<unknown>>> ngx_mutex_unlock(ngx_posted_events_mutex); 7 src/event/modules/ngx_epoll_module.c|442| <<<unknown>>> ngx_mutex_lock(ngx_posted_events_mutex); 8 src/event/modules/ngx_epoll_module.c|512| <<<unknown>>> &ngx_posted_accept_events : &ngx_posted_events); 9 src/event/modules/ngx_epoll_module.c|533| <<<unknown>>> ngx_locked_post_event(wev, &ngx_posted_events); 37 src/event/ngx_event.c|272| <<<unknown>>> ngx_event_process_posted(cycle, &ngx_posted_events); 59 src/event/ngx_event_timer.c|139| <<<unknown>>> ngx_post_event(ev, &ngx_posted_events); 60 src/http/ngx_http_request.c|208| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events); 61 src/http/ngx_http_request.c|2349| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events); 62 src/http/ngx_http_request.c|2472| <<<unknown>>> ngx_post_event(rev, &ngx_posted_events);
我们一一排出候选,做大量实验(不好意思,对nginx不是太熟悉,只能采用土方法)
最终发现了一个突破点:
531 为啥调用了ngx_locked_post_event,不输出log 532 if (flags & NGX_POST_EVENTS) { 533 ngx_locked_post_event(wev, &ngx_posted_events); 534 ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno, 535 " 535 process id:%l:",(long int)syscall(224));//加入log,看多进程和单进程之间的差别 536
其中
492 #define NGX_UPDATE_TIME 1 493 #define NGX_POST_EVENTS 2 494 #define NGX_POST_THREAD_EVENTS 4
发现单进程情况下,利用现成的连接操作,没有上面的log输出,而多进程情况下,有输出,从上面的log输出是否可以看出
是有可能是flags影响了前面的结果,我们利用这个线索继续分析:
我们发现单进程情况下,flags=1,多进程情况下,flags=3,差别就在这儿
那在什么地方设置了这些呢,我们可以追踪到ngx_process_events_and_timers函数,发现了 ngx_accept_mutex_held变量控制了flags的值是1还是3,ngx_process_events_and_timers函数内的 部分代码如下:
221 if (ngx_use_accept_mutex) { 222 if (ngx_accept_disabled > 0) { 223 ngx_accept_disabled--; 224 225 } else { 226 if (ngx_trylock_accept_mutex(cycle) == NGX_ERROR) { 227 return; 228 } 229 230 if (ngx_accept_mutex_held) { 231 flags |= NGX_POST_EVENTS; 232 233 } else { 234 if (timer == NGX_TIMER_INFINITE 235 || timer > ngx_accept_mutex_delay) 236 { 237 timer = ngx_accept_mutex_delay; 238 } 239 } 240 } 241 } 242
ngx_accept_mutex_held又是什么来控制的呢,我们又追踪到ngx_trylock_accept_mutex函数,部分代码函数如下:
261 if (ngx_shmtx_trylock(&ngx_accept_mutex)) { 262 263 ngx_log_debug0(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 264 "accept mutex locked"); 265 266 if (ngx_accept_mutex_held 267 && ngx_accept_events == 0 268 && !(ngx_event_flags & NGX_USE_RTSIG_EVENT)) 269 { 270 return NGX_OK; 271 } 272 273 if (ngx_enable_accept_events(cycle) == NGX_ERROR) { 274 ngx_shmtx_unlock(&ngx_accept_mutex); 275 return NGX_ERROR; 276 } 277 278 ngx_accept_events = 0; 279 ngx_accept_mutex_held = 1; 280 281 return NGX_OK; 282 }
所以ngx_accept_mutex变量控制了ngx_accept_mutex_held,而ngx_accept_mutex_held又控制了flags,
flags控制了ngx_locked_post_event函数的执行或者log的输出,所以ngx_accept_mutex很有可能就是导致连接关闭的根本原因
于是在这个假设下,设置了accept_mutex off,做大量实验,发现连接不关闭了,然后继续对代码进行分析,确实发现
accept_mutex影响了后端连接的关闭。
设置accept_mutex off会不会影响程序功能呢,我们引用nginx作者的话:
OS may wake all processes waiting on accept() and select(), this is called http://en.wikipedia.org/wiki/Thundering_herd_problem This is a problem if you have a lot of workers as in Apache (hundreds and more), but this insensible if you have just several workers as nginx usually has. Therefore turning accept_mutex off is as scheduling incoming connection by OS via select/kqueue/epoll/etc (but not accept().
从广告投放设置这个参数后,运行了8个多月,功能正常,而且性能提升了不少,而且也顺便解决了很多问题(nginx报无法建立连接,time_wait过多引起的问题)。
一般情况下,维持100多个后端连接就能处理1500~2000 reqs/s 应该没有问题(假设不用keepalive,那就是2000×60=12万的time_wait状态,高峰流量就会有20万左右,如果不设置一些系统参数的话,就会出问题了)
下面是具体应用例子:
在某一台后端服务器下,保持的连接数量:
[wangbin@bgp198-134 ~]$ netstat -n |grep '11311'|grep '10.120.12.' |wc -l 79
这是所有tcp状态的情况:
[wangbin@bgp198-134 ~]$ netstat -n | awk '/^tcp/ {++state[$NF]} END {for(key in state) print key,"/t",state[key]}' TIME_WAIT 2393 CLOSE_WAIT 3 FIN_WAIT1 1738 FIN_WAIT2 460 ESTABLISHED 10586 SYN_RECV 288 CLOSING 10 LAST_ACK 104
我们发现用了keepalive后,好处多多了