問題場景:
測試人員報告,業務 modb 做爲 RabbitMQ 的消費者,消費消息的速度很是慢,慢到大約每秒 2 條左右,從而致使 RabbitMQ 的隊列中積壓了 4000+ 條消息。
排查過程:
先經過 top 命令定位一下
[root@upucore_1 ~]# top -Hp 8177
top - 09:50:33 up 34 min, 8 users, load average: 4.76, 4.32, 3.65
Tasks: 2 total, 0 running, 2 sleeping, 0 stopped, 0 zombie
Cpu0 : 11.8%us, 11.8%sy, 0.0%ni, 76.5%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu1 : 11.1%us, 11.1%sy, 0.0%ni, 77.8%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu2 : 5.9%us, 11.8%sy, 0.0%ni, 82.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu3 : 23.5%us, 29.4%sy, 0.0%ni, 47.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu4 : 5.6%us, 5.6%sy, 0.0%ni, 88.9%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu5 : 11.1%us, 16.7%sy, 0.0%ni, 72.2%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu6 : 5.3%us, 5.3%sy, 0.0%ni, 84.2%id, 0.0%wa, 0.0%hi, 5.3%si, 0.0%st
Cpu7 : 16.7%us, 33.3%sy, 0.0%ni, 50.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu8 : 5.6%us, 0.0%sy, 0.0%ni, 94.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu9 : 5.6%us, 94.4%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu10 : 10.5%us, 31.6%sy, 0.0%ni, 57.9%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu11 : 12.5%us, 25.0%sy, 0.0%ni, 62.5%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu12 : 10.5%us, 15.8%sy, 0.0%ni, 73.7%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu13 : 0.0%us, 70.6%sy, 0.0%ni, 29.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu14 : 0.0%us,100.0%sy, 0.0%ni, 0.0%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Cpu15 : 5.6%us, 0.0%sy, 0.0%ni, 94.4%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
Mem: 32823540k total, 9229512k used, 23594028k free, 214588k buffers
Swap: 0k total, 0k used, 0k free, 809616k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
8177 root 20 0 110m 3268 1544 S 0.0 0.0 0:00.66 modb
13784 root 20 0 110m 3268 1544 S 0.0 0.0 0:00.00 modb
看起來 CPU 和內存使用沒有什麼異常。經過 pstack 進行觀察
[root@upucore_1 ~]# pstack 8177
Thread 2 (Thread 0x7f26f132e700 (LWP 13784)):
#0 0x00000032698e8ef3 in epoll_wait () from /lib64/libc.so.6
#1 0x00007f26f1f62a96 in epoll_dispatch () from /opt/mcu/modbcore/libevent_core.so
#2 0x00007f26f1f529f6 in event_base_loop () from /opt/mcu/modbcore/libevent_core.so
#3 0x0000000000408fcc in sql_event_thread_loop(event_thread_t*) ()
#4 0x0000003269c079d1 in start_thread () from /lib64/libpthread.so.0
#5 0x00000032698e88fd in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7f26f1330720 (LWP 8177)):
#0 0x00000032698db53d in write () from /lib64/libc.so.6
#1 0x0000003269871ad3 in _IO_new_file_write () from /lib64/libc.so.6
#2 0x000000326987199a in _IO_new_file_xsputn () from /lib64/libc.so.6
#3 0x0000003269849681 in buffered_vfprintf () from /lib64/libc.so.6
#4 0x000000326984421e in vfprintf () from /lib64/libc.so.6
#5 0x000000326984f0e8 in fprintf () from /lib64/libc.so.6
#6 0x00007f26f1d37f26 in fsm(amqp_connection_state_t_*) () from /opt/mcu/modbcore/librabbitmq_r.so
#7 0x00007f26f1f52dbc in event_base_loop () from /opt/mcu/modbcore/libevent_core.so
#8 0x0000000000406835 in main ()
發現 pid 爲 8177 的 modb 調用棧確實會卡在 write() 系統調用上。對應到業務代碼,則是 modb(內部連接的共享庫 librabbitmq.so)中經過 fprintf 輸出日誌到 stderr 上的狀況。
繼續經過 strace 確認一下
[root@upucore_1 ~]# strace -t -p 8177
Process 8177 attached - interrupt to quit
09:53:30 write(2, "110000199\",null,null,null,null,n"..., 1667) = 1667
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7607275, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7607275, ...}) = 0
09:53:30 lseek(18, 7607275, SEEK_SET) = 7607275
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 \n[Modb] l"..., 4096) = 4096
09:53:30 write(18, "values(?,?)\"],\"src...\n", 22) = 22
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 write(2, "[Modb] ####### curBMC(1755) > l"..., 54) = 54
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611393, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611393, ...}) = 0
09:53:30 lseek(18, 7611393, SEEK_SET) = 7611393
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] ##"..., 77) = 77
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 write(2, "[Modb] ####### modbKey = 26277d"..., 90) = 90
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611470, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611470, ...}) = 0
09:53:30 lseek(18, 7611470, SEEK_SET) = 7611470
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] ##"..., 113) = 113
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 sendto(8, "\f\0\0\0\3use modb_db", 16, 0, NULL, 0) = 16
09:53:30 recvfrom(8, "\7\0\0\1\0\0\0\2\0\0\0", 16384, 0, NULL, NULL) = 11
09:53:30 write(2, "[Modb] 'use modb_db' Success!\n", 30) = 30
09:53:30 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611583, ...}) = 0
09:53:30 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:30 fstat(18, {st_mode=S_IFREG|0644, st_size=7611583, ...}) = 0
09:53:30 lseek(18, 7611583, SEEK_SET) = 7611583
09:53:30 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:30 write(18, "[8177] 17 Sep 09:53:30 [Modb] 'u"..., 53) = 53
09:53:30 close(18) = 0
09:53:30 munmap(0x7f26f1553000, 4096) = 0
09:53:30 sendto(8, "\305\0\0\0\3insert into modbVer(bmc_ver"..., 201, 0, NULL, 0) = 201
09:53:30 recvfrom(8, "\7\0\0\1\0\1\0\2\0\0\0", 16384, 0, NULL, NULL) = 11
09:53:30 write(2, "[Modb] 'insert into modbVer(bmc_"..., 215
(此處會卡住,人爲回車進行分隔)
) = 215
09:53:34 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611636, ...}) = 0
09:53:34 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611636, ...}) = 0
09:53:34 lseek(18, 7611636, SEEK_SET) = 7611636
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(18, "[8177] 17 Sep 09:53:34 [Modb] 'i"..., 238) = 238
09:53:34 close(18) = 0
09:53:34 munmap(0x7f26f1553000, 4096) = 0
09:53:34 write(2, "\n[Modb] local_comsume_cb: not DE"..., 83) = 83
09:53:34 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611874, ...}) = 0
09:53:34 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:34 fstat(18, {st_mode=S_IFREG|0644, st_size=7611874, ...}) = 0
09:53:34 lseek(18, 7611874, SEEK_SET) = 7611874
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(18, "[8177] 17 Sep 09:53:34 \n[Modb] l"..., 106) = 106
09:53:34 close(18) = 0
...
09:53:34 munmap(0x7f26f1553000, 4096) = 0
09:53:34 sendto(16, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\1\251\0\316", 21, MSG_NOSIGNAL, NULL, 0) = 21
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] --> send"..., 43) = 43
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16][fsm:conn_"..., 56) = 56
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {}, 32, 0) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] mq_pop => timestamp ="..., 53) = 53
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 79) = 79
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 96) = 96
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17][fsm:conn_"..., 60) = 60
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 recvfrom(16, "\1\0\1\0\0\0.\0<\0<\vCON_f_LOCAL\0\0\0\0\0\0\1\252\1"..., 131072, 0, NULL, NULL) = 4168
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 47) = 47
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {{EPOLLOUT, {u32=17, u64=17}}}, 32, 0) = 1
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\1\0\1\0\0\0B\0<\0(\0\0\25modb.transfer.ex:c"..., 74, MSG_NOSIGNAL, NULL, 0) = 74
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] send Basic.Publish =>"..., 65) = 65
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 47) = 47
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\2\0\1\0\0\0 \0<\0\0\0\0\0\0\0\0\2\371\220\0\20applicatio"..., 40, MSG_NOSIGNAL, NULL, 0) = 40
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 55) = 55
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 sendto(17, "\3\0\1\0\0\2\371{\"keys\": [], \"app\": \"movi"..., 769, MSG_NOSIGNAL, NULL, 0) = 769
09:53:34 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:34 write(2, "[MQ][s:17] send Content-Body => "..., 64) = 64
09:53:34 write(2, "[MQ][PRO_t_TRANS s:17] --> send"..., 53) = 53
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLOUT, {u32=17, u64=17}}) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 17, {EPOLLIN, {u32=17, u64=17}}) = 0
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 55) = 55
09:53:34 epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 epoll_wait(3, {}, 32, 0) = 0
09:53:34 epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLIN, {u32=16, u64=16}}) = 0
09:53:34 write(2, "[MQ][CON_f_LOCAL s:16] <-- recv"..., 53) = 53
09:53:34 write(2, "\n[Modb] local_comsume_cb: Recv C"..., 4115
(此處會卡住,人爲回車進行分隔)
) = 4115
09:53:42 open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
09:53:42 fstat(18, {st_mode=S_IFREG|0644, st_size=7613621, ...}) = 0
09:53:42 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
09:53:42 fstat(18, {st_mode=S_IFREG|0644, st_size=7613621, ...}) = 0
09:53:42 lseek(18, 7613621, SEEK_SET) = 7613621
09:53:42 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
09:53:42 write(18, "[8177] 17 Sep 09:53:42 \n[Modb] l"..., 4096) = 4096
09:53:42 write(18, "values(?,?)\"],\"src...\n", 22) = 22
09:53:42 close(18) = 0
...
09:53:51 write(2, "[MQ][s:17] send Content-Body => "..., 64
^C <unfinished ...>
Process 8177 detached
[root@upucore_1 ~]#
經過 strace 發現,每次卡住的狀況都發生在進行 write 系統調用時,且爲向 fd 2 上輸出日誌信息。能夠看到,輸出的日誌內容包括 modb 自身的日誌和其所連接的 librabbitmq.so 中的日誌。
懷疑是否由於硬盤被寫滿致使
,查看系統硬盤和內存狀況,發現均無異常。
[root@upucore_1 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda2 39G 9.8G 27G 27% /
tmpfs 16G 0 16G 0% /dev/shm
/dev/sda1 58G 7.9G 47G 15% /opt/mcu
/dev/sda5 4.9G 138M 4.5G 3% /opt/sync
/dev/sda3 9.7G 150M 9.0G 2% /usr/mcu/log
gfsd1:/web_app 39G 9.8G 27G 27% /mnt/files
[root@upucore_1 ~]#
[root@upucore_1 ~]#
[root@upucore_1 ~]# free -m
total used free shared buffers cached
Mem: 32054 9081 22972 0 221 818
-/+ buffers/cache: 8042 24012
Swap: 0 0 0
[root@upucore_1 ~]#
懷疑是否因爲有其餘業務程序寫磁盤,致使 I/O 能力不夠。查看發現只有 mysql 線程的磁盤寫比較高,但也在合理範圍內。
[root@upucore_1 data]# iotop -o
Total DISK READ: 3.06 K/s | Total DISK WRITE: 1344.33 K/s
TID PRIO USER DISK READ DISK WRITE SWAPIN IO> COMMAND
540 be/3 root 0.00 B/s 0.00 B/s 0.00 % 25.83 % [jbd2/sda2-8]
7917 be/4 root 0.00 B/s 45.93 K/s 0.00 % 6.60 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
4806 be/4 mysql 0.00 B/s 58.18 K/s 0.00 % 4.74 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
7934 be/4 root 0.00 B/s 30.62 K/s 0.00 % 4.69 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
7916 be/4 root 0.00 B/s 27.56 K/s 0.00 % 2.47 % beam.smp -W w -K true -A30 -P 1048576 -- -root /usr/local/~l/rabbitmq/sbin/../var/lib/rabbitmq/mnesia/rabbitmq@rmq_1"
1229 be/3 root 0.00 B/s 79.62 K/s 0.00 % 1.08 % [jbd2/sda1-8]
6923 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.80 % run_erl -daemon /tmp//opt/mcu/upucore/ /opt/mcu/upucore/log exec /opt/mcu/upucore/bin/upu console ''
9449 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.61 % run_erl -daemon /tmp//opt/mcu/upu/ /opt/mcu/upu/log exec /opt/mcu/upu/bin/upu console ''
4933 be/4 mysql 0.00 B/s 857.43 K/s 0.00 % 0.48 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4935 be/4 mysql 0.00 B/s 6.12 K/s 0.00 % 0.31 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
21292 be/4 mysql 3.06 K/s 9.19 K/s 0.00 % 0.27 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4921 be/4 mysql 0.00 B/s 3.06 K/s 0.00 % 0.22 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4803 be/4 mysql 0.00 B/s 0.00 B/s 0.00 % 0.02 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
4804 be/4 mysql 0.00 B/s 0.00 B/s 0.00 % 0.00 % mysqld --basedir=/usr/local/mysql --datadir=/usr/local/mys~ql/data/upucore_1.pid --socket=/tmp/mysql.sock --port=3306
1602 be/4 root 0.00 B/s 21.44 K/s 0.00 % 0.00 % ./sa
1612 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.00 % ./sa
1638 be/4 root 0.00 B/s 9.19 K/s 0.00 % 0.00 % ./sa
13119 be/4 root 0.00 B/s 3.06 K/s 0.00 % 0.00 % nms_adapter
1 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % init
2 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [kthreadd]
3 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/0]
4 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/0]
5 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/0]
6 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/0]
7 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/1]
8 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/1]
9 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/1]
10 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/1]
11 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/2]
12 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/2]
13 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/2]
14 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/2]
15 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/3]
16 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/3]
17 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/3]
18 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [watchdog/3]
19 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/4]
20 rt/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [migration/4]
21 be/4 root 0.00 B/s 0.00 B/s 0.00 % 0.00 % [ksoftirqd/4]
[root@upucore_1 data]#
通過一番分析,發現以前的排查方向存在問題。日誌輸出到 stderr 上,但 stderr 卻沒有重定向到文件上。因此應該和磁盤 I/O 沒直接關係。
查看下 stderr 的指向
[root@upucore_1 ~]# ps aux|grep modb
root 4179 0.0 0.0 111744 2080 ? Sl 16:57 0:00 /opt/mcu/modb/modb /etc/modb.conf
root 4189 0.0 0.0 110952 2040 ? Sl 16:57 0:00 /opt/mcu/modbcore/modb /etc/modbcore.conf
root 4413 0.0 0.0 108200 1300 ? S 16:57 0:00 /bin/sh /usr/local/keepalived/etc/keepalived/script_master.sh mysql nds modbcore zookeeper
root 4750 0.0 0.0 103256 872 pts/0 S+ 16:57 0:00 grep modb
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 4179 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4179 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 4189 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4189 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof /dev/console
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
modb 4179 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4179 root 2u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 0u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 1u CHR 5,1 0t0 5559 /dev/console
modb 4189 root 2u CHR 5,1 0t0 5559 /dev/console
[root@upucore_1 ~]#
結果發現,確實兩個 modb 業務的 fd 0,1,2 均指向了 /dev/console 。而且系統中只有 modb 業務的 fd 0,1,2 是指向 /dev/console 的。
那麼 /dev/console 是什麼呢?
=== 我是假裝者真好看的分隔線 ===
- tty 是一類 char 設備的通稱;
- /dev/console 是一個虛擬的 tty ,它將映射到真正的 tty 上;
- console 有多種含義,簡單理解,能夠認爲特指 printk 輸出的設備;
- console 和 tty 有很大區別:console 是個只輸出的設備,功能很簡單,只能在內核中訪問;tty 是 char 設備,能夠被用戶程序訪問。
- 實際的驅動,好比串口驅動會對物理設備(串口)註冊兩次,一個是 tty ,一個是 console ,並經過在 console 的結構中記錄 tty 的主次設備號創建了聯繫。
- 在內核中,tty 和 console 均可以註冊多個。
- 當內核命令行上指定 console=ttyS0 之類的參數時,首先肯定了 printk 實際使用哪一個 console 做爲輸出,其次因爲 console 和 tty 之間的對應關係,打開 /dev/console 時,就會映射到相應的 tty 上。用一句話說:/dev/console 將映射到默認 console 對應的 tty 上。
=== 我是假裝者真好看的分隔線 ===
The /dev/console device is not necessarily available to you. The reason for that is because some system messages are written to the system console. For Linux that device would only show any result if you happen to look at the current virtual terminal.
From the documentation:
- /dev/tty Current TTY device
- /dev/console System console
- /dev/tty0 Current virtual console
In the good old days /dev/console was System Administrator console.
And TTYs were users' serial devices attached to a server.
Now /dev/console and /dev/tty0 represent current display and usually are the same.
You can override it for example by adding console=ttyS0 to grub.conf. After that your /dev/tty0 is a monitor and /dev/console is /dev/ttyS0.
/dev/console is a virtual set of devices which can be set as a parameter at boot time.
It might be redirected to a serial device or a virtual console and by default points to /dev/tty0.
/dev/tty0 is the current virtual console
/dev/tty[1-x] is one of the virtual consoles you switch to with control-alt-F1 and so on.
/dev/tty is the console used by the process querying it. Unlike the other devices, you do not need root privileges to write to it. Note also that processes like the ones launched by cron and similar batch processes have no usable /dev/tty, as they aren't associated with any. These processes have a ? in the TTY column of ps -ef output.When multiple console= options are passed to the kernel, the console output will go to more than one device.
=== 我是假裝者真好看的分隔線 ===
tty devices are among the most numerous and the most confusing of all the files in /dev. Every time you log in or launch a new xterm, a tty device is assigned to the shell. This device is responsible for displaying the shell's output and providing its input. It is known as the controlling tty for that shell and its child processes. The device /dev/tty is a magic device that refers to whichever tty device is the controlling tty for that process.
Besides /dev/tty, there are three types of tty devices: consoles, serial ports and pseudo devices.
Console ttys are used when the keyboard and monitor are directly connected to the system without running the X Window System. Since you can have several virtual consoles, the devices are tty0 through tty63. In theory you can have 64 virtual consoles, but most people use only a few. The device /dev/console is identical to tty0 and is needed for historical reasons. If your system lets you log in on consoles 1 through 6, then when you run X Windows System, X uses console 7, so you'll need /dev/tty1 through /dev/tty7 on your system. I recommend having files up through /dev/tty12. For more information on using virtual consoles, see the article Keyboards, Consoles and VT Cruising by John Fisk in the November 1996 issue of Linux Journal.
=== 我是假裝者真好看的分隔線 ===
從上面的信息能夠知道,/dev/console 的值和內核命令行參數設置有關。查看當前內核命令行設置,以下
[root@upucore_1 ~]# cat /proc/cmdline
ro root=UUID=ef97984a-07ef-4cdb-9825-cb845c23e7ec rd_NO_LUKS LANG=en_US.UTF-8 KEYTABLE=us rd_NO_MD SYSFONT=latarcyrheb-sun16 crashkernel=131M@0M console=ttyS0,9600n8 rd_NO_LVM rd_NO_DM
[root@upucore_1 ~]#
能夠看到 console=ttyS0,9600n8 這個設置,意思是輸出到 /dev/console 的內容,實際是發送給了串口,而串口波特率默認值爲 9600 b/s 。雖而後續會調整到 115200 ,那也不過才 115 kB/s 的輸出速度。
因此,在日誌輸出頻繁的狀況下,將致使串口輸出速度達到飽和,進而卡住業務進程中的 write 系統調用,最終致使業務行爲上「慢半拍」。
雖然已經知道問題的緣由了,但爲何 modb 這個業務的 fd 0,1,2 會指向 /dev/console 呢?
這和兩方面有關:
- 業務 modb 已經被添加爲系統服務,因此在系統啓動時會被 init 進程所啓動;
- 系統工程師爲了排查問題的方便,在 /proc/cmdline 中配置了 console=ttyS0,9600n8
那麼是否其餘業務做爲系統服務被 init 進程啓動時也有這個問題呢?查看信息以下
[root@upucore_1 ~]# chkconfig --list
...
haproxy 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
keepalived 0:off 1:off 2:off 3:off 4:off 5:off 6:off
...
memcached 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
modb 0:off 1:off 2:on 3:on 4:on 5:on 6:off
modbcore 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
nginx 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
rabbitmq 0:off 1:off 2:off 3:off 4:off 5:off 6:off
...
redis 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
zookeeper 0:off 1:off 2:on 3:on 4:on 5:on 6:off
...
[root@upucore_1 ~]#
選擇其中幾個進行查看
[root@upucore_1 ~]# lsof -c redis -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
redis-ser 4159 root 0u CHR 1,3 0t0 3968 /dev/null
redis-ser 4159 root 1u CHR 1,3 0t0 3968 /dev/null
redis-ser 4159 root 2u CHR 1,3 0t0 3968 /dev/null
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -c memcached -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
memcached 1976 root 0u CHR 1,3 0t0 3968 /dev/null
memcached 1976 root 1u CHR 1,3 0t0 3968 /dev/null
memcached 1976 root 2u CHR 1,3 0t0 3968 /dev/null
[root@upucore_1 ~]#
[root@upucore_1 ~]# ps ajxf|grep nginx
1 1994 1994 1994 ? -1 Ss 0 0:00 nginx: master process /usr/local/nginx/sbin/nginx -c /usr/local/nginx/conf/nginx.conf
1994 1995 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1996 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1997 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 1998 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2000 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2001 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2002 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2003 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2004 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2005 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2007 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2008 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2009 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2010 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2011 1994 1994 ? -1 S 0 0:04 \_ nginx: worker process
1994 2012 1994 1994 ? -1 S 0 0:03 \_ nginx: worker process
1994 2013 1994 1994 ? -1 S 0 0:02 \_ nginx: cache manager process
8264 17252 17251 8264 pts/0 17251 S+ 0 0:00 | \_ grep nginx
[root@upucore_1 ~]#
[root@upucore_1 ~]# lsof -p 1994 -a -d 0-2
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
nginx 1994 root 0u CHR 1,3 0t0 3968 /dev/null
nginx 1994 root 1u CHR 1,3 0t0 3968 /dev/null
nginx 1994 root 2w REG 8,2 9623092 1051452 /usr/local/nginx/logs/error.log
[root@upucore_1 ~]#
[root@upucore_1 ~]#
能夠看到,fd 0,1,2 要麼被重定向到 /dev/null ,要麼被重定向到文件中了。
好吧,這說明要想做爲系統服務運行的話,仍是應該「潔身自好」才行~~
解決辦法:
再也不將業務 modb 添加爲系統服務,只經過自定義腳本控制其啓動和中止。而且在啓動腳本中作重定向。
由於自定義腳本對應的進程爲 -bash 的子進程,而業務程序以常規命令行形式啓動,因此 fd 0,1,2 的重定向取決於和當前業務相關的進程樹中是否有重定向動做。而一般的實現方式爲,在啓動業務程序的時候直接經過 >/xxx/xxx.log 2>&1 的方式進行處理。
最後肯定一下,輸出的日誌數據是否過大,以及是否存在因爲代碼編寫錯誤致使的無用輸出。結果發現,消息內容是合法的,消息體確實比較大。
[root@upucore_1 modbcore]# strace -s 4096 -p 8177
Process 8177 attached - interrupt to quit
...
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19540496, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19540496, ...}) = 0
lseek(18, 19540496, SEEK_SET) = 19540496
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 \n[Modb] local_comsume_cb: Recv Content-Body ==>\n {\"app\":\"movision\",\"data\":{\"account\":\"0512110000195\",\"accountType\":1,\"binded\":false,\"brief\":\"\",\"cmsApproval\":false,\"createdAt\":null,\"dateOfBirth\":null,\"dcsAdmin\":false,\"defaultServiceDomainAdmin\":false,\"defaultUserDomainAdmin\":false,\"deviceGuid\":\"10252200000000000000512110000195\",\"deviceType\":\"522\",\"e164\":\"0512110000195\",\"email\":\"\",\"enable\":true,\"enableBMC\":false,\"enableCall\":false,\"enableDCS\":true,\"enableHD\":true,\"enableHDMeeting\":true,\"enableIncoming\":false,\"enableLive\":false,\"enableMeeting\":true,\"enableMeetingSMS\":false,\"enableNM\":false,\"enableOut\":false,\"enablePlay\":false,\"enableRoam\":false,\"enableSatellite\":false,\"enableSatelliteP2P\":false,\"enableUMC\":false,\"enableVRS\":true,\"enableVenueMonitor\":false,\"enableVideo\":false,\"enableWeibo\":true,\"extNum\":\"\",\"fax\":\"\",\"fullPy\":\"\",\"id\":643,\"ipRegion\":\"\",\"jid\":\"b9da57fa-c384-4247-a797-05fa19537730@hello\",\"jobNum\":\"\",\"limited\":false,\"meetingAdmin\":false,\"mobile\":\"\",\"moid\":\"b9da57fa-c384-4247-a797-05fa19537730\",\"name\":\"0512110000195\",\"nmAdmin\":false,\"nuServerId\":\"3b7cfbb3-b5c5-42b2-a759-209557a1ecf3\",\"officeLocation\":\"\",\"password\":\"21218cca77804d2ba1922c33e0151105\",\"portrait128\":\"\",\"portrait256\":\"\",\"portrait32\":\"\",\"portrait40\":\"\",\"portrait64\":\"\",\"restrictStrategy\":0,\"restrictUsedOn\":\"HD100;HD100S;HD1000;WD1000;TS6610;TS5610;TrueLink;iPad;iPhone;Android_Phone;SKY X500 1080P60;SKY X500 1080P30;SKY X500 720P60;TrueSens X500 1080P;TrueSens X500 720P;SKY X700;SKY X900;SKY for Windows;SKY for Mac;SKY for Android TV;SKY for Android Pad;HD100-TP\",\"seat\":\"\",\"serviceDomainAdmin\":false,\"serviceDomainMoid\":\"26277da4-b84d-4cad-8c69-fc977c9397d0\",\"serviceDomainName\":\"keda\",\"sex\":\"\",\"userDomainAdmin\":false,\"userDomainMoid\":\"zzeq2smvxtq687vgzno1vbco\",\"userDomainName\":\"admin\",\"validityPeriod\":{\"date\":29,\"day\":5,\"hours\":0,\"minutes\":0,\"month\":11,\"seconds\":0,\"time\":1514476800000,\"timezoneOffset\":-480,\"year\":117},\"vrsAdmin\":false,\"weiboAdmin\":false},\"db\":\"use ap\",\"event\":\"createAccount\",\"init\":true,\"keys\":[\"26277da4-b84d-4cad-8c69-fc977c9397d0\"],\"sqlargs\":[[\"b9da57fa-c384-4247-a797-05fa19537730\",\"0512110000195\",null,\"0512110000195\",null,null,\"b9da57fa-c384-4247-a797-05fa19537730@hello\",\"21218cca77804d2ba1922c33e0151105\",\"zzeq2smvxtq687vgzno1vbco\",\"26277da4-b84d-4cad-8c69-fc977c9397d0\",\"10252200000000000000512110000195\",\"3b7cfbb3-b5c5-42b2-a759-209557a1ecf3\",\"522\",1,false,true,false,null],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"0512110000195\",\"\",\"\",\"\",\"\",\"\",\"\",\"\",null,\"\",\"2017-12-29\",\"HD100;HD100S;HD1000;WD1000;TS6610;TS5610;TrueLink;iPad;iPhone;Android_Phone;SKY X500 1080P60;SKY X500 1080P30;SKY X500 720P60;TrueSens X500 1080P;TrueSens X500 720P;SKY X700;SKY X900;SKY for Windows;SKY for Mac;SKY for Android TV;SKY for Android Pad;HD100-TP\",null,null,null,null,null,null],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableMeeting\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableHD\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableVRS\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableHDMeeting\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableDCS\"],[\"b9da57fa-c384-4247-a797-05fa19537730\",\"enableWeibo\"]],\"sqls\":[\"insert into user_info(moid,account,email,e164,mobile,ip_region,jid,password,user_domain_moid,service_domain_moid,device_guid,nu_server_id,device_type,account_type,binded,enable,created_at,limited,manage_user_domain_moid) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,now(),?,?)\",\"insert into user_profile(moid,name,seat,ext_num,fax,job_num,brief,full_py,sex,date_of_birth,office_location,validity_period,restrict_used_on,restrict_strategy,portrait_32,portrait_40,portrait_64,portrait_128,portrait_256) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\",\"insert into user_privilege_data(moid,privilege_key) values(?,?)\"],\"sr", 4096) = 4096
write(18, "c\":\"\",\"version\":\"\"}\n\n", 21) = 21
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
write(2, "\n[Modb] local_comsume_cb: not DEST Platform, do TRANSFER and BROADCAST if config!\n\n", 83) = 83
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19544613, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19544613, ...}) = 0
lseek(18, 19544613, SEEK_SET) = 19544613
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 \n[Modb] local_comsume_cb: not DEST Platform, do TRANSFER and BROADCAST if config!\n\n", 106) = 106
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(2, "[MQ] mq_push => timestamp = 17 Sep 15:06:45.307\n", 48) = 48
epoll_ctl(3, EPOLL_CTL_ADD, 16, {EPOLLOUT, {u32=16, u64=16}}) = 0
epoll_wait(3, {{EPOLLIN, {u32=17, u64=17}}, {EPOLLOUT, {u32=16, u64=16}}}, 32, 10) = 2
epoll_ctl(3, EPOLL_CTL_DEL, 17, {EPOLLIN, {u32=17, u64=17}}) = 0
epoll_ctl(3, EPOLL_CTL_DEL, 16, {EPOLLOUT, {u32=16, u64=16}}) = 0
recvfrom(17, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\16h\0\316", 131072, 0, NULL, NULL) = 21
write(2, "[MQ][PRO_t_TRANS s:17] <-- recv Basic.Ack\n", 43) = 43
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(2, "[MQ][s:17] recv Basic.Ack => timestamp = 17 Sep 15:06:45.308\n", 61) = 61
write(2, "[MoDB] ### CB: Publisher Confirm -- [Basic.Ack] Tag:[PRO_t_TRANS] Sock[17] Delivery_Tag:[3688] multiple:[0]\n", 109) = 109
open("/var/log/modbcore.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = 18
fstat(18, {st_mode=S_IFREG|0644, st_size=19544719, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f26f1553000
fstat(18, {st_mode=S_IFREG|0644, st_size=19544719, ...}) = 0
lseek(18, 19544719, SEEK_SET) = 19544719
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
write(18, "[8177] 17 Sep 15:06:45 [MoDB] ### CB: Publisher Confirm -- [Basic.Ack] Tag:[PRO_t_TRANS] Sock[17] Delivery_Tag:[3688] multiple:[0]\n", 132) = 132
close(18) = 0
munmap(0x7f26f1553000, 4096) = 0
sendto(16, "\1\0\1\0\0\0\r\0<\0P\0\0\0\0\0\0\22\26\0\316", 21, MSG_NOSIGNAL, NULL, 0) = 21
write(2, "[MQ][CON_f_LOCAL s:16] --> send Basic.Ack\n", 43) = 43
write(2, "[MQ][CON_f_LOCAL s:16][fsm:conn_idle][C] Start to recv!\n", 56
(卡住)
) = 56
...
^C <unfinished ...>
Process 8177 detached
[root@upucore_1 modbcore]#
參考: