서버에 리소스를 요청 시 정상적으로 Response를 할 수 없는 이슈가 있어서 확인을 해봤다.
증상
1. WebServer(Nginx) 에 리소스를 요청 시 일단 Response Headers 를 정상적으로 받아온다.
2. 본문은 계속 받아오는 중.. (CAUTION: request is not finished yet!)
3. 약 1분 정도 시간이 지나면 아래와 같이 오류가 발생하며 이미지의 일부분만 다운받은채 Response 가 종료된다.
(net::ERR_CONTENT_LENGTH_MISMATCH 메시지 출력됨)
원인분석
1. WebServer 요청량(Request) 대비 동시처리 가능한 임계치 설정 확인
서버에 너무 많은 Request가 몰렸을 경우에 WebServer 에서 동시에 처리 가능한 임계치 설정 확인 및 필요하다면 늘려 주어야 한다.
Nginx 프로세스에서 처리하고 있는 커넥션 갯수가 많지 않음을 알 수 있다.
(따로 처리가 필요하지 않음)
[root@WebServer1 ~]# ps -ef | grep nginx
root 4634 4542 0 14:03 pts/2 00:00:00 grep nginx
root 16272 1 0 Feb13 ? 00:00:00 nginx: master process ./nginx
nobody 16273 16272 0 Feb13 ? 00:34:15 nginx: worker process
nobody 16274 16272 0 Feb13 ? 00:34:34 nginx: worker process
nobody 16275 16272 0 Feb13 ? 00:34:37 nginx: worker process
nobody 16276 16272 0 Feb13 ? 00:34:17 nginx: worker process
nobody 16277 16272 0 Feb13 ? 00:34:18 nginx: worker process
nobody 16278 16272 0 Feb13 ? 00:34:18 nginx: worker process
nobody 16279 16272 0 Feb13 ? 00:34:55 nginx: worker process
nobody 16280 16272 0 Feb13 ? 00:34:31 nginx: worker process
[root@WebServer1 ~]#
[root@WebServer1 ~]# cd /proc/4616272
[root@WebServer1 16272]# ls -l
0
dr-xr-xr-x 2 root sys 0 6 26 14:04 attr
-r-------- 1 root root 0 6 26 14:04 auxv
-r--r--r-- 1 root root 0 5 26 07:50 cmdline
-rw-r--r-- 1 root root 0 6 26 14:04 coredump_filter
-r--r--r-- 1 root root 0 6 26 14:04 cpuset
lrwxrwxrwx 1 root root 0 6 26 14:04 cwd -> /app/nginx-1.2.8/sbin
-r-------- 1 root root 0 6 26 14:04 environ
lrwxrwxrwx 1 root root 0 5 26 07:53 exe -> /app/nginx-1.2.8/sbin/nginx
dr-x------ 2 root root 0 6 26 14:04 fd
dr-x------ 2 root root 0 6 26 14:04 fdinfo
-r-------- 1 root root 0 6 26 14:04 io
-r--r--r-- 1 root root 0 6 26 14:04 limits
-rw-r--r-- 1 root root 0 6 26 14:04 loginuid
-r--r--r-- 1 root root 0 6 26 14:04 maps
-rw------- 1 root root 0 6 26 14:04 mem
-r--r--r-- 1 root root 0 6 26 14:04 mounts
-r-------- 1 root root 0 6 26 14:04 mountstats
-r--r--r-- 1 root root 0 6 26 14:04 numa_maps
-rw-r--r-- 1 root root 0 6 26 14:04 oom_adj
-r--r--r-- 1 root root 0 6 26 14:04 oom_score
lrwxrwxrwx 1 root root 0 6 26 14:04 root -> /
-r--r--r-- 1 root root 0 6 26 14:04 schedstat
-r--r--r-- 1 root root 0 6 26 14:04 smaps
-r--r--r-- 1 root root 0 5 25 19:49 stat
-r--r--r-- 1 root root 0 5 25 19:49 statm
-r--r--r-- 1 root root 0 5 31 08:50 status
dr-xr-xr-x 3 root sys 0 6 26 14:04 task
-r--r--r-- 1 root root 0 5 26 07:53 wchan
[root@WebServer1 16272]#
[root@WebServer1 16272]# cd fd
[root@WebServer1 fd]# ls -l
0
lrwx------ 1 root root 64 6 26 14:04 0 -> /dev/null
lrwx------ 1 root root 64 6 26 14:04 1 -> /dev/null
lrwx------ 1 root root 64 6 26 14:04 10 -> socket:[58331]
lrwx------ 1 root root 64 6 26 14:04 11 -> socket:[58333]
lrwx------ 1 root root 64 6 26 14:04 12 -> socket:[58334]
lrwx------ 1 root root 64 6 26 14:04 13 -> socket:[58336]
lrwx------ 1 root root 64 6 26 14:04 14 -> socket:[58337]
lrwx------ 1 root root 64 6 26 14:04 15 -> socket:[58338]
lrwx------ 1 root root 64 6 26 14:04 16 -> socket:[58339]
lrwx------ 1 root root 64 6 26 14:04 17 -> socket:[58342]
lrwx------ 1 root root 64 6 26 14:04 18 -> socket:[58343]
lrwx------ 1 root root 64 6 26 14:04 19 -> socket:[58345]
l-wx------ 1 root root 64 6 26 14:04 2 -> /logs/nginx/error.log
lrwx------ 1 root root 64 6 26 14:04 20 -> socket:[58346]
lrwx------ 1 root root 64 6 26 14:04 21 -> socket:[58348]
lrwx------ 1 root root 64 6 26 14:04 22 -> socket:[58349]
lrwx------ 1 root root 64 6 26 14:04 3 -> socket:[58327]
l-wx------ 1 root root 64 6 26 14:04 4 -> /logs/nginx/error.log
lr-x------ 1 root root 64 6 26 14:04 5 -> /usr/src/pam/shadow/root (deleted)
l-wx------ 1 root root 64 6 26 14:04 6 -> /logs/nginx/access.log
lrwx------ 1 root root 64 6 26 14:04 7 -> socket:[58324]
lrwx------ 1 root root 64 6 26 14:04 8 -> socket:[58328]
lrwx------ 1 root root 64 6 26 14:04 9 -> socket:[58330]
[root@WebServer1 fd]#
[root@WebServer1 fd]# ls -l | wc -l
24
[root@WebServer1 fd]#
root 4634 4542 0 14:03 pts/2 00:00:00 grep nginx
root 16272 1 0 Feb13 ? 00:00:00 nginx: master process ./nginx
nobody 16273 16272 0 Feb13 ? 00:34:15 nginx: worker process
nobody 16274 16272 0 Feb13 ? 00:34:34 nginx: worker process
nobody 16275 16272 0 Feb13 ? 00:34:37 nginx: worker process
nobody 16276 16272 0 Feb13 ? 00:34:17 nginx: worker process
nobody 16277 16272 0 Feb13 ? 00:34:18 nginx: worker process
nobody 16278 16272 0 Feb13 ? 00:34:18 nginx: worker process
nobody 16279 16272 0 Feb13 ? 00:34:55 nginx: worker process
nobody 16280 16272 0 Feb13 ? 00:34:31 nginx: worker process
[root@WebServer1 ~]#
[root@WebServer1 ~]# cd /proc/4616272
[root@WebServer1 16272]# ls -l
0
dr-xr-xr-x 2 root sys 0 6 26 14:04 attr
-r-------- 1 root root 0 6 26 14:04 auxv
-r--r--r-- 1 root root 0 5 26 07:50 cmdline
-rw-r--r-- 1 root root 0 6 26 14:04 coredump_filter
-r--r--r-- 1 root root 0 6 26 14:04 cpuset
lrwxrwxrwx 1 root root 0 6 26 14:04 cwd -> /app/nginx-1.2.8/sbin
-r-------- 1 root root 0 6 26 14:04 environ
lrwxrwxrwx 1 root root 0 5 26 07:53 exe -> /app/nginx-1.2.8/sbin/nginx
dr-x------ 2 root root 0 6 26 14:04 fd
dr-x------ 2 root root 0 6 26 14:04 fdinfo
-r-------- 1 root root 0 6 26 14:04 io
-r--r--r-- 1 root root 0 6 26 14:04 limits
-rw-r--r-- 1 root root 0 6 26 14:04 loginuid
-r--r--r-- 1 root root 0 6 26 14:04 maps
-rw------- 1 root root 0 6 26 14:04 mem
-r--r--r-- 1 root root 0 6 26 14:04 mounts
-r-------- 1 root root 0 6 26 14:04 mountstats
-r--r--r-- 1 root root 0 6 26 14:04 numa_maps
-rw-r--r-- 1 root root 0 6 26 14:04 oom_adj
-r--r--r-- 1 root root 0 6 26 14:04 oom_score
lrwxrwxrwx 1 root root 0 6 26 14:04 root -> /
-r--r--r-- 1 root root 0 6 26 14:04 schedstat
-r--r--r-- 1 root root 0 6 26 14:04 smaps
-r--r--r-- 1 root root 0 5 25 19:49 stat
-r--r--r-- 1 root root 0 5 25 19:49 statm
-r--r--r-- 1 root root 0 5 31 08:50 status
dr-xr-xr-x 3 root sys 0 6 26 14:04 task
-r--r--r-- 1 root root 0 5 26 07:53 wchan
[root@WebServer1 16272]#
[root@WebServer1 16272]# cd fd
[root@WebServer1 fd]# ls -l
0
lrwx------ 1 root root 64 6 26 14:04 0 -> /dev/null
lrwx------ 1 root root 64 6 26 14:04 1 -> /dev/null
lrwx------ 1 root root 64 6 26 14:04 10 -> socket:[58331]
lrwx------ 1 root root 64 6 26 14:04 11 -> socket:[58333]
lrwx------ 1 root root 64 6 26 14:04 12 -> socket:[58334]
lrwx------ 1 root root 64 6 26 14:04 13 -> socket:[58336]
lrwx------ 1 root root 64 6 26 14:04 14 -> socket:[58337]
lrwx------ 1 root root 64 6 26 14:04 15 -> socket:[58338]
lrwx------ 1 root root 64 6 26 14:04 16 -> socket:[58339]
lrwx------ 1 root root 64 6 26 14:04 17 -> socket:[58342]
lrwx------ 1 root root 64 6 26 14:04 18 -> socket:[58343]
lrwx------ 1 root root 64 6 26 14:04 19 -> socket:[58345]
l-wx------ 1 root root 64 6 26 14:04 2 -> /logs/nginx/error.log
lrwx------ 1 root root 64 6 26 14:04 20 -> socket:[58346]
lrwx------ 1 root root 64 6 26 14:04 21 -> socket:[58348]
lrwx------ 1 root root 64 6 26 14:04 22 -> socket:[58349]
lrwx------ 1 root root 64 6 26 14:04 3 -> socket:[58327]
l-wx------ 1 root root 64 6 26 14:04 4 -> /logs/nginx/error.log
lr-x------ 1 root root 64 6 26 14:04 5 -> /usr/src/pam/shadow/root (deleted)
l-wx------ 1 root root 64 6 26 14:04 6 -> /logs/nginx/access.log
lrwx------ 1 root root 64 6 26 14:04 7 -> socket:[58324]
lrwx------ 1 root root 64 6 26 14:04 8 -> socket:[58328]
lrwx------ 1 root root 64 6 26 14:04 9 -> socket:[58330]
[root@WebServer1 fd]#
[root@WebServer1 fd]# ls -l | wc -l
24
[root@WebServer1 fd]#
2. Nginx 의 Error 로그 확인
Error Log 확인 시 아래와 같이 출력이 되어 디스크 용량을 확인했다.
루트(/)영역의 사용가능용량이 없는 것을 확인했다.
[root@WebServer1 fd]# tail -f /logs/nginx/error.log
2015/06/26 14:04:44 [crit] 16279#0: *74928418 writev() "/app/nginx-1.2.8/proxy_temp/3/04/0011857043" failed (28: No space left on device) while reading upstream, client: ???.???.???.???, server: localhost, request: "GET /web/app/images/1415324940039_5228.png HTTP/1.1", upstream: "http://127.0.0.1:8080/web/app/images/1415324940039_5228.png", host: "example.co.kr"
2015/06/26 14:04:48 [crit] 16279#0: *74928441 writev() "/app/nginx-1.2.8/proxy_temp/8/04/0011857048" failed (28: No space left on device) while reading upstream, client: ???.???.???.???, server: localhost, request: "GET /web/????.jpg HTTP/1.1", upstream: "http://127.0.0.1:8080/web/????.jpg", host: "example.co.kr", referrer: "http://example.co.kr/web/????/index.html"
[root@WebServer1 fd]#
[root@WebServer1 fd]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda3 10154020 9757312 0 100% /
/dev/sda8 10153988 685052 8944820 8% /home
/dev/sda7 10153988 538692 9091180 6% /var
/dev/sda6 10153988 154240 9475632 2% /var/crash
/dev/sda5 10153988 4997928 4631944 52% /usr
/dev/sda2 20315844 176244 19090960 1% /osbackup
/dev/sda1 1019208 44724 921876 5% /boot
tmpfs 6084024 0 6084024 0% /dev/shm
/dev/mapper/vg00-log 20642428 176200 19417652 1% /log
[root@WebServer1 fd]#
2015/06/26 14:04:44 [crit] 16279#0: *74928418 writev() "/app/nginx-1.2.8/proxy_temp/3/04/0011857043" failed (28: No space left on device) while reading upstream, client: ???.???.???.???, server: localhost, request: "GET /web/app/images/1415324940039_5228.png HTTP/1.1", upstream: "http://127.0.0.1:8080/web/app/images/1415324940039_5228.png", host: "example.co.kr"
2015/06/26 14:04:48 [crit] 16279#0: *74928441 writev() "/app/nginx-1.2.8/proxy_temp/8/04/0011857048" failed (28: No space left on device) while reading upstream, client: ???.???.???.???, server: localhost, request: "GET /web/????.jpg HTTP/1.1", upstream: "http://127.0.0.1:8080/web/????.jpg", host: "example.co.kr", referrer: "http://example.co.kr/web/????/index.html"
[root@WebServer1 fd]#
[root@WebServer1 fd]# df -k
Filesystem 1K-blocks Used Available Use% Mounted on
/dev/sda3 10154020 9757312 0 100% /
/dev/sda8 10153988 685052 8944820 8% /home
/dev/sda7 10153988 538692 9091180 6% /var
/dev/sda6 10153988 154240 9475632 2% /var/crash
/dev/sda5 10153988 4997928 4631944 52% /usr
/dev/sda2 20315844 176244 19090960 1% /osbackup
/dev/sda1 1019208 44724 921876 5% /boot
tmpfs 6084024 0 6084024 0% /dev/shm
/dev/mapper/vg00-log 20642428 176200 19417652 1% /log
[root@WebServer1 fd]#
해결
루트(/) 영역의 중요하지 않은 파일 삭제
(Nginx 재기동 없이 서비스가 정상화 되었다.)
PS.
내가 격은 사례가 Web_Server가 동작하지 않는 원인이 디스크 용량의 이슈가 발생했던것 뿐이지 중요한 것은 (프로그램)개발 이슈가 없는 경우에 보통은 원인이 무엇이든간에 결국 Web_Server 가 정상적으로 동작하지 않는다는 것이다.
특히나 개발중인 프로그램에서 위 에러가 발생하는 거라면 개발소스를 수정해야겠지만, 문제없이 동작하던 Web_Server 에서 위 문제가 발생했다면 Web_Server 문제로 생각하는게 합리적이라고 생각한다.
일반 사용자 입장에서 Apache 또는 NginX와 같은 Web_Server 에서 어떤 문제가 있는지 직접 확인 및 조치를 할 수 없다면 해당 이슈는 해결할 수 없다.
참조
http://lfender6445.github.io/application-net-err-content-length-mismatch/
http://stackoverflow.com/questions/22183859/javascript-err-content-length-mismatch
참조
http://lfender6445.github.io/application-net-err-content-length-mismatch/
http://stackoverflow.com/questions/22183859/javascript-err-content-length-mismatch
댓글
댓글 쓰기