cloudwu/skynet

阿里云服务器Debian12 出现一个必然崩溃bug,不链接jemalloc却没问题

ckytam opened this issue · 1 comments

使用httpc.get导致dump, 后来发现是getaddrinfo函数导致,
简化了情形,直接用socket.resolve也会导致dump。 本地测试机器和腾讯云主机没有问题。

#!/bin/bash

apt install make gcc g++ gdb cmake autoconf libtool libssl-dev git

cur_dir=$(cd `dirname $0`; pwd)

if [ ! -d skynet ]; then
    git clone https://github.com/cloudwu/skynet.git skynet
fi


cd skynet && make linux

echo "" > examples/main.lua

tee -a examples/main.lua <<EOF
local skynet = require "skynet"
skynet.start(function()
	skynet.fork(function()
        local socket = require("skynet.socket")
        print("resolve >>>>>")
        local xx = socket.resolve("baidu.com")
        if xx then
            for key, value in pairs(xx) do
                print(">> ", key, value)
            end
        end
    end)
end)
EOF

ulimit -c unlimited

/sbin/sysctl -w kernel.core_pattern=./core_%e_%t >> /dev/null 2>&1

./skynet examples/config

试过编译时候不连接jemalloc 是没有问题的。CFLAGS加上 -DNOUSE_JEMALLOC

include platform.mk

LUA_CLIB_PATH ?= luaclib
CSERVICE_PATH ?= cservice

SKYNET_BUILD_PATH ?= .

CFLAGS = -g -O2 -Wall -I$(LUA_INC) $(MYCFLAGS) -DNOUSE_JEMALLOC
# CFLAGS += -DUSE_PTHREAD_LOCK
(gdb) bt
#0  musable (mem=0x7fb357d36f70) at ./malloc/malloc.c:5126
#1  __malloc_usable_size (m=0x7fb357d36f70) at ./malloc/malloc.c:5138
#2  0x00007fb3580d5a9a in ?? () from /lib/x86_64-linux-gnu/libnss_resolve.so.2
#3  0x00007fb3580da231 in ?? () from /lib/x86_64-linux-gnu/libnss_resolve.so.2
#4  0x00007fb3580dc739 in ?? () from /lib/x86_64-linux-gnu/libnss_resolve.so.2
#5  0x00007fb3580e01e4 in _nss_resolve_gethostbyname4_r () from /lib/x86_64-linux-gnu/libnss_resolve.so.2
#6  0x00007fb35821a796 in get_nss_addresses (res=0x7fb3511f9640, tmpbuf=0x7fb3511f9790, req=<optimized out>, name=<optimized out>)
    at ../sysdeps/posix/getaddrinfo.c:646
#7  gaih_inet (tmpbuf=0x7fb3511f9790, naddrs=<synthetic pointer>, pai=0x7fb3511f9610, req=<optimized out>, service=<optimized out>,
    name=<optimized out>) at ../sysdeps/posix/getaddrinfo.c:1179
#8  __GI_getaddrinfo (name=<optimized out>, service=<optimized out>, service@entry=0x0, hints=<optimized out>, hints@entry=0x7fb3511f9d00,
    pai=pai@entry=0x7fb3511f9cf8) at ../sysdeps/posix/getaddrinfo.c:2398
#9  0x00007fb35810884e in lresolve (L=0x7fb357c2f2e8) at lualib-src/lua-socket.c:800
#10 0x000055e26bc4c99e in precallC (f=0x7fb358108800 <lresolve>, nresults=1, func=0x7fb357d32850, L=0x7fb357c2f2e8) at ldo.c:529
#11 luaD_precall (L=L@entry=0x7fb357c2f2e8, func=<optimized out>, nresults=1) at ldo.c:595
#12 0x000055e26bc5b2fd in luaV_execute (L=<optimized out>, ci=<optimized out>) at lvm.c:1686
#13 0x000055e26bc4bd0a in luaD_rawrunprotected (L=L@entry=0x7fb357c2f2e8, f=f@entry=0x55e26bc4cb70 <resume>, ud=ud@entry=0x7fb3511f9fec) at ldo.c:144
#14 0x000055e26bc4cf09 in lua_resume (L=L@entry=0x7fb357c2f2e8, from=from@entry=0x7fb357c2f128, nargs=<optimized out>, nargs@entry=0,
    nresults=nresults@entry=0x7fb3511fa024) at ldo.c:849
#15 0x00007fb35812102a in lua_resumeX (nresults=0x7fb3511fa024, nargs=0, from=0x7fb357c2f128, L=0x7fb357c2f2e8) at service-src/service_snlua.c:90
#16 auxresume (narg=0, co=0x7fb357c2f2e8, L=0x7fb357c2f128) at service-src/service_snlua.c:146
#17 timing_resume (L=L@entry=0x7fb357c2f128, co_index=co_index@entry=1, n=0) at service-src/service_snlua.c:199
#18 0x00007fb358121370 in luaB_coresume (L=0x7fb357c2f128) at service-src/service_snlua.c:218
#19 0x000055e26bc4c6e9 in precallC (f=0x7fb358121340 <luaB_coresume>, nresults=-1, func=0x7fb357d31d50, L=0x7fb357c2f128) at ldo.c:529
#20 luaD_pretailcall (L=L@entry=0x7fb357c2f128, ci=ci@entry=0x7fb357dacb80, func=<optimized out>, func@entry=0x7fb357d31d50, narg1=<optimized out>,
    delta=delta@entry=2) at ldo.c:550
#21 0x000055e26bc5afff in luaV_execute (L=L@entry=0x7fb357c2f128, ci=<optimized out>) at lvm.c:1711
#22 0x000055e26bc4cd79 in ccall (inc=65537, nResults=0, func=<optimized out>, L=0x7fb357c2f128) at ldo.c:637
#23 luaD_callnoyield (L=0x7fb357c2f128, func=<optimized out>, nResults=0) at ldo.c:655
#24 0x000055e26bc4bd0a in luaD_rawrunprotected (L=L@entry=0x7fb357c2f128, f=f@entry=0x55e26bc47550 <f_call>, ud=ud@entry=0x7fb3511fa350) at ldo.c:144
#25 0x000055e26bc4d110 in luaD_pcall (L=L@entry=0x7fb357c2f128, func=func@entry=0x55e26bc47550 <f_call>, u=u@entry=0x7fb3511fa350, old_top=48,
    ef=<optimized out>) at ldo.c:953
#26 0x000055e26bc4989f in lua_pcallk (L=L@entry=0x7fb357c2f128, nargs=nargs@entry=5, nresults=nresults@entry=0, errfunc=errfunc@entry=1,
    ctx=ctx@entry=0, k=k@entry=0x0) at lapi.c:1066
#27 0x00007fb358104d4f in _cb (context=0x7fb357ceb590, ud=<optimized out>, type=1, session=1, source=0, msg=0x0, sz=0) at lualib-src/lua-skynet.c:67
#28 0x000055e26bc3f4e3 in dispatch_message (ctx=ctx@entry=0x7fb357ceb590, msg=msg@entry=0x7fb3511fa410) at skynet-src/skynet_server.c:275
#29 0x000055e26bc40083 in skynet_context_message_dispatch (sm=sm@entry=0x7fb357c081d0, q=0x7fb34f805060, weight=weight@entry=0)
    at skynet-src/skynet_server.c:335
#30 0x000055e26bc4090b in thread_worker (p=<optimized out>) at skynet-src/skynet_start.c:163
#31 0x00007fb3581b2134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#32 0x00007fb3582327dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

所有内存问题应自己先检测 c 库调用是否正确,有没有 double free 等问题。