performancecopilot/pcp

pcp-atopsar/libpcp failure possibly related to v3 archive indom handling

natoscott opened this issue · 0 comments

@kmcdonell keen to get your thoughts on this when when you're around - its reproducible for me over the last few days (reading latest archive each time, and note pcp-zeroconf is installed providing the proc metric being accessed here). The metric being accessed in getmaxpid() is proc.psinfo.pid ...

$ gdb --quiet --args /usr/libexec/pcp/bin/pcp-atopsar
Reading symbols from /usr/libexec/pcp/bin/pcp-atopsar...
(gdb) r
Starting program: /usr/libexec/pcp/bin/pcp-atopsar
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".

Program received signal SIGSEGV, Segmentation fault.
0x0000fffff7bbed10 in __strlen_asimd () from /lib64/libc.so.6
Missing separate debuginfos, use: dnf debuginfo-install avahi-libs-0.8-24.fc39.aarch64 cyrus-sasl-lib-2.1.28-11.fc39.aarch64 dbus-libs-1.14.10-1.fc39.aarch64 glibc-2.38-16.fc39.aarch64 libcap-2.48-9.fc39.aarch64 libgcc-13.2.1-6.fc39.aarch64 libxcrypt-4.4.36-2.fc39.aarch64 libzstd-1.5.5-4.fc39.aarch64 lz4-libs-1.9.4-4.fc39.aarch64 ncurses-libs-6.4-7.20230520.fc39.aarch64 openssl-libs-3.1.1-4.fc39.aarch64 systemd-libs-254.9-1.fc39.aarch64 xz-libs-5.4.4-1.fc39.aarch64 zlib-1.2.13-4.fc39.aarch64
(gdb) bt
#0 0x0000fffff7bbed10 in __strlen_asimd () from /lib64/libc.so.6
#1 0x0000fffff7eb7b90 in pmGetInDomArchive_ctx (ctxp=0x488ad0, ctxp@entry=0x0,
indom=, instlist=instlist@entry=0xffffffffda70,
namelist=namelist@entry=0xffffffffda78) at logmeta.c:1709
#2 0x0000fffff7eb7d08 in pmGetInDomArchive (indom=,
instlist=instlist@entry=0xffffffffda70, namelist=namelist@entry=0xffffffffda78)
at logmeta.c:1739
#3 0x000000000043b204 in instances (purpose=purpose@entry=0x443c20 "maxpid", all=all@entry=1,
id=id@entry=21, descs=descs@entry=0x476720 , ids=ids@entry=0xffffffffda70,
insts=insts@entry=0xffffffffda78) at various.c:1427
#4 0x000000000043b300 in get_instances (purpose=purpose@entry=0x443c20 "maxpid",
mid=mid@entry=21, descs=descs@entry=0x476720 , ids=ids@entry=0xffffffffda70,
insts=insts@entry=0xffffffffda78) at various.c:1456
#5 0x0000000000421bd4 in getmaxpid () at photoproc.c:273
#6 0x000000000043bf48 in setup_globals (opts=opts@entry=0xffffffffdc60) at various.c:1014
#7 0x0000000000415c68 in atopsar (argc=argc@entry=1, argv=argv@entry=0xfffffffff018)
at atopsar.c:335
#8 0x000000000041163c in main (argc=1, argv=0xfffffffff018) at atop.c:293
(gdb) up 1
#1 0x0000fffff7eb7b90 in pmGetInDomArchive_ctx (ctxp=0x488ad0, ctxp@entry=0x0,
indom=, instlist=instlist@entry=0xffffffffda70,
namelist=namelist@entry=0xffffffffda78) at logmeta.c:1709
1709 strsize += strlen(idp->namelist[j])+1;
(gdb) l
1704 if ((nlist = (char **)realloc(nlist, bytes)) == NULL) {
1705 pmNoMem("pmGetInDomArchive: nlist", bytes, PM_FATAL_ERR);
1706 }
1707 ilist[numinst] = idp->instlist[j];
1708 nlist[numinst] = idp->namelist[j];
1709 strsize += strlen(idp->namelist[j])+1;
1710 numinst++;
1711 }
1712 }
1713 bytes = numinst * sizeof(olist[0]) + strsize;
(gdb) p idp
$1 = (__pmLogInDom *) 0x11adb00
(gdb) p *idp
$2 = {next = 0x11ada00, prior = 0x10d3990, indom = 0, stamp = {sec = 1709471352,
nsec = 275820197}, isdelta = 1, numinst = 4, alloc = 4, instlist = 0x11ada74,
namelist = 0x11adad0, buf = 0x11ada60}
(gdb) p j
$3 = 0
(gdb) p idp->namelist[0]
$4 = 0x0
(gdb)