Windows handle leak
kankri opened this issue · 0 comments
We have noticed that our application which uses aws-crt-cpp is leaking Windows thread handles in aws-c-io.
The issue can easily be caught with Application Verifier using the following steps. It would be nice if you could integrate Application Verifier, available in Windows Software Development Kit, into your test sets.
I checked out tag v1.12.6 of aws-iot-device-sdk-cpp-v2 and made the following changes to convert raw_pub_sub
into a DLL and add a runner for it:
diff --git a/samples/mqtt/raw_pub_sub/CMakeLists.txt b/samples/mqtt/raw_pub_sub/CMakeLists.txt
index e13dd0a..1d001d7 100644
--- a/samples/mqtt/raw_pub_sub/CMakeLists.txt
+++ b/samples/mqtt/raw_pub_sub/CMakeLists.txt
@@ -6,7 +6,7 @@ file(GLOB SRC_FILES
"*.cpp"
)
-add_executable(${PROJECT_NAME} ${SRC_FILES})
+add_library(${PROJECT_NAME} SHARED ${SRC_FILES})
set_target_properties(${PROJECT_NAME} PROPERTIES
CXX_STANDARD 14)
diff --git a/samples/mqtt/raw_pub_sub/main.cpp b/samples/mqtt/raw_pub_sub/main.cpp
index 0a39efa..2800dab 100644
--- a/samples/mqtt/raw_pub_sub/main.cpp
+++ b/samples/mqtt/raw_pub_sub/main.cpp
@@ -63,6 +63,7 @@ char *s_getCmdOption(char **begin, char **end, const String &option)
return 0;
}
+extern "C" __declspec(dllexport)
int main(int argc, char *argv[])
{
diff --git a/samples/mqtt/runner/CMakeLists.txt b/samples/mqtt/runner/CMakeLists.txt
new file mode 100644
index 0000000..00e2f6f
--- /dev/null
+++ b/samples/mqtt/runner/CMakeLists.txt
@@ -0,0 +1,20 @@
+cmake_minimum_required(VERSION 3.1)
+# note: cxx-17 requires cmake 3.8, cxx-20 requires cmake 3.12
+project(runner CXX)
+
+file(GLOB SRC_FILES
+ "*.cpp"
+)
+
+add_executable(${PROJECT_NAME} ${SRC_FILES})
+
+set_target_properties(${PROJECT_NAME} PROPERTIES
+ CXX_STANDARD 14)
+
+#set warnings
+if (MSVC)
+ target_compile_options(${PROJECT_NAME} PRIVATE /W4 /wd4068)
+else ()
+ target_compile_options(${PROJECT_NAME} PRIVATE -Wall -Wno-long-long -pedantic -Werror)
+endif ()
+
diff --git a/samples/mqtt/runner/main.cpp b/samples/mqtt/runner/main.cpp
new file mode 100644
index 0000000..53cd4b4
--- /dev/null
+++ b/samples/mqtt/runner/main.cpp
@@ -0,0 +1,13 @@
+#include <stdio.h>
+#include <windows.h>
+typedef int (WINAPI *fn_main_t)(int argc, char *argv[]);
+
+int main(int argc, char *argv[])
+{
+ HMODULE dll = LoadLibraryW(L"raw-pub-sub.dll");
+ fn_main_t fn_main = (fn_main_t)GetProcAddress(dll, "main");
+ int ret = fn_main(argc, argv);
+ BOOL status = FreeLibrary(dll);
+ Sleep(60*1000);
+ return ret;
+}
I then built both the sample program as a DLL and the runner and copied the DLL to a directory the runner finds it from:
cmake -DCMAKE_PREFIX_PATH=%CD%\_sdk -A x64 -S aws-iot-device-sdk-cpp-v2\samples\mqtt\raw_pub_sub -B _build_raw
cmake --build _build_raw --config "Debug"
cmake -A x64 -S aws-iot-device-sdk-cpp-v2\samples\mqtt\runner -B _runner
cmake --build _runner --config "Debug"
copy _build_raw\Debug\raw-pub-sub.dll _runner\Debug
I then enabled handle leak detection using an elevated user (this is easier to explore with the GUI):
appverif -enable leak -for runner.exe
I then ran the program:
_runner\Debug\runner.exe --endpoint ENDPOINT --cert CERT --key KEY --ca_file CA_FILE --topic TOPIC --client_id CLIENT_ID
<type "exit" immediately when program starts>
This caused a minidump to be generated (depending on the host configuration, see e.g. Collecting User-Mode Dumps) and a log file in %USERPROFILE%\AppVerifierLogs\runner.exe.*.dat
. I exported the binary log file as XML:
appverif -export log -for runner.exe -with to=runner.exe.xml
The log says:
<avrf:message>A HANDLE was leaked.</avrf:message>
<avrf:parameter1>1a0 - Value of the leaked handle. Run !htrace <handle> to get additional information about the handle if handle tracing is enabled.</avrf:parameter1>
<avrf:parameter2>2878b6a4f30 - Address to the allocation stack trace. Run dps <address> to view the allocation stack.</avrf:parameter2>
<avrf:parameter3>287921c0fd6 - Address of the owner dll name. Run du <address> to read the dll name.</avrf:parameter3>
<avrf:parameter4>7ffb42670000 - Base of the owner dll. Run .reload <dll_name> = <address> to reload the owner dll. Use 'lm' to get more information about the loaded and unloaded modules.</avrf:parameter4>
Opening the minidump with Windbg (or cdb.exe) and running the suggested command shows:
0:000> dps 2878b6a4f30
00000287`8b6a4f30 00000000`00000000
00000287`8b6a4f38 00100000`00009801
00000287`8b6a4f40 00007ffb`4277d10c raw_pub_sub!aws_thread_launch+0x19c [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-common\source\windows\thread.c @ 205]
00000287`8b6a4f48 00007ffb`4273318c raw_pub_sub!create_and_init_host_entry+0x43c [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-io\source\host_resolver.c @ 1364]
00000287`8b6a4f50 00007ffb`42733464 raw_pub_sub!default_resolve_host+0x264 [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-io\source\host_resolver.c @ 1411]
00000287`8b6a4f58 00007ffb`4272ee7c raw_pub_sub!aws_host_resolver_resolve_host+0x8c [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-io\source\host_resolver.c @ 76]
00000287`8b6a4f60 00007ffb`42734c7b raw_pub_sub!aws_client_bootstrap_new_socket_channel+0x46b [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-io\source\channel_bootstrap.c @ 780]
00000287`8b6a4f68 00007ffb`426b59f3 raw_pub_sub!s_mqtt_client_connect+0x183 [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-mqtt\source\client.c @ 1579]
00000287`8b6a4f70 00007ffb`426b31e7 raw_pub_sub!aws_mqtt_client_connection_connect+0xb07 [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-mqtt\source\client.c @ 1529]
00000287`8b6a4f78 00007ffb`42697686 raw_pub_sub!Aws::Crt::Mqtt::MqttConnection::Connect+0x316 [aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\source\mqtt\mqttclient.cpp @ 405]
00000287`8b6a4f80 00007ffb`426728e1 raw_pub_sub!main+0x1431 [aws-iot-device-sdk-cpp-v2\samples\mqtt\raw_pub_sub\main.cpp @ 350]
0:000> lsa raw_pub_sub!aws_thread_launch+0x19c
201: if (is_managed_thread) {
202: aws_thread_increment_unjoined_count();
203: }
204:
> 205: thread->thread_handle =
206: CreateThread(0, stack_size, thread_wrapper_fn, (LPVOID)thread_wrapper, 0, &thread->thread_id);
207:
208: if (!thread->thread_handle) {
209: aws_thread_decrement_unjoined_count();
210: return aws_raise_error(AWS_ERROR_THREAD_INSUFFICIENT_RESOURCE)
0:000> lsc
Current: aws-iot-device-sdk-cpp-v2\crt\aws-crt-cpp\crt\aws-c-common\source\windows\thread.c(211)
Alternatively in an interactive setting it might be easier to run the program directly under debugger:
windbg -g _runner\Debug\runner.exe --endpoint ENDPOINT --cert CERT --key KEY --ca_file CA_FILE --topic TOPIC --client_id CLIENT_ID
<type "exit" immediately when program starts>
=======================================
VERIFIER STOP 0000000000000901: pid 0x7FE4: A HANDLE was leaked.
00000000000002F8 : Value of the leaked handle. Run !htrace <handle> to get additional information about the handle if handle tracing is enabled.
000002170E5A62E0 : Address to the allocation stack trace. Run dps <address> to view the allocation stack.
00000217150F0FD6 : Address of the owner dll name. Run du <address> to read the dll name.
00007FFB1C9B0000 : Base of the owner dll. Run .reload <dll_name> = <address> to reload the owner dll. Use 'lm' to get more information about the loaded and unloaded modules.
Looking at the caller, create_and_init_host_entry()
in aws-c-io/source/host_resolver.c:1364, it seems it receives struct aws_thread
which contains a thread handle from aws_thread_launch()
but never calls aws_thread_clean_up()
for it. It seems that struct is not needed for anything, thread_wrapper
contains a copy of it with a unique thread handle which is used to synchronize thread exit. So it should be safe to clean up the struct right away.