kubo/plthook

Can I hook the function called by a referenced library?

fengyuanyu1 opened this issue · 6 comments

@kubo Hello, kubo
Thanks for your great tools, I have a question about usage.
If I wirte a program a. The program a use a function b in LIBRARY B, function b calls the function c in LIBRARY C. I cant edit the LIBRARY B and LIBRARY C, LIBRARY B is the further wrapper of LIBRARY C。I use the API provided by LIBRARY B. Can I use the methods in Readme to hook the function c in program a?

If not, can u tell me how to implement this feature?

kubo commented

I cant edit the LIBRARY B and LIBRARY C.

Plthook modifies programs and libraries in process memory, not on disk. You have no need to have permission to edit LIBRARY B and LIBRARY C on disk.

How about hooking function c function calls in LIBRARY B?

How about hooking function c function calls in LIBRARY B?

In my opinion, each ELF has GOT/PLT which records the functions' position called by functions in current ELF.
I try to use the following code in Program A:

int install_hook_function()
{
    plthook_t *plthook;
    
    if (plthook_open(&plthook, "libC.so.1") != 0) {
        printf("plthook_open error: %s\n", plthook_error());
        return -1;
    }
    if (plthook_replace(plthook, "c", (void*)my_c, NULL) != 0) {
        printf("plthook_replace error: %s\n", plthook_error());
        plthook_close(plthook);
        return -1;
    }
    plthook_close(plthook);
    return 0;
}

But it dont work.

Specificially, I try to hook the CUDA driver API, cuLaunchKernel. The lib B is libcublas, and the lib C is libcuda. Program A is matrixMulCUBLAS.cpp. I put the following code in the Program A. But it tell me that it has no symbol for cuLaunchKernel. Then I find Program A and the lib B doesn't calls cuLaunchKernel according to this. This is confusing because I trace the cuLaunchKernel in the function b's function stack using GDB. And I also find that libcuda.so is explicitly loaded at runtime after main. But I am not sure whether it affects the HOOK? Can you give me some suggestion?

extern "C" {
#include "plthook.h"
// First Try.
// When hook functions are outside of modified files
static CUresult my_cuLaunchKernel(CUfunction f, 
                           unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, 
                           unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, 
                           unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) 
{

    CUresult ret;
    
    /* call real LaunchKernel(). */
    ret = cuLaunchKernel(f, 
                         gridDimX, gridDimY, gridDimZ, 
                         blockDimX, blockDimY, blockDimZ, 
                         sharedMemBytes, hStream, kernelParams, extra); 
    
    // ... do your task: logging, check received data, etc. ...
    return ret;
}
    
int install_hook_function()
{
    plthook_t *plthook;
    
    if (plthook_open(&plthook, "libcublas.so.11") != 0) {
        printf("plthook_open error: %s\n", plthook_error());
        return -1;
    }
    if (plthook_replace(plthook, "cuLaunchKernel", (void*)my_cuLaunchKernel, NULL) != 0) {
        printf("plthook_replace error: %s\n", plthook_error());
        plthook_close(plthook);
        return -1;
    }
    plthook_close(plthook);
    return 0;
}

// Second Try.
// When hook functions are inside of modified files
#include <dlfcn.h>
static CUresult (*cuLaunchKernel_func)(CUfunction f, 
                           unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, 
                           unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, 
                           unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);

static CUresult my_cuLaunchKernel(CUfunction f, 
                           unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, 
                           unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, 
                           unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) 
{

    CUresult ret;
    
    /* call real LaunchKernel(). */
    ret = (*cuLaunchKernel_func)(f, 
                         gridDimX, gridDimY, gridDimZ, 
                         blockDimX, blockDimY, blockDimZ, 
                         sharedMemBytes, hStream, kernelParams, extra); 
    
    // ... do your task: logging, check received data, etc. ...
    return ret;
}

int install_hook_function()
{
    plthook_t *plthook;
    if (plthook_open_by_address(&plthook, &cuLaunchKernel_func) != 0) {
        printf("plthook_open error: %s\n", plthook_error());
        return -1;
    }
    if (plthook_replace(plthook, "cuLaunchKernel", (void*)my_cuLaunchKernel, (void**)&cuLaunchKernel_func) != 0) {
        printf("plthook_replace error: %s\n", plthook_error());
        plthook_close(plthook);
        return -1;
    }
#ifndef WIN32
    // The address passed to the fourth argument of plthook_replace() is
    // available on Windows. But not on Unixes. Get the real address by dlsym().
    cuLaunchKernel_func = (CUresult (*)(CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, CUstream, void**, void**))dlsym(RTLD_DEFAULT, "cuLaunchKernel");
#endif
    plthook_close(plthook);
    return 0;
}
}
kubo commented

Probably libcublas.so.11 loads libcublas.so by dlopen and gets the address of cuLaunchKernel by dlsym or dlvsym.

How about the following code?

extern "C" {
#include "plthook.h"
// First Try.
// When hook functions are outside of modified files
static CUresult my_cuLaunchKernel(CUfunction f, 
                           unsigned int  gridDimX, unsigned int  gridDimY, unsigned int  gridDimZ, 
                           unsigned int  blockDimX, unsigned int  blockDimY, unsigned int  blockDimZ, 
                           unsigned int  sharedMemBytes, CUstream hStream, void** kernelParams, void** extra) 
{

    CUresult ret;
    
    /* call real LaunchKernel(). */
    ret = cuLaunchKernel(f, 
                         gridDimX, gridDimY, gridDimZ, 
                         blockDimX, blockDimY, blockDimZ, 
                         sharedMemBytes, hStream, kernelParams, extra); 
    
    // ... do your task: logging, check received data, etc. ...
    return ret;
}

static void *my_dlsym(void *handle, const char *symbol)
{
    void *addr = dlsym(handle, symbol);
    if (addr != NULL && strcmp(symbol, "cuLaunchKernel") == 0) {
        return my_cuLaunchKernel;
    }
    return addr;
}

static void *my_dlvsym(void *handle, const char *symbol, const char *version)
{
    void *addr = dlvsym(handle, symbol, version);
    if (addr != NULL && strcmp(symbol, "cuLaunchKernel") == 0) {
        return my_cuLaunchKernel;
    }
    return addr;
}
    
int install_hook_function()
{
    plthook_t *plthook;
    int found = 0;

    if (plthook_open(&plthook, "libcublas.so.11") != 0) {
        printf("plthook_open error: %s\n", plthook_error());
        return -1;
    }
    if (plthook_replace(plthook, "dlsym", (void*)my_dlsym, NULL) == 0) {
        found = 1;
    } else {
        printf("plthook_replace error: %s\n", plthook_error());
    }
    if (plthook_replace(plthook, "dlvsym", (void*)my_dlvsym, NULL) == 0) {
        found = 1;
    } else {
        printf("plthook_replace error: %s\n", plthook_error());
    }
    if (!found) {
        printf("neither dlsym nor dlvsym is hooked.\n");
        plthook_close(plthook);
        return -1;
    }
    plthook_close(plthook);
    return 0;
}
}

Sorry, it doesnt work. After adding some printf in my_cuLaunchKernel, it have no expected output.

kubo commented

@fengyuanyu1

Could you check symbol passed to dlsym and dlvsym as follows?
According to https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__EXEC.html, CUDA provides functions similar to cuLaunchKernel. libcublas.so.11 may use another one.

static void *my_dlsym(void *handle, const char *symbol)
{
    void *addr = dlsym(handle, symbol);
    if (addr != NULL && strcmp(symbol, "cuLaunchKernel") == 0) {
        return my_cuLaunchKernel;
    }
    printf("dlsym(%p, \"%s\") => %p\n", handle, symbol, addr)
    return addr;
}

static void *my_dlvsym(void *handle, const char *symbol, const char *version)
{
    void *addr = dlvsym(handle, symbol, version);
    if (addr != NULL && strcmp(symbol, "cuLaunchKernel") == 0) {
        return my_cuLaunchKernel;
    }
    printf("dlvsym(%p, \"%s\", \"%s\") => %p\n", handle, symbol, version, addr)
    return addr;
}

Thanks for your suggestion. Actually, I use the LD_PRELOAD HOOK method in this repo to hook the cuLaunchKernel successfully. But I dont know why PLT/GOT HOOK fails, I need to explore the details of loading these CUDA libraries.