Iqiyi开源hook框架XHook分析

Got Hook 可以使用dlsym得到目标函数的地址, 然后遍历got表进行Hook.

但是有时候我们不能通过dlsym得到函数地址. 这时我们需要怎么办呢? iqiyi的xhook很好的解决了这个问题, (可以参考linker的实现, 有问题, 看源码.

通过找到目标函数地址存放在got表中位置, 只要找到了目标symbol在got表中的offset, 想要修改或者得到目标函数的实际地址就轻松多了.

symbol -> symid -> 在got表中的offset -> 目标函数实际地址

阅读Readme #

https://github.com/iqiyi/xHook/blob/master/README.zh-CN.md https://github.com/iqiyi/xHook/blob/master/docs/overview/android_plt_hook_overview.zh-CN.md

//监测内存泄露
xhook_register(".*\\.so$", "malloc",  my_malloc,  NULL);
xhook_register(".*\\.so$", "calloc",  my_calloc,  NULL);
xhook_register(".*\\.so$", "realloc", my_realloc, NULL);
xhook_register(".*\\.so$", "free",    my_free,    NULL);

//监控 sockets 生命周期
xhook_register(".*\\.so$", "getaddrinfo", my_getaddrinfo, NULL);
xhook_register(".*\\.so$", "socket",      my_socket,      NULL);
xhook_register(".*\\.so$", "setsockopt"   my_setsockopt,  NULL);
xhook_register(".*\\.so$", "bind",        my_bind,        NULL);
xhook_register(".*\\.so$", "listen",      my_listen,      NULL);
xhook_register(".*\\.so$", "connect",     my_connect,     NULL);
xhook_register(".*\\.so$", "shutdown",    my_shutdown,    NULL);
xhook_register(".*\\.so$", "close",       my_close,       NULL);

//过滤出和保存部分安卓 log 到本地文件
xhook_register(".*\\.so$", "__android_log_write",  my_log_write,  NULL);
xhook_register(".*\\.so$", "__android_log_print",  my_log_print,  NULL);
xhook_register(".*\\.so$", "__android_log_vprint", my_log_vprint, NULL);
xhook_register(".*\\.so$", "__android_log_assert", my_log_assert, NULL);

//追踪某些调用 (忽略 linker 和 linker64)
xhook_register("^/system/.*$", "mmap",   my_mmap,   NULL);
xhook_register("^/vendor/.*$", "munmap", my_munmap, NULL);
xhook_ignore  (".*/linker$",   "mmap");
xhook_ignore  (".*/linker$",   "munmap");
xhook_ignore  (".*/linker64$", "mmap");
xhook_ignore  (".*/linker64$", "munmap");

//防御某些注入攻击
xhook_register(".*com\\.hacker.*\\.so$", "malloc",  my_malloc_always_return_NULL, NULL);
xhook_register(".*/libhacker\\.so$",     "connect", my_connect_with_recorder,     NULL);

//修复某些系统 bug
xhook_register(".*some_vendor.*/libvictim\\.so$", "bad_func", my_nice_func, NULL);

//忽略 libwebviewchromium.so 的所有 hook 信息
xhook_ignore(".*/libwebviewchromium.so$", NULL);

//现在执行 hook!
xhook_refresh(1);

发现重点为下面几个函数:

  1. xhook_register
  2. xhook_ignore
  3. xhook_refresh

接下来依次分析

分析 #

xhook_register Hook函数注册 #

函数申明 #

int xhook_register(const char *pathname_regex_str, const char *symbol,
                   void *new_func, void **old_func) XHOOK_EXPORT;

函数实现分析 #

int xhook_register(const char *pathname_regex_str, const char *symbol,
                   void *new_func, void **old_func)
{
    return xh_core_register(pathname_regex_str, symbol, new_func, old_func);
}

xh_core_register #

int xh_core_register(const char *pathname_regex_str, const char *symbol,
                     void *new_func, void **old_func)
{
    xh_core_hook_info_t *hi;
    regex_t              regex;

    if(NULL == pathname_regex_str || NULL == symbol || NULL == new_func) return XH_ERRNO_INVAL;

    if(xh_core_inited)
    {
        XH_LOG_ERROR("do not register hook after refresh(): %s, %s", pathname_regex_str, symbol);
        return XH_ERRNO_INVAL;
    }

    if(0 != regcomp(&regex, pathname_regex_str, REG_NOSUB)) return XH_ERRNO_INVAL;

    if(NULL == (hi = malloc(sizeof(xh_core_hook_info_t)))) return XH_ERRNO_NOMEM;
    if(NULL == (hi->symbol = strdup(symbol)))
    {
        free(hi);
        return XH_ERRNO_NOMEM;
    }
#if XH_CORE_DEBUG
    if(NULL == (hi->pathname_regex_str = strdup(pathname_regex_str)))
    {
        free(hi->symbol);
        free(hi);
        return XH_ERRNO_NOMEM;
    }
#endif
    hi->pathname_regex = regex;
    hi->new_func = new_func;
    hi->old_func = old_func;
    
    pthread_mutex_lock(&xh_core_mutex);
    TAILQ_INSERT_TAIL(&xh_core_hook_info, hi, link); // 核心
    pthread_mutex_unlock(&xh_core_mutex);

    return 0;
}

xh_elf_init: #

  1. 设置 load_bias, elf_header, Program header地址.
  2. 找到第一个PT_LOAD段
  3. 找到dynamic_header. 之后解析dynamic segment, 找到strtab, symtab, rel.plt, .rel,等
int xh_elf_init(xh_elf_t *self, uintptr_t base_addr, const char *pathname)
{
    if(0 == base_addr || NULL == pathname) return XH_ERRNO_INVAL;

    //always reset
    memset(self, 0, sizeof(xh_elf_t));
    
    self->pathname = pathname;
    self->base_addr = (ElfW(Addr))base_addr;
    self->ehdr = (ElfW(Ehdr) *)base_addr;
    self->phdr = (ElfW(Phdr) *)(base_addr + self->ehdr->e_phoff); //segmentation fault sometimes

    //find the first load-segment with offset 0
    ElfW(Phdr) *phdr0 = xh_elf_get_first_segment_by_type_offset(self, PT_LOAD, 0);
    if(NULL == phdr0)
    {
        XH_LOG_ERROR("Can NOT found the first load segment. %s", pathname);
        return XH_ERRNO_FORMAT;
    }

#if XH_ELF_DEBUG
    if(0 != phdr0->p_vaddr)
        XH_LOG_DEBUG("first load-segment vaddr NOT 0 (vaddr: %p). %s",
                     (void *)(phdr0->p_vaddr), pathname);
#endif

    //save load bias addr
    if(self->base_addr < phdr0->p_vaddr) return XH_ERRNO_FORMAT;
    self->bias_addr = self->base_addr - phdr0->p_vaddr;
    
    //find dynamic-segment
    ElfW(Phdr) *dhdr = xh_elf_get_first_segment_by_type(self, PT_DYNAMIC);
    if(NULL == dhdr)
    {
        XH_LOG_ERROR("Can NOT found dynamic segment. %s", pathname);
        return XH_ERRNO_FORMAT;
    }

    //parse dynamic-segment
    self->dyn          = (ElfW(Dyn) *)(self->bias_addr + dhdr->p_vaddr);
    self->dyn_sz       = dhdr->p_memsz;
    ElfW(Dyn) *dyn     = self->dyn;
    ElfW(Dyn) *dyn_end = self->dyn + (self->dyn_sz / sizeof(ElfW(Dyn)));
    uint32_t  *raw;
    for(; dyn < dyn_end; dyn++)
    {
        switch(dyn->d_tag) //segmentation fault sometimes
        {
        case DT_NULL:
            //the end of the dynamic-section
            dyn = dyn_end;
            break;
        case DT_STRTAB:
            {
                self->strtab = (const char *)(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))(self->strtab) < self->base_addr) return XH_ERRNO_FORMAT;
                break;
            }
        case DT_SYMTAB:
            {
                self->symtab = (ElfW(Sym) *)(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))(self->symtab) < self->base_addr) return XH_ERRNO_FORMAT;
                break;
            }
        case DT_PLTREL:
            //use rel or rela?
            self->is_use_rela = (dyn->d_un.d_val == DT_RELA ? 1 : 0);
            break;
        case DT_JMPREL:
            {
                self->relplt = (ElfW(Addr))(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))(self->relplt) < self->base_addr) return XH_ERRNO_FORMAT;
                break;
            }
        case DT_PLTRELSZ:
            self->relplt_sz = dyn->d_un.d_val;
            break;
        case DT_REL:
        case DT_RELA:
            {
                self->reldyn = (ElfW(Addr))(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))(self->reldyn) < self->base_addr) return XH_ERRNO_FORMAT;
                break;
            }
        case DT_RELSZ:
        case DT_RELASZ:
            self->reldyn_sz = dyn->d_un.d_val;
            break;
        case DT_ANDROID_REL:
        case DT_ANDROID_RELA:
            {
                self->relandroid = (ElfW(Addr))(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))(self->relandroid) < self->base_addr) return XH_ERRNO_FORMAT;
                break;
            }
        case DT_ANDROID_RELSZ:
        case DT_ANDROID_RELASZ:
            self->relandroid_sz = dyn->d_un.d_val;
            break;
        case DT_HASH:
            {
                raw = (uint32_t *)(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))raw < self->base_addr) return XH_ERRNO_FORMAT;
                self->bucket_cnt  = raw[0];
                self->chain_cnt   = raw[1];
                self->bucket      = &raw[2];
                self->chain       = &(self->bucket[self->bucket_cnt]);
                break;
            }
        case DT_GNU_HASH:
            {
                raw = (uint32_t *)(self->bias_addr + dyn->d_un.d_ptr);
                if((ElfW(Addr))raw < self->base_addr) return XH_ERRNO_FORMAT;
                self->bucket_cnt  = raw[0];
                self->symoffset   = raw[1];
                self->bloom_sz    = raw[2];
                self->bloom_shift = raw[3];
                self->bloom       = (ElfW(Addr) *)(&raw[4]);
                self->bucket      = (uint32_t *)(&(self->bloom[self->bloom_sz]));
                self->chain       = (uint32_t *)(&(self->bucket[self->bucket_cnt]));
                self->is_use_gnu_hash = 1;
                break;
            }
        default:
            break;
        }
    }

    //check android rel/rela
    if(0 != self->relandroid)
    {
        const char *rel = (const char *)self->relandroid;
        if(self->relandroid_sz < 4 ||
           rel[0] != 'A' ||
           rel[1] != 'P' ||
           rel[2] != 'S' ||
           rel[3] != '2')
        {
            XH_LOG_ERROR("android rel/rela format error\n");
            return XH_ERRNO_FORMAT;
        }
        
        self->relandroid += 4;
        self->relandroid_sz -= 4;
    }

    //check elf info
    if(0 != xh_elf_check(self))
    {
        XH_LOG_ERROR("elf init check failed. %s", pathname);
        return XH_ERRNO_FORMAT;
    }
    
#if XH_ELF_DEBUG
    xh_elf_dump(self);
#endif

    XH_LOG_INFO("init OK: %s (%s %s PLT:%u DYN:%u ANDROID:%u)\n", self->pathname,
                self->is_use_rela ? "RELA" : "REL",
                self->is_use_gnu_hash ? "GNU_HASH" : "ELF_HASH",
                self->relplt_sz, self->reldyn_sz, self->relandroid_sz);

    return 0;
}

xh_refresh #

xh_refresh -> xh_core_refresh -> xh_core_init_once

                              -> xh_core_refresh_impl 
   

xh_core_refresh_impl -> xh_core_check_elf_header
                     -> xh_core_hook             -> xh_core_hook_impl
                     
                     
xh_core_hook_impl    -> xh_elf_init
                     -> xh_elf_hook

xh_elf_hook #

  1. xh_elf_find_symidx_by_name //find symbol index by symbol name
  2. replace.
    //find symbol index by symbol name
    if(0 != (r = xh_elf_find_symidx_by_name(self, symbol, &symidx))) return 0;
    
    //replace for .rel(a).plt
    if(0 != self->relplt)
    {
        xh_elf_plain_reloc_iterator_init(&plain_iter, self->relplt, self->relplt_sz, self->is_use_rela);
        while(NULL != (rel_common = xh_elf_plain_reloc_iterator_next(&plain_iter)))
        {
            if(0 != (r = xh_elf_find_and_replace_func(self,
                                                      (self->is_use_rela ? ".rela.plt" : ".rel.plt"), 1,
                                                      symbol, new_func, old_func,
                                                      symidx, rel_common, &found))) return r;
            if(found) break;
        }
    }

xh_elf_find_symidx_by_name #

其中: 寻找symidx, 遍历symtab,

static int xh_elf_find_symidx_by_name(xh_elf_t *self, const char *symbol, uint32_t *symidx)
{
    if(self->is_use_gnu_hash)
        return xh_elf_gnu_hash_lookup(self, symbol, symidx);
    else
        return xh_elf_hash_lookup(self, symbol, symidx);
}

xh_elf_find_and_replace_func #

rel, rela表中存放这r_offset, 和symidx.

真正替换的流程在xh_elf_find_and_replace_func.

static int xh_elf_find_and_replace_func(xh_elf_t *self, const char *section,
                                        int is_plt, const char *symbol,
                                        void *new_func, void **old_func,
                                        uint32_t symidx, void *rel_common,
                                        int *found)
{
    ElfW(Rela)    *rela;
    ElfW(Rel)     *rel;
    ElfW(Addr)     r_offset;
    size_t         r_info;
    size_t         r_sym;
    size_t         r_type;
    ElfW(Addr)     addr;
    int            r;

    if(NULL != found) *found = 0;
    
    if(self->is_use_rela)
    {
        rela = (ElfW(Rela) *)rel_common;
        r_info = rela->r_info;
        r_offset = rela->r_offset;
    }
    else
    {
        rel = (ElfW(Rel) *)rel_common;
        r_info = rel->r_info;
        r_offset = rel->r_offset;
    }

    //check sym
    r_sym = XH_ELF_R_SYM(r_info);
    if(r_sym != symidx) return 0;

    //check type
    r_type = XH_ELF_R_TYPE(r_info);
    if(is_plt && r_type != XH_ELF_R_GENERIC_JUMP_SLOT) return 0;
    if(!is_plt && (r_type != XH_ELF_R_GENERIC_GLOB_DAT && r_type != XH_ELF_R_GENERIC_ABS)) return 0;

    //we found it
    XH_LOG_INFO("found %s at %s offset: %p\n", symbol, section, (void *)r_offset);
    if(NULL != found) *found = 1;

    //do replace
    addr = self->bias_addr + r_offset;
    if(addr < self->base_addr) return XH_ERRNO_FORMAT;
    if(0 != (r = xh_elf_replace_function(self, symbol, addr, new_func, old_func)))
    {
        XH_LOG_ERROR("replace function failed: %s at %s\n", symbol, section);
        return r;
    }

    return 0;
}