iOS優秀第三方源碼解析(Hook利器之fishhook)

本篇是筆者解讀源碼項目 iOS-Framework-Analysis 的開篇,今年計劃完成10個優秀第三方源碼解讀,歡迎star和筆者一塊兒解讀這些優秀框架的背後思想,從而提高本身的內功。該篇詳細的源碼註釋已上傳 fishhook源碼註釋,若有須要請自取🎉。git

在iOS平臺下,提及Hook首先會想起MethodSwizzling這個蘋果提供的工具,利用Objective-C的Runtime的特性,經過在消息轉發時交換方法實現(IMP)的機會。但MethodSwizzling只能對Objective-C方法進行Hook,若是要對C/C++方法進行Hook操做,可使用facebook提供的fishhook框架,本文是對該框架的解讀。github

初識fishhook

首先,咱們須要瞭解幾個常見的概念,有助於後面源碼的閱讀。數組

Mach-O: 在iOS和OS X系統下,全部可執行文件、dylib 以及 Bundle都是Mach-O格式。主要有HeaderLoad CommandsData組成。bash

Mach-O的具體結構(看看就好,後面有部清楚的再回頭找):

// header
struct mach_header {
    uint32_t    magic;      /* mach magic number identifier */
    cpu_type_t  cputype;    /* cpu specifier */
    cpu_subtype_t   cpusubtype; /* machine specifier */
    uint32_t    filetype;   /* type of file */
    uint32_t    ncmds;      /* number of load commands */
    uint32_t    sizeofcmds; /* the size of all the load commands */
    uint32_t    flags;      /* flags */
};

struct mach_header_64 {
    uint32_t    magic;      /* mach magic number identifier */
    cpu_type_t  cputype;    /* cpu specifier */
    cpu_subtype_t   cpusubtype; /* machine specifier */
    uint32_t    filetype;   /* type of file */
    uint32_t    ncmds;      /* number of load commands */
    uint32_t    sizeofcmds; /* the size of all the load commands */
    uint32_t    flags;      /* flags */
    uint32_t    reserved;   /* reserved */
};

// load command
struct load_command {
    uint32_t cmd;       /* type of load command */
    uint32_t cmdsize;   /* total size of command in bytes */
};


// load command中的segment_command
// 32&64位字段都相同      
struct segment_command_64 { /* for 64-bit architectures */
    uint32_t    cmd;        /* LC_SEGMENT_64 */
    uint32_t    cmdsize;    /* includes sizeof section_64 structs*/
    char        segname[16];    /* segment name */
    uint64_t    vmaddr;        /* memory address of this segment*/
    uint64_t    vmsize;        /* memory size of this segment */
    uint64_t    fileoff;    /* file offset of this segment */
    uint64_t    filesize;    /* amount to map from the file */
    vm_prot_t    maxprot;    /* maximum VM protection */
    vm_prot_t    initprot;    /* initial VM protection */
    uint32_t    nsects;        /* number of sections in segment*/
    uint32_t    flags;        /* flags */
};


// _DATA中的section
// 32&64位字段都相同      
 struct section_64
     {
     char sectname[16];
     char segname[16];
     uint64_t addr;
     uint64_t size;
     uint32_t offset;
     uint32_t align;
     uint32_t reloff;
     uint32_t nreloc;
     uint32_t flags;
     uint32_t reserved1;
     uint32_t reserved2;
     };

複製代碼

dyld ( the dynamic link editor ):負責將各類各樣程序須要的鏡像加載到程序運行的內存空間中,這個過程發生的時間很是早 — 在 objc 運行時初始化以前。架構

鏡像(image):dyld會將Mach-O文件做爲鏡像,既鏡像就是Mach-O。框架

_dyld_register_func_for_add_image: 每一個鏡像被dyld加載時,都會執行系統註冊過的回調函數,能夠經過該方法註冊自定義的回調函數,當調用該函數註冊時,會讓全部鏡像都執行回調函數,不管是否已經加載過。ide

dyld_stub_binder: 在目標符號(例如 printf)首次被調用時,將其連接到指定的動態連接庫 ,找到對應的符號表真實地址進行綁定(printf 符號位於 _DATA 端中的 lazy 符號表中)。函數

Mach-O的動態綁定機制:編譯App時,系統共享庫不會編譯到Mach-O文件中,而是第一次調用才經過dyld動態綁定,將MACH-O的DATA段符號表中對應的指針指向外部系統共享庫中的真正實現工具

fishhook正是利用動態綁定機制,先肯定某一個符號在 _DATA 段中的位置,而後保存原符號對應的函數指針,並使用新的函數指針覆蓋原有符號的函數指針,實現替換。oop

fishhook源碼解讀

申明

#ifndef fishhook_h
#define fishhook_h

#include <stddef.h>
#include <stdint.h>

#if !defined(FISHHOOK_EXPORT)
#define FISHHOOK_VISIBILITY _attribute_((visibility("hidden")))
#else
#define FISHHOOK_VISIBILITY _attribute_((visibility("default")))
#endif

#ifdef _cplusplus
extern "C" {
#endif //_cplusplus

/*
 * A structure representing a particular intended rebinding from a symbol
 * name to its replacement
 */
struct rebinding {
  const char *name; // 被hook的函數名
  void *replacement;  // 替換的函數指針(IMP)
  void **replaced;  // 用於存放原函數指針的指針(成功替換後會將原函數指針放入其中)
};

/*
 * For each rebinding in rebindings, rebinds references to external, indirect
 * symbols with the specified name to instead point at replacement for each
 * image in the calling process as well as for all future images that are loaded
 * by the process. If rebind_functions is called more than once, the symbols to
 * rebind are added to the existing list of rebindings, and if a given symbol
 * is rebound more than once, the later rebinding will take precedence.
 */
FISHHOOK_VISIBILITY
// 參數分別是結構體rebinding數組和數組元素個數
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);

/*
 * Rebinds as above, but only in the specified image. The header should point
 * to the mach-o header, the slide should be the slide offset. Others as above.
 */
FISHHOOK_VISIBILITY
// 在指定的image中進行替換,header爲該鏡像的header,slider爲偏移量,其餘如上。
int rebind_symbols_image(void *header,
                         intptr_t slide,
                         struct rebinding rebindings[],
                         size_t rebindings_nel);

#ifdef _cplusplus
}
#endif //_cplusplus

#endif //fishhook_h

複製代碼

實現

#include "fishhook.h"

#include <dlfcn.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <mach/vm_region.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>

#ifdef _LP64_
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
//LC_SEGMENT_64:一種command類型表示將文件的64位的段映射到進程地址空間
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif

#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST "_DATA_CONST"
#endif

struct rebindings_entry {
  struct rebinding *rebindings;
  size_t rebindings_nel;
  struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;
複製代碼

首先是引入頭文件,按照不一樣架構定義一些系統結構體,並定義 rebindings_entry 鏈表節點,建立一個私有的鏈表頭節點 _rebindings_head,每次調用都會將最新的rebindings_entry插入頭部。

rebind_symbols

咱們從 rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) 調用入手,看下整個代碼邏輯是怎麼樣的。

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
  int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
  if (retval < 0) {
    return retval;
  }
  // If this was the first call, register callback for image additions (which is also invoked for
  // existing images, otherwise, just run on existing images
  if (!_rebindings_head->next) {
    _dyld_register_func_for_add_image(_rebind_symbols_for_image);
  } else {
    uint32_t c = _dyld_image_count();
    for (uint32_t i = 0; i < c; i++) {
      _rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
    }
  }
  return retval;
}
複製代碼

rebind_symbols 主要作了兩件事,首先是調用 prepend_rebindings 將傳入的rebindings封裝成rebindings_entry,並插入到私有鏈表的表頭。

static int prepend_rebindings(struct rebindings_entry **rebindings_head,
                              struct rebinding rebindings[],
                              size_t nel) {
  // 建立rebindings_entry節點
  struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
  if (!new_entry) {
    return -1;
  }
  new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
  if (!new_entry->rebindings) {
    free(new_entry);
    return -1;
  }
  memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
  new_entry->rebindings_nel = nel;
  // 每次都將新建立的rebindings_entry放到鏈表rebindings_head最前面
  new_entry->next = *rebindings_head;
  *rebindings_head = new_entry;
  return 0;
}
複製代碼

接着經過鏈表內容判斷是不是第一次執行,若是是首次則調用函數 _dyld_register_func_for_add_image 註冊自定義回調,系統會在進行dyld連接時自動執行回調。非首次調用則遍歷全部鏡像(image),手動執行自定義回調方法。

經過自定義回調方法 _rebind_symbols_for_image 實現查找符號在鏡像中的位置,並在鏡像的 _DATA段 中綁定咱們本身的實現。

rebind_symbols_for_image

rebind_symbols_for_image 作的事能夠分紅兩部分,第一部分是在Load Commands找到與符號表相關command,並獲得符號表的準備地址,包括在動態符號表、符號表和字符串表。

// 對真正實現方法的簡單封裝
static void _rebind_symbols_for_image(const struct mach_header *header,
                                      intptr_t slide) {
    rebind_symbols_for_image(_rebindings_head, header, slide);
}

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
                                     const struct mach_header *header,
                                     intptr_t slide) {
  Dl_info info;
  if (dladdr(header, &info) == 0) {
    return;
  }
  // 找到與符號表相關的 command,包括 linkedit segment command、symtab command 和 dysymtab command
  segment_command_t *cur_seg_cmd;
  segment_command_t *linkedit_segment = NULL; //LINKEDIT
  struct symtab_command* symtab_cmd = NULL; //符號表
  struct dysymtab_command* dysymtab_cmd = NULL; //間接符號表
  //1. 遍歷加載命令,得到MachO中LINKEDIT、符號表、間接符號表三個加載命令
  // 每一個mach-o由(header、load commands、 data)三塊區域組成
  // 要去尋找load command,因此這裏先跳過sizeof(mach_header_t)大小
  uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
  //_LINKEDIT段 含有爲動態連接庫使用的原始數據,好比符號,字符串,重定位表條目等等
  /*
   LC_SEGMENT_64 含有爲動態連接庫使用的原始數據
   LC_SYMTAB(符號地址)這個LoadCommand主要提供了兩個信息
      Symbol Table(符號表)的偏移量與Symbol Table中元素的個數
      String Table(字符串表)的偏移量與String Table的長度
   LC_DYSYMTAB(動態符號表地址)提供了動態符號表的位移和元素個數,還有一些其餘的表格索引
   */
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
        linkedit_segment = cur_seg_cmd;
      }
    } else if (cur_seg_cmd->cmd == LC_SYMTAB) {
      symtab_cmd = (struct symtab_command*)cur_seg_cmd;
    } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
      dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
    }
  }
    
  if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
      !dysymtab_cmd->nindirectsyms) {
    return;
  }

  // 找到符號表的地址
  // 原本是:基址=linkedit內存地址 - linkedit的fileoff
  // 因爲ASLR:真實基址 = linkedit內存地址(vmaddr) + slide - fileoff
  uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
  //符號表的地址 = 基址 + 符號表偏移量
  nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
  //字符串表的地址 = 基址 + 字符串表偏移量
  char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
  //動態符號表地址 = 基址 + 動態符號表偏移量
  uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

  ......
複製代碼

第二部分是遍歷鏡像_DATA中的section,找到 LAZY_SYMBOL_POINTERS 和 NON_LAZY_SYMBOL_POINTERS,由開頭可知,這兩個section是在Mach_O的_DATA段中用來綁定non-lazy和lazy 的符號表,最後調用替換方法perform_rebinding_with_section

爲何要找 LAZY_SYMBOL_POINTERS/NON_LAZY_SYMBOL_POINTERS? NON_LAZY_SYMBOL_POINTERS 非懶加載指針表 LAZY_SYMBOL_POINTERS 懶加載指針表,符號第一次調用時經過 dyld 中的 dyld_stub_binder進行加載到表中 這兩個表是_DATA中跟動態符號連接相關的部分,因此須要找到原方法這兩個部分的指針去替換連接方法 對於動態連接庫裏面的C函數,第一次調用的時候,咱們會獲得函數和實現地址的對應關係,函數的實現地址存放在一個叫 LAZY_SYMBOL_POINTERS 的地方,第二次調用的時候,直接經過 LAZY_SYMBOL_POINTERS 找到函數地址就能夠,再也不須要繁瑣的獲取函數地址的過程。

......

  //2. 遍歷加載命令,獲得DATA,而後遍歷DATA裏面的section,找到nl_symbol_ptr(got)/la_symbol_ptr
  cur = (uintptr_t)header + sizeof(mach_header_t);
  for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
    cur_seg_cmd = (segment_command_t *)cur;
    if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
      //尋找__DATA和__DATA_CONST的section
      if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
          strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
        continue;
      }
      //遍歷DATA裏面的section,找到nl_symbol_ptr(got)/la_symbol_ptr
      for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
         //_DATA 加上結構體偏移
         
         //
         //          struct segment_command_64 { /* for 64-bit architectures */
          //          uint32_t    cmd;        /* LC_SEGMENT_64 */
          //          uint32_t    cmdsize;    /* includes sizeof section_64 structs*/
          //          char        segname[16];    /* segment name */
          //          uint64_t    vmaddr;        /* memory address of this segment*/
          //          uint64_t    vmsize;        /* memory size of this segment */
          //          uint64_t    fileoff;    /* file offset of this segment */
          //          uint64_t    filesize;    /* amount to map from the file */
          //          vm_prot_t    maxprot;    /* maximum VM protection */
          //          vm_prot_t    initprot;    /* initial VM protection */
          //          uint32_t    nsects;        /* number of sections in segment*/
          //          uint32_t    flags;        /* flags */
          //      };
    
         
        section_t *sect =
          (section_t *)(cur + sizeof(segment_command_t)) + j;
        //尋找__la_symbol_ptr區
        if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
        //尋找__nl_symbol_ptr
        if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
          perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
        }
      }
    }
  }
}
複製代碼

perform_rebinding_with_section

該方法根據傳入的 NON-Lazy 或 Lazy 數據段,遍歷該數據段的符號,找到其對應的符號名並與傳入的符號名進行匹配,命中則進行替換。

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
                                           section_t *section,
                                           intptr_t slide,
                                           nlist_t *symtab, // 符號表
                                           char *strtab, // 字符串表
                                           uint32_t *indirect_symtab // 動態符號表
                                           ) {
  const bool isDataConst = strcmp(section->segname, "__DATA_CONST") == 0;
  // 符號表訪問指針地址替換
  // `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`字段指明對應在`indirect symbol table`起始的index
  //  得到該section符號表的起始地址
  uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
  // 獲得該section段的全部函數地址
  void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
  vm_prot_t oldProtection = VM_PROT_READ;
  if (isDataConst) {
    oldProtection = get_protection(rebindings);
    // protect()函數能夠用來修改一段指定內存區域的保護屬性。
    // 這裏暫時將常量區權限改爲可讀可寫
    mprotect(indirect_symbol_bindings, section->size, PROT_READ | PROT_WRITE);
  }
  for (uint i = 0; i < section->size / sizeof(void *); i++) {
    // 從動態符號表中取得符號在符號表中的位置
    uint32_t symtab_index = indirect_symbol_indices[i];
    if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
        symtab_index == (INDIRECT_SYMBOL_LOCAL   | INDIRECT_SYMBOL_ABS)) {
      continue;
    }
    //獲取每個須要動態解析的符號在字符串表中的偏移量
    uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
    //經過字符串表偏移量獲取符號對應的字符串(符號的名字)
    char *symbol_name = strtab + strtab_offset;
    bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
    // 遍歷rebindings數組,比較符號,相同則進行替換
    struct rebindings_entry *cur = rebindings;
    while (cur) {
      for (uint j = 0; j < cur->rebindings_nel; j++) {
        if (symbol_name_longer_than_1 &&
            strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
          // 判斷原實現是否有被保存過,既實現和如今表中的實現是否一致
          if (cur->rebindings[j].replaced != NULL &&
              indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
            *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
          }
          // 更改函數爲新的實現
          indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
          goto symbol_loop;
        }
      }
      cur = cur->next;
    }
  symbol_loop:;
  }
  // 恢復常量區的訪問權限
  if (isDataConst) {
    int protection = 0;
    if (oldProtection & VM_PROT_READ) {
      protection |= PROT_READ;  // 按位或後賦值
    }
    if (oldProtection & VM_PROT_WRITE) {
      protection |= PROT_WRITE;
    }
    if (oldProtection & VM_PROT_EXECUTE) {
      protection |= PROT_EXEC;
    }
    mprotect(indirect_symbol_bindings, section->size, protection);
  }
}
複製代碼

總結

最後,用github上的lazy說明圖總結下流程。
fishhook 首先經過遍歷鏡像的 load commans 段獲取符號表、動態符號表和字符串表,接着遍歷 Data 段,獲得 LAZY_SYMBOL_POINTERS 和 NON_LAZY_SYMBOL_POINTERS,裏面記錄着鏡像的符號段在動態符號表的位置( indirect_symtab+section->reserve1 )和全部符號對應的實現指針地址( section->addr ),再經過遍歷動態符號取得每一個符號和符號名進行比對,與傳入的符號相同時則進行實現的替換。
fishhook也有其侷限性,因爲是依賴 Mach-O 的動態綁定機制實現的,因此只能Hook在外部共享庫中的函數,對於編譯時就已經肯定的內部/自定義的 C 函數 fishhook 就無能爲力了。

找到符號名稱

參考連接:
fishhook
fishhook源碼分析
趣探 Mach-O:FishHook 解析

Find Me 👇

wechat: yhbxcq

相關文章
相關標籤/搜索