本篇是筆者解讀源碼項目 iOS-Framework-Analysis 的開篇,今年計劃完成10個優秀第三方源碼解讀,歡迎star和筆者一塊兒解讀這些優秀框架的背後思想,從而提高本身的內功。該篇詳細的源碼註釋已上傳 fishhook源碼註釋,若有須要請自取🎉。git
在iOS平臺下,提及Hook首先會想起MethodSwizzling
這個蘋果提供的工具,利用Objective-C的Runtime的特性,經過在消息轉發時交換方法實現(IMP)的機會。但MethodSwizzling只能對Objective-C方法進行Hook,若是要對C/C++方法進行Hook操做,可使用facebook提供的fishhook
框架,本文是對該框架的解讀。github
首先,咱們須要瞭解幾個常見的概念,有助於後面源碼的閱讀。數組
Mach-O: 在iOS和OS X系統下,全部可執行文件、dylib 以及 Bundle都是Mach-O格式。主要有Header、Load Commands和Data組成。bash
Mach-O的具體結構(看看就好,後面有部清楚的再回頭找):// header
struct mach_header {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
};
struct mach_header_64 {
uint32_t magic; /* mach magic number identifier */
cpu_type_t cputype; /* cpu specifier */
cpu_subtype_t cpusubtype; /* machine specifier */
uint32_t filetype; /* type of file */
uint32_t ncmds; /* number of load commands */
uint32_t sizeofcmds; /* the size of all the load commands */
uint32_t flags; /* flags */
uint32_t reserved; /* reserved */
};
// load command
struct load_command {
uint32_t cmd; /* type of load command */
uint32_t cmdsize; /* total size of command in bytes */
};
// load command中的segment_command
// 32&64位字段都相同
struct segment_command_64 { /* for 64-bit architectures */
uint32_t cmd; /* LC_SEGMENT_64 */
uint32_t cmdsize; /* includes sizeof section_64 structs*/
char segname[16]; /* segment name */
uint64_t vmaddr; /* memory address of this segment*/
uint64_t vmsize; /* memory size of this segment */
uint64_t fileoff; /* file offset of this segment */
uint64_t filesize; /* amount to map from the file */
vm_prot_t maxprot; /* maximum VM protection */
vm_prot_t initprot; /* initial VM protection */
uint32_t nsects; /* number of sections in segment*/
uint32_t flags; /* flags */
};
// _DATA中的section
// 32&64位字段都相同
struct section_64
{
char sectname[16];
char segname[16];
uint64_t addr;
uint64_t size;
uint32_t offset;
uint32_t align;
uint32_t reloff;
uint32_t nreloc;
uint32_t flags;
uint32_t reserved1;
uint32_t reserved2;
};
複製代碼
dyld ( the dynamic link editor ):負責將各類各樣程序須要的鏡像加載到程序運行的內存空間中,這個過程發生的時間很是早 — 在 objc 運行時初始化以前。架構
鏡像(image):dyld會將Mach-O文件做爲鏡像,既鏡像就是Mach-O。框架
_dyld_register_func_for_add_image: 每一個鏡像被dyld加載時,都會執行系統註冊過的回調函數,能夠經過該方法註冊自定義的回調函數,當調用該函數註冊時,會讓全部鏡像都執行回調函數,不管是否已經加載過。ide
dyld_stub_binder: 在目標符號(例如 printf)首次被調用時,將其連接到指定的動態連接庫 ,找到對應的符號表真實地址進行綁定(printf 符號位於 _DATA 端中的 lazy 符號表中)。函數
Mach-O的動態綁定機制:編譯App時,系統共享庫不會編譯到Mach-O文件中,而是第一次調用才經過dyld動態綁定,將MACH-O的DATA段符號表中對應的指針指向外部系統共享庫中的真正實現工具
fishhook正是利用動態綁定機制,先肯定某一個符號在 _DATA 段中的位置,而後保存原符號對應的函數指針,並使用新的函數指針覆蓋原有符號的函數指針,實現替換。oop
#ifndef fishhook_h
#define fishhook_h
#include <stddef.h>
#include <stdint.h>
#if !defined(FISHHOOK_EXPORT)
#define FISHHOOK_VISIBILITY _attribute_((visibility("hidden")))
#else
#define FISHHOOK_VISIBILITY _attribute_((visibility("default")))
#endif
#ifdef _cplusplus
extern "C" {
#endif //_cplusplus
/*
* A structure representing a particular intended rebinding from a symbol
* name to its replacement
*/
struct rebinding {
const char *name; // 被hook的函數名
void *replacement; // 替換的函數指針(IMP)
void **replaced; // 用於存放原函數指針的指針(成功替換後會將原函數指針放入其中)
};
/*
* For each rebinding in rebindings, rebinds references to external, indirect
* symbols with the specified name to instead point at replacement for each
* image in the calling process as well as for all future images that are loaded
* by the process. If rebind_functions is called more than once, the symbols to
* rebind are added to the existing list of rebindings, and if a given symbol
* is rebound more than once, the later rebinding will take precedence.
*/
FISHHOOK_VISIBILITY
// 參數分別是結構體rebinding數組和數組元素個數
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel);
/*
* Rebinds as above, but only in the specified image. The header should point
* to the mach-o header, the slide should be the slide offset. Others as above.
*/
FISHHOOK_VISIBILITY
// 在指定的image中進行替換,header爲該鏡像的header,slider爲偏移量,其餘如上。
int rebind_symbols_image(void *header,
intptr_t slide,
struct rebinding rebindings[],
size_t rebindings_nel);
#ifdef _cplusplus
}
#endif //_cplusplus
#endif //fishhook_h
複製代碼
#include "fishhook.h"
#include <dlfcn.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <mach/mach.h>
#include <mach/vm_map.h>
#include <mach/vm_region.h>
#include <mach-o/dyld.h>
#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#ifdef _LP64_
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
//LC_SEGMENT_64:一種command類型表示將文件的64位的段映射到進程地址空間
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif
#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST "_DATA_CONST"
#endif
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
複製代碼
首先是引入頭文件,按照不一樣架構定義一些系統結構體,並定義 rebindings_entry
鏈表節點,建立一個私有的鏈表頭節點 _rebindings_head
,每次調用都會將最新的rebindings_entry插入頭部。
咱們從 rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel)
調用入手,看下整個代碼邏輯是怎麼樣的。
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
複製代碼
rebind_symbols
主要作了兩件事,首先是調用 prepend_rebindings
將傳入的rebindings封裝成rebindings_entry,並插入到私有鏈表的表頭。
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
// 建立rebindings_entry節點
struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
// 每次都將新建立的rebindings_entry放到鏈表rebindings_head最前面
new_entry->next = *rebindings_head;
*rebindings_head = new_entry;
return 0;
}
複製代碼
接着經過鏈表內容判斷是不是第一次執行,若是是首次則調用函數 _dyld_register_func_for_add_image
註冊自定義回調,系統會在進行dyld連接時自動執行回調。非首次調用則遍歷全部鏡像(image),手動執行自定義回調方法。
經過自定義回調方法 _rebind_symbols_for_image
實現查找符號在鏡像中的位置,並在鏡像的 _DATA段
中綁定咱們本身的實現。
rebind_symbols_for_image
作的事能夠分紅兩部分,第一部分是在Load Commands找到與符號表相關command,並獲得符號表的準備地址,包括在動態符號表、符號表和字符串表。
// 對真正實現方法的簡單封裝
static void _rebind_symbols_for_image(const struct mach_header *header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, header, slide);
}
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}
// 找到與符號表相關的 command,包括 linkedit segment command、symtab command 和 dysymtab command
segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL; //LINKEDIT
struct symtab_command* symtab_cmd = NULL; //符號表
struct dysymtab_command* dysymtab_cmd = NULL; //間接符號表
//1. 遍歷加載命令,得到MachO中LINKEDIT、符號表、間接符號表三個加載命令
// 每一個mach-o由(header、load commands、 data)三塊區域組成
// 要去尋找load command,因此這裏先跳過sizeof(mach_header_t)大小
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
//_LINKEDIT段 含有爲動態連接庫使用的原始數據,好比符號,字符串,重定位表條目等等
/*
LC_SEGMENT_64 含有爲動態連接庫使用的原始數據
LC_SYMTAB(符號地址)這個LoadCommand主要提供了兩個信息
Symbol Table(符號表)的偏移量與Symbol Table中元素的個數
String Table(字符串表)的偏移量與String Table的長度
LC_DYSYMTAB(動態符號表地址)提供了動態符號表的位移和元素個數,還有一些其餘的表格索引
*/
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// 找到符號表的地址
// 原本是:基址=linkedit內存地址 - linkedit的fileoff
// 因爲ASLR:真實基址 = linkedit內存地址(vmaddr) + slide - fileoff
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
//符號表的地址 = 基址 + 符號表偏移量
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
//字符串表的地址 = 基址 + 字符串表偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
//動態符號表地址 = 基址 + 動態符號表偏移量
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
......
複製代碼
第二部分是遍歷鏡像_DATA中的section,找到 LAZY_SYMBOL_POINTERS 和 NON_LAZY_SYMBOL_POINTERS,由開頭可知,這兩個section是在Mach_O的_DATA段中用來綁定non-lazy和lazy 的符號表,最後調用替換方法perform_rebinding_with_section
。
爲何要找 LAZY_SYMBOL_POINTERS/NON_LAZY_SYMBOL_POINTERS? NON_LAZY_SYMBOL_POINTERS 非懶加載指針表 LAZY_SYMBOL_POINTERS 懶加載指針表,符號第一次調用時經過 dyld 中的 dyld_stub_binder進行加載到表中 這兩個表是_DATA中跟動態符號連接相關的部分,因此須要找到原方法這兩個部分的指針去替換連接方法 對於動態連接庫裏面的C函數,第一次調用的時候,咱們會獲得函數和實現地址的對應關係,函數的實現地址存放在一個叫 LAZY_SYMBOL_POINTERS 的地方,第二次調用的時候,直接經過 LAZY_SYMBOL_POINTERS 找到函數地址就能夠,再也不須要繁瑣的獲取函數地址的過程。
......
//2. 遍歷加載命令,獲得DATA,而後遍歷DATA裏面的section,找到nl_symbol_ptr(got)/la_symbol_ptr
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
//尋找__DATA和__DATA_CONST的section
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
//遍歷DATA裏面的section,找到nl_symbol_ptr(got)/la_symbol_ptr
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
//_DATA 加上結構體偏移
//
// struct segment_command_64 { /* for 64-bit architectures */
// uint32_t cmd; /* LC_SEGMENT_64 */
// uint32_t cmdsize; /* includes sizeof section_64 structs*/
// char segname[16]; /* segment name */
// uint64_t vmaddr; /* memory address of this segment*/
// uint64_t vmsize; /* memory size of this segment */
// uint64_t fileoff; /* file offset of this segment */
// uint64_t filesize; /* amount to map from the file */
// vm_prot_t maxprot; /* maximum VM protection */
// vm_prot_t initprot; /* initial VM protection */
// uint32_t nsects; /* number of sections in segment*/
// uint32_t flags; /* flags */
// };
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
//尋找__la_symbol_ptr區
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
//尋找__nl_symbol_ptr
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
複製代碼
該方法根據傳入的 NON-Lazy 或 Lazy 數據段,遍歷該數據段的符號,找到其對應的符號名並與傳入的符號名進行匹配,命中則進行替換。
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab, // 符號表
char *strtab, // 字符串表
uint32_t *indirect_symtab // 動態符號表
) {
const bool isDataConst = strcmp(section->segname, "__DATA_CONST") == 0;
// 符號表訪問指針地址替換
// `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`字段指明對應在`indirect symbol table`起始的index
// 得到該section符號表的起始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
// 獲得該section段的全部函數地址
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
vm_prot_t oldProtection = VM_PROT_READ;
if (isDataConst) {
oldProtection = get_protection(rebindings);
// protect()函數能夠用來修改一段指定內存區域的保護屬性。
// 這裏暫時將常量區權限改爲可讀可寫
mprotect(indirect_symbol_bindings, section->size, PROT_READ | PROT_WRITE);
}
for (uint i = 0; i < section->size / sizeof(void *); i++) {
// 從動態符號表中取得符號在符號表中的位置
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
//獲取每個須要動態解析的符號在字符串表中的偏移量
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
//經過字符串表偏移量獲取符號對應的字符串(符號的名字)
char *symbol_name = strtab + strtab_offset;
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
// 遍歷rebindings數組,比較符號,相同則進行替換
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
// 判斷原實現是否有被保存過,既實現和如今表中的實現是否一致
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
// 更改函數爲新的實現
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
// 恢復常量區的訪問權限
if (isDataConst) {
int protection = 0;
if (oldProtection & VM_PROT_READ) {
protection |= PROT_READ; // 按位或後賦值
}
if (oldProtection & VM_PROT_WRITE) {
protection |= PROT_WRITE;
}
if (oldProtection & VM_PROT_EXECUTE) {
protection |= PROT_EXEC;
}
mprotect(indirect_symbol_bindings, section->size, protection);
}
}
複製代碼
最後,用github上的lazy說明圖總結下流程。
fishhook 首先經過遍歷鏡像的 load commans 段獲取符號表、動態符號表和字符串表,接着遍歷 Data 段,獲得 LAZY_SYMBOL_POINTERS 和 NON_LAZY_SYMBOL_POINTERS,裏面記錄着鏡像的符號段在動態符號表的位置( indirect_symtab+section->reserve1 )和全部符號對應的實現指針地址( section->addr ),再經過遍歷動態符號取得每一個符號和符號名進行比對,與傳入的符號相同時則進行實現的替換。
fishhook也有其侷限性,因爲是依賴 Mach-O 的動態綁定機制實現的,因此只能Hook在外部共享庫中的函數,對於編譯時就已經肯定的內部/自定義的 C 函數 fishhook 就無能爲力了。
參考連接:
fishhook
fishhook源碼分析
趣探 Mach-O:FishHook 解析
wechat: yhbxcq