dyld
經過更新Mach-O二進制文件中特定__DATA
段的指針來綁定惰性和非惰性符號。fishhook經過傳遞給rebind_symbols
的符號名來肯定須要更新的位置,而後用相應的替換項從新綁定這些符號。git
對於給定的鏡像,__DATA
段能夠包含與動態符號綁定相關的兩個部分:__nl_symbol_ptr
和__la_symbol_ptr
。github
__nl_symbol_ptr
是指向非延遲綁定數據的指針數組(這些指針在加載庫時綁定)。shell
__la_symbol_ptr
是指向導入函數的指針數組,一般在第一次調用該符號時由名爲dyld_stub_binder
的例程填充(也能夠在啓動時告訴dyld
綁定這些指針)。數組
爲了找到對應於這些部分中某個特定位置的符號的名稱,咱們須要經過幾個間接層來進行查看。數據結構
對於兩個相關部分,section header
(<mach-o/loader.h>
中聲明的struct section
)提供一個偏移量(在reserved1
字段中)到所謂的間接符號表中。ide
間接符號表位於二進制文件的__LINKEDIT
段中,它只是符號表(也在__LINKEDIT
中)中的索引數組,其順序與非惰性和惰性符號部分中的指針順序相同。所以,struct section nl_symbol_ptr
,該部分中第一個地址的符號表中的對應索引是indirect_symbol_table[nl_symbol_ptr->reserved1]
。函數
符號表自己是一個struct nlist
數組(請參見<mach-o/nlist.h>
),每一個nlist
都包含一個指向__LINKEDIT
中字符串表的索引,其中存儲了實際的符號名。所以,對於每一個指針__nl_symbol_ptr
和__la_symbol_ptr
,咱們均可以找到相應的符號,而後找到相應的字符串與請求的符號名進行比較,若是有匹配項,咱們用替換項替換節中的指針。oop
//---------------------------------更改NSLog-----------
//函數指針
static void(*sys_nslog)(NSString * format,...);
//定義一個新的函數
void my_nslog(NSString * format,...){
format = [format stringByAppendingString:@"你咋又來了 \n"];
//調用原始的
sys_nslog(format);
}
@implementation ViewController
- (void)viewDidLoad {
[super viewDidLoad];
NSLog(@"log來了,老弟");
struct rebinding nslog;
nslog.name = "NSLog";
nslog.replacement = my_nslog;
nslog.replaced = (void *)&sys_nslog;
struct rebinding rebs[1] = {nslog};
rebind_symbols(rebs, 1);
NSLog(@"log來了,老弟");
}
@end
複製代碼
運行結果:測試
2020-03-16 09:47:38.526862+0800 Demo[28657:5210895] log來了,老弟
2020-03-16 09:47:38.536892+0800 Demo[28657:5210895] log來了,老弟你咋又來了
複製代碼
MachOView會彈出輸入框讓你輸入PID。flex
這個PID在Xcode的Show the Debug navigator菜單下,能夠用⌘ + 7快速切過來。這裏咱們能夠看到進程的PID,輸入到上面的框中。
struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};
static struct rebindings_entry *_rebindings_head;
// 給須要rebinding的方法結構體開闢出對應的空間
// 生成對應的鏈表結構(rebindings_entry)
static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel) {
// 開闢一個rebindings_entry大小的空間
struct rebindings_entry *new_entry = (struct rebindings_entry *) malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
// 一共有nel個rebinding
new_entry->rebindings = (struct rebinding *) malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
// 將rebinding賦值給new_entry->rebindings
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
// 繼續賦值nel
new_entry->rebindings_nel = nel;
// 每次都將new_entry插入頭部
new_entry->next = *rebindings_head;
// rebindings_head從新指向頭部
*rebindings_head = new_entry;
return 0;
}
複製代碼
這裏定義了rebindings_entry鏈表。每次進行綁定的時候,會傳入struct rebinding rebindings[]數組,建立一個新的rebindings_entry結構,而後把這個結構插入鏈表頭部。
static void _rebind_symbols_for_image(const struct mach_header *header, intptr_t slide) {
// 找到對應的符號,進行重綁定
rebind_symbols_for_image(_rebindings_head, header, slide);
}
// 在知道肯定的MachO,可使用該方法
int rebind_symbols_image(void *header,
intptr_t slide,
struct rebinding rebindings[],
size_t rebindings_nel) {
struct rebindings_entry *rebindings_head = NULL;
int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
rebind_symbols_for_image(rebindings_head, (const struct mach_header *) header, slide);
if (rebindings_head) {
free(rebindings_head->rebindings);
}
free(rebindings_head);
return retval;
}
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// 若是這是第一次調用,請爲image添加註冊回調(這也會爲現有image調用,不然,只在現有image上運行
if (!_rebindings_head->next) {
// 向每一個image註冊_rebind_symbols_for_image函數,而且當即觸發一次
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
// _dyld_image_count() 獲取image數量
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
// _dyld_get_image_header(i) 獲取第i個image的header指針
// _dyld_get_image_vmaddr_slide(i) 獲取第i個image的基址
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}
複製代碼
rebind_symbols_image和rebind_symbols是兩個公開的方法,用於從新綁定符號。rebind_symbols_image用於指定鏡像的符號綁定,rebind_symbols對全部鏡像進行處理。
無論是哪一個方法,最後都是調用rebind_symbols_for_image去獲取相關部分的地址。
static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide) {
Dl_info info;
// 判斷當前macho是否在進程裏,若是不在則直接返回
if (dladdr(header, &info) == 0) {
return;
}
// 定義好幾個變量,後面去遍歷查找
segment_command_t *cur_seg_cmd;
// MachO中Load Commons中的linkedit
segment_command_t *linkedit_segment = NULL;
// MachO中LC_SYMTAB
struct symtab_command* symtab_cmd = NULL;
// MachO中LC_DYSYMTAB
struct dysymtab_command* dysymtab_cmd = NULL;
// header的首地址+mach_header的內存大小
// 獲得跳過mach_header的地址,也就是直接到Load Commons的地址
uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍歷Load Commons 找到上面三個遍歷
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
// 若是是LC_SEGMENT_64
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 找到linkedit
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
linkedit_segment = cur_seg_cmd;
}
}
// 若是是LC_SYMTAB,就找到了symtab_cmd
else if (cur_seg_cmd->cmd == LC_SYMTAB) {
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
}
// 若是是LC_DYSYMTAB,就找到了dysymtab_cmd
else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}
// 下面其中任何一個值沒有都直接return
// 由於image不是須要找的image
if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}
// Find base symbol/string table addresses
// 找到linkedit的頭地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
// 獲取symbol_table的真實地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 獲取string_table的真實地址
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
// Get indirect symbol table (array of uint32_t indices into symbol table)
// 獲取indirect_symtab的真實地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
// 一樣的,獲得跳過mach_header的地址,獲得Load Commons的地址
cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍歷Load Commons,找到對應符號進行從新綁定
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 若是不是__DATA段,也不是__DATA_CONST段,直接跳過
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
// 遍歷全部的section
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
// 找懶加載表S_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// 重綁定的真正函數
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
// 找非懶加載表S_NON_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
// 重綁定的真正函數
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}
複製代碼
最上面,經過header
指針和header
大小獲取到加載指令的基址。而後遍歷獲取3個數據結構:
// MachO中Load Commons中的linkedit
segment_command_t *linkedit_segment = NULL;
// MachO中LC_SYMTAB
struct symtab_command* symtab_cmd = NULL;
// MachO中LC_DYSYMTAB
struct dysymtab_command* dysymtab_cmd = NULL;
複製代碼
下面是比較核心的代碼:
// 找到linkedit的頭地址
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;
複製代碼
咱們來看看linkedit_segment->vmaddr對應4294995968
,linkedit_segment->fileoff對應28672
。這樣可能看不太出來這是基地址,咱們格式化一下:
(lldb) p/x 4294995968
(long) $0 = 0x0000000100007000
(lldb) p/x 28672
(int) $1 = 0x00007000
(lldb) p/x 4294995968 - 28672
(long) $2 = 0x0000000100000000
複製代碼
咱們能夠看出這個部分就是拿到了image對應的內存基址。
// 獲取symbol_table的真實地址
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 獲取string_table的真實地址
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);
複製代碼
從struct symtab_command結構中獲取到符號表的字符表的偏移量,而後加載基址就是內存中兩個表的地址了。
(lldb) p/x 0x0000000100000000 + 30200
(long) $3 = 0x00000001000075f8
(lldb) p/x 0x0000000100000000 + 33408
(long) $4 = 0x0000000100008280
複製代碼
經過MachOView咱們也驗證了這兩個地址是正確的。
// 獲取indirect_symtab的真實地址
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);
複製代碼
經過struct dysymtab_command獲取間接符號表。
(lldb) p/x 0x0000000100000000 + 33224
(long) $5 = 0x00000001000081c8
複製代碼
間接符號表的地址咱們也得到了。
// 一樣的,獲得跳過mach_header的地址,獲得Load Commons的地址
cur = (uintptr_t)header + sizeof(mach_header_t);
// 遍歷Load Commons,找到對應符號進行從新綁定
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
// 若是不是__DATA段,也不是__DATA_CONST段,直接跳過
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
// 遍歷全部的section
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j;
// 找懶加載表S_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
// 重綁定的真正函數
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
// 找非懶加載表S_NON_LAZY_SYMBOL_POINTERS
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
// 重綁定的真正函數
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
複製代碼
對於給定的image
,__DATA
段包含與動態符號綁定相關的兩個部分:__nl_symbol_ptr
和__la_symbol_ptr
。遍歷找到這個兩個部分,而後進行符號從新綁定。
static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab) {
// reserved1對應的的是indirect_symbol中的offset,也就是indirect_symbol的真實地址
// indirect_symtab+offset就是indirect_symbol_indices(indirect_symbol的數組)
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;
// 函數地址,addr就是section的偏移地址
void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);
// 遍歷section中的每一個符號
for (uint i = 0; i < section->size / sizeof(void *); i++) {
// 訪問indirect_symbol,symtab_index就是indirect_symbol中data的值
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
// 訪問symbol_table,根據symtab_index獲取到symbol_table中的偏移offset
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;
// 訪問string_table,根據strtab_offset獲取symbol_name
char *symbol_name = strtab + strtab_offset;
// string_table中的全部函數名都是以"."開始的,因此一個函數必定有兩個字符
bool symbol_name_longer_than_1 = symbol_name[0] && symbol_name[1];
struct rebindings_entry *cur = rebindings;
// 已經存入的rebindings_entry
while (cur) {
// 循環每一個entry中須要重綁定的函數
for (uint j = 0; j < cur->rebindings_nel; j++) {
// 判斷symbol_name是不是一個正確的函數名
// 須要被重綁定的函數名是否與當前symbol_name相等
if (symbol_name_longer_than_1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
// 判斷replaced是否存在
// 判斷replaced和老的函數是不是同樣的
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
// 將原函數的地址給新函數replaced
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
// 將replacement賦值給剛剛找到的
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
// 繼續下一個須要綁定的函數
cur = cur->next;
}
symbol_loop:;
}
}
複製代碼
這個部分就像fishhook
原理裏面提到的:
indirect_symbol_indices[nl_symbol_ptr->reserved1]
拿到間接符號表的函數起始地址。indirect_symbol_bindings
是nl_symbol_ptr
中對應的函數指針數組。.
開頭的,因此至少有2個字符。 symbol_name[1] 是去掉開頭.
的字符串。replaced
中的函數指針,再將原來函數的地址替換爲咱們要綁定的replacement
函數地址。若是以爲本文對你有所幫助,給我點個贊吧~