做者:王智通(阿里雲安全工程師)java
這兩天在class文件解析器的基礎上, 加上了java反彙編的功能, 反彙編器是指令解釋器的基礎,經過編寫反彙編器能夠熟悉jvm的指令系統, 不過jvm的指令一共有201個,反彙編過程基本就是個體力活。在《java虛擬機規範》中對每一條指令都有了詳細的描述,下面說說我是如何解析 bytecode的:
一個java文件通過javac編譯後會生成class格式文件, 在class格式中method字段裏會有Code屬性,Code屬性包含了java的指令碼和長度。 首先用class解析器將指令碼提取出來, 舉個例子:
test.java算法
06 |
public static void main(String args[]) { |
09 |
for (i = 0 ; i < 5 ; i++) |
10 |
System.out.println( "hehe" ); |
咱們用class文件解析器把test對應的bytecode打印出來:
len: 5
0x2a0xb70x00x10xb1
這一串bytecode爲:0x2a0xb70x00x10xb1, 長度是5個字節。
對照《java虛擬機規範》咱們來一步步手工解析:
0x2a表明aload_0指令, 它將本地局部變量中的第一個變量壓入到堆棧裏。這個指令自己長度就是一個字節,沒有參數, 所以0x2a的解析就很是簡單, 直接在屏幕打印出aload_0便可:
printf("%s\n", symbol);
0xb7表明invokespecial 它用來調用超類構造方法,實例初始化方法, 私有方法。它的用法以下:
invokespecial indexbyte1 indexbyte2,indexbyte1和indexbyte2各佔一個字節,用(indexbyte1 << 8) | indexbyte2來構建一個常量池中的索引。每一個jvm指令自己都佔用一個字節,加上它的兩個參數, invokespecial語句它將佔用3個字節空間。 因此它的解析算法以下:數組
3 |
index = ((*(u1 *)(base + 1)) << 8) | (*(u1 *)(base + 2)); |
4 |
printf ( "%s #%x\n" , symbol, index); |
注意0xb7解析完後,咱們要跳過3個字節的地址,那麼就是0xb1了, 它是return指令,沒有參數,所以它的解析方法跟aload_0同樣:
printf("%s\n", symbol);
以上是咱們手工解析的過程, 可是jvm有201條指令, 咱們須要創建一個合適的數據結構:安全
1 |
typedef int (*interp_func)(u2 opcode_len, char *symbol, void *base); |
3 |
typedef struct bytecode_st { |
5 |
u2 opcode_len; // 指令總的長度,包括參數 |
6 |
char symbol[OPCODE_SYMBOL_LEN]; // 指令對應的助記符 |
7 |
interp_func func; // 解析指令的回調函數 |
咱們能夠直接創建一個大的BYTECODE數組:數據結構
001 |
BYTECODE jvm_byte_code[OPCODE_LEN] = { |
002 |
{0x00, 1, "nop" , jvm_interp_nop}, |
003 |
{0x01, 1, "aconst_null" , jvm_interp_aconst_null}, |
004 |
{0x02, 1, "iconst_m1" , jvm_interp_iconst_m1}, |
005 |
{0x03, 1, "iconst_0" , jvm_interp_iconst_0}, |
006 |
{0x04, 1, "iconst_1" , jvm_interp_iconst_1}, |
007 |
{0x05, 1, "iconst_2" , jvm_interp_iconst_2}, |
008 |
{0x06, 1, "iconst_3" , jvm_interp_iconst_3}, |
009 |
{0x07, 1, "iconst_4" , jvm_interp_iconst_4}, |
010 |
{0x08, 1, "iconst_5" , jvm_interp_iconst_5}, |
011 |
{0x09, 1, "lconst_0" , jvm_interp_lconst_0}, |
012 |
{0x0a, 1, "lconst_1" , jvm_interp_lconst_1}, |
013 |
{0x0b, 1, "fconst_0" , jvm_interp_fconst_0}, |
014 |
{0x0c, 1, "fconst_1" , jvm_interp_fconst_1}, |
015 |
{0x0d, 1, "fconst_2" , jvm_interp_fconst_2}, |
016 |
{0x0e, 1, "dconst_0" , jvm_interp_dconst_0}, |
017 |
{0x0f, 1, "dconst_1" , jvm_interp_dconst_1}, |
018 |
{0x10, 1, "bipush" , jvm_interp_bipush}, |
019 |
{0x11, 1, "sipush" , jvm_interp_sipush}, |
020 |
{0x12, 2, "ldc" , jvm_interp_ldc}, |
021 |
{0x13, 1, "ldc_w" , jvm_interp_ldc_w}, |
022 |
{0x14, 1, "ldc2_w" , jvm_interp_ldc2_w}, |
023 |
{0x15, 1, "iload" , jvm_interp_iload}, |
024 |
{0x16, 1, "lload" , jvm_interp_lload}, |
025 |
{0x17, 1, "fload" , jvm_interp_fload}, |
026 |
{0x18, 1, "dload" , jvm_interp_dload}, |
027 |
{0x19, 1, "aload" , jvm_interp_aload}, |
028 |
{0x1a, 1, "iload_0" , jvm_interp_iload_0}, |
029 |
{0x1b, 1, "iload_1" , jvm_interp_iload_1}, |
030 |
{0x1c, 1, "iload_2" , jvm_interp_iload_2}, |
031 |
{0x1d, 1, "iload_3" , jvm_interp_iload_3}, |
032 |
{0x1e, 1, "lload_0" , jvm_interp_lload_0}, |
033 |
{0x1f, 1, "lload_1" , jvm_interp_lload_1}, |
034 |
{0x20, 1, "lload_2" , jvm_interp_lload_2}, |
035 |
{0x21, 1, "lload_3" , jvm_interp_lload_3}, |
036 |
{0x22, 1, "fload_0" , jvm_interp_fload_0}, |
037 |
{0x23, 1, "fload_1" , jvm_interp_fload_1}, |
038 |
{0x24, 1, "fload_2" , jvm_interp_fload_2}, |
039 |
{0x25, 1, "fload_3" , jvm_interp_fload_3}, |
040 |
{0x26, 1, "dload_0" , jvm_interp_dload_0}, |
041 |
{0x27, 1, "dload_1" , jvm_interp_dload_1}, |
042 |
{0x28, 1, "dload_2" , jvm_interp_dload_2}, |
043 |
{0x29, 1, "dload_3" , jvm_interp_dload_3}, |
044 |
{0x2a, 1, "aload_0" , jvm_interp_aload_0}, |
045 |
{0x2b, 1, "aload_1" , jvm_interp_aload_1}, |
046 |
{0x2c, 1, "aload_2" , jvm_interp_aload_2}, |
047 |
{0x2d, 1, "aload_3" , jvm_interp_aload_3}, |
048 |
{0x2e, 1, "iaload" , jvm_interp_iaload}, |
049 |
{0x2f, 1, "laload" , jvm_interp_laload}, |
050 |
{0x30, 1, "faload" , jvm_interp_faload}, |
051 |
{0x31, 1, "daload" , jvm_interp_daload}, |
052 |
{0x32, 1, "aaload" , jvm_interp_aaload}, |
053 |
{0x33, 1, "baload" , jvm_interp_baload}, |
054 |
{0x34, 1, "caload" , jvm_interp_caload}, |
055 |
{0x35, 1, "saload" , jvm_interp_saload}, |
056 |
{0x36, 1, "istore" , jvm_interp_istore}, |
057 |
{0x37, 1, "lstore" , jvm_interp_lstore}, |
058 |
{0x38, 1, "fstore" , jvm_interp_fstore}, |
059 |
{0x39, 1, "dstore" , jvm_interp_dstore}, |
060 |
{0x3a, 1, "astore" , jvm_interp_astore}, |
061 |
{0x3b, 1, "istore_0" , jvm_interp_istore_0}, |
062 |
{0x3c, 1, "istore_1" , jvm_interp_istore_1}, |
063 |
{0x3d, 1, "istore_2" , jvm_interp_istore_2}, |
064 |
{0x3e, 1, "istore_3" , jvm_interp_istore_3}, |
065 |
{0x3f, 1, "lstore_0" , jvm_interp_lstore_0}, |
066 |
{0x40, 1, "lstore_1" , jvm_interp_lstore_1}, |
067 |
{0x41, 1, "lstore_2" , jvm_interp_lstore_2}, |
068 |
{0x42, 1, "lstore_3" , jvm_interp_lstore_3}, |
069 |
{0x43, 1, "fstore_0" , jvm_interp_fstore_0}, |
070 |
{0x44, 1, "fstore_1" , jvm_interp_fstore_1}, |
071 |
{0x45, 1, "fstore_2" , jvm_interp_fstore_2}, |
072 |
{0x46, 1, "fstore_3" , jvm_interp_fstore_3}, |
073 |
{0x47, 1, "dstore_0" , jvm_interp_dstore_0}, |
074 |
{0x48, 1, "dstore_1" , jvm_interp_dstore_1}, |
075 |
{0x49, 1, "dstore_2" , jvm_interp_dstore_2}, |
076 |
{0x4a, 1, "dstore_3" , jvm_interp_dstore_3}, |
077 |
{0x4b, 1, "astore_0" , jvm_interp_astore_0}, |
078 |
{0x4c, 1, "astore_1" , jvm_interp_astore_1}, |
079 |
{0x4d, 1, "astore_2" , jvm_interp_astore_2}, |
080 |
{0x4e, 1, "astore_3" , jvm_interp_astore_3}, |
081 |
{0x4f, 1, "iastore" , jvm_interp_iastore}, |
082 |
{0x50, 1, "lastore" , jvm_interp_lastore}, |
083 |
{0x51, 1, "fastore" , jvm_interp_fastore}, |
084 |
{0x52, 1, "dastore" , jvm_interp_dastore}, |
085 |
{0x53, 1, "aastore" , jvm_interp_aastore}, |
086 |
{0x54, 1, "bastore" , jvm_interp_bastore}, |
087 |
{0x55, 1, "castore" , jvm_interp_castore}, |
088 |
{0x56, 1, "sastore" , jvm_interp_sastore}, |
089 |
{0x57, 1, "pop" , jvm_interp_pop}, |
090 |
{0x58, 1, "pop2" , jvm_interp_pop2}, |
091 |
{0x59, 1, "dup" , jvm_interp_dup}, |
092 |
{0x5a, 1, "dup_x1" , jvm_interp_dup_x1}, |
093 |
{0x5b, 1, "dup_x2" , jvm_interp_dup_x2}, |
094 |
{0x5c, 1, "dup2" , jvm_interp_dup2}, |
095 |
{0x5d, 1, "dup2_x1" , jvm_interp_dup2_x1}, |
096 |
{0x5e, 1, "dup2_x2" , jvm_interp_dup2_x2}, |
097 |
{0x5f, 1, "swap" , jvm_interp_swap}, |
098 |
{0x60, 1, "iadd" , jvm_interp_iadd}, |
099 |
{0x61, 1, "ladd" , jvm_interp_ladd}, |
100 |
{0x62, 1, "fadd" , jvm_interp_fadd}, |
101 |
{0x63, 1, "dadd" , jvm_interp_dadd}, |
102 |
{0x64, 1, "isub" , jvm_interp_isub}, |
103 |
{0x65, 1, "lsub" , jvm_interp_lsub}, |
104 |
{0x66, 1, "fsub" , jvm_interp_fsub}, |
105 |
{0x67, 1, "dsub" , jvm_interp_dsub}, |
106 |
{0x68, 1, "imul" , jvm_interp_imul}, |
107 |
{0x69, 1, "lmul" , jvm_interp_lmul}, |
108 |
{0x6a, 1, "fmul" , jvm_interp_fmul}, |
109 |
{0x6b, 1, "dmul" , jvm_interp_dmul}, |
110 |
{0x6c, 1, "idiv" , jvm_interp_idiv}, |
111 |
{0x6d, 1, "ldiv" , jvm_interp_ldiv}, |
112 |
{0x6e, 1, "fdiv" , jvm_interp_fdiv}, |
113 |
{0x6f, 1, "ddiv" , jvm_interp_ddiv}, |
114 |
{0x70, 1, "irem" , jvm_interp_irem}, |
115 |
{0x71, 1, "lrem" , jvm_interp_lrem}, |
116 |
{0x72, 1, "frem" , jvm_interp_frem}, |
117 |
{0x73, 1, "drem" , jvm_interp_drem}, |
118 |
{0x74, 1, "ineg" , jvm_interp_ineg}, |
119 |
{0x75, 1, "lneg" , jvm_interp_lneg}, |
120 |
{0x76, 1, "fneg" , jvm_interp_fneg}, |
121 |
{0x77, 1, "dneg" , jvm_interp_dneg}, |
122 |
{0x78, 1, "ishl" , jvm_interp_ishl}, |
123 |
{0x79, 1, "lshl" , jvm_interp_lshl}, |
124 |
{0x7a, 1, "ishr" , jvm_interp_ishr}, |
125 |
{0x7b, 1, "lshr" , jvm_interp_lshr}, |
126 |
{0x7c, 1, "iushr" , jvm_interp_iushr}, |
127 |
{0x7d, 1, "lushr" , jvm_interp_lushr}, |
128 |
{0x7e, 1, "iand" , jvm_interp_iand}, |
129 |
{0x7f, 1, "land" , jvm_interp_land}, |
130 |
{0x80, 1, "ior" , jvm_interp_ior}, |
131 |
{0x81, 1, "lor" , jvm_interp_lor}, |
132 |
{0x82, 1, "ixor" , jvm_interp_ixor}, |
133 |
{0x83, 1, "lxor" , jvm_interp_lxor}, |
134 |
{0x84, 3, "iinc" , jvm_interp_iinc}, |
135 |
{0x85, 1, "i2l" , jvm_interp_i2l}, |
136 |
{0x86, 1, "i2f" , jvm_interp_i2f}, |
137 |
{0x87, 1, "i2d" , jvm_interp_i2d}, |
138 |
{0x88, 1, "l2i" , jvm_interp_l2i}, |
139 |
{0x89, 1, "l2f" , jvm_interp_l2f}, |
140 |
{0x8a, 1, "l2d" , jvm_interp_l2d}, |
141 |
{0x8b, 1, "f2i" , jvm_interp_f2i}, |
142 |
{0x8c, 1, "f2l" , jvm_interp_f2l}, |
143 |
{0x8d, 1, "f2d" , jvm_interp_f2d}, |
144 |
{0x8e, 1, "d2i" , jvm_interp_d2i}, |
145 |
{0x8f, 1, "d2l" , jvm_interp_d2l}, |
146 |
{0x90, 1, "d2f" , jvm_interp_d2f}, |
147 |
{0x91, 1, "i2b" , jvm_interp_i2b}, |
148 |
{0x92, 1, "i2c" , jvm_interp_i2c}, |
149 |
{0x93, 1, "i2s" , jvm_interp_i2s}, |
150 |
{0x94, 1, "lcmp" , jvm_interp_lcmp}, |
151 |
{0x95, 1, "fcmpl" , jvm_interp_fcmpl}, |
152 |
{0x96, 1, "fcmpg" , jvm_interp_fcmpg}, |
153 |
{0x97, 1, "dcmpl" , jvm_interp_dcmpl}, |
154 |
{0x98, 1, "dcmpg" , jvm_interp_dcmpg}, |
155 |
{0x99, 1, "ifeq" , jvm_interp_ifeq}, |
156 |
{0x9a, 1, "ifne" , jvm_interp_ifne}, |
157 |
{0x9b, 1, "iflt" , jvm_interp_iflt}, |
158 |
{0x9c, 1, "ifge" , jvm_interp_ifge}, |
159 |
{0x9d, 1, "ifgt" , jvm_interp_ifgt}, |
160 |
{0x9e, 1, "ifle" , jvm_interp_ifle}, |
161 |
{0x9f, 1, "if_icmpeq" , jvm_interp_if_icmpeq}, |
162 |
{0xa0, 1, "if_icmpne" , jvm_interp_if_icmpne}, |