項目的完整代碼在 C2j-Compilerjava
在上一篇解釋完了一些基礎的Java字節碼指令後,就能夠正式進入真正的代碼生成部分了。可是這部分先說的是代碼生成依靠的幾個類,也就是用來生成指令的操做。git
這一篇用到的文件都在codegen下:github
這個是枚舉類,用來生成一些比較特殊的指令數組
都生成像聲明一個類或者一個方法的範圍的指令,比較簡單。緩存
public enum Directive { CLASS_PUBLIC(".class public"), END_CLASS(".end class"), SUPER(".super"), FIELD_PRIVATE_STATIC(".field private static"), METHOD_STATIC(".method static"), METHOD_PUBLIC(".method public"), FIELD_PUBLIC(".field public"), METHOD_PUBBLIC_STATIC(".method public static"), END_METHOD(".end method"), LIMIT_LOCALS(".limit locals"), LIMIT_STACK(".limit stack"), VAR(".var"), LINE(".line"); private String text; Directive(String text) { this.text = text; } public String toString() { return text; } }
這也是一個枚舉類,用來生成一些基本的指令函數
public enum Instruction { LDC("ldc"), GETSTATIC("getstatic"), SIPUSH("sipush"), IADD("iadd"), IMUL("imul"), ISUB("isub"), IDIV("idiv"), INVOKEVIRTUAL("invokevirtual"), INVOKESTATIC("invokestatic"), INVOKESPECIAL("invokespecial"), RETURN("return"), IRETURN("ireturn"), ILOAD("iload"), ISTORE("istore"), NEWARRAY("newarray"), NEW("new"), DUP("dup"), ASTORE("astore"), IASTORE("iastore"), ALOAD("aload"), PUTFIELD("putfield"), GETFIELD("getfield"), ANEWARRAY("anewarray"), AASTORE("aastore"), AALOAD("aaload"), IF_ICMPEG("if_icmpeq"), IF_ICMPNE("if_icmpne"), IF_ICMPLT("if_icmplt"), IF_ICMPGE("if_icmpge"), IF_ICMPGT("if_icmpgt"), IF_ICMPLE("if_icmple"), GOTO("goto"), IALOAD("iaload"); private String text; Instruction(String s) { this.text = s; } public String toString() { return text; } }
重點來了,生成的邏輯主要都在CodeGenerator和ProgramGenerator裏,CodeGenerator是ProgramGenerator的父類oop
CodeGenerator的構造函數new了一個輸出流,用來輸出字節碼到xxx.j裏this
public CodeGenerator() { String assemblyFileName = programName + ".j"; try { bytecodeFile = new PrintWriter(new PrintStream(new File(assemblyFileName))); } catch (FileNotFoundException e) { e.printStackTrace(); } }
emit、emitString、emitDirective、emitBlankLine都屬於輸出基本指令的方法,都有多個重載方法來應對不同操做和操做數。須要注意的是,有的指令可能須要先緩存起來,在最後的時候一塊兒提交,好比buffered、classDefine就是用來判斷是否是應該先緩存的布爾值code
public void emitString(String s) { if (buffered) { bufferedContent += s + "\n"; return; } if (classDefine) { classDefinition += s + "\n"; return; } bytecodeFile.print(s); bytecodeFile.flush(); } public void emit(Instruction opcode) { if (buffered) { bufferedContent += "\t" + opcode.toString() + "\n"; return; } if (classDefine) { classDefinition += "\t" + opcode.toString() + "\n"; return; } bytecodeFile.println("\t" + opcode.toString()); bytecodeFile.flush(); ++instructionCount; } public void emitDirective(Directive directive, String operand1, String operand2, String operand3) { if (buffered) { bufferedContent += directive.toString() + " " + operand1 + " " + operand2 + " " + operand3 + "\n"; return; } if (classDefine) { classDefinition += directive.toString() + " " + operand1 + " " + operand2 + " " + operand3 + "\n"; return; } bytecodeFile.println(directive.toString() + " " + operand1 + " " + operand2 + " " + operand3); ++instructionCount; } public void emitBlankLine() { if (buffered) { bufferedContent += "\n"; return; } if (classDefine) { classDefinition += "\n"; return; } bytecodeFile.println(); bytecodeFile.flush(); }
ProgramGenerator繼承了CodeGenerator,也就是繼承了一些基本的操做,在上一篇像結構體、數組的指令輸出都在這個類裏繼承
先看四個屬性,這四個屬性主要是就來處理嵌套的分支和循環。
private int branch_count = 0; private int branch_out = 0; private String embedded = ""; private int loopCount = 0;
當沒嵌套一個ifelse語句時候 embedded屬性就會加上一個字符‘i’,而當退出一個分支的時候,就把這個‘i’切割掉
branch_count和branch_out都用來標誌相同做用域的分支跳轉
也就是說若是有嵌套就用embedded來處理,若是是用一個做用域的分支就用branch_count和branch_out來作標誌
public void incraseIfElseEmbed() { embedded += "i"; } public void decraseIfElseEmbed() { embedded = embedded.substring(1); } public void emitBranchOut() { String s = "\n" + embedded + "branch_out" + branch_out + ":\n"; this.emitString(s); branch_out++; }
loopCount則是對嵌套循環的處理
public void emitLoopBranch() { String s = "\n" + "loop" + loopCount + ":" + "\n"; emitString(s); } public String getLoopBranch() { return "loop" + loopCount; } public void increaseLoopCount() { loopCount++; }
putStructToClassDeclaration是定義結構體的,也就是new一個類。declareStructAsClass則是處理結構體裏的變量,也就是至關於處理類的屬性
public void putStructToClassDeclaration(Symbol symbol) { Specifier sp = symbol.getSpecifierByType(Specifier.STRUCTURE); if (sp == null) { return; } StructDefine struct = sp.getStruct(); if (structNameList.contains(struct.getTag())) { return; } else { structNameList.add(struct.getTag()); } if (symbol.getValueSetter() == null) { this.emit(Instruction.NEW, struct.getTag()); this.emit(Instruction.DUP); this.emit(Instruction.INVOKESPECIAL, struct.getTag() + "/" + "<init>()V"); int idx = this.getLocalVariableIndex(symbol); this.emit(Instruction.ASTORE, "" + idx); } declareStructAsClass(struct); } private void declareStructAsClass(StructDefine struct) { this.setClassDefinition(true); this.emitDirective(Directive.CLASS_PUBLIC, struct.getTag()); this.emitDirective(Directive.SUPER, "java/lang/Object"); Symbol fields = struct.getFields(); do { String fieldName = fields.getName() + " "; if (fields.getDeclarator(Declarator.ARRAY) != null) { fieldName += "["; } if (fields.hasType(Specifier.INT)) { fieldName += "I"; } else if (fields.hasType(Specifier.CHAR)) { fieldName += "C"; } else if (fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != null) { fieldName += "Ljava/lang/String;"; } this.emitDirective(Directive.FIELD_PUBLIC, fieldName); fields = fields.getNextSymbol(); } while (fields != null); this.emitDirective(Directive.METHOD_PUBLIC, "<init>()V"); this.emit(Instruction.ALOAD, "0"); String superInit = "java/lang/Object/<init>()V"; this.emit(Instruction.INVOKESPECIAL, superInit); fields = struct.getFields(); do { this.emit(Instruction.ALOAD, "0"); String fieldName = struct.getTag() + "/" + fields.getName(); String fieldType = ""; if (fields.hasType(Specifier.INT)) { fieldType = "I"; this.emit(Instruction.SIPUSH, "0"); } else if (fields.hasType(Specifier.CHAR)) { fieldType = "C"; this.emit(Instruction.SIPUSH, "0"); } else if (fields.hasType(Specifier.CHAR) && fields.getDeclarator(Declarator.POINTER) != null) { fieldType = "Ljava/lang/String;"; this.emit(Instruction.LDC, " "); } String classField = fieldName + " " + fieldType; this.emit(Instruction.PUTFIELD, classField); fields = fields.getNextSymbol(); } while (fields != null); this.emit(Instruction.RETURN); this.emitDirective(Directive.END_METHOD); this.emitDirective(Directive.END_CLASS); this.setClassDefinition(false); }
其它有關Java字節碼其實都是根據上一篇來完成的,邏輯不復雜,如今來看一個方法:getLocalVariableIndex,這個方法是獲取變量當前在隊列裏的位置的
public int getLocalVariableIndex(Symbol symbol) { TypeSystem typeSys = TypeSystem.getInstance(); String funcName = nameStack.peek(); Symbol funcSym = typeSys.getSymbolByText(funcName, 0, "main"); ArrayList<Symbol> localVariables = new ArrayList<>(); Symbol s = funcSym.getArgList(); while (s != null) { localVariables.add(s); s = s.getNextSymbol(); } Collections.reverse(localVariables); ArrayList<Symbol> list = typeSys.getSymbolsByScope(symbol.getScope()); for (int i = 0; i < list.size(); i++) { if (!localVariables.contains(list.get(i))) { localVariables.add(list.get(i)); } } for (int i = 0; i < localVariables.size(); i++) { if (localVariables.get(i) == symbol) { return i; } } return -1; }
這一篇主要是根據上一篇的JVM字節碼來對不一樣的操做提供不一樣的方法來去輸出這些指令
歡迎Star!