前有芋艿大佬已經發過相關分析的文章,本身覺的源碼總歸要看一下,而後看了就要記錄下來(記性不好...),因此就有了這篇文章(之後還要繼續更😄)
,但願咱們都能在看過文章後可以有不同的收穫。mysql
聲明:本文基於1.5.M1版本sql
首先咱們來看下解析sql的過程當中用到的類作一個解釋:數據庫
@Test public void assertNextTokenForOrderBy() { Lexer lexer = new Lexer("SELECT * FROM ORDER ORDER \t BY XX DESC", dictionary); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, DefaultKeyword.SELECT, "SELECT"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, Symbol.STAR, "*"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, DefaultKeyword.FROM, "FROM"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, Literals.IDENTIFIER, "ORDER"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, DefaultKeyword.ORDER, "ORDER"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, DefaultKeyword.BY, "BY"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, Literals.IDENTIFIER, "XX"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, DefaultKeyword.DESC, "DESC"); //lexer.nextToken(); LexerAssert.assertNextToken(lexer, Assist.END, ""); }
上面是項目中的一段測試用例,咱們以這個用例來分析。編程
/** * 分析下一個詞法標記. */ public final void nextToken() { skipIgnoredToken(); if (isVariableBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanVariable(); } else if (isNCharBegin()) { currentToken = new Tokenizer(input, dictionary, ++offset).scanChars(); } else if (isIdentifierBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanIdentifier(); } else if (isHexDecimalBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanHexDecimal(); } else if (isNumberBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanNumber(); } else if (isSymbolBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanSymbol(); } else if (isCharsBegin()) { currentToken = new Tokenizer(input, dictionary, offset).scanChars(); } else if (isEnd()) { currentToken = new Token(Assist.END, "", offset); } else { currentToken = new Token(Assist.ERROR, "", offset); } offset = currentToken.getEndPosition(); }
private void skipIgnoredToken() { offset = new Tokenizer(input, dictionary, offset).skipWhitespace(); while (isHintBegin()) { offset = new Tokenizer(input, dictionary, offset).skipHint(); offset = new Tokenizer(input, dictionary, offset).skipWhitespace(); } while (isCommentBegin()) { offset = new Tokenizer(input, dictionary, offset).skipComment(); offset = new Tokenizer(input, dictionary, offset).skipWhitespace(); } }
這裏咱們以跳過空格爲例來展開說明:ide
從傳入的offset標誌位開始,循環判斷sql語句中對應位置的字符是否是空格,直到不是空格就退出,返回最新位置的offset測試
/** * 跳過空格. * * @return 跳過空格後的偏移量 */ public int skipWhitespace() { int length = 0; while (CharType.isWhitespace(charAt(offset + length))) { length++; } return offset + length; } private char charAt(final int index) { return index >= input.length() ? (char) CharType.EOI : input.charAt(index); } /** * 判斷是否爲空格. * * @param ch 待判斷的字符 * @return 是否爲空格 */ public static boolean isWhitespace(final char ch) { return ch <= 32 && EOI != ch || 160 == ch || ch >= 0x7F && ch <= 0xA0; }
/** 這是mysql的實現 **/ @Override protected boolean isVariableBegin() { return '@' == getCurrentChar(0); }
private boolean isNCharBegin() { return isSupportNChars() && 'N' == getCurrentChar(0) && '\'' == getCurrentChar(1); }
private boolean isIdentifierBegin() { return isIdentifierBegin(getCurrentChar(0)); } private boolean isIdentifierBegin(final char ch) { return CharType.isAlphabet(ch) || '`' == ch || '_' == ch || '$' == ch; } /** * 判斷是否爲字母. * * @param ch 待判斷的字符 * @return 是否爲字母 */ public static boolean isAlphabet(final char ch) { return ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z'; }
/** * 掃描標識符. * * @return 標識符標記 */ public Token scanIdentifier() { if ('`' == charAt(offset)) { int length = getLengthUntilTerminatedChar('`'); return new Token(Literals.IDENTIFIER, input.substring(offset, offset + length), offset + length); } int length = 0; while (isIdentifierChar(charAt(offset + length))) { length++; } String literals = input.substring(offset, offset + length); if (isAmbiguousIdentifier(literals)) { return new Token(processAmbiguousIdentifier(offset + length, literals), literals, offset + length); } return new Token(dictionary.findTokenType(literals, Literals.IDENTIFIER), literals, offset + length); }
小尾巴走一波,歡迎關注個人公衆號,不按期分享編程方面的小技巧:)
spa