Python隨筆(四)抽象語法樹AST

什麼是抽象語法樹嘞?

在計算機科學中,抽象語法和抽象語法樹實際上是源代碼的抽象語法結構的樹狀表現形式 咱們能夠用一個在線的AST編輯器來觀察AST的構建
Python語言的執行過程就是經過將Python字節碼轉化爲抽象語法樹來進行下一步的分析等其餘操做,因此將Python轉化爲抽象語法樹更利於程序的分析
通常來講,咱們早期的學習當中當然會用到一種叫作表達式樹的東西,咱們用Python來實現一下表達式樹html

class StackEmptyException(Exception): pass


class StackFullException(Exception): pass


class Node:
    def __init__(self, val=None, nxt=None):
        self.value = val
        self.next = nxt

    def __str__(self):
        return str(self.value)


class Stack:

    def __init__(self, max=0):
        self._top = None
        self._max = 0
        self.max = max

    @property
    def max(self):
        return self._max

    @max.setter
    def max(self, m):
        m = int(m)
        if m < self.length:
            raise Exception('Resize stack failed, please pop some elements first.')
        self._max = m
        if self._max < 0:
            self._max = 0

    def init(self, iterable=()):
        if not iterable:
            return
        self._top = Node(iterable[0])
        for i in iterable[1:]:
            node = self._top
            self._top = Node(i)
            self._top.next = node

    def show(self):
        def _traversal(self):
            node = self._top
            while node and node.next:
                yield node
                node = node.next
            yield node

        print('\n'.join(map(lambda x: '|{:^7}|'.format(str(x)), _traversal(self))) + '\n ' + 7 * '-')

    @property
    def length(self):
        if self._top is None:
            return 0
        node = self._top
        i = 1
        while node.next:
            node = node.next
            i += 1
        return i

    @property
    def is_empty(self):
        return self._top is None

    @property
    def is_full(self):
        return bool(self._max and self.length == self._max)

    def push(self, item):
        if self.is_full:
            raise StackFullException('Error: trying to push element into a full stack!')
        if not self._top:
            self._top = Node(item)
            return
        node = self._top
        self._top = Node(item)
        self._top.next = node

    def pop(self):
        if self.is_empty:
            raise StackEmptyException('Error: trying to pop element from an empty stack!')
        node = self._top
        self._top = self._top.next
        return node.value

    def top(self):
        return self._top.value if self._top else self._top

    def clear(self):
        while self._top:
            self.pop()


def test(stack):
    print('\nShow stack:')
    stack.show()

    print('\nInit linked list:')
    stack.init([1, 2, 3, 4, 5])
    stack.show()

    print('\nPush element to stack:')
    stack.push(6)
    stack.push(7)
    stack.push('like')
    stack.show()

    print('\nCheck top element:')
    print(stack.top())

    print('\nPop element from stack:')
    e = stack.pop()
    print('Element %s popped,' % e)
    stack.show()

    print('\nSet stack max size:')
    try:
        stack.max = 1
    except Exception as e:
        print(e)

    print('\nSet stack max size:')
    stack.max = 7
    print(stack.max)

    print('\nPush full stack:')
    try:
        stack.push(7)
    except StackFullException as e:
        print(e)

    print('\nClear stack:')
    stack.clear()
    stack.show()

    print('\nStack is empty:')
    print(stack.is_empty)

    print('\nPop empty stack:')
    try:
        stack.pop()
    except StackEmptyException as e:
        print(e)


class TreeNode:
    def __init__(self, val=None, lef=None, rgt=None):
        self.value = val
        self.left = lef
        self.right = rgt

    def __str__(self):
        return str(self.value)


class BinaryTree:
    def __init__(self, root=None):
        self._root = root

    def __str__(self):
        return '\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), self.pre_traversal()))

    def pre_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            x.append((node, depth))
            if node and node.left is not None:
                _traversal(node.left)
            if node and node.right is not None:
                _traversal(node.right)
            depth -= 1
            return x
        return _traversal(root)

    def in_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            if node and node.left is not None:
                _traversal(node.left)
            x.append((node, depth))
            if node and node.right is not None:
                _traversal(node.right)
            depth -= 1
            return x
        return _traversal(root)

    def post_traversal(self, root=None):
        if not root:
            root = self._root
        x = []
        depth = -1

        def _traversal(node):
            nonlocal depth
            depth += 1
            if node and node.left is not None:
                _traversal(node.left)
            if node and node.right is not None:
                _traversal(node.right)
            x.append((node, depth))
            depth -= 1
            return x
        return _traversal(root)

    @property
    def max_depth(self):
        return sorted(self.pre_traversal(), key=lambda x: x[1])[-1][1]

    def show(self, tl=None):
        if not tl:
            tl = self.pre_traversal()
        print('\n'.join(map(lambda x: x[1]*4*' '+str(x[0]), tl)))

    def make_empty(self):
        self.__init__()

    def insert(self, item):
        if self._root is None:
            self._root = TreeNode(item)
            return

        def _insert(item, node):
            if not node:
                return TreeNode(item)
            if node.left is None:
                node.left = _insert(item, node.left)
            elif node.right is None:
                node.right = _insert(item, node.right)
            else:
                if len(self.pre_traversal(node.left)) <= len(self.pre_traversal(node.right)):
                    node.left = _insert(item, node.left)
                else:
                    node.right = _insert(item, node.right)
            return node
        self._root = _insert(item, self._root)


class ExpressionTree(BinaryTree):
    SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

    def gene_tree_by_postfix(self, expr):
        s =Stack()
        for i in expr:
            if i in self.SIGN.keys():
                right = s.pop()
                left = s.pop()
                node = TreeNode(i, left, right)
                s.push(node)
            else:
                s.push(TreeNode(i))
        self._root = s.pop()

class ExpressionTree(BinaryTree):
    SIGN = {'+': 1, '-': 1, '*': 2, '/': 2, '(': 3}

    def gene_tree_by_postfix(self, expr):
        s = Stack()
        for i in expr:
            if i in self.SIGN.keys():
                right = s.pop()
                left = s.pop()
                node = TreeNode(i, left, right)
                s.push(node)
            else:
                s.push(TreeNode(i))
        self._root = s.pop()


def test_expression_tree(ep):
    t = ExpressionTree()
    t.gene_tree_by_postfix(ep)
    print('\n------先序遍歷-------')
    print(t)
    print('\n------後序遍歷------')
    t.show(t.post_traversal())
    print('\n-------中序遍歷-------')
    t.show(t.in_traversal())

if __name__ == '__main__':
    ep = 'a b + c d e + * *'
    test_expression_tree(ep.split(' '))

輸出:
回到AST
AST主要做用有三步:node

1. 解析(PARSE):將代碼字符串解析成抽象語法樹。
2. 轉換(TRANSFORM):對抽象語法樹進行轉換操做。
3. 生成(GENERATE): 根據變換後的抽象語法樹再生成代碼字符串。

Python官方對於CPython解釋器對python源碼的處理過程以下:python

1. Parse source code into a parse tree (Parser/pgen.c)
2. Transform parse tree into an Abstract Syntax Tree (Python/ast.c)
3. Transform AST into a Control Flow Graph (Python/compile.c)
4. Emit bytecode based on the Control Flow Graph (Python/compile.c)

可是隻知道上面還不夠咱們去理解,由於在Python中,以控制檯爲例,咱們的輸入都是些字符串例如a=2b=[1,2,3,4,5]之類咱們要如何讓計算機去理解而且執行這些東西呢?
這就是解釋器的解釋過程,負責把關鍵字,變量,空格,特殊字符進行處理處理的過程大概有下面兩個步驟express

1. 將整個代碼字符串分割成 語法單元數組。
2. 在分詞結果的基礎之上分析 語法單元之間的關係。

一個抽象語法樹的基本構成數組

type:描述該語句的類型 --變量聲明語句
kind:變量聲明的關鍵字 -- var
declaration: 聲明的內容數組,裏面的每一項也是一個對象
    type: 描述該語句的類型 
    id: 描述變量名稱的對象
        type:定義
        name: 是變量的名字
    init: 初始化變量值得對象
        type: 類型
        value: 值 "is tree" 不帶引號
        row: "\"is tree"\" 帶引號

通常來講咱們在能夠Python的pythonrun裏面找到app

PyObject *type;

定義了語法樹的類型
通常來講,研究抽象語法樹有哪些用途呢?框架

在一種語言的IDE中,語法的檢查、風格的檢查、格式化、高亮、錯誤提示,代碼自動補全等等
經過搭建一個Python的語法樹去理解表達式是如何被解析的,咱們來看一個(3+2-5*0)/3的例子:編輯器

#首先定義四則運算
Num = lambda env, n: n
Var = lambda env, x: env[x]
Add = lambda env, a, b:_eval(env, a) + _eval(env, b)
Mul = lambda env, a, b:_eval(env, a) * _eval(env, b)
Sub = lambda env, a, b:_eval(env, a) - _eval(env, b)
Div = lambda env, a, b:_eval(env, a) / _eval(env, b)
#定義表達式計算
 _eval = lambda env, expr:expr[0](env, *expr[1:])
#定義環境中的自變量
env = {'i':5, 'j':2, 'k':3}
#定義語法樹結構(我尋思這玩意怎麼那麼像Clojure呢。。。。。)
tree=(Div,(Sub,(Add,(Var,'k'),(Var,'j')),(Mul,(Var,'i'),(Num,0))),(Var,'k'))
print(_eval(env, tree))

輸出:
post

承接前一篇虛擬機的運行機制,咱們來看看Python的AST解析過程
首先來看Python虛擬機的循環執行框架
位於pythonrun.c文件中學習

PyObject *
PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
{
  ......
  // 獲取當前活動線程的線程狀態對象(PyThreadState)
  PyThreadState *tstate = PyThreadState_GET();
  // 設置線程狀態對象中的frame
  tstate->frame = f;
  co = f->f_code;
  names = co->co_names;
  consts = co->co_consts;
 
  why = WHY_NOT;
  ......
  for (;;) {
    fast_next_opcode:
        f->f_lasti = INSTR_OFFSET();
        // 獲取字節碼指令
        opcode = NEXTOP();
        oparg = 0; 
        // 若是指令有參數,獲取參數
        if (HAS_ARG(opcode))
            oparg = NEXTARG();
    dispatch_opcode:
      ......
  }
}

如今來調試一下PYVM


在咱們對PYVM進行調試的過程當中能夠看到Py把stdin的字符串一個個「吃掉」了吃的過程是爲了把字符串轉換和解釋爲字節碼,經過字節碼構建抽象語法樹,字節碼的遍歷是經過幾個宏來實現:

#define INSTR_OFFSET()  ((int)(next_instr - first_instr))
#define NEXTOP()        (*next_instr++)
#define NEXTARG()       (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
#define PEEKARG()       ((next_instr[2]<<8) + next_instr[1])
#define JUMPTO(x)       (next_instr = first_instr + (x))
#define JUMPBY(x)       (next_instr += (x))

在程序內部經過PyTokenizer_Get來獲取輸入字符串中是否存在關鍵字,構建好語法樹之後經過PyRun_InteractiveOneObjectEx執行。
Python中AST的節點定義
pythoncore/Parser/node.c

PyNode_New(int type)
{
    node *n = (node *) PyObject_MALLOC(1 * sizeof(node));
    if (n == NULL)
        return NULL;
    n->n_type = type;
    n->n_str = NULL;
    n->n_lineno = 0;
    n->n_nchildren = 0;
    n->n_child = NULL;
    return n;
}

下面給出Python自帶的AST例子,去觀察構建出來的樹

import ast
Monster ="""
class Monster:
    def __init__(self):
        self.level=0
        self.hp=1000
        self.boom=[x for x in range(10)]
    def eat(self,frut):
        self.hp+=1
    def howl(self):
        print("Ao uuuuuuuuuuuuuu")
monster=Monster()
monster.howl()
"""
if __name__=="__main__":
    # cm = compile(Monster, '<string>', 'exec')
    # exec (cm)
    r_node = ast.parse(Monster)
    print(ast.dump(r_node))

經過compile咱們能夠編譯Python字符串執行字串的內容

同時,咱們也能夠用Python自帶的AST庫解析咱們的字符串爲語法樹

參考文檔:
[Abstract Syntax Trees]https://docs.python.org/3/library/ast.html
[輪子哥博客]http://www.cppblog.com/vczh/archive/2008/06/15/53373.html
[表達式樹]http://www.cnblogs.com/stacklike/p/8284691.html
[AST庫的使用]https://www.cnblogs.com/yssjun/p/10069199.html

相關文章
相關標籤/搜索