C語言第三方庫Melon開箱即用之詞法分析器使用

以前的文章中,筆者介紹了Linux/UNIX C語言庫Melon的基本功能及框架使用。git

本文將介紹Melon中的詞法分析器組件。github

Melon的Github倉庫爲:https://github.com/Water-Melo...
在這裏插入圖片描述
詞法分析器在Melon中並不依賴於自身框架,所以能夠在不初始化框架的狀況下便可使用。shell

基礎使用

咱們先來看一個基本例子:框架

//lexer.c

#include <stdio.h>
#include "mln_lex.h"

MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST);
MLN_DEFINE_TOKEN(mln_test, TEST);

int main(int argc, char *argv[])
{
    if (argc != 2) {
        fprintf(stderr, "Usage: %s file_path\n", argv[0]);
        return -1;
    }

    mln_string_t path;
    mln_lex_t *lex = NULL;
    struct mln_lex_attr lattr;
    mln_test_struct_t *ts;

    mln_string_nSet(&path, argv[1], strlen(argv[1]));
    lattr.pool = mln_alloc_init();
    if (lattr.pool == NULL) {
        fprintf(stderr, "init memory pool failed\n");
        return -1;
    }
    lattr.keywords = NULL;
    lattr.hooks = NULL;
    lattr.preprocess = 0;
    lattr.padding = 0;
    lattr.type = M_INPUT_T_FILE;
    lattr.data = &path;

    mln_lex_initWithHooks(mln_test, lex, &lattr);
    if (lex == NULL) {
        fprintf(stderr, "lexer init failed\n");
        return -1;
    }

    while (1) {
        ts = mln_test_token(lex);
        if (ts == NULL || ts->type == TEST_TK_EOF)
            break;
        write(STDOUT_FILENO, ts->text->data, ts->text->len);
        printf(" line:%u type:%d\n", ts->line, ts->type);
    }

    mln_lex_destroy(lex);
    mln_alloc_destroy(lattr.pool);

    return 0;
}

如此,便可完成一個詞法解析器程序,它讀取程序的參數所指定的文件的內容,而後解析成詞素,並將其打印出來。測試

咱們執行:spa

$ ./lexer lexer.c

/ line:1 type:21
/ line:1 type:21
lexer line:1 type:5
. line:1 type:20
c line:1 type:5
# line:3 type:9
include line:3 type:5
< line:3 type:24
stdio line:3 type:5
. line:3 type:20
h line:3 type:5
> line:3 type:26
...

能夠看到,這個程序將咱們的示例C程序拆解成各類詞素,如:/,#,<等等。debug

進階使用

上面的例子能夠看到,基礎的詞法解析器解析出的詞素過於細碎,有時咱們還但願解析器支持咱們自定義的關鍵字、自定義格式的數據,甚至是一些預處理功能,例如引入其餘文件的內容解析詞素。code

那麼,咱們就將上面的例子進行一番修改:blog

//lexer.c

#include <stdio.h>
#include "mln_lex.h"

mln_string_t keywords[] = {
    mln_string("on"),
    mln_string("off"),
    mln_string(NULL)
};

MLN_DEFINE_TOKEN_TYPE_AND_STRUCT(static, mln_test, TEST, TEST_TK_ON, TEST_TK_OFF, TEST_TK_STRING);
MLN_DEFINE_TOKEN(mln_test, TEST, {TEST_TK_ON, "TEST_TK_ON"}, {TEST_TK_OFF, "TEST_TK_OFF"}, {TEST_TK_STRING, "TEST_TK_STRING"});

static inline int
mln_get_char(mln_lex_t *lex, char c)
{
    if (c == '\\') {
        char n;
        if ((n = mln_lex_getAChar(lex)) == MLN_ERR) return -1;
        switch ( n ) {
            case '\"':
                if (mln_lex_putAChar(lex, n) == MLN_ERR) return -1;
                break;
            case '\'':
                if (mln_lex_putAChar(lex, n) == MLN_ERR) return -1;
                break;
            case 'n':
                if (mln_lex_putAChar(lex, '\n') == MLN_ERR) return -1;
                break;
            case 't':
                if (mln_lex_putAChar(lex, '\t') == MLN_ERR) return -1;
                break;
            case 'b':
                if (mln_lex_putAChar(lex, '\b') == MLN_ERR) return -1;
                break;
            case 'a':
                if (mln_lex_putAChar(lex, '\a') == MLN_ERR) return -1;
                break;
            case 'f':
                if (mln_lex_putAChar(lex, '\f') == MLN_ERR) return -1;
                break;
            case 'r':
                if (mln_lex_putAChar(lex, '\r') == MLN_ERR) return -1;
                break;
            case 'v':
                if (mln_lex_putAChar(lex, '\v') == MLN_ERR) return -1;
                break;
            case '\\':
                if (mln_lex_putAChar(lex, '\\') == MLN_ERR) return -1;
                break;
            default:
                mln_lex_setError(lex, MLN_LEX_EINVCHAR);
                return -1;
        }
    } else {
        if (mln_lex_putAChar(lex, c) == MLN_ERR) return -1;
    }
    return 0;
}

static mln_test_struct_t *
mln_test_dblq_handler(mln_lex_t *lex, void *data)
{
    mln_lex_cleanResult(lex);
    char c;
    while ( 1 ) {
        c = mln_lex_getAChar(lex);
        if (c == MLN_ERR) return NULL;
        if (c == MLN_EOF) {
            mln_lex_setError(lex, MLN_LEX_EINVEOF);
            return NULL;
        }
        if (c == '\"') break;
        if (mln_get_char(lex, c) < 0) return NULL;
    }
    return mln_test_new(lex, TEST_TK_STRING);
}

int main(int argc, char *argv[])
{
    if (argc != 2) {
        fprintf(stderr, "Usage: %s file_path\n", argv[0]);
        return -1;
    }

    mln_string_t path;
    mln_lex_t *lex = NULL;
    struct mln_lex_attr lattr;
    mln_test_struct_t *ts;
    mln_lex_hooks_t hooks;

    memset(&hooks, 0, sizeof(hooks));
    hooks.dblq_handler = (lex_hook)mln_test_dblq_handler;

    mln_string_nSet(&path, argv[1], strlen(argv[1]));

    lattr.pool = mln_alloc_init();
    if (lattr.pool == NULL) {
        fprintf(stderr, "init pool failed\n");
        return -1;
    }
    lattr.keywords = keywords;
    lattr.hooks = &hooks;
    lattr.preprocess = 1;//支持預處理
    lattr.padding = 0;
    lattr.type = M_INPUT_T_FILE;
    lattr.data = &path;

    mln_lex_initWithHooks(mln_test, lex, &lattr);
    if (lex == NULL) {
        fprintf(stderr, "lexer init failed\n");
        return -1;
    }

    while (1) {
        ts = mln_test_token(lex);
        if (ts == NULL || ts->type == TEST_TK_EOF)
            break;
        write(STDOUT_FILENO, ts->text->data, ts->text->len);
        printf(" line:%u type:%d\n", ts->line, ts->type);
    }

    mln_lex_destroy(lex);
    mln_alloc_destroy(lattr.pool);

    return 0;
}

這一次,咱們增長以下功能:token

  • 支持關鍵字 onoff
  • 支持識別雙引號擴住的內容爲字符串類型
  • 增長了預處理功能,例如引入其餘文件內容

生成可執行程序:

$ cc -o a a.c -I /usr/local/melon/include/ -L /usr/local/melon/lib/ -lmelon -lpthread

建立兩個測試文件:

a.ini

#include "b.ini"
test_mode = on
log_level = 'debug'
proc_num = 10

b.ini

conf_name = "b.ini"

運行咱們的程序來看看效果:

$ ./lexer a.ini

conf_name line:1 type:5
= line:1 type:25
b.ini line:1 type:42
test_mode line:2 type:5
= line:2 type:25
on line:2 type:40
log_level line:3 type:5
= line:3 type:25
' line:3 type:13
debug line:3 type:5
' line:3 type:13
proc_num line:4 type:5
= line:4 type:25
10 line:4 type:2

能夠看到,在a.ini中寫入include的部分,是b.ini文件內容解析後的詞素。而且onoff都被正常解析出來了。且字符串也被正常處理出來了。


Melon的Github倉庫爲:https://github.com/Water-Melo...

感謝閱讀

相關文章
相關標籤/搜索