<!-- lang: lua --> local f = require('DataGenerator')
function test_data (data) local i = 0 for _, patt in ipairs(data) do --print(patt) for _, k in ipairs(f(patt)) do --print(k) if string.match(k, patt) then i = i + 1 print('ok ' .. i .. ' - "' .. k .. '" match /' .. patt .. '/') end end end endgit
local data = { '^bde^de$xx$' }數組
test_data(data)數據結構
<!-- lang: lua --> -- 單數組最大記錄
local array_limit = 100 -- + * 最大上限 10 { n, } 增長的限度 local max_rep_times = 10 -- 數組鏈接,超出最大記錄數後,每次遞增的記錄數 local step_length = 1app
local unpack = table.unpack or unpackide
local function error (...) print(...) os.exit() endui
-- 將 {num} {num,}{num, num} 格式的字符串 -- 解析成 數據格式,也就是一個數組 local function parse_quantifier_str (str) local array = {} local mode = 0 local num_str = '' for char in string.gmatch(str, '.') do -- 初始化模式 if mode == 0 then -- 遇到開始符號,才能進入另外的模式 if char == '{' then mode = 1 end -- 開始解析 elseif mode == 1 then -- 遇到字符,添加到緩衝,狀態升級 if string.match(char, '%d') then num_str = char mode = 2 elseif not char == ' ' then error('error:01:class difine with no digit numer') end elseif mode == 2 then -- 接着遇到數字就添加進去 if string.match(char, '%d') then num_str = num_str .. char -- 若是遇到逗號,模式改變 elseif char == ',' then table.insert(array, tonumber(num_str)) num_str = '' mode = 3 elseif char == '}' then -- {5} => {5, 5} table.insert(array, tonumber(num_str)) table.insert(array, tonumber(num_str)) num_str = '' break -- 若是遇到除空格以外的字符,就是一個錯誤 elseif not char == ' ' then error('error:02:quantifier with non digit char') end elseif mode == 3 then -- 遇到結束符號,就退出了 -- 由於上個字符是逗號,因此就是 { num, } 模式 if char == '}' then table.insert(array, array[1] + max_rep_times) break -- 若是是數字 elseif string.match(char,'%d') then mode = 4 num_str = char elseif not char == ' ' then error('error:03: wrong char in class str') end -- 上次的符號是 , 逗號, elseif mode == 4 then -- 繼續遇到數字的話,就繼續添加 if string.match(char, '%d') then num_str = num_str .. char elseif char == '}' then table.insert(array, tonumber(num_str)) break elseif not char == ' ' then error('error:04: wrong char in class str') end end end return array endlua
local function concat_array (a1, a2) local limit = limit or array_limit local max_length = 1 if #a1 > max_length then max_length = #a1 end if #a2 > max_length then max_length = #a2 end if max_length > limit then limit = max_length + step_length end指針
count = 0 local a = {} for _, v1 in ipairs(a1) do for _, v2 in ipairs(a2) do table.insert(a, v1 .. v2) count = count + 1 end if count > limit then break end end return a endcode
-- 將多個數組合並 local function concat_multi_array (array) if #array == 1 then return array[1] elseif #array == 2 then return concat_array(unpack(array)) elseif #array > 2 then local a1 = table.remove(array) local a2 = table.remove(array) local a = concat_array(a2,a1) table.insert(array, a) return concat_multi_array(array) end end排序
local function concat_rep_array (array, rep_times) local rep_array = {} if rep_times == 0 then return { '' } end if rep_times == 1 then return array end for i = 1, rep_times do table.insert(rep_array, array) end return concat_multi_array(rep_array) end
-- class and quantifier to array local function cnq_to_array (a) local combin_array = {} for _, value in ipairs(a) do local class, quant = unpack(value) local array = {} local from, to = unpack(quant) for i = from, to do local i_array = concat_rep_array(class, i) -- 將生成的記錄加入原有的記錄中 for _, v in ipairs(i_array) do table.insert(array, v) end end table.insert(combin_array, array) end return combin_array end
local count = 128 local char_id = {}
-- 申請一個ID,從 128開始-。若是是有參數爲 1 -- 那麼就同時生成一個 table 做爲 char_id local function apply_id_char (id, mode) mode = mode or 0 count = count + 1 local char = string.char(count) char_id[char] = id if mode == 1 then char_id[char] = { id = id } end return char end
-- 獲取字符的原始字符 local function get_char_id (char) local value = char_id[char] if type(value) == 'table' then return value.id elseif type(value) == 'string' then return value else error('error:05:not exists id records: ' .. char) return nil end end
-- 獲取字符的表明字符集 local function get_char_class (char)
-- if not get_char_id(char) then print(char) end local array = {} -- 使用可見字符 for i = 0, 126 do local i_char = string.char(i) local patt = '' if char == '.' then patt = char else patt = '%' .. get_char_id(char) end if string.find(i_char, patt) then table.insert(array, i_char) end end return array end
-- 根據 class char of quantifier char get matched str with pos local function get_char_str (char, pos) local str_list = char_id[char] -- 不要使用數字做爲 table 的索引,由於這是數組的地盤 if str_list then return str_list[pos] else error('error:06:not exists char record') end end
-- 獲取數量字符表,並同時能得到對應的數量數據結構 local quantifier_char_table = { ['+'] = { 1, max_rep_times }, ['*'] = { 0, max_rep_times }, ['-'] = { 0, max_rep_times }, ['?'] = { 0, 1}, } -- 獲取其餘的轉義字符的字符列表。用於替換 local escape_id_table = {}
-- class_char_table, char is class local class_str = 'a c d l p s u w x z A C D L P S U W X Z' local class_char_table = { ['.'] = '.' } for id in string.gmatch(class_str, '%S') do local id_char = apply_id_char(id) class_char_table[id_char] = id escape_id_table[id] = id_char end
-- 有些字符的轉義須要隱藏,以便對不轉義的字符進行的處理 local conceal_str = '+ - * ? [ ] ( ) { } % .' local conceal_char_table = {} for id in string.gmatch(conceal_str, '%S') do local id_char = apply_id_char(id) conceal_char_table[id_char] = id escape_id_table[id] = id_char end
-- [...] => class_char -- 將 user 自定義字符集結構進行字符化 local user_class_char = apply_id_char('[...]', 1)
-- {num,num} {num} {num,} => quantifier_char -- 將自定義數量結構進行字符化 local user_quantifier_char = apply_id_char('{...}', 1)
-- [0-8 a-z] -- 區間字符定義 local user_range_char = apply_id_char('[n-m]', 1)
function process_patt_str (patt)
-- first ^ and end $ is not any uses patt = string.gsub(patt, '^%^', '') patt = string.gsub(patt, '%$$', '')
-- replace all escape magic char to other char -- 對全部轉義的字符進行處理,應用規則 -- 用相應的字符代替 patt = string.gsub(patt, '%%(.)', function (id) if escape_id_table[id] then return escape_id_table[id] end return id end)
-- could not add user defined class to class char patt = string.gsub(patt, '%[.-%]', function (str) table.insert(char_id[user_class_char], str) return user_class_char end)
-- also could not add user defined quantifier to quantifier char patt = string.gsub(patt, '{%d+,?%d-}', function (str) table.insert(char_id[user_quantifier_char], str) return user_quantifier_char end) return patt end
-- 將 [...] 格式的字符串解析成 數組,字符數組 local function parse_class_str (str) -- 預處理字符集字符串 -- 將 .-. 結構的捕獲出來 -- 叫作 char_range -- 獲取其中 a-z 0-9 A-Z 的結構 local range_char_table = {} str = string.gsub(str, '(.)%-(.)', function (from, to) local class = {} local from_index = string.byte(from) local to_index = string.byte(to) for i = from_index, to_index do table.insert(class, string.char(i)) end table.insert(range_char_table, class) return user_range_char end)
-- 其中的字符有 class 字符,除此之外,都是字符 -- 只有兩種模式 local mode = 0 local reverse_mode = 0 local range_pos = 1 -- 用散列保存字符集,以去重 local char_table = {} for char in string.gmatch(str, '.') do if mode == 0 then if char == '[' then mode = 1 end elseif mode == 1 then mode = 2 if char == '^' then reverse_mode = 1 elseif class_char_table[char] then local class = get_char_class(char) for _, k in ipairs(class) do char_table[k] = 1 end elseif char == user_range_char then -- print('call here') local class = range_char_table[range_pos] range_pos = range_pos + 1 for _, k in ipairs(class) do char_table[k] = 1 end elseif conceal_char_table[char] then local id = get_char_id(char) char_table[id] = 1 elseif char == ']' then error('error:07:without class define') break else char_table[char] = 1 end elseif mode == 2 then if class_char_table[char] then local class = get_char_class(char) for _, k in ipairs(class) do char_table[k] = 1 end elseif char == user_range_char then -- print('call here') local class = range_char_table[range_pos] range_pos = range_pos + 1 for _, k in ipairs(class) do char_table[k] = 1 end elseif conceal_char_table[char] then local id = get_char_id(char) char_table[id] = 1 elseif char == ']' then break else char_table[char] = 1 end end end -- 將去重後的字符集合併成數組輸出,能夠進行排序 local array = {} -- 若是 class 前面定義有 ^ 標誌 if reverse_mode == 1 then for i = 0, 126 do local char = string.char(i) if not char_table[char] then table.insert(array, char) end end else for k, _ in pairs(char_table) do table.insert(array, k) end -- 若是沒有的話,順序就是亂的,須要排序 table.sort(array, function (a,b) return a < b end) end return array end
-- patt to array 解析 pattern to array -- { {char_list, amount}, {char_list. amount} } local function patt_to_array (patt) local array = {} -- return array local class = {} local mode = 0 -- 定位在 class_char 的位置指針 local class_pos = 1 -- 定位在 quantifier_char 的位置指針 local quantifier_pos = 1 -- 若是末尾是 - 那麼就連最後一個 class 一塊兒刪除 patt = string.gsub(patt, '-$', '?') -- 在末尾添加一個字符以結束解析 patt = patt .. '$' for char in string.gmatch(patt, '.') do -- 初始化模式 mode = 0 if mode == 0 then -- 字符集字符,動態獲取 if class_char_table[char] then class = get_char_class(char) mode = 1 -- 數量字符,進行映射 elseif quantifier_char_table[char] then error('error:08:quantifier char could not at begin') -- 轉義字符,要進行恢復 -- 自定義字符集 elseif char == user_class_char then local str = get_char_str(char, class_pos) class = parse_class_str(str) class_pos = class_pos + 1 mode = 1 -- 自定義數量字符 elseif char == user_quantifier_char then error('error:09:user quantifier could not at begin') elseif conceal_char_table[char] then local id = get_char_id(char) class = { id } mode = 1 else -- 其餘字符處理 class = { char } mode = 1 end
-- 若是是字符,或字符集模式 1 elseif mode == 1 then -- 初始化 quantifier local quantifier = {1, 1} -- if it is quantifer identifer, then mode = 0 -- and get defined quantifer if quantifier_char_table[char] then quantifier = quantifier_char_table[char] mode = 0 elseif char == user_quantifier_char then local str = get_char_str(char, quantifier_pos) quantifier = parse_quantifier_str(str) quantifier_pos = quantifier_pos + 1 mode = 0 end -- push current class and quantifier to array table.insert(array, { class, quantifier }) -- if meet new class char, then create new class if class_char_table[char] then class = get_char_class(char) elseif char == user_class_char then local str = get_char_str(char, class_pos) class = parse_class_str(str) class_pos = class_pos + 1 elseif conceal_char_table[char] then local id = get_char_id(char) class = { id } else class = { char } end end
end return array end
function DataGenerator (str, mode) mode = mode or 0 local patt = process_patt_str(str) local array = patt_to_array(patt) local data = cnq_to_array(array) return concat_multi_array(data) end
return DataGenerator