【譯】Redis Hyperloglog 的轉換

原文連接:github.com/cf020031308…git

redis 中一個或多個 hyperloglog 數據進行合併時,結果會以稠密的結構進行存儲,佔用內存 12k,與之相對的是,以稀疏結構存儲的數據(在個人數據庫中)平均僅佔 200B。github

所以我寫了個 Lua 腳本,用於將稠密的數據轉回稀疏結構redis

在個人 MacBook Pro (2.4 GHz Intel Core i5) 上,這個腳本每稱可轉換 182 個 hyperloglog數據庫

local function hll_dense2sparse(key)
    local exec = redis.call
    local sub = string.sub
    local byte = string.byte
    local char = string.char
    local insert = table.insert
    local concat = table.concat
    local floor = math.floor
    local magic = "HYLL"

    local dense = exec("GET", key)
    if sub(dense, 1, 4) ~= magic then
        -- not a hll
        return -1
    end
    if byte(dense, 5) == 1 then
        -- already sparse
        return 0
    end
    if #dense ~= 12304 then
        -- 12304 = 16 + 16384 * 6 / 8 is the length of a dense hll
        return -1
    end

    local sparse = {magic, char(1), sub(dense, 6, 16)}
    local c, v, _v = 1, nil, nil
    for i = 0, 16384 do
        local offset = i * 6 % 8
        local j = (i * 6 - offset) / 8 + 17
        local x, y = byte(dense, j, j + 1)
        if x then
            _v = (floor(x / 2 ^ offset) + (y or 0) * 2 ^ (8 - offset)) % 64
        else
            _v = nil
        end

        if _v and _v > 32 then
            -- cannot translate to sparse representation
            return -2
        end
        if _v == v then
            c = c + 1
        else
            if v == 0 then
                while c >= 16384 do
                    insert(sparse, char(127) .. char(255))
                    c = c - 16384
                end
                if c > 64 then
                    c = c - 1
                    insert(sparse, char(64 + floor(c / 256)) .. char(c % 256))
                elseif c > 0 then
                    insert(sparse, char(c - 1))
                end
            elseif v then
                v = v - 1
                while c >= 4 do
                    insert(sparse, char(128 + v * 4 + 3))
                    c = c - 4
                end
                if c > 0 then
                    insert(sparse, char(128 + v * 4 + c - 1))
                end
            end
            c, v = 1, _v
        end
    end

    exec("SET", key, concat(sparse))
    return 1
end
複製代碼
相關文章
相關標籤/搜索