# sonic配置team與實現機制

sonic配置team與實現機制

sonic實現team代碼框架圖:

clipboard.png

sonic修改lag模式配置步驟

1.修改文件teamd.j2c++

docker exec -it teamd bash
 cd /usr/share/sonic/templates/
 vim teamd.j2

例如將動態模式改爲靜態模式:git

源文件:github

{
    "device": "{{ pc }}",
    "hwaddr": "{{ hwaddr }}",
    "runner": {
        "name": "lacp",
        "active": true,
{% if PORTCHANNEL[pc]['fallback'] and ((PORTCHANNEL[pc]['members'] | length) == 1) %}
        "fallback": {{ PORTCHANNEL[pc]['fallback'] }},
{% else %}
{# Use 75% links upperbound as min-links #}
        "min_ports": {{ (PORTCHANNEL[pc]['members'] | length * 0.75) | round(0, 'ceil') | int }},
{% endif %}
        "tx_hash": ["eth", "ipv4", "ipv6"]
    },
    "link_watch": {
        "name": "ethtool"
    },
    "ports": {
{% for member in PORTCHANNEL[pc]['members'] %}
        "{{ member }}": {}{% if not loop.last %},{% endif %}

{% endfor %}
    }
}

修改後的文件:docker

{
    "device": "{{ pc }}",
    "hwaddr": "{{ hwaddr }}",
    "runner": {
        "name": "roundrobin",
        "active": true,
{% if PORTCHANNEL[pc]['fallback'] and ((PORTCHANNEL[pc]['members'] | length) == 1) %}
        "fallback": {{ PORTCHANNEL[pc]['fallback'] }},
{% else %}
{# Use 75% links upperbound as min-links #}
        "min_ports": {{ (PORTCHANNEL[pc]['members'] | length * 0.75) | round(0, 'ceil') | int }},
{% endif %}
        "tx_hash": ["eth", "ipv4", "ipv6"]
    },
    "link_watch": {
        "name": "ethtool"
    },
    "ports": {
{% for member in PORTCHANNEL[pc]['members'] %}
        "{{ member }}": {}{% if not loop.last %},{% endif %}

{% endfor %}
    }
}

2.從新加載配置該步驟會重啓docker-teamdshell

admin@switch2:~$ sudo config reload -y
Running command: sonic-cfggen -j /etc/sonic/config_db.json --write-to-db
Running command: service hostname-config restart
Running command: service interfaces-config restart
Running command: service ntp-config restart
Running command: service rsyslog-config restart
Running command: service swss restart
Running command: service pmon restart
Running command: service teamd restart

LAG內核信息同步到APP_DB實現機制分析

sonic的team採用的是開源team項目,詳細信息參考:https://github.com/jpirko/lib...數據庫

該部分有一個teamsyncd進程用於監聽內核的team netlink信息,以及teamd的lag成員端口變化信息,將其同步到app_db.json

該部分涉及文件:vim

teamsyncd.cppbash

teamsync.cppapp

teamsync.h

TeamSync

class TeamSync : public NetMsg
{
public:
    TeamSync(DBConnector *db, DBConnector *stateDb, Select *select);

    /*
     * Listens to RTM_NEWLINK and RTM_DELLINK to undestand if there is a new
     * team device
     * lag變化信息處理回調函數
     */
    virtual void onMsg(int nlmsg_type, struct nl_object *obj);

    class TeamPortSync : public Selectable//lag成員端口信息監聽結構
    {
    public:
        enum { MAX_IFNAME = 64 };
        TeamPortSync(const std::string &lagName, int ifindex,
                     ProducerStateTable *lagMemberTable);
        ~TeamPortSync();

        int getFd() override;
        void readData() override;

    protected:
        int onChange();
        static int teamdHandler(struct team_handle *th, void *arg,
                                team_change_type_mask_t type_mask);
        static const struct team_change_handler gPortChangeHandler;
    private:
        ProducerStateTable *m_lagMemberTable;
        struct team_handle *m_team;//lag句柄,用於管理lag相關信息,主要是成員端口的管理
        std::string m_lagName;
        int m_ifindex;
        //記錄lag中的成員,進行新舊比對
        std::map<std::string, bool> m_lagMembers; /* map[ifname] = status (enabled|disabled) */
    };

protected:
    void addLag(const std::string &lagName, int ifindex, bool admin_state,
                bool oper_state, unsigned int mtu);//添加lag函數
    void removeLag(const std::string &lagName);//刪除lag函數

private:
    Select *m_select;
    ProducerStateTable m_lagTable;//lag數據庫生產者
    ProducerStateTable m_lagMemberTable;//lag成員數據庫生產者
    Table m_stateLagTable;//lag state 數據庫
    std::map<std::string, std::shared_ptr<TeamPortSync> > m_teamPorts;//每個lag對應的成員端口監聽對象
};

具體函數

/* Taken from drivers/net/team/team.c */
#define TEAM_DRV_NAME "team"

TeamSync::TeamSync(DBConnector *db, DBConnector *stateDb, Select *select) :
    m_select(select),
    m_lagTable(db, APP_LAG_TABLE_NAME),//做爲appdb的lag_table的生產者
    m_lagMemberTable(db, APP_LAG_MEMBER_TABLE_NAME),//做爲appdb的lag_member_table的生產者
    m_stateLagTable(stateDb, STATE_LAG_TABLE_NAME)//寫state表
{
}

void TeamSync::onMsg(int nlmsg_type, struct nl_object *obj)
{
    struct rtnl_link *link = (struct rtnl_link *)obj;
    if ((nlmsg_type != RTM_NEWLINK) && (nlmsg_type != RTM_DELLINK))
        return;

    string lagName = rtnl_link_get_name(link);
    /* Listens to LAG messages */
    char *type = rtnl_link_get_type(link);
    if (!type || (strcmp(type, TEAM_DRV_NAME) != 0))
        return;

    if (nlmsg_type == RTM_DELLINK)
    {
        /* Remove LAG ports and delete LAG */
        removeLag(lagName);
        return;
    }
    //lag狀態變化都會走這裏,都是使用RTM_NEWLINK事件通知的
    addLag(lagName, rtnl_link_get_ifindex(link),
           rtnl_link_get_flags(link) & IFF_UP,
           rtnl_link_get_flags(link) & IFF_LOWER_UP,
           rtnl_link_get_mtu(link));
}

void TeamSync::addLag(const string &lagName, int ifindex, bool admin_state,
                      bool oper_state, unsigned int mtu)
{
    /* Set the LAG */
    std::vector<FieldValueTuple> fvVector;
    FieldValueTuple a("admin_status", admin_state ? "up" : "down");
    FieldValueTuple o("oper_status", oper_state ? "up" : "down");
    FieldValueTuple m("mtu", to_string(mtu));
    fvVector.push_back(a);
    fvVector.push_back(o);
    fvVector.push_back(m);
    m_lagTable.set(lagName, fvVector);

    SWSS_LOG_INFO("Add %s admin_status:%s oper_status:%s mtu:%d",
                   lagName.c_str(), admin_state ? "up" : "down", oper_state ? "up" : "down", mtu);

    /* Return when the team instance has already been tracked */
    if (m_teamPorts.find(lagName) != m_teamPorts.end())
        return;

    /* Start track the team instance 新接口,啓動一個套接口監聽該lag的成員變化狀況 */
    auto sync = make_shared<TeamPortSync>(lagName, ifindex, &m_lagMemberTable);
    m_select->addSelectable(sync.get());
    m_teamPorts[lagName] = sync;
    //在db6(state-db)設置該lag建立成功標誌
    fvVector.clear();
    FieldValueTuple s("state", "ok");
    fvVector.push_back(s);
    m_stateLagTable.set(lagName, fvVector);
}

void TeamSync::removeLag(const string &lagName)
{
    /* Delete the LAG */
    m_lagTable.del(lagName);

    SWSS_LOG_INFO("Remove %s", lagName.c_str());

    /* Return when the team instance hasn't been tracked before */
    if (m_teamPorts.find(lagName) == m_teamPorts.end())
        return;

    /* No longer track the current team instance */
    m_select->removeSelectable(m_teamPorts[lagName].get());
    m_teamPorts.erase(lagName);
    m_stateLagTable.del(lagName);//移除成功標誌
}
//lag成員端口變化處理函數
const struct team_change_handler TeamSync::TeamPortSync::gPortChangeHandler = {
    .func       = TeamSync::TeamPortSync::teamdHandler,
    .type_mask  = TEAM_PORT_CHANGE | TEAM_OPTION_CHANGE
};

TeamSync::TeamPortSync::TeamPortSync(const string &lagName, int ifindex,
                                     ProducerStateTable *lagMemberTable) :
    m_lagMemberTable(lagMemberTable),
    m_lagName(lagName),
    m_ifindex(ifindex)
{
    m_team = team_alloc();
    if (!m_team)
    {
        SWSS_LOG_ERROR("Unable to allocated team socket");
        throw system_error(make_error_code(errc::address_not_available),
                           "Unable to allocated team socket");
    }
    //libteam初始化函數,該函數進行了大量的回調函數的註冊,會自動獲取lag中全部的端口到port_list成員列表中
    int err = team_init(m_team, ifindex);
    if (err) {
        team_free(m_team);
        m_team = NULL;
        SWSS_LOG_ERROR("Unable to init team socket");
        throw system_error(make_error_code(errc::address_not_available),
                           "Unable to init team socket");
    }
    //註冊端口變化處理函數,端口信息發生變化後調用gPortChangeHandler
    err = team_change_handler_register(m_team, &gPortChangeHandler, this);
    if (err) {
        team_free(m_team);
        m_team = NULL;
        SWSS_LOG_ERROR("Unable to register port change event");
        throw system_error(make_error_code(errc::address_not_available),
                           "Unable to register port change event");
    }

    /* Sync LAG at first */
    onChange();
}

TeamSync::TeamPortSync::~TeamPortSync()
{
    if (m_team)
    {
        team_change_handler_unregister(m_team, &gPortChangeHandler, this);
        team_free(m_team);
    }
}
//lag成員端口變化處理函數
int TeamSync::TeamPortSync::onChange()
{
    struct team_port *port;
    map<string, bool> tmp_lag_members;

    /* Check each port  */
    team_for_each_port(port, m_team)//遍歷該team的每個端口
    {
        uint32_t ifindex;
        char ifname[MAX_IFNAME + 1] = {0};
        bool enabled;

        ifindex = team_get_port_ifindex(port);

        /* Skip if interface is not found 獲取端口,從這裏能夠看出,端口沒有離開team以前不能刪除 */
        if (!team_ifindex2ifname(m_team, ifindex, ifname, MAX_IFNAME))
        {
            SWSS_LOG_INFO("Interface ifindex(%u) is not found", ifindex);
            continue;
        }

        /* Skip the member that is removed from the LAG */
        /* 端口已經被移除 */
        if (team_is_port_removed(port))
        {
            continue;
        }
        /* 獲取端口是否使能 */
        team_get_port_enabled(m_team, ifindex, &enabled);
        //獲取每個使能的端口
        tmp_lag_members[string(ifname)] = enabled;
    }

    /* Compare old and new LAG members and set/del accordingly */
    //比較兩次事件之間的lag成員變化
    for (auto it : tmp_lag_members)
    {
        //新增端口,或者原來的端口狀態發生變化
        if (m_lagMembers.find(it.first) == m_lagMembers.end() || it.second != m_lagMembers[it.first])
        {
            //刷新數據庫
            string key = m_lagName + ":" + it.first;
            vector<FieldValueTuple> v;
            FieldValueTuple l("status", it.second ? "enabled" : "disabled");
            v.push_back(l);
            m_lagMemberTable->set(key, v);
        }
    }
    //須要刪除的端口。進行刪除
    for (auto it : m_lagMembers)
    {
        if (tmp_lag_members.find(it.first) == tmp_lag_members.end())
        {
            string key = m_lagName + ":" + it.first;
            m_lagMemberTable->del(key);
        }
    }

    /* Replace the old LAG members with the new ones */
    m_lagMembers = tmp_lag_members;
    return 0;
}

int TeamSync::TeamPortSync::teamdHandler(struct team_handle *team, void *arg,
                                         team_change_type_mask_t type_mask)
{
    return ((TeamSync::TeamPortSync *)arg)->onChange();
}

int TeamSync::TeamPortSync::getFd()
{
    return team_get_event_fd(m_team);
}

void TeamSync::TeamPortSync::readData()
{
    team_handle_events(m_team);
}

teamsyncd

int main(int argc, char **argv)
{
    swss::Logger::linkToDbNative("teamsyncd");
    DBConnector db(APPL_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);//鏈接app_db
    DBConnector stateDb(STATE_DB, DBConnector::DEFAULT_UNIXSOCKET, 0);
    Select s;
    TeamSync sync(&db, &stateDb, &s);
    //加入主播組,監聽RTM_NEWLINK和RTM_DELLINK事件,lag up/down信息也是經過RTM_NEWLINK傳遞
    NetDispatcher::getInstance().registerMessageHandler(RTM_NEWLINK, &sync);
    NetDispatcher::getInstance().registerMessageHandler(RTM_DELLINK, &sync);

    while (1)
    {
        try
        {
            NetLink netlink;

            netlink.registerGroup(RTNLGRP_LINK);
            cout << "Listens to teamd events..." << endl;
            netlink.dumpRequest(RTM_GETLINK);

            s.addSelectable(&netlink);
            while (true)
            {
                Selectable *temps;
                s.select(&temps);
            }
        }
        catch (const std::exception& e)
        {
            cout << "Exception \"" << e.what() << "\" had been thrown in deamon" << endl;
            return 0;
        }
    }

    return 1;
}

app_db數據示例

127.0.0.1:6379> SELECT 0
127.0.0.1:6379> KEYS *LAG*
1) "LAG_MEMBER_TABLE:PortChannel1:Ethernet4"
2) "LAG_MEMBER_TABLE:PortChannel1:Ethernet0"
3) "LAG_TABLE:PortChannel1"
127.0.0.1:6379> HGETALL "LAG_TABLE:PortChannel1"
1) "admin_status"
2) "up"
3) "oper_status"
4) "up"
5) "mtu"
6) "9100"
127.0.0.1:6379> HGETALL "LAG_MEMBER_TABLE:PortChannel1:Ethernet0"
1) "status"
2) "enabled"
127.0.0.1:6379>

LAG APP_DB信息同步到ASIC_DB實現機制分析

lag與lag-member相關部分處理是在portsorch中進行處理。

該部分涉及的文件有:

portsorch.cpp

portsorch.h

LAG

void PortsOrch::doLagTask(Consumer &consumer)
{
    SWSS_LOG_ENTER();

    auto it = consumer.m_toSync.begin();
    while (it != consumer.m_toSync.end())
    {
        auto &t = it->second;

        string lag_alias = kfvKey(t);
        string op = kfvOp(t);

        if (op == SET_COMMAND)
        {
            /* Duplicate entry */
            if (m_portList.find(lag_alias) != m_portList.end())
            {
                it = consumer.m_toSync.erase(it);
                continue;
            }

            if (addLag(lag_alias))//同步到硬件
                it = consumer.m_toSync.erase(it);
            else
                it++;
        }
        else if (op == DEL_COMMAND)
        {
            Port lag;
            /* Cannot locate LAG */
            if (!getPort(lag_alias, lag))
            {
                it = consumer.m_toSync.erase(it);
                continue;
            }

            if (removeLag(lag))
                it = consumer.m_toSync.erase(it);
            else
                it++;
        }
        else
        {
            SWSS_LOG_ERROR("Unknown operation type %s", op.c_str());
            it = consumer.m_toSync.erase(it);
        }
    }
}

從上面能夠看出,orch沒有響應lag的狀態變化,會出現使用ifconfig lagname down後,lag仍可以轉發報文,不過這種配置不該該出現。

lagmember

void PortsOrch::doLagMemberTask(Consumer &consumer)
{
    SWSS_LOG_ENTER();

    auto it = consumer.m_toSync.begin();
    while (it != consumer.m_toSync.end())//遍歷該消費者的每個事件
    {
        auto &t = it->second;

        /* Retrieve LAG alias and LAG member alias from key */
        string key = kfvKey(t);
        size_t found = key.find(':');
        /* Return if the format of key is wrong */
        if (found == string::npos)
        {
            SWSS_LOG_ERROR("Failed to parse %s", key.c_str());
            return;
        }
        string lag_alias = key.substr(0, found);//獲取lag別名
        string port_alias = key.substr(found+1);//獲取lag成員接口名

        string op = kfvOp(t);

        Port lag, port;
        if (!getPort(lag_alias, lag))//查看lag是否存在,若是不存在直接跳出
        {
            SWSS_LOG_INFO("Failed to locate LAG %s", lag_alias.c_str());
            it++;
            continue;
        }

        if (!getPort(port_alias, port))
        {
            SWSS_LOG_ERROR("Failed to locate port %s", port_alias.c_str());
            it = consumer.m_toSync.erase(it);
            continue;
        }

        /* Update a LAG member */
        if (op == SET_COMMAND)
        {
            string status;
            for (auto i : kfvFieldsValues(t))
            {
                if (fvField(i) == "status")
                    status = fvValue(i);
            }

            /* Sync an enabled member */
            if (status == "enabled")//成員使能
            {
                /* Duplicate entry 成員已經存在,直接跳出 */
                if (lag.m_members.find(port_alias) != lag.m_members.end())
                {
                    it = consumer.m_toSync.erase(it);
                    continue;
                }

                /* Assert the port doesn't belong to any LAG */
                assert(!port.m_lag_id && !port.m_lag_member_id);
                //添加成員
                if (addLagMember(lag, port))
                    it = consumer.m_toSync.erase(it);
                else
                    it++;
            }
            /* Sync an disabled member */
            else /* status == "disabled" */
            {
                /* "status" is "disabled" at start when m_lag_id and
                 * m_lag_member_id are absent */
                if (!port.m_lag_id || !port.m_lag_member_id)
                {
                    it = consumer.m_toSync.erase(it);
                    continue;
                }
                //功能禁止,直接從硬件中刪除
                if (removeLagMember(lag, port))
                    it = consumer.m_toSync.erase(it);
                else
                    it++;
            }
        }
        /* Remove a LAG member 刪除成員*/
        else if (op == DEL_COMMAND)
        {
            /* Assert the LAG member exists */
            assert(lag.m_members.find(port_alias) != lag.m_members.end());

            if (!port.m_lag_id || !port.m_lag_member_id)
            {
                SWSS_LOG_WARN("Member %s not found in LAG %s lid:%lx lmid:%lx,",
                        port.m_alias.c_str(), lag.m_alias.c_str(), lag.m_lag_id, port.m_lag_member_id);
                it = consumer.m_toSync.erase(it);
                continue;
            }

            if (removeLagMember(lag, port))
                it = consumer.m_toSync.erase(it);
            else
                it++;
        }
        else
        {
            SWSS_LOG_ERROR("Unknown operation type %s", op.c_str());
            it = consumer.m_toSync.erase(it);
        }
    }
}

asic_db數據示例

127.0.0.1:6379[1]> KEYS *LAG*
1) "ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER:oid:0x1b0000000005e3"
2) "ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER:oid:0x1b0000000005e4"
3) "ASIC_STATE:SAI_OBJECT_TYPE_LAG:oid:0x20000000005d2"
127.0.0.1:6379[1]> HGETALL ASIC_STATE:SAI_OBJECT_TYPE_LAG_MEMBER:oid:0x1b0000000005e4
1) "SAI_LAG_MEMBER_ATTR_LAG_ID"
2) "oid:0x20000000005d2"
3) "SAI_LAG_MEMBER_ATTR_PORT_ID"
4) "oid:0x1000000000003"
127.0.0.1:6379[1]>
相關文章
相關標籤/搜索