OpenvSwitch sw_flow 流表項添加過程詳解之一

在 skb 進入 ovs_dp_process_packet 後(位於 ovs/datapath/datapath.c),若並未查詢找匹配的流表,則進行upcall網絡

void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
{
    /* Miss flow. */
    struct dp_upcall_info upcall;
    memset(&upcall, 0, sizeof(upcall));
    upcall.cmd = OVS_PACKET_CMD_MISS;
    upcall.portid = ovs_vport_find_upcall_portid(p, skb);
    upcall.mru = OVS_CB(skb)->mru;
    /* Upcall */
    const struct vport *p = OVS_CB(skb)->input_vport;
    struct datapath *dp = p->dp;
    ovs_dp_upcall(dp, skb, key, &upcall, 0);
}

在 ovs_dp_upcall 中:ui

int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
          const struct sw_flow_key *key,
          const struct dp_upcall_info *upcall_info,
          uint32_t cutlen)
{
    if (!skb_is_gso(skb))
        queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
    else
        queue_gso_packets(dp, skb, key, upcall_info, cutlen);
}

這裏只是簡單地分析了 skb 是否須要分片,咱們這裏只看無需分片的一支。
在 queue_userspace_packet 中:this

static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
                  const struct sw_flow_key *key,
                  const struct dp_upcall_info *upcall_info,
                  uint32_t cutlen)
{
    size_t len = upcall_msg_size(upcall_info, skb->len, OVS_CB(skb)->acts_origlen);

    struct sk_buff *user_skb = genlmsg_new(len, GFP_ATOMIC);

    //將 key 中的信息(即各層網絡協議頭信息)填入 user_skb 中
    ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);

    skb_zerocopy(user_skb, skb, skb->len, skb->len);

    ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;

    genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
}

此時,經過 genlmsg_unicast 發送到用戶空間的信息 user_skb 中只有該 skb 對應的 key 中的信息(即各層網絡協議的收發地址信息)。user_skb 管理的數據部分是一個 generic message,從 upcall_msg_size 中咱們不難看出該 generic message 的內容是什麼。spa

static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
                  unsigned int hdrlen, int actions_attrlen)
{
    size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
        + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
        + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
        + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
        + nla_total_size(sizeof(upcall_info->mru));/* OVS_PACKET_ATTR_MRU */
    return size;
}

ovs_header 是 ovs 使用的自定義 generic message 協議頭。code

/**
 * struct ovs_header - header for OVS Generic Netlink messages.
 * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
 * specific to a datapath).
 *
 * Attributes following the header are specific to a particular OVS Generic
 * Netlink family, but all of the OVS families use this header.
 */
struct ovs_header {
    int dp_ifindex;
};

skb_zerocopy 將 skb 的負載部分拷貝到 user_skb 的 tailroom 部分中(這裏一個問題,從 ovs 源碼的註釋中說的倒是拷貝到 user_skb 的 headroom 中)ci

#define skb_zerocopy rpl_skb_zerocopy
#ifndef HAVE_SKB_ZEROCOPY
/**
 *    skb_zerocopy - Zero copy skb to skb
 *    @to: destination buffer
 *    @source: source buffer
 *    @len: number of bytes to copy from source buffer
 *    @hlen: size of linear headroom in destination buffer
 *
 *    Copies up to `len` bytes from `from` to `to` by creating references
 *    to the frags in the source buffer.
 *
 *    The `hlen` as calculated by skb_zerocopy_headlen() specifies the
 *    headroom in the `to` buffer.
 *
 *    Return value:
 *    0: everything is OK
 *    -ENOMEM: couldn't orphan frags of @from due to lack of memory
 *    -EFAULT: skb_copy_bits() found some problem with skb geometry
 */
int rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
{
    int i, j = 0;
    int plen = 0; /* length of skb->head fragment */
    int ret;
    struct page *page;
    unsigned int offset;
    //若 to 的 tailroom 中有長度爲 len 的空間,這直接所有複製過來
    if (len <= skb_tailroom(to))
        return skb_copy_bits(from, 0, skb_put(to, len), len);
    //不然只複製一部分
    if (hlen) {
        ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
        len -= hlen;
    } else {
        plen = min_t(int, skb_headlen(from), len);
        if (plen) {
            page = virt_to_head_page(from->head);
            offset = from->data - (unsigned char *)page_address(page);
            __skb_fill_page_desc(to, 0, page, offset, plen);
            get_page(page);
            j = 1;
            len -= plen;
        }
    }

    to->truesize += len + plen;
    to->len += len + plen;
    to->data_len += len + plen;
    //分頁部分
    for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
        if (!len)
            break;
        skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
        skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
        len -= skb_shinfo(to)->frags[j].size;
        skb_frag_ref(to, j);
        j++;
    }
    skb_shinfo(to)->nr_frags = j;
    return 0;
}
EXPORT_SYMBOL_GPL(rpl_skb_zerocopy);
#endif
相關文章
相關標籤/搜索