在 skb 進入 ovs_dp_process_packet 後(位於 ovs/datapath/datapath.c),若並未查詢找匹配的流表,則進行upcall網絡
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { /* Miss flow. */ struct dp_upcall_info upcall; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; upcall.portid = ovs_vport_find_upcall_portid(p, skb); upcall.mru = OVS_CB(skb)->mru; /* Upcall */ const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; ovs_dp_upcall(dp, skb, key, &upcall, 0); }
在 ovs_dp_upcall 中:ui
int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { if (!skb_is_gso(skb)) queue_userspace_packet(dp, skb, key, upcall_info, cutlen); else queue_gso_packets(dp, skb, key, upcall_info, cutlen); }
這裏只是簡單地分析了 skb 是否須要分片,咱們這裏只看無需分片的一支。
在 queue_userspace_packet 中:this
static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_key *key, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { size_t len = upcall_msg_size(upcall_info, skb->len, OVS_CB(skb)->acts_origlen); struct sk_buff *user_skb = genlmsg_new(len, GFP_ATOMIC); //將 key 中的信息(即各層網絡協議頭信息)填入 user_skb 中 ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); skb_zerocopy(user_skb, skb, skb->len, skb->len); ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); }
此時,經過 genlmsg_unicast 發送到用戶空間的信息 user_skb 中只有該 skb 對應的 key 中的信息(即各層網絡協議的收發地址信息)。user_skb 管理的數據部分是一個 generic message,從 upcall_msg_size 中咱們不難看出該 generic message 的內容是什麼。spa
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ + nla_total_size(sizeof(upcall_info->mru));/* OVS_PACKET_ATTR_MRU */ return size; }
ovs_header 是 ovs 使用的自定義 generic message 協議頭。code
/** * struct ovs_header - header for OVS Generic Netlink messages. * @dp_ifindex: ifindex of local port for datapath (0 to make a request not * specific to a datapath). * * Attributes following the header are specific to a particular OVS Generic * Netlink family, but all of the OVS families use this header. */ struct ovs_header { int dp_ifindex; };
skb_zerocopy 將 skb 的負載部分拷貝到 user_skb 的 tailroom 部分中(這裏一個問題,從 ovs 源碼的註釋中說的倒是拷貝到 user_skb 的 headroom 中)ci
#define skb_zerocopy rpl_skb_zerocopy #ifndef HAVE_SKB_ZEROCOPY /** * skb_zerocopy - Zero copy skb to skb * @to: destination buffer * @source: source buffer * @len: number of bytes to copy from source buffer * @hlen: size of linear headroom in destination buffer * * Copies up to `len` bytes from `from` to `to` by creating references * to the frags in the source buffer. * * The `hlen` as calculated by skb_zerocopy_headlen() specifies the * headroom in the `to` buffer. * * Return value: * 0: everything is OK * -ENOMEM: couldn't orphan frags of @from due to lack of memory * -EFAULT: skb_copy_bits() found some problem with skb geometry */ int rpl_skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) { int i, j = 0; int plen = 0; /* length of skb->head fragment */ int ret; struct page *page; unsigned int offset; //若 to 的 tailroom 中有長度爲 len 的空間,這直接所有複製過來 if (len <= skb_tailroom(to)) return skb_copy_bits(from, 0, skb_put(to, len), len); //不然只複製一部分 if (hlen) { ret = skb_copy_bits(from, 0, skb_put(to, hlen), hlen); len -= hlen; } else { plen = min_t(int, skb_headlen(from), len); if (plen) { page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); __skb_fill_page_desc(to, 0, page, offset, plen); get_page(page); j = 1; len -= plen; } } to->truesize += len + plen; to->len += len + plen; to->data_len += len + plen; //分頁部分 for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); len -= skb_shinfo(to)->frags[j].size; skb_frag_ref(to, j); j++; } skb_shinfo(to)->nr_frags = j; return 0; } EXPORT_SYMBOL_GPL(rpl_skb_zerocopy); #endif