VPP接口層分析

VPP接口層分析

​ 接口層是硬件驅動和上層軟件之間一層抽象代碼,屏蔽硬件的差別,爲上層軟件提供一些統一的操做接口。上層軟件調用接口層的操做進行報文的讀入與發出,同時能夠進行硬件設備的設置以及相關信息(好比統計數據)的讀取。node

​ vpp支持多種驅動類型的網絡設備,好比dpdk,netmap,af_packet等等。同一種驅動的物理設備能夠有多個,好比一個設備能夠有多個dpdk接口,所以抽象了鏈路層接口hw_interface。在網絡中有不少的虛擬設備,它們依附於物理設備,例如vlan設備對於同一個hw_interface能夠有4096個虛擬的子接口。爲了描述這些虛擬接口,vpp在hw_interface的基礎上又封裝了一層sw_interface來表示接口。sw_interface是整個接口層對上層軟件的一個抽象,上層軟件使用sw_interface索引用來表示具體操做的設備。linux

VPP支持的物理網絡設備類型

一類設備表示使用相同硬件驅動的設備,好比dpdk類型的以太網設備,af_packet類型的虛擬以太網設備,netmap以太網設備等,屬於物理層的描述。redis

設備類描述結構

/* A class of hardware interface devices. */
/* 一類硬件接口的操做函數集合,這些函數是在硬件設備的驅動上封裝的一層 */
typedef struct _vnet_device_class
{
    /* Index into main vector.類索引 */
    u32 index;

    /* Device name (e.g. "FOOBAR 1234a").設備類名字 */
    char *name;

    /* Function to call when hardware interface is added/deleted. */
    /* 添加/刪除一個該類設備的實例函數 */
    vnet_interface_function_t *interface_add_del_function;

    /* Function to bring device administratively up/down. */
    /* 設備UP/DOWN操做函數 */
    vnet_interface_function_t *admin_up_down_function;

    /* Function to call when sub-interface is added/deleted */
    /* 以該類型設備爲主設備,添加/刪除一個子接口的函數 */
    vnet_subif_add_del_function_t *subif_add_del_function;

    /* Function to call interface rx mode is changed */
    /* 接收模式變化函數 */
    vnet_interface_set_rx_mode_function_t *rx_mode_change_function;

    /* Function to call interface l2 mode is changed */
    /* 接口的二層模式變化操做函數 */
    vnet_interface_set_l2_mode_function_t *set_l2_mode_function;

    /* Redistribute flag changes/existence of this interface class. */
    u32 redistribute;

    /* Transmit function. */
    /* 發送函數 */
    vlib_node_function_t *tx_function;

    /* Transmit function candidate registration with priority */
    /* 註冊的多個候選的發送函數,最終根據優先級選擇一個最高的賦值給tx_function */
    vlib_node_fn_registration_t *tx_fn_registrations;

    /* Error strings indexed by error code for this node. */
    /* 發送函數錯誤緣由字符數組 */
    char **tx_function_error_strings;

    /* Number of error codes used by this node. */
    /* tx_function_error_strings數組大小 */
    u32 tx_function_n_errors;

    /* Renumber device name [only!] support, a control-plane kludge */
    int (*name_renumber) (struct vnet_hw_interface_t * hi,
                          u32 new_dev_instance);

    /* Interface flow offload operations */
    /* 流量卸載功能操做函數集合 */                      
    vnet_flow_dev_ops_function_t *flow_ops_function;

    /* Format device instance as name. */
    format_function_t *format_device_name;

    /* Parse function for device name. */
    unformat_function_t *unformat_device_name;

    /* Format device verbosely for this class. */
    format_function_t *format_device;

    /* Trace buffer format for TX function. */
    format_function_t *format_tx_trace;

    /* Format flow offload entry */
    format_function_t *format_flow;

    /* Function to clear hardware counters for device. */
    /* 清除統計函數 */
    void (*clear_counters) (u32 dev_class_instance);

    uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
                                           u32 hw_if_index,
                                           u32 hw_class_index);

    /* Called when hardware class of an interface changes. */
    void (*hw_class_change) (struct vnet_main_t * vnm,
                             u32 hw_if_index, u32 new_hw_class_index);

    /* Called to redirect traffic from a specific interface instance */
    /* 強制重定向一個接口的流量到指定的node */                         
    void (*rx_redirect_to_node) (struct vnet_main_t * vnm,
                                 u32 hw_if_index, u32 node_index);

    /* Link-list of all device classes set up by constructors created below */
    /* 造成鏈表 */
    struct _vnet_device_class *next_class_registration;

    /* Function to set mac address. */
    /* 鏈路層地址變化函數 */
    vnet_interface_set_mac_address_function_t *mac_addr_change_function;
} vnet_device_class_t;

設備類輸出函數註冊結構

該結構與node的功能函數註冊結構是同樣的算法

typedef struct _vlib_node_fn_registration
{
    vlib_node_function_t *function;
    int priority;
    struct _vlib_node_fn_registration *next_registration;
    char *name;
} vlib_node_fn_registration_t;

註冊宏

#ifndef CLIB_MARCH_VARIANT /* 註冊/去註冊設備操做類函數集合的宏 */
#define VNET_DEVICE_CLASS(x,...)                                        \
  __VA_ARGS__ vnet_device_class_t x;                                    \
static void __vnet_add_device_class_registration_##x (void)             \
    __attribute__((__constructor__)) ;                                  \
static void __vnet_add_device_class_registration_##x (void)             \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    x.next_class_registration = vnm->device_class_registrations;        \
    vnm->device_class_registrations = &x;                               \
}                                                                       \
static void __vnet_rm_device_class_registration_##x (void)              \
    __attribute__((__destructor__)) ;                                   \
static void __vnet_rm_device_class_registration_##x (void)              \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    VLIB_REMOVE_FROM_LINKED_LIST (vnm->device_class_registrations,      \
                                  &x, next_class_registration);         \
}                                                                       \
__VA_ARGS__ vnet_device_class_t x
#else
/* create unused pointer to silence compiler warnings and get whole
   function optimized out */
#define VNET_DEVICE_CLASS(x,...)                                        \
static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
/* 註冊/去註冊設備類的輸出函數宏 */
#define VNET_DEVICE_CLASS_TX_FN(devclass)                \
uword CLIB_MARCH_SFX (devclass##_tx_fn)();                \
static vlib_node_fn_registration_t                    \
  CLIB_MARCH_SFX(devclass##_tx_fn_registration) =            \
  { .function = &CLIB_MARCH_SFX (devclass##_tx_fn), };            \
                                    \
static void __clib_constructor                        \
CLIB_MARCH_SFX (devclass##_tx_fn_multiarch_register) (void)        \
{                                    \
  extern vnet_device_class_t devclass;                    \
  vlib_node_fn_registration_t *r;                    \
  r = &CLIB_MARCH_SFX (devclass##_tx_fn_registration);            \
  r->priority = CLIB_MARCH_FN_PRIORITY();                \
  r->next_registration = devclass.tx_fn_registrations;            \
  devclass.tx_fn_registrations = r;                    \
}                                    \
uword CLIB_CPU_OPTIMIZED CLIB_MARCH_SFX (devclass##_tx_fn)

實例

/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) =
{
    .name = "dpdk",
    .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
    .tx_function_error_strings = dpdk_tx_func_error_strings,
    .format_device_name = format_dpdk_device_name,
    .format_device = format_dpdk_device,
    .format_tx_trace = format_dpdk_tx_trace,
    .clear_counters = dpdk_clear_hw_interface_counters,
    .admin_up_down_function = dpdk_interface_admin_up_down,
    .subif_add_del_function = dpdk_subif_add_del_function,
    .rx_redirect_to_node = dpdk_set_interface_next_node,
    .mac_addr_change_function = dpdk_set_mac_address,
    .format_flow = format_dpdk_flow,
    .flow_ops_function = dpdk_flow_ops_fn,
};

鏈路層interface類型

​ 在物理設備上,根據接口的功能進一步抽象一層硬件設備類型,七層中屬於鏈路層。好比ethernet設備,bond設備,vlan設備等。api

interface類型描述結構

/* Layer-2 (e.g. Ethernet) interface class. */
typedef struct _vnet_hw_interface_class
{
    /* Index into main vector. */
    /* 索引 */
    u32 index;

    /* Class name (e.g. "Ethernet"). 其所屬類名字 */
    char *name;

    /* Flags 標誌,目前只有一個,是否爲點到點設備,點到點設備不須要鄰居信息 */
    vnet_hw_interface_class_flags_t flags;

    /* Function to call when hardware interface is added/deleted. */
    vnet_interface_function_t *interface_add_del_function;

    /* Function to bring interface administratively up/down. */
    vnet_interface_function_t *admin_up_down_function;

    /* Function to call when link state changes. */
    vnet_interface_function_t *link_up_down_function;

    /* Function to call when link MAC changes. */
    vnet_interface_set_mac_address_function_t *mac_addr_change_function;

    /* Format function to display interface name. */
    format_function_t *format_interface_name;

    /* Format function to display interface address. */
    format_function_t *format_address;

    /* Format packet header for this interface class. */
    format_function_t *format_header;

    /* Format device verbosely for this class. */
    format_function_t *format_device;

    /* Parser for hardware (e.g. ethernet) address. */
    unformat_function_t *unformat_hw_address;

    /* Parser for packet header for e.g. rewrite string. */
    unformat_function_t *unformat_header;

    /* Builds a rewrite string for the interface to the destination
     * for the payload/link type. */
    u8 *(*build_rewrite) (struct vnet_main_t * vnm,
                          u32 sw_if_index,
                          vnet_link_t link_type, const void *dst_hw_address);

    /* Update an adjacency added by FIB (as opposed to via the
     * neighbour resolution protocol). */
    void (*update_adjacency) (struct vnet_main_t * vnm,
                              u32 sw_if_index, u32 adj_index);

    uword (*is_valid_class_for_interface) (struct vnet_main_t * vnm,
                                           u32 hw_if_index,
                                           u32 hw_class_index);

    /* Called when hw interface class is changed and old hardware instance
       may want to be deleted. */
    void (*hw_class_change) (struct vnet_main_t * vnm, u32 hw_if_index,
                             u32 old_class_index, u32 new_class_index);

    /* List of hw interface classes, built by constructors */
    struct _vnet_hw_interface_class *next_class_registration;

} vnet_hw_interface_class_t;

註冊宏

/**
 * @brief Default adjacency update function
 */
extern void default_update_adjacency (struct vnet_main_t *vnm,
                                      u32 sw_if_index, u32 adj_index);

#define VNET_HW_INTERFACE_CLASS(x,...)                                  \
  __VA_ARGS__ vnet_hw_interface_class_t x;                              \
static void __vnet_add_hw_interface_class_registration_##x (void)       \
    __attribute__((__constructor__)) ;                                  \
static void __vnet_add_hw_interface_class_registration_##x (void)       \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    x.next_class_registration = vnm->hw_interface_class_registrations;  \
    vnm->hw_interface_class_registrations = &x;                         \
}                                                                       \
static void __vnet_rm_hw_interface_class_registration_##x (void)        \
    __attribute__((__destructor__)) ;                                   \
static void __vnet_rm_hw_interface_class_registration_##x (void)        \
{                                                                       \
    vnet_main_t * vnm = vnet_get_main();                                \
    VLIB_REMOVE_FROM_LINKED_LIST (vnm->hw_interface_class_registrations,\
                                  &x, next_class_registration);         \
}                                                                       \
__VA_ARGS__ vnet_hw_interface_class_t x

實例

/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) =
{
    .name = "Ethernet",
    .format_address = format_ethernet_address,
    .format_header = format_ethernet_header_with_length,
    .unformat_hw_address = unformat_ethernet_address,
    .unformat_header = unformat_ethernet_header,
    .build_rewrite = ethernet_build_rewrite,
    .update_adjacency = ethernet_update_adjacency,
    .mac_addr_change_function = ethernet_mac_change,
};
/* *INDENT-ON* */

軟件層interface

typedef enum vnet_sw_interface_flags_t_
{
    VNET_SW_INTERFACE_FLAG_NONE = 0,
    /* Interface is "up" meaning administratively up.
       Up in the sense of link state being up is maintained by hardware interface. */
    VNET_SW_INTERFACE_FLAG_ADMIN_UP = (1 << 0),

    /* Interface is disabled for forwarding: punt all traffic to slow-path. */
    VNET_SW_INTERFACE_FLAG_PUNT = (1 << 1),

    VNET_SW_INTERFACE_FLAG_PROXY_ARP = (1 << 2),

    VNET_SW_INTERFACE_FLAG_UNNUMBERED = (1 << 3),

    VNET_SW_INTERFACE_FLAG_BOND_SLAVE = (1 << 4),

    /* Interface does not appear in CLI/API */
    VNET_SW_INTERFACE_FLAG_HIDDEN = (1 << 5),

    /* Interface in ERROR state */
    VNET_SW_INTERFACE_FLAG_ERROR = (1 << 6),

    /* Interface has IP configured directed broadcast */
    VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST = (1 << 7),

} __attribute__ ((packed)) vnet_sw_interface_flags_t;

/* Software-interface.  This corresponds to a Ethernet VLAN, ATM vc, a
   tunnel, etc.  Configuration (e.g. IP address) gets attached to
   software interface. 
 * 軟件層接口,例如vlan,atm,tunnel等
 */
typedef struct
{
    /* 接口類型 */
    vnet_sw_interface_type_t type:16;
    /* 接口標誌 */
    vnet_sw_interface_flags_t flags;

    /* Index for this interface. */
    /* 軟件接口索引 */
    u32 sw_if_index;

    /* Software interface index of super-interface;
       equal to sw_if_index if this interface is not a
       sub-interface.
     * 若是該接口是一個子接口的話,那麼該值爲其依附的接口的sw_if_index,不然就是sw_if_index
     */
    u32 sup_sw_if_index;

    /* this swif is unnumbered, use addresses on unnumbered_sw_if_index... */
    u32 unnumbered_sw_if_index;

    /* VNET_SW_INTERFACE_TYPE_HARDWARE. */
    u32 hw_if_index;

    /* MTU for network layer (not including L2 headers) */
    /* 網絡層mtu,不包括鏈路層 */
    u32 mtu[VNET_N_MTU];

    /* VNET_SW_INTERFACE_TYPE_SUB. */
    /* 若是該接口是子接口的話,那麼子接口信息保存在該結構體中 */
    vnet_sub_interface_t sub;

    /* VNET_SW_INTERFACE_TYPE_P2P. */
    vnet_p2p_sub_interface_t p2p;

    vnet_flood_class_t flood_class;
} vnet_sw_interface_t;

子接口

typedef enum
{
    /* A hw interface. */
    VNET_SW_INTERFACE_TYPE_HARDWARE,

    /* A sub-interface. */
    VNET_SW_INTERFACE_TYPE_SUB,
    VNET_SW_INTERFACE_TYPE_P2P,
    VNET_SW_INTERFACE_TYPE_PIPE,
} vnet_sw_interface_type_t;

typedef struct
{
    /*
     * Subinterface ID. A number 0-N to uniquely identify
     * this subinterface under the main (parent?) interface
     */
    u32 id;

    /* Classification data. Used to associate packet header with subinterface. */
    struct
    {
        u16 outer_vlan_id;
        u16 inner_vlan_id;
        union
        {
            u16 raw_flags;
            struct
            {
                u16 no_tags:1;
                u16 one_tag:1;
                u16 two_tags:1;
                u16 dot1ad:1;        /* 0 = dot1q, 1=dot1ad */
                u16 exact_match:1;
                u16 default_sub:1;
                u16 outer_vlan_id_any:1;
                u16 inner_vlan_id_any:1;
            } flags;
        };
    } eth;
} vnet_sub_interface_t;

接口層初始化過程

​ 前面所述的設備和接口註冊信息,是在main函數以前進行註冊的,main函數須要對這些信息進行整理加工。在vpp中vnet_main_t中的interface_main成員維護了接口相關的信息。數組

接口管理接口結構

/* 接口管理全局結構 */
typedef struct
{
    /* Hardware interfaces. 硬件接口數組,全部鏈路層接口都放置在該數組中 */
    vnet_hw_interface_t *hw_interfaces;

    /* Hash table mapping HW interface name to index. 鏈路岑該接口名字與索引映射表*/
    uword *hw_interface_by_name;

    /* Vectors if hardware interface classes and device classes. */
    /* 靜態註冊的鏈路層接口類型鏈表 */
    vnet_hw_interface_class_t *hw_interface_classes;
    /* 靜態註冊的物理層設備類型鏈表 */    
    vnet_device_class_t *device_classes;

    /* Hash table mapping name to hw interface/device class. */
    /* 鏈路層接口類型索引與名字映射表 */
    uword *hw_interface_class_by_name;
    /* 設備類索引與名字映射表 */
    uword *device_class_by_name;

    /* Software interfaces. */
    /* 軟件接口數組 */
    vnet_sw_interface_t *sw_interfaces;

    /* Hash table mapping sub intfc sw_if_index by sup sw_if_index and sub id */
    uword *sw_if_index_by_sup_and_sub;

    /* Software interface counters both simple and combined
     * packet and byte counters. 
     * 軟件接口統計與其對應的鎖
     */
    volatile u32 *sw_if_counter_lock;
    vlib_simple_counter_main_t *sw_if_counters;
    vlib_combined_counter_main_t *combined_sw_if_counters;

    vnet_hw_interface_nodes_t *deleted_hw_interface_nodes;

    /* pcap drop tracing */
    int drop_pcap_enable;
    pcap_main_t pcap_main;
    u8 *pcap_filename;
    u32 pcap_sw_if_index;
    u32 pcap_pkts_to_capture;
    uword *pcap_drop_filter_hash;

    /* per-thread data */
    vnet_interface_per_thread_data_t *per_thread_data;

    /* enable GSO processing in packet path if this count is > 0 */
    u32 gso_interface_count;

    /* feature_arc_index */
    u8 output_feature_arc_index;
} vnet_interface_main_t;

接口管理初始化函數

/* 接口初始化 */
clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
    vnet_main_t *vnm = vnet_get_main ();
    vnet_interface_main_t *im = &vnm->interface_main;
    vlib_buffer_t *b = 0;
    vnet_buffer_opaque_t *o = 0;
    clib_error_t *error;

    /*
     * Keep people from shooting themselves in the foot.
     */
    if (sizeof (b->opaque) != sizeof (vnet_buffer_opaque_t))
    {
#define _(a) if (sizeof(o->a) > sizeof (o->unused))                     \
      clib_warning                                                      \
        ("FATAL: size of opaque union subtype %s is %d (max %d)",       \
         #a, sizeof(o->a), sizeof (o->unused));
        foreach_buffer_opaque_union_subtype;
#undef _

        return clib_error_return
        (0, "FATAL: size of vlib buffer opaque %d, size of vnet opaque %d",
         sizeof (b->opaque), sizeof (vnet_buffer_opaque_t));
    }
    /* 統計信息鎖,負責保護sw_if_counters */
    im->sw_if_counter_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
            CLIB_CACHE_LINE_BYTES);
    im->sw_if_counter_lock[0] = 1;    /* should be no need */

    vec_validate (im->sw_if_counters, VNET_N_SIMPLE_INTERFACE_COUNTER - 1);
#define _(E,n,p)    /* 各類統計信息初始化 */                        \
  im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n;        \
  im->sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
    foreach_simple_interface_counter_name
#undef _
    vec_validate (im->combined_sw_if_counters,
                  VNET_N_COMBINED_INTERFACE_COUNTER - 1);
#define _(E,n,p)                            \
  im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].name = #n;    \
  im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_##E].stat_segment_name = "/" #p "/" #n;
    foreach_combined_interface_counter_name
#undef _
    im->sw_if_counter_lock[0] = 0;
    /* 構建設備類型描述結構體索引與類型名之間的映射表 */
    im->device_class_by_name = hash_create_string ( /* size */ 0,
            sizeof (uword));
    {
        vnet_device_class_t *c;
        /* 遍歷每個註冊的設備 */
        c = vnm->device_class_registrations;

        while (c)
        {
            c->index = vec_len (im->device_classes);/* 分配索引 */
            /* 設置索引與名字之間的映射關係 */
            hash_set_mem (im->device_class_by_name, c->name, c->index);

            if (c->tx_fn_registrations)
            {
                vlib_node_fn_registration_t *fnr = c->tx_fn_registrations;
                int priority = -1;

                /* to avoid confusion, please remove ".tx_function" statement
                   from VNET_DEVICE_CLASS() if using function candidates */
                ASSERT (c->tx_function == 0);
                /* 獲取最高優先級的發送函數 */
                while (fnr)
                {
                    if (fnr->priority > priority)
                    {
                        priority = fnr->priority;
                        c->tx_function = fnr->function;
                    }
                    fnr = fnr->next_registration;
                }
            }

            vec_add1 (im->device_classes, c[0]);
            c = c->next_class_registration;
        }
    }
    /* 初始化硬件interface索引與名字映射表 */
    im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
            sizeof (uword));

    im->sw_if_index_by_sup_and_sub = hash_create_mem (0, sizeof (u64),
            sizeof (uword));
    {
        vnet_hw_interface_class_t *c;
        /* 遍歷每個註冊的鏈路層interface類型 */
        c = vnm->hw_interface_class_registrations;

        while (c)
        {
            c->index = vec_len (im->hw_interface_classes);
            hash_set_mem (im->hw_interface_class_by_name, c->name, c->index);

            if (NULL == c->build_rewrite)
                c->build_rewrite = default_build_rewrite;
            if (NULL == c->update_adjacency)
                c->update_adjacency = default_update_adjacency;

            vec_add1 (im->hw_interface_classes, c[0]);
            c = c->next_class_registration;
        }
    }

    im->gso_interface_count = 0;
    /* init per-thread data */
    vec_validate_aligned (im->per_thread_data, vlib_num_workers (),
                          CLIB_CACHE_LINE_BYTES);

    if ((error = vlib_call_init_function (vm, vnet_interface_cli_init)))
        return error;

    vnm->interface_tag_by_sw_if_index = hash_create (0, sizeof (uword));

#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
    if ((error = vlib_call_init_function (vm, trajectory_trace_init)))
        return error;
#endif

    return 0;
}

VLIB_INIT_FUNCTION (vnet_interface_init);

添加一個接口

以af_packet類型的以太網接口爲例進行說明

/* 建立一個af_packet類型的接口 */
int
af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
                     u32 * sw_if_index)
{
    af_packet_main_t *apm = &af_packet_main;
    int ret, fd = -1, fd2 = -1;
    struct tpacket_req *rx_req = 0;
    struct tpacket_req *tx_req = 0;
    struct ifreq ifr;
    u8 *ring = 0;
    af_packet_if_t *apif = 0;
    u8 hw_addr[6];
    clib_error_t *error;
    vnet_sw_interface_t *sw;
    vnet_hw_interface_t *hw;
    vlib_thread_main_t *tm = vlib_get_thread_main ();
    vnet_main_t *vnm = vnet_get_main ();
    uword *p;
    uword if_index;
    u8 *host_if_name_dup = vec_dup (host_if_name);
    int host_if_index = -1;

    p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
    if (p)
    {
        apif = vec_elt_at_index (apm->interfaces, p[0]);
        *sw_if_index = apif->sw_if_index;
        return VNET_API_ERROR_IF_ALREADY_EXISTS;
    }

    vec_validate (rx_req, 0);
    rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
    rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
    rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
    rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;

    vec_validate (tx_req, 0);
    tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
    tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
    tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
    tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;

    /*
     * make sure host side of interface is 'UP' before binding AF_PACKET
     * socket on it.
     * 須要確保af_packet類型的接口依附的主機側的接口是up的
     */
    if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
    {
        vlib_log_debug (apm->log_class, "Failed to create socket");
        ret = VNET_API_ERROR_SYSCALL_ERROR_1;
        goto error;
    }

    clib_memcpy (ifr.ifr_name, (const char *) host_if_name,
                 vec_len (host_if_name));
    /* 根據名字獲取主機接口的索引 */
    if ((ret = ioctl (fd2, SIOCGIFINDEX, &ifr)) < 0)
    {
        vlib_log_debug (apm->log_class, "af_packet_create error: %d", ret);
        close (fd2);
        return VNET_API_ERROR_INVALID_INTERFACE;
    }

    host_if_index = ifr.ifr_ifindex;
    /* 獲取主機接口的標誌信息 */
    if ((ret = ioctl (fd2, SIOCGIFFLAGS, &ifr)) < 0)
    {
        vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
        goto error;
    }
    /* 若是沒有up,那麼設置其爲UP */
    if (!(ifr.ifr_flags & IFF_UP))
    {
        ifr.ifr_flags |= IFF_UP;
        if ((ret = ioctl (fd2, SIOCSIFFLAGS, &ifr)) < 0)
        {
            vlib_log_warn (apm->log_class, "af_packet_create error: %d", ret);
            goto error;
        }
    }

    if (fd2 > -1)
        close (fd2);
    /* 建立af_packet套接字 */
    ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);

    if (ret != 0)
        goto error;

    ret = is_bridge (host_if_name);

    if (ret == 0)            /* is a bridge, ignore state */
        host_if_index = -1;

    /* So far everything looks good, let's create interface */
    pool_get (apm->interfaces, apif);
    if_index = apif - apm->interfaces;

    apif->host_if_index = host_if_index;
    apif->fd = fd;
    apif->rx_ring = ring;
    apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
    apif->rx_req = rx_req;
    apif->tx_req = tx_req;
    apif->host_if_name = host_if_name_dup;
    apif->per_interface_next_index = ~0;
    apif->next_tx_frame = 0;
    apif->next_rx_frame = 0;

    if (tm->n_vlib_mains > 1)/* 添加epoll監聽事件,用於模擬中斷 */
        clib_spinlock_init (&apif->lockp);

    {
        clib_file_t template = { 0 };
        template.read_function = af_packet_fd_read_ready;
        template.file_descriptor = fd;
        template.private_data = if_index;
        template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
        template.description = format (0, "%U", format_af_packet_device_name,
                                       if_index);
        apif->clib_file_index = clib_file_add (&file_main, &template);
    }

    /*use configured or generate random MAC address */
    if (hw_addr_set)
        clib_memcpy (hw_addr, hw_addr_set, 6);
    else
    {
        f64 now = vlib_time_now (vm);
        u32 rnd;
        rnd = (u32) (now * 1e6);
        rnd = random_u32 (&rnd);

        clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
        hw_addr[0] = 2;
        hw_addr[1] = 0xfe;
    }
    /* 註冊以太網接口 */
    error = ethernet_register_interface (vnm, af_packet_device_class.index,
                                         if_index, hw_addr, &apif->hw_if_index,
                                         af_packet_eth_flag_change);

    if (error)
    {
        clib_memset (apif, 0, sizeof (*apif));
        pool_put (apm->interfaces, apif);
        vlib_log_err (apm->log_class, "Unable to register interface: %U",
                      format_clib_error, error);
        clib_error_free (error);
        ret = VNET_API_ERROR_SYSCALL_ERROR_1;
        goto error;
    }

    sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
    hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
    apif->sw_if_index = sw->sw_if_index;
    /* 設置該接口對應的輸入節點索引 */
    vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
                                      af_packet_input_node.index);
    /* 分配該接口到指定的收包線程中 */
    vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0,    /* queue */
                                        ~0 /* any cpu */ );
    /* 接口支持中斷模式 */
    hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
    vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
                                 VNET_HW_INTERFACE_FLAG_LINK_UP);

    vnet_hw_interface_set_rx_mode (vnm, apif->hw_if_index, 0,
                                   VNET_HW_INTERFACE_RX_MODE_INTERRUPT);

    mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
                   0);
    if (sw_if_index)
        *sw_if_index = apif->sw_if_index;

    return 0;

error:
    if (fd2 > -1)
        close (fd2);
    vec_free (host_if_name_dup);
    vec_free (rx_req);
    vec_free (tx_req);
    return ret;
}

註冊ethernet接口

/* 註冊以太網接口 */
clib_error_t *
ethernet_register_interface (vnet_main_t * vnm,
                             u32 dev_class_index,/* 設備類型 */
                             u32 dev_instance,/* 設備實例索引 */
                             const u8 * address,/* 鏈路層地址 */
                             u32 * hw_if_index_return,/* 返回的硬件設備索引 */
                             ethernet_flag_change_function_t flag_change)
{
    ethernet_main_t *em = &ethernet_main;
    ethernet_interface_t *ei;
    vnet_hw_interface_t *hi;
    clib_error_t *error = 0;
    u32 hw_if_index;
    /* 獲取一個以太網設備實例 */
    pool_get (em->interfaces, ei);
    /* 用於配置以太網設備的硬件特性,好比混雜模式等 */
    ei->flag_change = flag_change;
    /* 註冊以太網接口,給該以太網設備分配一個硬件索引 */
    hw_if_index = vnet_register_interface
                  (vnm,
                   dev_class_index, dev_instance,
                   ethernet_hw_interface_class.index, ei - em->interfaces);
    *hw_if_index_return = hw_if_index;

    hi = vnet_get_hw_interface (vnm, hw_if_index);
    /* 設置node的以太網相關的函數 */
    ethernet_setup_node (vnm->vlib_main, hi->output_node_index);

    hi->min_packet_bytes = hi->min_supported_packet_bytes =
                               ETHERNET_MIN_PACKET_BYTES;
    hi->max_packet_bytes = hi->max_supported_packet_bytes =
                               ETHERNET_MAX_PACKET_BYTES;

    /* Standard default ethernet MTU. */
    /* 標準的以太網mtu爲9000 */
    vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);

    clib_memcpy (ei->address, address, sizeof (ei->address));
    vec_add (hi->hw_address, address, sizeof (ei->address));

    if (error)
    {
        pool_put (em->interfaces, ei);
        return error;
    }
    return error;
}

向接口管理中註冊一個接口

/* Register an interface instance. */
u32
vnet_register_interface (vnet_main_t * vnm,
                         u32 dev_class_index,
                         u32 dev_instance,/* 使用同一種驅動的設備實例編號 */
                         u32 hw_class_index, 
                         u32 hw_instance)/* 同一類硬件設備的編號,好比以太網設備 */
{
    vnet_interface_main_t *im = &vnm->interface_main;
    vnet_hw_interface_t *hw;
    /* 根據設備類型索引獲取對應的類型描述控制塊,至關於linux內核的驅動ops */
    vnet_device_class_t *dev_class =
    vnet_get_device_class (vnm, dev_class_index);
    vnet_hw_interface_class_t *hw_class =
    vnet_get_hw_interface_class (vnm, hw_class_index);
    vlib_main_t *vm = vnm->vlib_main;
    vnet_feature_config_main_t *fcm;
    vnet_config_main_t *cm;
    u32 hw_index, i;
    char *tx_node_name = NULL, *output_node_name = NULL;
    /* 分配一個硬件接口描述控制塊 */
    pool_get (im->hw_interfaces, hw);
    clib_memset (hw, 0, sizeof (*hw));

    hw_index = hw - im->hw_interfaces;
    hw->hw_if_index = hw_index;
    /* 默認模式就是POLLING模式 */
    hw->default_rx_mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
    /* 設置設備名字 */
    if (dev_class->format_device_name)
        hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
    else if (hw_class->format_interface_name)
        hw->name = format (0, "%U", hw_class->format_interface_name,
                           dev_instance);
    else
        hw->name = format (0, "%s%x", hw_class->name, dev_instance);
    /* 構建接口名字到接口索引的hash表 */
    if (!im->hw_interface_by_name)
        im->hw_interface_by_name = hash_create_vec ( /* size */ 0,
                sizeof (hw->name[0]),
                sizeof (uword));
    
    /* 設置本接口名字與索引的映射關係 */
    hash_set_mem (im->hw_interface_by_name, hw->name, hw_index);

    /* Make hardware interface point to software interface. */
    /* 在硬件接口的基礎上構建軟件接口 */
    {
        vnet_sw_interface_t sw =
        {
            .type = VNET_SW_INTERFACE_TYPE_HARDWARE,
            .flood_class = VNET_FLOOD_CLASS_NORMAL,
            .hw_if_index = hw_index
        };
        /* 分配軟件接口索引 */
        hw->sw_if_index = vnet_create_sw_interface_no_callbacks (vnm, &sw);
    }
    /* 設置設備類索引 */
    hw->dev_class_index = dev_class_index;
    /* 同類設備的索引 */
    hw->dev_instance = dev_instance;
    /* 硬件類索引 */
    hw->hw_class_index = hw_class_index;
    /* 同硬件類索引 */
    hw->hw_instance = hw_instance;

    hw->max_rate_bits_per_sec = 0;
    hw->min_packet_bytes = 0;
    /* 設置mtu默認爲0 */
    vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
    /* 這類設備沒有發送函數,建立一個無發送類型節點 */
    if (dev_class->tx_function == 0)
        goto no_output_nodes;    /* No output/tx nodes to create */
    /* interface不只須要承當報文輸入功能,同時也要承擔報文輸出功能,因此須要兩個node
     * 輸入功能採用的是依附於input-node,而輸出功能須要單獨添加一個node,在vpp實現中
     * 採用了兩個node來實現輸出功能,一個是hw-interface層,用於構建鏈路層頭,另一個
     * 物理層,調用驅動發包函數發送報文。
     */
    tx_node_name = (char *) format (0, "%v-tx", hw->name);
    output_node_name = (char *) format (0, "%v-output", hw->name);

    /* If we have previously deleted interface nodes, re-use them. */
    /* 若是之前有些接口node刪除了,複用它們 */
    if (vec_len (im->deleted_hw_interface_nodes) > 0)
    {
        vnet_hw_interface_nodes_t *hn;
        vlib_node_t *node;
        vlib_node_runtime_t *nrt;

        hn = vec_end (im->deleted_hw_interface_nodes) - 1;

        hw->tx_node_index = hn->tx_node_index;
        hw->output_node_index = hn->output_node_index;

        vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
        vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);

        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            vnet_interface_output_runtime_t *rt;

            rt = vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);
            ASSERT (rt->is_deleted == 1);
            rt->is_deleted = 0;
            rt->hw_if_index = hw_index;
            rt->sw_if_index = hw->sw_if_index;
            rt->dev_instance = hw->dev_instance;

            rt = vlib_node_get_runtime_data (this_vlib_main, hw->tx_node_index);
            rt->hw_if_index = hw_index;
            rt->sw_if_index = hw->sw_if_index;
            rt->dev_instance = hw->dev_instance;
        });
        /* *INDENT-ON* */

        /* The new class may differ from the old one.
         * Functions have to be updated. */
        node = vlib_get_node (vm, hw->output_node_index);
        node->function = vnet_interface_output_node;
        node->format_trace = format_vnet_interface_output_trace;
        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            nrt = vlib_node_get_runtime (this_vlib_main, hw->output_node_index);
            nrt->function = node->function;
        });
        /* *INDENT-ON* */

        node = vlib_get_node (vm, hw->tx_node_index);
        node->function = dev_class->tx_function;
        node->format_trace = dev_class->format_tx_trace;
        /* *INDENT-OFF* */
        foreach_vlib_main (
        {
            nrt = vlib_node_get_runtime (this_vlib_main, hw->tx_node_index);
            nrt->function = node->function;
        });
        /* *INDENT-ON* */

        _vec_len (im->deleted_hw_interface_nodes) -= 1;
    }
    else
    {
        vlib_node_registration_t r;
        vnet_interface_output_runtime_t rt =
        {
            .hw_if_index = hw_index,
            .sw_if_index = hw->sw_if_index,
            .dev_instance = hw->dev_instance,
            .is_deleted = 0,
        };
        /* 註冊物理層輸出節點,該節點直接將報文輸出,不須要下一個節點,除非報文出錯 */
        clib_memset (&r, 0, sizeof (r));
        r.type = VLIB_NODE_TYPE_INTERNAL;
        r.runtime_data = &rt;
        r.runtime_data_bytes = sizeof (rt);
        r.scalar_size = 0;
        r.vector_size = sizeof (u32);

        r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
        r.name = tx_node_name;
        r.function = dev_class->tx_function;

        hw->tx_node_index = vlib_register_node (vm, &r);

        vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
                                            "error-drop",
                                            VNET_INTERFACE_TX_NEXT_DROP);
        /* 註冊鏈路層輸出節點,用於構建鏈路層信息,指向物理層節點 */
        r.flags = 0;
        r.name = output_node_name;
        r.function = vnet_interface_output_node;
        r.format_trace = format_vnet_interface_output_trace;

        {
            static char *e[] =
            {
                "interface is down",
                "interface is deleted",
                "no buffers to segment GSO",
            };

            r.n_errors = ARRAY_LEN (e);
            r.error_strings = e;
        }
        hw->output_node_index = vlib_register_node (vm, &r);

        vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
                                            "error-drop",
                                            VNET_INTERFACE_OUTPUT_NEXT_DROP);
        /* 指向物理層輸出節點 */
        vlib_node_add_next_with_slot (vm, hw->output_node_index,
                                      hw->tx_node_index,
                                      VNET_INTERFACE_OUTPUT_NEXT_TX);

        /* add interface to the list of "output-interface" feature arc start nodes
         * and clone nexts from 1st interface if it exists 
         * 構建鏈路層輸出節點的output-featrue-arc。
         * 將本output節點做爲output-featrue-arc的一個起始幾點。
         */
        fcm = vnet_feature_get_config_main (im->output_feature_arc_index);
        cm = &fcm->config_main;
        i = vec_len (cm->start_node_indices);
        vec_validate (cm->start_node_indices, i);
        cm->start_node_indices[i] = hw->output_node_index;
        if (hw_index)
        {
            /* copy nexts from 1st interface */
            vnet_hw_interface_t *first_hw;
            vlib_node_t *first_node;

            first_hw = vnet_get_hw_interface (vnm, /* hw_if_index */ 0);
            first_node = vlib_get_node (vm, first_hw->output_node_index);

            /* 1st 2 nexts are already added above */
            for (i = 2; i < vec_len (first_node->next_nodes); i++)
                        vlib_node_add_next_with_slot (vm, hw->output_node_index,
                                                      first_node->next_nodes[i], i);
        }
    }
    /* 構建鏈路層輸出節點 */
    setup_output_node (vm, hw->output_node_index, hw_class);
    /* 構建物理層輸出節點 */
    setup_tx_node (vm, hw->tx_node_index, dev_class);

no_output_nodes:
    /* Call all up/down callbacks with zero flags when interface is created. */
    vnet_sw_interface_set_flags_helper (vnm, hw->sw_if_index, /* flags */ 0,
                                        VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
    vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
                                        VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
    vec_free (tx_node_name);
    vec_free (output_node_name);

    return hw_index;
}

給接口分配指定的收包線程

在函數af_packet_create_if中調用了以下代碼,用於設置該接口的收包線程,對於af_packet採用的是中斷模式收包。網絡

/* 設置該接口對應的輸入節點索引 */
    vnet_hw_interface_set_input_node (vnm, apif->hw_if_index,
                                      af_packet_input_node.index);
    /* 分配該接口到指定的收包線程中 */
    vnet_hw_interface_assign_rx_thread (vnm, apif->hw_if_index, 0,    /* queue */
                                        ~0 /* any cpu */ );
vnet_hw_interface_assign_rx_thread
/* 給接口分配收包線程 */
void
vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
                                    u16 queue_id, uword thread_index)
{
    vnet_device_main_t *vdm = &vnet_device_main;
    vlib_main_t *vm, *vm0;
    vnet_device_input_runtime_t *rt;
    vnet_device_and_queue_t *dq;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);

    ASSERT (hw->input_node_index > 0);

    if (vdm->first_worker_thread_index == 0)/* 沒有收包線程,即主線程負責全部工做,則使用主模式進行收包 */
        thread_index = 0;

    if (thread_index != 0 &&/* 選取合適的線程進行收包,算法爲rr */
            (thread_index < vdm->first_worker_thread_index ||
             thread_index > vdm->last_worker_thread_index))
    {
        thread_index = vdm->next_worker_thread_index++;
        if (vdm->next_worker_thread_index > vdm->last_worker_thread_index)/* 開始下一個來回 */
            vdm->next_worker_thread_index = vdm->first_worker_thread_index;
    }

    vm = vlib_mains[thread_index];/* 收包線程 */
    vm0 = vlib_get_main ();/* 本線程,通常是主線程 */

    vlib_worker_thread_barrier_sync (vm0);/*  開始同步 */

    rt = vlib_node_get_runtime_data (vm, hw->input_node_index);/* 獲取輸入節點的運行數據 */

    vec_add2 (rt->devices_and_queues, dq, 1);/* 增長一個隊列 */
    dq->hw_if_index = hw_if_index;
    dq->dev_instance = hw->dev_instance;
    dq->queue_id = queue_id;
    dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;/* 默認設置輪詢模式 */
    rt->enabled_node_state = VLIB_NODE_STATE_POLLING;

    vnet_device_queue_update (vnm, rt);
    /* 創建隊列與線程之間的映射關係 */
    vec_validate (hw->input_node_thread_index_by_queue, queue_id);
    vec_validate (hw->rx_mode_by_queue, queue_id);
    hw->input_node_thread_index_by_queue[queue_id] = thread_index;
    hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_POLLING;

    vlib_worker_thread_barrier_release (vm0);
    /* 更新節點狀態統計信息 */
    vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
}

刪除一個interface

咱們仍是以af_packet接口爲例:app

int
af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
{
    vnet_main_t *vnm = vnet_get_main ();
    af_packet_main_t *apm = &af_packet_main;
    af_packet_if_t *apif;
    uword *p;
    uword if_index;
    u32 ring_sz;

    p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
    if (p == NULL)
    {
        vlib_log_warn (apm->log_class, "Host interface %s does not exist",
                       host_if_name);
        return VNET_API_ERROR_SYSCALL_ERROR_1;
    }
    apif = pool_elt_at_index (apm->interfaces, p[0]);
    if_index = apif - apm->interfaces;

    /* bring down the interface 將主機接口down掉*/
    vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
    /* 從input節點摘除 */
    vnet_hw_interface_unassign_rx_thread (vnm, apif->hw_if_index, 0);

    /* clean up 清除註冊的中斷,沒有中斷了將不會收包 */
    if (apif->clib_file_index != ~0)
    {
        clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
        apif->clib_file_index = ~0;
    }
    else
        close (apif->fd);

    ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
              apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
    if (munmap (apif->rx_ring, ring_sz))
        vlib_log_warn (apm->log_class,
                       "Host interface %s could not free rx/tx ring",
                       host_if_name);
    apif->rx_ring = NULL;
    apif->tx_ring = NULL;
    apif->fd = -1;

    vec_free (apif->rx_req);
    apif->rx_req = NULL;
    vec_free (apif->tx_req);
    apif->tx_req = NULL;

    vec_free (apif->host_if_name);
    apif->host_if_name = NULL;
    apif->host_if_index = -1;

    mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
    /* 將以太網端口刪除 */
    ethernet_delete_interface (vnm, apif->hw_if_index);

    pool_put (apm->interfaces, apif);

    return 0;
}

解除收包線程

int
vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
                                      u16 queue_id)
{
    vlib_main_t *vm, *vm0;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
    vnet_device_input_runtime_t *rt;
    vnet_device_and_queue_t *dq;
    uword old_thread_index;
    vnet_hw_interface_rx_mode mode;

    if (hw->input_node_thread_index_by_queue == 0)
        return VNET_API_ERROR_INVALID_INTERFACE;

    if (vec_len (hw->input_node_thread_index_by_queue) < queue_id + 1)
        return VNET_API_ERROR_INVALID_INTERFACE;

    old_thread_index = hw->input_node_thread_index_by_queue[queue_id];

    vm = vlib_mains[old_thread_index];
    /* 從該輸入節點的運行數據中刪除hw->input_node_index */
    rt = vlib_node_get_runtime_data (vm, hw->input_node_index);

    vec_foreach (dq, rt->devices_and_queues)
    if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
    {
        mode = dq->mode;
        goto delete;
    }

    return VNET_API_ERROR_INVALID_INTERFACE;

delete:

    vm0 = vlib_get_main ();
    vlib_worker_thread_barrier_sync (vm0);
    vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
    vnet_device_queue_update (vnm, rt);
    hw->rx_mode_by_queue[queue_id] = VNET_HW_INTERFACE_RX_MODE_UNKNOWN;
    vlib_worker_thread_barrier_release (vm0);

    if (vec_len (rt->devices_and_queues) == 0)/*若是該類型的input節點沒有了接口了,那麼將該接口設置爲disable*/
        vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
    else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
    {
        /*
         * if the deleted interface is polling, we may need to set the node state
         * to interrupt if there is no more polling interface for this device's
         * corresponding thread. This is because mixed interfaces
         * (polling and interrupt), assigned to the same thread, set the
         * thread to polling prior to the deletion.
         */
        vec_foreach (dq, rt->devices_and_queues)
        {
            if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
                return 0;
        }
        rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
        vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
    }

    return 0;
}

ethernet_delete_interface

void
ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index)
{
    ethernet_main_t *em = &ethernet_main;
    ethernet_interface_t *ei;
    vnet_hw_interface_t *hi;
    main_intf_t *main_intf;
    vlan_table_t *vlan_table;
    u32 idx;

    hi = vnet_get_hw_interface (vnm, hw_if_index);
    ei = pool_elt_at_index (em->interfaces, hi->hw_instance);

    /* Delete vlan mapping table for dot1q and dot1ad. */
    main_intf = vec_elt_at_index (em->main_intfs, hi->hw_if_index);
    if (main_intf->dot1q_vlans)
    {
        vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1q_vlans);
        for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
        {
            if (vlan_table->vlans[idx].qinqs)
            {
                pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
                vlan_table->vlans[idx].qinqs = 0;
            }
        }
        pool_put_index (em->vlan_pool, main_intf->dot1q_vlans);
        main_intf->dot1q_vlans = 0;
    }
    if (main_intf->dot1ad_vlans)
    {
        vlan_table = vec_elt_at_index (em->vlan_pool, main_intf->dot1ad_vlans);
        for (idx = 0; idx < ETHERNET_N_VLAN; idx++)
        {
            if (vlan_table->vlans[idx].qinqs)
            {
                pool_put_index (em->qinq_pool, vlan_table->vlans[idx].qinqs);
                vlan_table->vlans[idx].qinqs = 0;
            }
        }
        pool_put_index (em->vlan_pool, main_intf->dot1ad_vlans);
        main_intf->dot1ad_vlans = 0;
    }

    vnet_delete_hw_interface (vnm, hw_if_index);
    pool_put (em->interfaces, ei);
}

刪除硬件接口信息

void
vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
{
    vnet_interface_main_t *im = &vnm->interface_main;
    vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
    vlib_main_t *vm = vnm->vlib_main;
    vnet_device_class_t *dev_class = vnet_get_device_class (vnm,
            hw->dev_class_index);
    /* If it is up, mark it down. */
    if (hw->flags != 0)
        vnet_hw_interface_set_flags (vnm, hw_if_index, /* flags */ 0);

    /* Call delete callbacks. */
    call_hw_interface_add_del_callbacks (vnm, hw_if_index, /* is_create */ 0);

    /* Delete any sub-interfaces. */
    {
        u32 id, sw_if_index;
        /* *INDENT-OFF* */
        hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
                      (
        {
            vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
            u64 sup_and_sub_key =
            ((u64) (si->sup_sw_if_index) << 32) | (u64) si->sub.id;
            hash_unset_mem_free (&im->sw_if_index_by_sup_and_sub, &sup_and_sub_key);
            vnet_delete_sw_interface (vnm, sw_if_index);
        }));
        hash_free (hw->sub_interface_sw_if_index_by_id);
        /* *INDENT-ON* */
    }

    /* Delete software interface corresponding to hardware interface. */
    vnet_delete_sw_interface (vnm, hw->sw_if_index);

    if (dev_class->tx_function)
    {
        /* Put output/tx nodes into recycle pool */
        vnet_hw_interface_nodes_t *dn;

        /* *INDENT-OFF* 遍歷每個線程,設置該節點的運行節點信息的標誌位rt->is_deleted爲1,
         * 而不是摘除該節點,提高處理速度。
         */
        foreach_vlib_main
        (
        {
            vnet_interface_output_runtime_t *rt =
            vlib_node_get_runtime_data (this_vlib_main, hw->output_node_index);

            /* Mark node runtime as deleted so output node (if called)
             * will drop packets. */
            rt->is_deleted = 1;
        });
        /* *INDENT-ON* */
        /* 重命名節點,會遍歷全部的線程 */
        vlib_node_rename (vm, hw->output_node_index,
                          "interface-%d-output-deleted", hw_if_index);
        vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
                          hw_if_index);
        /* 放置在 */
        vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
        dn->tx_node_index = hw->tx_node_index;
        dn->output_node_index = hw->output_node_index;
    }

    hash_unset_mem (im->hw_interface_by_name, hw->name);
    vec_free (hw->name);
    vec_free (hw->hw_address);
    vec_free (hw->input_node_thread_index_by_queue);
    vec_free (hw->dq_runtime_index_by_queue);

    pool_put (im->hw_interfaces, hw);
}
相關文章
相關標籤/搜索