ARP 實現

ARP 實現

如今咱們介紹一下arp的實現,內核版本2.6.24.
[數據結構]
協議棧經過ARP協議獲取到的網絡上鄰居主機的IP地址與MAC地址的對應關 系都會保存在這個表中,以備下次與鄰居通信時使用,
同時,ARP模塊自身也會提供一套相應的機制來更新和維護這個鄰居表.
struct neigh_table
{
    struct neigh_table      *next; //下一個鄰居表
    int                     family; //協議簇
    int                     entry_size;//入口長度,也就是一個鄰居結構的大小,初始化爲sizeof(neighbour)+4 (4爲一個IP地址的長度)
    int                     key_len;//哈希關鍵值長度 即IP地址的長度,爲4
    //哈希值的計數函數(哈希值是經對應設備net_device與目的Ip計算出來的)
    __u32                   (*hash)(const void *pkey, const struct net_device *);
    //鄰居初始化函數
    int                     (*constructor)(struct neighbour *);
    int                     (*pconstructor)(struct pneigh_entry *);
    void                    (*pdestructor)(struct pneigh_entry *);
    void                    (*proxy_redo)(struct sk_buff *skb);
    char                    *id;//鄰居表的名稱
    //系統中每一個網絡設備接口對應鏈表中一個節點,表示該設備接口上的鄰居的一些傳輸參數。同時,鏈表中還有一個缺省的項。
    struct neigh_parms      parms;
    /* HACK. gc_* shoul follow parms without a gap! */
    //常規垃圾回收的時候使用
    int                     gc_interval;
    int                     gc_thresh1;
    //第二個閥值,若是鄰居超過此值,當建立新的鄰居時若超過五秒沒有刷新,則必須當即刷新,強制垃圾回收
    int                     gc_thresh2;
    int                     gc_thresh3;//容許鄰居的上限,超過將沒法建立鄰居項
    unsigned long           last_flush;//最近刷新時間
    //常規的垃圾回收定時器
    struct timer_list       gc_timer;
    struct timer_list       proxy_timer;//代理ARP定時器
    struct sk_buff_head     proxy_queue;//待處理的代理ARP數據包的隊列
    atomic_t                entries;//整個表中鄰居的數量
    rwlock_t                lock;
    //記錄一個時間,即上次爲 parms鏈表中每一個節點生成reachable_time的時間,reachable_time是須要被定時刷新的
    unsigned long           last_rand;
    struct kmem_cache               *kmem_cachep;
    struct neigh_statistics *stats;
    struct neighbour        **hash_buckets;//哈希數組,存入其中的鄰居
    unsigned int            hash_mask;
    __u32                   hash_rnd;//用於鄰居哈希表hash_buckets的一個隨機數
    unsigned int            hash_chain_gc;
    struct pneigh_entry     **phash_buckets;
#ifdef CONFIG_PROC_FS
    struct proc_dir_entry   *pde;
#endif
};
鄰居項結構
struct neighbour
{
    struct neighbour        *next; //下一項
    struct neigh_table      *tbl; //所在鄰居表
    struct neigh_parms      *parms; //傳輸參數
    struct net_device        *dev; //對應的網絡設備
    unsigned long           used; //最後使用時間
    unsigned long           confirmed;
    unsigned long           updated; //更新時間
    __u8                    flags; //標誌
    __u8                    nud_state; //狀態
    __u8                    type; //類型
    __u8                    dead; //回收標誌,爲1將被回收
    atomic_t                probes; //重複發送arp請求的次數
    rwlock_t                lock;
    unsigned char           ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; //對應鄰居的頭部緩存
    struct hh_cache         *hh; //硬件頭緩存,每種協議對應一個節點
    atomic_t                refcnt; //引用計數
    int                     (*output)(struct sk_buff *skb); //發送函數
    struct sk_buff_head     arp_queue; //發送skb的隊列
    struct timer_list        timer; //定時器
    struct neigh_ops        *ops; //相關操做
    u8                      primary_key[0]; //記錄地址
};
硬件頭緩存,每種協議對應一個節點,協議類型記錄在hh_type中,咱們如今只處理IP協議,因此這個鏈表中老是隻有一項.
struct hh_cache
{
    struct hh_cache *hh_next;  //下一項
    atomic_t        hh_refcnt;  //引用計數
    __be16          hh_type ____cacheline_aligned_in_smp; //協議類型

    u16             hh_len;         /* length of header */
    //輸出函數,有了hh,下次再發送數據報,就不須要從新構建以太網頭 了。當ARP解析完成後,須要更新hh緩衝。
    int             (*hh_output)(struct sk_buff *skb);
    seqlock_t       hh_lock;
#define HH_DATA_MOD     16
#define HH_DATA_OFF(__len)  (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1))
#define HH_DATA_ALIGN(__len) (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1))
    //緩存的硬件頭(對於以太網來講,就是以太網頭)
    unsigned long   hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)];
};

#define NUD_IN_TIMER    (NUD_INCOMPLETE|NUD_REACHABLE|NUD_DELAY|NUD_PROBE)
#define NUD_VALID       (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
#define NUD_CONNECTED   (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE)
[/數據結構]
[初始化]
    inet_init (net/ipv4/af_inet.c) -> arp_init arp初始化函數。
void __init arp_init(void)
{
    neigh_table_init(&arp_tbl);//初始化arp協議網絡鄰居解析表

    dev_add_pack(&arp_packet_type);//註冊地址解析包接收器
    arp_proc_init();
#ifdef CONFIG_SYSCTL
    neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL);
#endif
    register_netdevice_notifier(&arp_netdev_notifier); //註冊通知鏈,看下面arp通知鏈實現
}
void neigh_table_init(struct neigh_table *tbl)
{
    struct neigh_table *tmp;

    neigh_table_init_no_netlink(tbl); //初始化這個表
    write_lock(&neigh_tbl_lock);
    for (tmp = neigh_tables; tmp; tmp = tmp->next) { //查找是否有重複
        if (tmp->family == tbl->family)
            break;
    }
    //添加到連表頭
    tbl->next       = neigh_tables;
    neigh_tables    = tbl;
    write_unlock(&neigh_tbl_lock);

    if (unlikely(tmp)) { //有重複,出錯
        printk(KERN_ERR "NEIGH: Registering multiple tables for family %d\n", tbl->family);
        dump_stack();
    }
}
    實際的初始化函數
void neigh_table_init_no_netlink(struct neigh_table *tbl)
{
    unsigned long now = jiffies;
    unsigned long phsize;

    atomic_set(&tbl->parms.refcnt, 1); //引用計數初始 1
    INIT_RCU_HEAD(&tbl->parms.rcu_head);
    tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);

    if (!tbl->kmem_cachep) //創建網絡鄰居信息結構內存分配器
        tbl->kmem_cachep = kmem_cache_create(tbl->id, tbl->entry_size, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);

    tbl->stats = alloc_percpu(struct neigh_statistics); //每cpu變量
    if (!tbl->stats)
        panic("cannot create neighbour cache statistics");
#ifdef CONFIG_PROC_FS //在/proc/net/stat/arp_cache文件中反應arp信息
    tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
    if (!tbl->pde)
        panic("cannot create neighbour proc dir entry");
    tbl->pde->proc_fops = &neigh_stat_seq_fops;
    tbl->pde->data = tbl;
#endif
    tbl->hash_mask = 1;
    tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); //分配兩項用做hash

    //#define PNEIGH_HASHMASK         0xF
    //分配17項用做高速緩存的hash
    phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
    tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);

    if (!tbl->hash_buckets || !tbl->phash_buckets)
        panic("cannot allocate neighbour cache hashes");

    get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); //保存一些隨機數

    rwlock_init(&tbl->lock);
    init_timer(&tbl->gc_timer);//垃圾收集定時器
    tbl->gc_timer.data     = (unsigned long)tbl;
    tbl->gc_timer.function = neigh_periodic_timer; //垃圾收集函數
    tbl->gc_timer.expires  = now + 1;
    add_timer(&tbl->gc_timer);

    init_timer(&tbl->proxy_timer); //代理ARP定時器
    tbl->proxy_timer.data     = (unsigned long)tbl;
    tbl->proxy_timer.function = neigh_proxy_process; //代理arp緩存垃圾收集函數
    skb_queue_head_init_class(&tbl->proxy_queue, &neigh_table_proxy_queue_class); // ?????????

    tbl->last_flush = now;
    tbl->last_rand  = now + tbl->parms.reachable_time * 20;
}
arp協議處理結構
static struct packet_type arp_packet_type = {
    .type = __constant_htons(ETH_P_ARP), //arp協議
    .func = arp_rcv, //當檢測到協議是arp時調用
};
struct neigh_table arp_tbl = { //默認鄰居表結構初始化
    .family =       AF_INET,
    .entry_size =   sizeof(struct neighbour) + 4,
    .key_len =      4,
    .hash =         arp_hash,
    .constructor =  arp_constructor,
    .proxy_redo =   parp_redo,
    .id =           "arp_cache",
    .parms = {
        .tbl =                  &arp_tbl,
        .base_reachable_time =  30 * HZ,
        .retrans_time = 1 * HZ,
        .gc_staletime = 60 * HZ,
        .reachable_time =               30 * HZ,
        .delay_probe_time =     5 * HZ,
        .queue_len =            3,
        .ucast_probes = 3,
        .mcast_probes = 3,
        .anycast_delay =        1 * HZ,
        .proxy_delay =          (8 * HZ) / 10,
        .proxy_qlen =           64,
        .locktime =             1 * HZ,
    },
    .gc_interval =  30 * HZ,
    .gc_thresh1 =   128,
    .gc_thresh2 =   512,
    .gc_thresh3 =   1024,
};
[/初始化]
[arp協議處理]
    arp接收處理函數
static int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
    struct arphdr *arp;

    if (dev->nd_net != &init_net)
        goto freeskb;
    //檢查是否有完整的arp頭長度
    if (!pskb_may_pull(skb, (sizeof(struct arphdr) + (2 * dev->addr_len) + (2 * sizeof(u32)))))
        goto freeskb;
    arp = arp_hdr(skb); //提取arp頭結構
    //頭長度不等於設備中頭長度或設備沒有arp解析或包是到其餘主機或迴環,最後若是不是ip地址的長度都出錯
    if (arp->ar_hln != dev->addr_len || dev->flags & IFF_NOARP || skb->pkt_type == PACKET_OTHERHOST ||
            skb->pkt_type == PACKET_LOOPBACK || arp->ar_pln != 4)
        goto freeskb;
    //若是skb被共享了,分配一個新的skb
    if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
        goto out_of_mem;

    memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
    //調用相關arp方面的hook函數,處理後繼續調用arp_process
    return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process);
freeskb:
    kfree_skb(skb);
out_of_mem:
    return 0;
}
    正式的處理arp包
static int arp_process(struct sk_buff *skb)
{
    struct net_device *dev = skb->dev;
    struct in_device *in_dev = in_dev_get(dev);
    struct arphdr *arp;
    unsigned char *arp_ptr;
    struct rtable *rt;
    unsigned char *sha;
    __be32 sip, tip;
    u16 dev_type = dev->type; //設備類型
    int addr_type;
    struct neighbour *n;

    if (in_dev == NULL)
        goto out;

    arp = arp_hdr(skb); //取出arp頭

    switch (dev_type) {
        default:
            //若是協議類型不是ip或設備標識的類型不是協議指明的類型
            if (arp->ar_pro != htons(ETH_P_IP) || htons(dev_type) != arp->ar_hrd)
                goto out;
            break;
        case ARPHRD_ETHER: //以太網
        case ARPHRD_IEEE802_TR:
        case ARPHRD_FDDI:
        case ARPHRD_IEEE802:
            //arp頭中標明的類型與這些類型都不符
            if ((arp->ar_hrd != htons(ARPHRD_ETHER) && arp->ar_hrd != htons(ARPHRD_IEEE802)) || arp->ar_pro != htons(ETH_P_IP))
                goto out;
            break;
        case ARPHRD_AX25:
            if (arp->ar_pro != htons(AX25_P_IP) || arp->ar_hrd != htons(ARPHRD_AX25))
                goto out;
            break;
        case ARPHRD_NETROM:
            if (arp->ar_pro != htons(AX25_P_IP) || arp->ar_hrd != htons(ARPHRD_NETROM))
                goto out;
            break;
    }
    //若是arp動做不是請求和應答
    if (arp->ar_op != htons(ARPOP_REPLY) && arp->ar_op != htons(ARPOP_REQUEST))
        goto out;
    arp_ptr= (unsigned char *)(arp+1); //指向頭後面的部分,在內核arp頭實現中是ip和硬件地址數據
    sha     = arp_ptr; //源硬件地址
    arp_ptr += dev->addr_len;//移動指針
    memcpy(&sip, arp_ptr, 4); //源ip
    arp_ptr += 4;
    arp_ptr += dev->addr_len;
    memcpy(&tip, arp_ptr, 4); //目的ip

    /* Check for bad requests for 127.x.x.x and requests for multicast addresses.  If this is one such, delete it. */
    if (LOOPBACK(tip) || MULTICAST(tip))//若是目的ip是迴環或多播地址
        goto out;
    /* Special case: We must set Frame Relay source Q.922 address */
    if (dev_type == ARPHRD_DLCI)
        sha = dev->broadcast;
    /* Special case: IPv4 duplicate address detection packet (RFC2131) */
    //特殊狀況處理:IPv4地址衝突檢測(RFC2131:DHCP協議,它基於ARP協議,其發送的請求包的源地址爲0)
    if (sip == 0) {
        if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(tip) == RTN_LOCAL && !arp_ignore(in_dev,dev,sip,tip))
            arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, dev->dev_addr, sha);
        goto out;
    }
    //是arp請求,且路由查詢正確
    if (arp->ar_op == htons(ARPOP_REQUEST) && ip_route_input(skb, tip, sip, 0, dev) == 0) {
        rt = (struct rtable*)skb->dst; //路由緩存
        addr_type = rt->rt_type;
        if (addr_type == RTN_LOCAL) { //本地地址查詢
            n = neigh_event_ns(&arp_tbl, sha, &sip, dev);//查找和更新arp_tbl表
            if (n) {
                int dont_send = 0;
                if (!dont_send)
                    dont_send |= arp_ignore(in_dev,dev,sip,tip);

                if (!dont_send && IN_DEV_ARPFILTER(in_dev))
                    dont_send |= arp_filter(sip,tip,dev);

                if (!dont_send) //沒有被忽略,沒有被過濾,發送arp應答消息告知本機mac地址
                    arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);

                neigh_release(n);
            }
            goto out;
        } else if (IN_DEV_FORWARD(in_dev)) { //非本地地址查詢,設備容許forward
            if ((rt->rt_flags&RTCF_DNAT) || (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
                        (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
                n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
                if (n)
                    neigh_release(n);

                if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST ||
                        in_dev->arp_parms->proxy_delay == 0) {
                    arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha);
                } else {
                    pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb);
                    in_dev_put(in_dev);
                    return 0;
                }
                goto out;
            }
        }
    }
    /* Update our ARP tables */
    n = __neigh_lookup(&arp_tbl, &sip, dev, 0);//在arp_tbl表中查找源ip(sip)對應的鄰居結構

    if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
        /* Unsolicited ARP is not accepted by default.                                     
           It is possible, that this option should be enabled for some devices (strip is candidate)  */
        if (n == NULL && arp->ar_op == htons(ARPOP_REPLY) && inet_addr_type(sip) == RTN_UNICAST)
            n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
    }
    if (n) { //找到
        int state = NUD_REACHABLE; //是應答包嗎,若是是那麼狀態就變爲可達了
        int override;
        /* If several different ARP replies follows back-to-back, use the FIRST one. It is possible, if several proxy                              agents are active. Taking the first reply prevents arp trashing and chooses the fastest router.*/
        //locktime是以jiffies表示的鄰居結構必須被更新的最小時間
        override = time_after(jiffies, n->updated + n->parms->locktime);

        /* Broadcast replies and request packets do not assert neighbour reachability. */
        //是請求包或是到主機的包
        if (arp->ar_op != htons(ARPOP_REPLY) || skb->pkt_type != PACKET_HOST)
            state = NUD_STALE;
        neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0);//更新鄰居表
        neigh_release(n);
    }
out:
    if (in_dev)
        in_dev_put(in_dev);
    kfree_skb(skb);
    return 0;
}
    查找,若是找到則更新表
struct neighbour *neigh_event_ns(struct neigh_table *tbl, u8 *lladdr, void *saddr, struct net_device *dev)
{
    struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len); //查詢
    if (neigh)
        neigh_update(neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_OVERRIDE); //更新
    return neigh;
}
    查詢表,若是沒有找到而且指定了建立標誌則建立一個新的項
static inline struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
    struct neighbour *n = neigh_lookup(tbl, pkey, dev); //查詢
    if (n || !creat)
        return n;
    //沒找到須要建立
    n = neigh_create(tbl, pkey, dev); //建立
    return IS_ERR(n) ? NULL : n;
}
struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    struct neighbour *n;
    int key_len = tbl->key_len;
    u32 hash_val = tbl->hash(pkey, dev); //根據源地址計算一個hash值

    NEIGH_CACHE_STAT_INC(tbl, lookups);

    read_lock_bh(&tbl->lock);
    for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { //在這hash桶中尋找
        //設備相同且地址匹配
        if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
            neigh_hold(n);
            NEIGH_CACHE_STAT_INC(tbl, hits);
            break;
        }
    }
    read_unlock_bh(&tbl->lock);
    return n;
}
struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    u32 hash_val;
    int key_len = tbl->key_len;
    int error;
    struct neighbour *n1, *rc, *n = neigh_alloc(tbl); //分配一個鄰居項

    if (!n) {
        rc = ERR_PTR(-ENOBUFS);
        goto out;
    }
    memcpy(n->primary_key, pkey, key_len); //把對端主機地址保存到鄰居項的末尾(如今知道爲何初始化時要加個4了吧)
    n->dev = dev; //記錄設備
    dev_hold(dev);

    /* Protocol specific setup. */
    if (tbl->constructor && (error = tbl->constructor(n)) < 0) { //若是有構造函數則調用,參看下面初始化指定函數實現
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }
    /* Device specific setup. */
    if (n->parms->neigh_setup && (error = n->parms->neigh_setup(n)) < 0) {//參看下面初始化指定函數實現
        rc = ERR_PTR(error);
        goto out_neigh_release;
    }

    n->confirmed = jiffies - (n->parms->base_reachable_time << 1);

    write_lock_bh(&tbl->lock);
    if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) //鄰居數量超過掩碼加1
        neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); //鄰居hash表增加調整
    hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; //計算hash值

    if (n->parms->dead) {
        rc = ERR_PTR(-EINVAL);
        goto out_tbl_unlock;
    }
    //再次查找
    for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
        if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
            neigh_hold(n1);
            rc = n1; //找到
            goto out_tbl_unlock;
        }
    }
    //添加新項到表頭
    n->next = tbl->hash_buckets[hash_val];
    tbl->hash_buckets[hash_val] = n;
    n->dead = 0; //分配時爲1,如今添加到表後爲0
    neigh_hold(n);
    write_unlock_bh(&tbl->lock);
    NEIGH_PRINTK2("neigh %p is created.\n", n);
    rc = n;
out:
    return rc;
out_tbl_unlock:
    write_unlock_bh(&tbl->lock);
out_neigh_release:
    neigh_release(n);
    goto out;
}
    分配一個鄰居項
static struct neighbour *neigh_alloc(struct neigh_table *tbl)
{
    struct neighbour *n = NULL;
    unsigned long now = jiffies;
    int entries;

    entries = atomic_inc_return(&tbl->entries) - 1;
    //若是項數超過絕對閥值或超過中間值且如今時間超過鄰居表最後刷新5秒
    if (entries >= tbl->gc_thresh3 || (entries >= tbl->gc_thresh2 && time_after(now, tbl->last_flush + 5 * HZ))) {
        //進行垃圾回收,後若是仍是超過絕對閥值
        if (!neigh_forced_gc(tbl) && entries >= tbl->gc_thresh3)
            goto out_entries;//不能分配,退出
    }
    n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC); //分配一項
    if (!n)

        goto out_entries;
    skb_queue_head_init(&n->arp_queue);
    rwlock_init(&n->lock);
    n->updated        = n->used = now;
    n->nud_state      = NUD_NONE; //剛被建立時的狀態
    n->output         = neigh_blackhole; //發送數據時調用的函數
    n->parms          = neigh_parms_clone(&tbl->parms); //增長引用計數返回這個指針
    init_timer(&n->timer);
    n->timer.function = neigh_timer_handler; //參考下面定時器操做實現
    n->timer.data     = (unsigned long)n;

    NEIGH_CACHE_STAT_INC(tbl, allocs);
    n->tbl            = tbl;
    atomic_set(&n->refcnt, 1);
    n->dead           = 1;
out:
    return n;
out_entries:
    atomic_dec(&tbl->entries);
    goto out;
}
    增加hash表
static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
{
    struct neighbour **new_hash, **old_hash;
    unsigned int i, new_hash_mask, old_entries;

    NEIGH_CACHE_STAT_INC(tbl, hash_grows);
    BUG_ON(!is_power_of_2(new_entries));

    //分配新數量的hash桶,看上面代碼是源數量的2倍
    new_hash = neigh_hash_alloc(new_entries);
    if (!new_hash)
        return;

    old_entries = tbl->hash_mask + 1; //舊的hash桶數
    new_hash_mask = new_entries - 1; //新項數的掩碼
    old_hash = tbl->hash_buckets; //舊指針

    get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
    //循環全部舊hash桶
    for (i = 0; i < old_entries; i++) {
        struct neighbour *n, *next;
        for (n = old_hash[i]; n; n = next) {
            //從新計算hash值而後用新的掩碼進行hash
            //能夠把hash值保存到數據結構中,那麼只須要與一下就能夠了
            unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
            hash_val &= new_hash_mask;
            next = n->next;
            //把這項添加到新hash桶的頭
            n->next = new_hash[hash_val];
            new_hash[hash_val] = n;
        }
    }
    //更新新桶和新掩碼
    tbl->hash_buckets = new_hash;
    tbl->hash_mask = new_hash_mask;
    neigh_hash_free(old_hash, old_entries); //釋放舊的hash桶
}
    更新鄰居項 neigh_event_ns ->
int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags)
{
    u8 old;
    int err;
    int notify = 0;
    struct net_device *dev;
    int update_isrouter = 0;

    write_lock_bh(&neigh->lock);

    dev    = neigh->dev;
    old    = neigh->nud_state; //舊狀態
    err    = -EPERM;
    //若是更新操做不是管理員操做且鄰居項是不該答arp的或永久的, 退出
    if (!(flags & NEIGH_UPDATE_F_ADMIN) && (old & (NUD_NOARP | NUD_PERMANENT)))
        goto out;

    if (!(new & NUD_VALID)) { //若是新狀態沒有有效標誌集
        neigh_del_timer(neigh); //刪除鄰居項定時器
        if (old & NUD_CONNECTED) //若是舊狀態是鏈接的
            neigh_suspect(neigh); //解除快速路徑

        neigh->nud_state = new; //保存新狀態
        err = 0;
        notify = old & NUD_VALID; //是否須要通知
        goto out;
    }
    /* Compare new lladdr with cached one */
    if (!dev->addr_len) { //設備的硬件地址長度爲0
        /* First case: device needs no address. */
        lladdr = neigh->ha; //指向鄰居項記錄的硬件地址
    } else if (lladdr) { //攜帶了硬件地址
        /* The second case: if something is already cached and a new address is proposed:                                                   - compare new & old                                                             
           - if they are different, check override flag
           */
        //若是舊狀態也是有效狀態集中的標誌且記錄的硬件地址與參數的相同
        if ((old & NUD_VALID) && !memcmp(lladdr, neigh->ha, dev->addr_len))
            lladdr = neigh->ha;
    } else {
        /* No address is supplied; if we know something, use it, otherwise discard the request. */
        err = -EINVAL;
        if (!(old & NUD_VALID)) //舊狀態是無效的
            goto out;
        lladdr = neigh->ha;
    }
    if (new & NUD_CONNECTED) //新狀態是鏈接標誌集
        neigh->confirmed = jiffies; //記錄鏈接狀態的變動時間
    neigh->updated = jiffies; //時間更新

    /* If entry was valid and address is not changed, do not change entry state, if new one is STALE. */
    err = 0;
    update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; //這個更新是路由系統作的嗎

    if (old & NUD_VALID) { //舊狀態是有效的
        //新硬件地址與原來記錄的不一樣,且不容許覆蓋
        if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
            update_isrouter = 0;
            if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && (old & NUD_CONNECTED)) {
                lladdr = neigh->ha;
                new = NUD_STALE;
            } else
                goto out;
        } else {
            //地址相同
            if (lladdr == neigh->ha && new == NUD_STALE && ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) || (old & NUD_CONNECTED)))
                new = old;
        }
    }
    if (new != old) { //新狀態不一樣於舊狀態
        neigh_del_timer(neigh); //刪除定時器
        if (new & NUD_IN_TIMER) { //新狀態要求啓動定時器
            neigh_hold(neigh); //增長引用計數
            //啓動定時器
            neigh_add_timer(neigh, (jiffies + ((new & NUD_REACHABLE) ? neigh->parms->reachable_time : 0)));
        }
        neigh->nud_state = new; //保存新狀態
    }
    if (lladdr != neigh->ha) { //硬件地址不一樣
        memcpy(&neigh->ha, lladdr, dev->addr_len); //保存這個地址
        neigh_update_hhs(neigh);//更新hh_cache內容
        if (!(new & NUD_CONNECTED)) //新狀態不是鏈接
            neigh->confirmed = jiffies - (neigh->parms->base_reachable_time << 1);
        notify = 1;
    }
    if (new == old)
        goto out;
    if (new & NUD_CONNECTED) //新狀態是鏈接,改變輸出函數
        neigh_connect(neigh);
    else
        neigh_suspect(neigh);
    if (!(old & NUD_VALID)) { //原來舊狀態不是有效狀態
        struct sk_buff *skb;
        /* Again: avoid dead loop if something went wrong */
        while (neigh->nud_state & NUD_VALID && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
            struct neighbour *n1 = neigh;
            write_unlock_bh(&neigh->lock);
            /* On shaper/eql skb->dst->neighbour != neigh :( */
            //路由緩存和arp鄰居項都準備好了
            if (skb->dst && skb->dst->neighbour)
                n1 = skb->dst->neighbour;
            n1->output(skb); //發送這個全部的skb數據包
            write_lock_bh(&neigh->lock);
        }
        skb_queue_purge(&neigh->arp_queue);
    }
out:
    if (update_isrouter) {
        neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ? (neigh->flags | NTF_ROUTER) : (neigh->flags & ~NTF_ROUTER);
    }
    write_unlock_bh(&neigh->lock);
    if (notify)
        neigh_update_notify(neigh);

    return err;
}
    改變輸出函數,在設置爲鏈接狀態時
static void neigh_connect(struct neighbour *neigh)
{
    struct hh_cache *hh;
    NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
    neigh->output = neigh->ops->connected_output;
    for (hh = neigh->hh; hh; hh = hh->hh_next)
        hh->hh_output = neigh->ops->hh_output;
}
static void neigh_suspect(struct neighbour *neigh)
{
    struct hh_cache *hh;
    NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);

    neigh->output = neigh->ops->output;

    for (hh = neigh->hh; hh; hh = hh->hh_next)
        hh->hh_output = neigh->ops->output;
}
    更新hh_cache中記錄的硬件地址
static void neigh_update_hhs(struct neighbour *neigh)
{
    struct hh_cache *hh;
    //這個函數就是保存硬件地址信息到hh_cache結構中,在ether_setup函數中指定的.
    void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
        = neigh->dev->header_ops->cache_update;

    if (update) { //若是有這函數,更新全部的hh_cache結構
        for (hh = neigh->hh; hh; hh = hh->hh_next) {
            write_seqlock_bh(&hh->hh_lock);
            update(hh, neigh->dev, neigh->ha);
            write_sequnlock_bh(&hh->hh_lock);
        }
    }
}
是否可使用代理arp來處理這個arp包
    arp_process->
static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
{
    struct in_device *out_dev;
    int imi, omi = -1;

    if (!IN_DEV_PROXY_ARP(in_dev)) //不容許代理arp
        return 0;

    if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) //不限制介質類型,能夠進行ARP代理
        return 1;

    if (imi == -1)
        return 0;

    /* place to check for proxy_arp for routes */
    if ((out_dev = in_dev_get(rt->u.dst.dev)) != NULL) {
        omi = IN_DEV_MEDIUM_ID(out_dev);
        in_dev_put(out_dev);
    }
    //外出接口介質與進入接口不一樣且不是不容許代理arp的介質,那麼就能夠進行arp代理
    return (omi != imi && omi != -1);
}
在代理arp緩存中進行查找
    arp_process->
struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
    struct pneigh_entry *n;
    int key_len = tbl->key_len;
    u32 hash_val = *(u32 *)(pkey + key_len - 4);
    //下面是計算hash值
    hash_val ^= (hash_val >> 16);
    hash_val ^= hash_val >> 8;
    hash_val ^= hash_val >> 4;
    hash_val &= PNEIGH_HASHMASK;

    read_lock_bh(&tbl->lock);
    for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { //在桶中查找
        //key是目的ip
        if (!memcmp(n->key, pkey, key_len) && (n->dev == dev || !n->dev)) {
            read_unlock_bh(&tbl->lock);
            goto out; //找到
        }
    }
    read_unlock_bh(&tbl->lock);
    n = NULL;

    if (!creat) //不容許建立
        goto out;

    ASSERT_RTNL();
    //分配一項代理緩存
    n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
    if (!n)
        goto out;

    memcpy(n->key, pkey, key_len); //拷貝目的ip
    n->dev = dev; //保存設備結構, mac地址在這個結構中
    if (dev)
        dev_hold(dev);

    if (tbl->pconstructor && tbl->pconstructor(n)) { //若是有調用構造函數
        if (dev)
            dev_put(dev);

        kfree(n);
        n = NULL;
        goto out;
    }
    //插入到hash頭
    write_lock_bh(&tbl->lock);
    n->next = tbl->phash_buckets[hash_val];
    tbl->phash_buckets[hash_val] = n;
    write_unlock_bh(&tbl->lock);
out:
    return n;
}
    建立發送一個arp包
void arp_send(int type, int ptype, __be32 dest_ip, struct net_device *dev, __be32 src_ip,
        unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw)
{
    struct sk_buff *skb;

    if (dev->flags&IFF_NOARP) //這個接口不容許arp
        return;
    //建立一個arp包的skb,這個函數很簡單可是其中調用了一個函數dev_hard_header,主要實現是
    //return dev->header_ops->create(skb, dev, type, daddr, saddr, len);若是有這個操做和函數就會調用這個create函數
    //這個create函數是用來構建以太網頭(struct ethhdr)的函數
    skb = arp_create(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw, target_hw);
    if (skb == NULL) {
        return;
    }
    //發送這個包,NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit);
    //簡單,進入arp的外出hook後調用dev_queue_xmit發送函數
    arp_xmit(skb);
}
[/arp協議處理]
[初始化指定函數實現]
咱們看到在初始化部分有一些函數指針被初始化了咱們如今來分析。
    hash計算函數
static u32 arp_hash(const void *pkey, const struct net_device *dev)
{
    //還記得neigh_table_init_no_netlink函數中爲hash_rnd讀了一些隨機數嗎
    return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd);
}
    當建立一個鄰居項時neigh_create會調用到這個構造函數
static int arp_constructor(struct neighbour *neigh)
{
    __be32 addr = *(__be32*)neigh->primary_key; //取ip地址
    struct net_device *dev = neigh->dev;
    struct in_device *in_dev;
    struct neigh_parms *parms;

    neigh->type = inet_addr_type(addr); //判斷地址的類型

    rcu_read_lock();
    in_dev = __in_dev_get_rcu(dev);
    if (in_dev == NULL) {
        rcu_read_unlock();
        return -EINVAL;
    }
    parms = in_dev->arp_parms; //指向設備的參數結構
    __neigh_parms_put(neigh->parms); //減小初始化時使用的參數結構,這個結構在arp_tlb中嵌入和靜態的被初始化
    neigh->parms = neigh_parms_clone(parms); //增長設備參數結構的引用計數
    rcu_read_unlock();

    if (!dev->header_ops) { //設備沒有頭操做
        neigh->nud_state = NUD_NOARP;
        neigh->ops = &arp_direct_ops;
        neigh->output = neigh->ops->queue_xmit;
    } else {
        switch (dev->type) { //設備類型
            default:
                break;
            case ARPHRD_ROSE:
#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
            case ARPHRD_AX25:
#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
            case ARPHRD_NETROM:
#endif
                neigh->ops = &arp_broken_ops;
                neigh->output = neigh->ops->output;
                return 0;
#endif
                ;
        }
        if (neigh->type == RTN_MULTICAST) { //地址類型是多播
            neigh->nud_state = NUD_NOARP;
            arp_mc_map(addr, neigh->ha, dev, 1);
        } else if (dev->flags & (IFF_NOARP|IFF_LOOPBACK)) { //設備是迴環或設置了不容許arp標誌
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
        } else if (neigh->type == RTN_BROADCAST || dev->flags & IFF_POINTOPOINT) {//地址類型是廣播或設備是ptop設備
            neigh->nud_state = NUD_NOARP;
            memcpy(neigh->ha, dev->broadcast, dev->addr_len);
        }
        if (dev->header_ops->cache) //設備有這函數指針,看下面鄰居項操做實現
            neigh->ops = &arp_hh_ops;
        else
            neigh->ops = &arp_generic_ops;

        if (neigh->nud_state & NUD_VALID) //鄰居項是有效狀態
            neigh->output = neigh->ops->connected_output;
        else
            neigh->output = neigh->ops->output;
    }
    return 0;
}
    arp代理redo函數
static void parp_redo(struct sk_buff *skb)
{
    arp_process(skb);
}
[/初始化指定函數實現]
[鄰居項操做實現]
    調用alloc_etherdev()來構建網卡的net_device結構的,同時ether_setup()函數是標準以太網卡的初始化函數,會進行相關字段的初始化。
void ether_setup(struct net_device *dev)
{
    dev->header_ops         = &eth_header_ops;
    ......
}
因此上面arp_constructor函數中neigh->ops就會指向arp_hh_ops結構。
static struct neigh_ops arp_hh_ops = {
    .family =               AF_INET,
    .solicit =              arp_solicit,
    .error_report =         arp_error_report,
    //下面都指向一個函數
    .output =               neigh_resolve_output,
    .connected_output =     neigh_resolve_output,
    //下面這兩個指向了直接發送函數
    .hh_output =            dev_queue_xmit,
    .queue_xmit =           dev_queue_xmit,
};
arp協議的發送函數neigh_resolve_output,看代碼前先要介紹一下誰,在哪調用了這個函數。
dst->output = ip_output->ip_finish_output->ip_finish_output2
......
//參考上面函數neigh_connect,鄰居項狀態變爲NUD_REACHABLE時,會改變hh_cache->hh_output的指針指向neigh_ops->hh_output.
if (dst->hh) //有hh_cache結構,直接拷貝hh_cache中的硬件地址到以太網頭的目的地址字段,而後調用hh->hh_output(skb);直接發送數據.
    return neigh_hh_output(dst->hh, skb);
else if (dst->neighbour)
    return dst->neighbour->output(skb);
    ......
    在linux 路由實現文章中看ip_route_output_slow的函數流程會告訴你ip_output函數的由來。
int neigh_resolve_output(struct sk_buff *skb)
{
    struct dst_entry *dst = skb->dst;
    struct neighbour *neigh;
    int rc = 0;
    //沒有路由緩存或路由緩存沒有綁定一個arp的鄰居項
    //路由代碼中函數arp_bind_neighbour在轉發或向外發送數據時會爲dst綁定(先查找,找不到就建立)一個鄰居項。參考下面綁定鄰居項
    if (!dst || !(neigh = dst->neighbour))
        goto discard;
    //skb->data指向網絡頭
    __skb_pull(skb, skb_network_offset(skb));

    if (!neigh_event_send(neigh, skb)) {//判斷鄰居項是否有可用狀態,若是可用,則把數據包發送出去
        int err;
        struct net_device *dev = neigh->dev;
        //設備有cache函數(用來填充硬件地址信息到hh_cache結構中)且路由緩存沒有hh結構
        if (dev->header_ops->cache && !dst->hh) {
            write_lock_bh(&neigh->lock);
            if (!dst->hh) //分配並初始化一個hh_cache結構
                neigh_hh_init(neigh, dst, dst->ops->protocol);


            //在skb中填充硬件頭 return dev->header_ops->create(skb, dev, type, daddr, saddr, len);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
            write_unlock_bh(&neigh->lock);
        } else {
            read_lock_bh(&neigh->lock);
            err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
            read_unlock_bh(&neigh->lock);
        }
        if (err >= 0) //發送這個skb
            rc = neigh->ops->queue_xmit(skb);
        else
            goto out_kfree_skb;
    }
out:
    return rc;
discard:
    NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
out_kfree_skb:
    rc = -EINVAL;
    kfree_skb(skb);
    goto out;
}
    觸發狀態轉換,判斷是否可用 0
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
    neigh->used = jiffies; //保存最後使用時間
    //在這裏剔除了NUD_STALE狀態,由於,在此狀態有發送信息的時候,會將它轉入延遲狀態,並設置定時器,這在__neigh_event_send中能夠看到
    if (!(neigh->nud_state & (NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
        return __neigh_event_send(neigh, skb);
    return 0;
}
int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
    int rc;
    unsigned long now;

    write_lock_bh(&neigh->lock);
    rc = 0;
    //參數檢測
    if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
        goto out_unlock_bh;

    now = jiffies;
    //狀態不是STALE或INCOMPLETE
    if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
        //若是指定了探測數值
        if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
            atomic_set(&neigh->probes, neigh->parms->ucast_probes);
            neigh->nud_state     = NUD_INCOMPLETE; //設置狀態爲INCOMPLETE
            neigh->updated = jiffies;
            neigh_hold(neigh);
            neigh_add_timer(neigh, now + 1); //啓動定時器,看下面定時器操做實現
        } else { //沒有指定
            neigh->nud_state = NUD_FAILED; //失敗狀態
            neigh->updated = jiffies;
            write_unlock_bh(&neigh->lock);
            if (skb)
                kfree_skb(skb); //釋放這個skb
            return 1;
        }
    } else if (neigh->nud_state & NUD_STALE) { //狀態是STABLE,返回0將包送出
        NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
        neigh_hold(neigh);
        neigh->nud_state = NUD_DELAY; //延遲狀態
        neigh->updated = jiffies;
        neigh_add_timer(neigh, jiffies + neigh->parms->delay_probe_time); //啓動定時器
    }
    if (neigh->nud_state == NUD_INCOMPLETE) { //狀態爲INCOMPLETE
        if (skb) {
            if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { //隊列長度超過限制
                struct sk_buff *buff;
                buff = neigh->arp_queue.next;
                __skb_unlink(buff, &neigh->arp_queue);
                kfree_skb(buff); //釋放一個
            }
            __skb_queue_tail(&neigh->arp_queue, skb); //新包添加到隊列尾部
        }
        rc = 1;
    }
out_unlock_bh:
    write_unlock_bh(&neigh->lock);
    return rc;
}
    分配並初始化一個hh_cache結構
static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, __be16 protocol)
{
    struct hh_cache *hh;
    struct net_device *dev = dst->dev;

    for (hh = n->hh; hh; hh = hh->hh_next) //查找匹配協議
        if (hh->hh_type == protocol)
            break;
    //沒有找到,分配一個
    if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
        seqlock_init(&hh->hh_lock);
        hh->hh_type = protocol; //記錄協議
        atomic_set(&hh->hh_refcnt, 0);
        hh->hh_next = NULL;
        //調用以太網函數填充這個hh_cache結構,主要是硬件地址信息
        if (dev->header_ops->cache(n, hh)) {
            kfree(hh);
            hh = NULL;
        } else { //填充正確,放入到連表頭
            atomic_inc(&hh->hh_refcnt);
            hh->hh_next = n->hh;
            n->hh       = hh;
            if (n->nud_state & NUD_CONNECTED) //是鏈接狀態
                hh->hh_output = n->ops->hh_output; //指向直接輸出函數
            else
                hh->hh_output = n->ops->output; //指向間接輸出函數
        }
    }
    if (hh) {
        atomic_inc(&hh->hh_refcnt);
        dst->hh = hh; //保存這hh_cache
    }
}
    發送arp請求
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
    __be32 saddr = 0;
    u8  *dst_ha = NULL;
    struct net_device *dev = neigh->dev;
    __be32 target = *(__be32*)neigh->primary_key; //目的ip地址,也許是下一跳網關地址
    int probes = atomic_read(&neigh->probes); //探測次數
    struct in_device *in_dev = in_dev_get(dev);

    if (!in_dev)
        return;
    switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { //選擇源ip地址時的策略
        default:
        case 0:  //任何本地ip地址均可以
            if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
                saddr = ip_hdr(skb)->saddr;
            break;
        case 1: //若是可能,選擇在目標ip一樣的子網範圍內,不行那麼用 2
            if (!skb)
                break;

            saddr = ip_hdr(skb)->saddr;
            if (inet_addr_type(saddr) == RTN_LOCAL) {
                /* saddr should be known to target */
                //這函數並非在選擇,只是判斷一下源和目的是否在同一子網
                if (inet_addr_onlink(in_dev, target, saddr))
                    break;
            }
            saddr = 0;
            break;
        case 2:         //優先選擇主要的ip
            break;
    }
    if (in_dev)
        in_dev_put(in_dev);
    if (!saddr) //若是源地址沒有,那麼選擇一個. 參考ip 路由實現一文
        saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
    if ((probes -= neigh->parms->ucast_probes) < 0) {
        if (!(neigh->nud_state & NUD_VALID))
            printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n");
        dst_ha = neigh->ha;
        read_lock_bh(&neigh->lock);
    } else if ((probes -= neigh->parms->app_probes) < 0) {
#ifdef CONFIG_ARPD
        neigh_app_ns(neigh);
#endif
        return;
    }
    //若是dst_ha爲NULL,那麼發送請求時就會填充爲廣播地址
    arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, dst_ha, dev->dev_addr, NULL);
    if (dst_ha)
        read_unlock_bh(&neigh->lock);
}
[/鄰居項操做實現]
    [定時器操做實現]
static void neigh_timer_handler(unsigned long arg)
{
    unsigned long now, next;
    struct neighbour *neigh = (struct neighbour *)arg;
    unsigned state;
    int notify = 0;

    write_lock(&neigh->lock);

    state = neigh->nud_state;
    now = jiffies;
    next = now + HZ; //添加 1 秒
    if (!(state & NUD_IN_TIMER)) {
#ifndef CONFIG_SMP
        printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
#endif
        goto out;
    }
    //下面是肯定發送延遲
    if (state & NUD_REACHABLE) {
        if (time_before_eq(now, neigh->confirmed + neigh->parms->reachable_time)) {
            NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
            next = neigh->confirmed + neigh->parms->reachable_time;
        } else if (time_before_eq(now, neigh->used + neigh->parms->delay_probe_time)) {
            NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
            neigh->nud_state = NUD_DELAY;
            neigh->updated = jiffies;
            neigh_suspect(neigh);
            next = now + neigh->parms->delay_probe_time;
        } else {
            NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
            neigh->nud_state = NUD_STALE;
            neigh->updated = jiffies;
            neigh_suspect(neigh);
            notify = 1;
        }
    } else if (state & NUD_DELAY) {
        if (time_before_eq(now, neigh->confirmed + neigh->parms->delay_probe_time)) {
            NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
            neigh->nud_state = NUD_REACHABLE;
            neigh->updated = jiffies;
            neigh_connect(neigh);
            notify = 1;
            next = neigh->confirmed + neigh->parms->reachable_time;
        } else {
            NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
            neigh->nud_state = NUD_PROBE;
            neigh->updated = jiffies;
            atomic_set(&neigh->probes, 0);
            next = now + neigh->parms->retrans_time;
        }
    } else {
        /* NUD_PROBE|NUD_INCOMPLETE */
        next = now + neigh->parms->retrans_time;
    }
    //若是狀態仍是沒有到要求(可達)且探測次數已經超過最大限制
    if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
        struct sk_buff *skb;
        neigh->nud_state = NUD_FAILED; //失敗
        neigh->updated = jiffies;
        notify = 1;
        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
        while (neigh->nud_state == NUD_FAILED && (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
            write_unlock(&neigh->lock);
            neigh->ops->error_report(neigh, skb); //剩餘的skb請求發送錯誤報告
            write_lock(&neigh->lock);
        }
        skb_queue_purge(&neigh->arp_queue);//所有銷燬
    }
    //這是沒有超過探測限制
    if (neigh->nud_state & NUD_IN_TIMER) {
        if (time_before(next, jiffies + HZ/2))
            next = jiffies + HZ/2;
        if (!mod_timer(&neigh->timer, next)) //更改定時器時間
            neigh_hold(neigh);
    }
    if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
        struct sk_buff *skb = skb_peek(&neigh->arp_queue); //獲取一個skb,並無脫連
        /* keep skb alive even if arp_queue overflows */
        if (skb)
            skb_get(skb);

        write_unlock(&neigh->lock);
        neigh->ops->solicit(neigh, skb); //發送arp請求
        atomic_inc(&neigh->probes);
        if (skb)
            kfree_skb(skb);
    } else {
out:
        write_unlock(&neigh->lock);
    }
    if (notify)
        neigh_update_notify(neigh);
    neigh_release(neigh);
}
[/定時器操做實現]
[綁定鄰居項]
    在路由緩存中綁定一個鄰居項
int arp_bind_neighbour(struct dst_entry *dst)
{
    struct net_device *dev = dst->dev;
    struct neighbour *n = dst->neighbour;

    if (dev == NULL)
        return -EINVAL;
    if (n == NULL) { //沒有綁定
        __be32 nexthop = ((struct rtable*)dst)->rt_gateway; //下一跳地址
        if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
            nexthop = 0; //迴環和ptop地址,不須要

        //根據地址查找
        n = __neigh_lookup_errno(
#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
                dev->type == ARPHRD_ATM ? clip_tbl_hook :
#endif
                &arp_tbl, &nexthop, dev);
        if (IS_ERR(n))
            return PTR_ERR(n);
        dst->neighbour = n;
    }
}
static inline struct neighbour * __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, struct net_device *dev)
{
    struct neighbour *n = neigh_lookup(tbl, pkey, dev); //查找
    if (n)
        return n;
    return neigh_create(tbl, pkey, dev); //沒有找到建立一個,剛建立時的狀態是NUD_NONE
}
那麼調用流程就是,當發送ip包時查找路由,找到後查找鄰居項,都找到或建立後調用路由緩存項中的dst->output函數。
[/綁定鄰居項]
[arp通知鏈實現]
    通知回掉函數
static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
    struct net_device *dev = ptr;

    if (dev->nd_net != &init_net)
        return NOTIFY_DONE;
    switch (event) {
        case NETDEV_CHANGEADDR: //改變地址命令
            //這函數釋放全部屬於dev設備的鄰居項
            neigh_changeaddr(&arp_tbl, dev);
            rt_cache_flush(0); //更新rt_flush_timer定時器時間,這定時器函數用於清空全部路由高速緩存
            break;
        default:
            break;
    }
    return NOTIFY_DONE;
}
[/arp通知鏈實現]
相關文章
相關標籤/搜索