Linux網絡地址轉換分析

Linux網絡地址轉換分析

地址轉換用來改變源/目的端口,是netfilter的一部分,也是經過hook點上註冊相應的結構來工做.
Nat註冊的hook點和conntrack相同,只是優先級不一樣,數據包進入netfilter以後先通過conntrack,再通過nat.
而在數據包離開netfilter以前先通過nat,再通過conntrack.

在ip_conntrack結構中有爲nat定義的一個nat結構,爲何把這個結構放在ip_conntrack裏呢。
簡單的說,對於非初始化鏈接的數據包,即後續的數據包,一旦肯定它屬於某個鏈接,則能夠直接利用鏈接狀態裏的nat信息來進行地址轉換;
而對於初始數據包,必須在nat表裏查找相應的規則,肯定了地址轉換的內容後,將這些信息放到鏈接跟蹤結構的nat參量裏面,供後續的數據包使用.

struct ip_conntrack {
    ......
#ifdef CONFIG_IP_NF_NAT_NEEDED
    struct {
        struct ip_nat_info info;
        union ip_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
        int masq_index;
#endif
    } nat;
#endif /* CONFIG_IP_NF_NAT_NEEDED */
    ......
};
struct ip_nat_info
{
    struct list_head bysource;
    struct ip_nat_seq seq[IP_CT_DIR_MAX];
};

    下面咱們來看初始化函數.
static int __init ip_nat_standalone_init(void) //net/ipv4/netfilter/ip_nat_standalone.c
{
    int ret = 0;

    need_conntrack(); //空函數
#ifdef CONFIG_XFRM  //IPSEC相關,咱們忽略
    BUG_ON(ip_nat_decode_session != NULL);
    ip_nat_decode_session = nat_decode_session;
#endif
    //初始化nat規則
    ret = ip_nat_rule_init();
    if (ret < 0) {
        printk("ip_nat_init: can't setup rules.\n");
        goto cleanup_decode_session;
    }
    //註冊hook函數
    ret = nf_register_hooks(ip_nat_ops, ARRAY_SIZE(ip_nat_ops));
    if (ret < 0) {
        printk("ip_nat_init: can't register hooks.\n");
        goto cleanup_rule_init;
    }
    return ret;
    ......
}
規則初始化
static struct ipt_target ipt_snat_reg = {
    .name           = "SNAT",
    .target         = ipt_snat_target,
    .targetsize     = sizeof(struct ip_nat_multi_range_compat),
    .table          = "nat",
    .hooks          = 1 << NF_IP_POST_ROUTING,
    .checkentry     = ipt_snat_checkentry,
};

static struct ipt_target ipt_dnat_reg = {
    .name           = "DNAT",
    .target         = ipt_dnat_target,
    .targetsize     = sizeof(struct ip_nat_multi_range_compat),
    .table          = "nat",
    .hooks          = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT),
    .checkentry     = ipt_dnat_checkentry,
};
int __init ip_nat_rule_init(void)
{
    int ret;
    //註冊nat表和參照模板(第二個參數),初始化表中字段(與iptable有關)
    ret = ipt_register_table(&nat_table, &nat_initial_table.repl);  //參看Linux Netfilter實現機制和擴展技術
    if (ret != 0)
        return ret;

    //把這個結構鏈接到一個結構中的struct list_head target連表中
    ret = ipt_register_target(&ipt_snat_reg);
    if (ret != 0)
        goto unregister_table;

    ret = ipt_register_target(&ipt_dnat_reg);
    if (ret != 0)
        goto unregister_snat;

    return ret;
    ......
}
    在看另外一個文件的初始化
static int __init ip_nat_init(void) //net/ipv4/netfilter/ip_nat_core.c
{
    size_t i;
    ip_nat_htable_size = ip_conntrack_htable_size; //nat的hash表大小和conntrack的hash表相同

    bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size); //初始化了一個叫bysource的全局鏈表指針
    if (!bysource)
        return -ENOMEM;

    write_lock_bh(&ip_nat_lock);
    for (i = 0; i < MAX_IP_NAT_PROTO; i++)
        ip_nat_protos[i] = &ip_nat_unknown_protocol;

    //註冊一些內建的協議,是用來維護nat模塊中用到的協議結構ip_nat_protocol的全局鏈表.
    ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
    ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
    ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
    write_unlock_bh(&ip_nat_lock);

    for (i = 0; i < ip_nat_htable_size; i++) { //初始化鏈表
        INIT_LIST_HEAD(&bysource[i]);
    }
    //初始化一個ip_conntrack_destroyed函數,ip_nat_cleanup_conntrack(struct ip_conntrack *conn) 的做用是在bysource鏈表中刪除conn對應的節點.
    ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
    //加上這個標誌後nat將跳過這個僞造的conntrack
    ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
    return 0;
}
咱們仍是假定從此遇到的包所有是tcp協議的. 看下面協議實現部分.
下面咱們仍是一個一個來看這些hook函數.
static struct nf_hook_ops ip_nat_ops[] = {
    /* Before packet filtering, change destination */
    {
        .hook           = ip_nat_in,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_PRE_ROUTING,
        .priority       = NF_IP_PRI_NAT_DST,
    },
    /* After packet filtering, change source */
    {
        .hook           = ip_nat_out,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_POST_ROUTING,
        .priority       = NF_IP_PRI_NAT_SRC,
    },
    /* After conntrack, adjust sequence number */
    {
        .hook           = ip_nat_adjust,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_POST_ROUTING,
        .priority       = NF_IP_PRI_NAT_SEQ_ADJUST,
    },
    /* Before packet filtering, change destination */
    {
        .hook           = ip_nat_local_fn,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_LOCAL_OUT,
        .priority       = NF_IP_PRI_NAT_DST,
    },
    /* After packet filtering, change source */
    {
        .hook           = ip_nat_fn,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_LOCAL_IN,
        .priority       = NF_IP_PRI_NAT_SRC,
    },
    /* After conntrack, adjust sequence number */
    {
        .hook           = ip_nat_adjust,
        .owner          = THIS_MODULE,
        .pf             = PF_INET,
        .hooknum        = NF_IP_LOCAL_IN,
        .priority       = NF_IP_PRI_NAT_SEQ_ADJUST,
    },
};
NF_IP_PRE_ROUTING,在報文做路由之前執行;
NF_IP_FORWARD,在報文轉向另外一個NIC之前執行;
NF_IP_POST_ROUTING,在報文流出之前執行;
NF_IP_LOCAL_IN,在流入本地的報文做路由之後執行;
NF_IP_LOCAL_OUT,在本地報文作流出路由前執行;

NF_ACCEPT  :繼續正常的報文處理;
NF_DROP    :將報文丟棄;
NF_STOLEN  :由鉤子函數處理了該報文,不要再繼續傳送;
NF_QUEUE   :將報文入隊,一般交由用戶程序處理;
NF_REPEAT  :再次調用該鉤子函數。
NF_STOP     :中止檢測,再也不進行下一個Hook函數

static unsigned int ip_nat_in(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in,
        const struct net_device *out,  int (*okfn)(struct sk_buff *))
{
    unsigned int ret;
    u_int32_t daddr = (*pskb)->nh.iph->daddr;

    ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
    if (ret != NF_DROP && ret != NF_STOLEN && daddr != (*pskb)->nh.iph->daddr) { //目的地址已經改變
        dst_release((*pskb)->dst); //丟棄原來的路由信息
        (*pskb)->dst = NULL;
    }
    return ret;
}
//主要的通用函數
static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in,
        const struct net_device *out, int (*okfn)(struct sk_buff *))
{
    struct ip_conntrack *ct;
    enum ip_conntrack_info ctinfo;
    struct ip_nat_info *info;

    //#define HOOK2MANIP(hooknum) ((hooknum) != NF_IP_POST_ROUTING && (hooknum) != NF_IP_LOCAL_IN)
    //根據所在的hook點判斷轉換類型是源地址轉換仍是目的地址轉換,爲0(IP_NAT_MANIP_SRC)表示源地址轉換,爲1(IP_NAT_MANIP_DST)表示目的地址轉換
    enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
    //取得數據包的鏈接狀態
    ct = ip_conntrack_get(*pskb, &ctinfo);
    //數據包沒有被conntrack
    if (ct == &ip_conntrack_untracked)
        return NF_ACCEPT;

    //這個函數有兩種不一樣的行爲,取決於傳給它輸入包仍是輸出包.對於輸入包,它使傳輸層硬件校驗和無效.對於輸出包,它計算傳輸層校驗和
    if ((*pskb)->ip_summed == CHECKSUM_HW)
        if (skb_checksum_help(*pskb, (out == NULL)))
            return NF_DROP;
    //若是找不到對應鏈接,則應該直接放行它,而再也不對其進行轉換處理,特別地,ICMP重定向報文將會被丟棄
    if (!ct) {
        if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
            struct icmphdr _hdr, *hp;
            hp = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, sizeof(_hdr), &_hdr);
            if (hp != NULL && hp->type == ICMP_REDIRECT)
                return NF_DROP;
        }
        return NF_ACCEPT;
    }
    switch (ctinfo) { //判斷鏈接狀態,調用相應的處理函數
        case IP_CT_RELATED:
        case IP_CT_RELATED+IP_CT_IS_REPLY:
            if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
                if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, CTINFO2DIR(ctinfo)))
                    return NF_DROP;
                else
                    return NF_ACCEPT;
            }
        case IP_CT_NEW: //初始鏈接的數據包
            info = &ct->nat.info;
            //測試ct->status中的位判斷是否已經初始化conntrack中nat部分
            if (!ip_nat_initialized(ct, maniptype)) {
                unsigned int ret;
                if (unlikely(is_confirmed(ct)))
                    ret = alloc_null_binding_confirmed(ct, info, hooknum);
                else if (hooknum == NF_IP_LOCAL_IN)
                    //這是在沒有找到轉換規則的時候就作一個空轉換,例如若是咱們是但願在數據包外出的時候修改源IP,
                    //那麼在prerouting的時候就找不到規則,這時候就會發生空轉換的動做,其實這個做用一是爲了保持流程的統一,
                    //即無論有沒有規則都要調用ip_nat_setup_info(),第二個做用是這樣調用了ip_nat_setup_info之後,
                    //會作一些NAT的輔助工做,也就是說基本信息的記錄和轉換信息由不一樣的模塊來負責
                    ret = alloc_null_binding(ct, info, hooknum);
                else  //包狀態爲NEW,而且沒有作過NAT轉化的包纔會經過ip_nat_rule_find()查找並生成NAT規則信息
                    ret = ip_nat_rule_find(pskb, hooknum, in, out, ct, info);
                if (ret != NF_ACCEPT) {
                    return ret;
                }
            } else
                DEBUGP("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct);
            break;
        default:
            //看見syn+ack後ctinfo 應該是IP_CT_ESTABLISHED+IP_CT_IS_REPLY 第二次握手
            //看見ack 後 ctinfo 應該是IP_CT_ESTABLISHED 第三次握手
            IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
            info = &ct->nat.info;
    }
    IP_NF_ASSERT(info);
    //修改數據包內容
    return ip_nat_packet(ct, ctinfo, hooknum, pskb);
}
int ip_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out,
        struct ip_conntrack *ct, struct ip_nat_info *info)
{
    int ret;
    //經過hooknum在iptable表獲得檢查點對應的默認的chain表(chain是在某個檢查點上所引用規則的集合,規則由ipt_entry表示)
    //ipt_do_table查找表中的全部ipt_entry,若是match全都匹配,則調用target函數
    //此時的target函數就是在nat初始化時註冊的ipt_snat_target和ipt_dnat_target
    //例如添加iptables -t nat -A PREROUTING -p TCP -i eth0 -d 10.0.0.1 --dport 80 -j DNAT --to-destination 192.168.0.1
    //其中用到了nat 表和 DNAT,看上面的初始化函數
    ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);

    if (ret == NF_ACCEPT) {
        if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
            ret = alloc_null_binding(ct, info, hooknum);
    }
    return ret;
}
//咱們看一下這個註冊的snat_target
static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct ipt_target *target, const void *targinfo, void *userinfo)
{
    struct ip_conntrack *ct;
    enum ip_conntrack_info ctinfo;
    const struct ip_nat_multi_range_compat *mr = targinfo;
    //源地址轉換隻能在POST_ROUTING中
    IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
    //獲取鏈接信息
    ct = ip_conntrack_get(*pskb, &ctinfo);

    // 只有新鏈接才進行NAT info的創建
    // targinfo實際是struct ip_nat_multi_range_compat結構指針,記錄轉換後的地址、端口等信息
    IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
    IP_NF_ASSERT(out);

    return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, const struct ip_nat_range *range, unsigned int hooknum)
{
    struct ip_conntrack_tuple curr_tuple, new_tuple;
    struct ip_nat_info *info = &conntrack->nat.info;
    int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
    enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);

    //對當前狀態的應答方向的tuple調用invert_tuplepr取反,獲得一個curr_tupe,
    //若是以前沒有進行過地址或端口轉換,一般這裏獲得的curr_tupe就等於初始方向的tuple
    invert_tuplepr(&curr_tuple, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); //參看ip_conntrack實現

    //找一個未使用的進行了轉換後的tuple結構參數,其中參數range是轉換後的ip地址和端口範圍
    //new_tuple保持轉換後的鏈接原始方向的tuple
    get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);

    //檢查轉換先後的tuple值是否相同,new_tuple是NAT後的新的原始方向的tuple
    if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
        struct ip_conntrack_tuple reply;

        //創建鏈接地址轉換後的反向的tuple
        invert_tuplepr(&reply, &new_tuple);
        //修改鏈接中的響應方向的tuple值
        //即conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *reply
        ip_conntrack_alter_reply(conntrack, &reply);
        //設置標誌
        if (maniptype == IP_NAT_MANIP_SRC)
            conntrack->status |= IPS_SRC_NAT;
        else
            conntrack->status |= IPS_DST_NAT;
    }
    if (have_to_hash) {
        //鏈接到基於起始方向源IP的HASH鏈表中
        unsigned int srchash = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
        write_lock_bh(&ip_nat_lock); 
        list_add(&info->bysource, &bysource[srchash]);
        write_unlock_bh(&ip_nat_lock);
    }
    //在鏈接的狀態值中設置源或目的NAT完成標誌
    if (maniptype == IP_NAT_MANIP_DST)
        set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
    else
        set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);

    return NF_ACCEPT;
}
static void get_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack_tuple *orig_tuple,
        const struct ip_nat_range *range, struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype)
{
    struct ip_nat_protocol *proto;

    //若是是作SNAT,而且此源地址(包括ip地址和端口等信息)已經作過轉換,並且這樣產生的tuple仍然是惟一的話,那麼轉換成功結束
    if (maniptype == IP_NAT_MANIP_SRC) {
        if (find_appropriate_src(orig_tuple, tuple, range)) {//找到合適的源地址的NAT    
            if (!ip_nat_used_tuple(tuple, conntrack))  
                return;
        }
    }
    *tuple = *orig_tuple;
    //選擇一個最少使用的ip
    find_best_ips_proto(tuple, range, conntrack, maniptype);
    //查找協議看上面註冊部分
    proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);

    //若是端口不限或在指定的端口範圍內,而且此tuple惟一,那麼轉換成功
    if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max))
            && !ip_nat_used_tuple(tuple, conntrack)) {
        ip_nat_proto_put(proto);
        return;
    }
    //作端口轉換,看下面協議實現部分
    proto->unique_tuple(tuple, range, maniptype, conntrack);
    ip_nat_proto_put(proto);
}
static void find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range,
        const struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype)
{
    u_int32_t *var_ipp;
    u_int32_t minip, maxip, j;
    //就沒做 ip NAT
    if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
        return;

    if (maniptype == IP_NAT_MANIP_SRC) //指向要修改的ip
        var_ipp = &tuple->src.ip;
    else
        var_ipp = &tuple->dst.ip;

    if (range->min_ip == range->max_ip) { //只有一個選擇
        *var_ipp = range->min_ip;
        return;
    }
    //選擇一個ip
    minip = ntohl(range->min_ip);
    maxip = ntohl(range->max_ip);
    j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
    *var_ipp = htonl(minip + j % (maxip - minip + 1));
}
static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out,
        unsigned int hooknum, const struct ipt_target *target, const void *targinfo, void *userinfo)
{
    struct ip_conntrack *ct;
    enum ip_conntrack_info ctinfo;
    const struct ip_nat_multi_range_compat *mr = targinfo;

    ct = ip_conntrack_get(*pskb, &ctinfo);
    //鏈接必須是新的和有效的
    IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));

    if (hooknum == NF_IP_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
        warn_if_extra_mangle((*pskb)->nh.iph->daddr, mr->range[0].min_ip);

    //仍是調用這函數
    return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
    下面咱們仍是繼續ip_nat_fn函數,在最後一步調用
unsigned int ip_nat_packet(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff **pskb)
{
    enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); //方向
    unsigned long statusbit;
    enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);

    if (mtype == IP_NAT_MANIP_SRC) //是源仍是目的nat
        statusbit = IPS_SRC_NAT;
    else
        statusbit = IPS_DST_NAT;

    //翻轉映射位若是是應答方向, 源改成目的,目的改成源
    if (dir == IP_CT_DIR_REPLY)
        statusbit ^= IPS_NAT_MASK; //異或 相同爲0 不一樣爲1
    //ct->status中NAT類型是在創建NAT信息的ip_nat_setup_info()函數中設置的
    if (ct->status & statusbit) {
        struct ip_conntrack_tuple target;
        //根據當前數據的反方向tuple,獲取轉換後的地址端口的tuple信息到target中
        //若是dir是原始方向那麼獲得的target是修改後的原始方向
        invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);

        //根據target中信息修改當前包中的信息
        if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
            return NF_DROP;
    }
}
static int manip_pkt(u_int16_t proto, struct sk_buff **pskb, unsigned int iphdroff, const struct ip_conntrack_tuple *target,
        enum ip_nat_manip_type maniptype)
{
    struct iphdr *iph;
    struct ip_nat_protocol *p;

    //因爲2.6.1*內核netfilter架構重組IP包後不進行線性化操做,因此不能直接用skb中的協議頭獲取各協議字段頭信息,
    //必須用skb_header_pointer()函數來獲取.一樣,在進行NAT操做時,對數據的修改也不能直接修改,必須採用新函數預先進行處理,使skb包可寫
    //這個函數就是實現此功能
    if(!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
        return 0;
    //獲取ip頭
    iph = (void *)(*pskb)->data + iphdroff;
    //查找相關協議,看初始化時怎樣註冊的協議
    p = ip_nat_proto_find_get(proto);
    //調用協議函數處理數據包,看下面協議實現部分
    if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
        ip_nat_proto_put(p);
        return 0;
    }
    ip_nat_proto_put(p);

    //根據NAT類型,基於新地址重新計算校驗和,而後修改源或目的IP地址
    if (maniptype == IP_NAT_MANIP_SRC) {
        iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, iph->check);     
        iph->saddr = target->src.ip;
    } else {          
        iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, iph->check);        
        iph->daddr = target->dst.ip;
    }
    return 1;
}
    咱們繼續看NF_IP_POST_ROUTING的hook
static unsigned int ip_nat_out(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in,
        const struct net_device *out, int (*okfn)(struct sk_buff *))
{
    ...... //忽略IPSEC
    unsigned int ret;
    //檢測原始數據包
    if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
        return NF_ACCEPT;

    ret = ip_nat_fn(hooknum, pskb, in, out, okfn); //已經看到過
    ......
    return ret;
}
    咱們繼續看NF_IP_POST_ROUTING的
static unsigned int ip_nat_adjust(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in,
        const struct net_device *out, int (*okfn)(struct sk_buff *))
{
    struct ip_conntrack *ct;
    enum ip_conntrack_info ctinfo;
    //獲取conntrack
    ct = ip_conntrack_get(*pskb, &ctinfo);
    if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
        //調整tcp序號,從新計算效驗和等(忽略)
        if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
            return NF_DROP;
    }
    return NF_ACCEPT;
}
    NF_IP_LOCAL_OUT的hook
static unsigned int ip_nat_local_fn(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in,
        const struct net_device *out, int (*okfn)(struct sk_buff *))
{
    struct ip_conntrack *ct;
    enum ip_conntrack_info ctinfo;
    unsigned int ret;
    //處理原始數據包
    if ((*pskb)->len < sizeof(struct iphdr) || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))

        return NF_ACCEPT;
    ret = ip_nat_fn(hooknum, pskb, in, out, okfn); //調用這最重要的函數

    if (ret != NF_DROP && ret != NF_STOLEN && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
        enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
        if (ct->tuplehash[dir].tuple.dst.ip != ct->tuplehash[!dir].tuple.src.ip) //目的地址進行了NAT
            if (__ip_route_me_harder(pskb, RTN_UNSPEC)) //從新綁定輸出包路由
                ret = NF_DROP;
    }
}
如今咱們知道其最主要的核心函數就是ip_nat_fn,主要要把這個函數看懂.
=============================================================
[協議實現部分]
struct ip_nat_protocol ip_nat_protocol_tcp = {
    .name                   = "TCP",            //協議名稱,字符串常量
    .protonum               = IPPROTO_TCP,    //協議號
    .me                     = THIS_MODULE,
    .manip_pkt              = tcp_manip_pkt,    //修改協議相關數據,根據NAT規則來肯定是修改源部分仍是目的部分
    .in_range               = tcp_in_range,      //判斷數據包是不是要進行NAT修改
    .unique_tuple           = tcp_unique_tuple,  //構造一個新tuple,處理將原tuple在進行NAT後對應的鏈接參數,
    //如TCP源NAT時,除了源地址必需要修改外,通常還要修改源端口,
    //這個鏈接的後續包的源端口就都改這個端口值,而修改後的這個端口值必須是惟一的,和
    //這個鏈接綁定,其餘鏈接就不能再使用這個端口,若是找不到合適的tuple值,NAT將失敗,
    //也就是說,對於多對一的NAT轉換,理論上最多隻能處理65535個TCP鏈接,
    //超過此數的新的TCP鏈接就沒法進行NAT了,對於 TCP和UDP,
    //就是檢測查找一個新的未用端口生成一個新的tuple結構對應該鏈接,
    //對應ICMP,則是找一個未用的 ID值
#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
    .range_to_nfattr        = ip_nat_port_range_to_nfattr,
    .nfattr_to_range        = ip_nat_port_nfattr_to_range,
#endif
};

static int tcp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff,
        const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype)
{
    struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
    struct tcphdr *hdr;
    unsigned int hdroff = iphdroff + iph->ihl*4; //tcp頭位置
    u32 oldip, newip;
    u16 *portptr, newport, oldport;
    int hdrsize = 8;

    //skb包含了完整的tcp頭
    if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
        hdrsize = sizeof(struct tcphdr);

    if (!skb_make_writable(pskb, hdroff + hdrsize)) //已經看到過
        return 0;

    iph = (struct iphdr *)((*pskb)->data + iphdroff);
    hdr = (struct tcphdr *)((*pskb)->data + hdroff);

    if (maniptype == IP_NAT_MANIP_SRC) {
        oldip = iph->saddr;
        newip = tuple->src.ip;
        newport = tuple->src.u.tcp.port;
        portptr = &hdr->source;
    } else {
        oldip = iph->daddr;
        newip = tuple->dst.ip;
        newport = tuple->dst.u.tcp.port;
        portptr = &hdr->dest;
    }
    //修改端口
    oldport = *portptr;
    *portptr = newport;

    if (hdrsize < sizeof(*hdr))
        return 1;
    //更新校驗和
    hdr->check = ip_nat_cheat_check(~oldip, newip, ip_nat_cheat_check(oldport ^ 0xFFFF, newport, hdr->check));
}
static int tcp_in_range(const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype,
        const union ip_conntrack_manip_proto *min, const union ip_conntrack_manip_proto *max)
{
    u_int16_t port;

    if (maniptype == IP_NAT_MANIP_SRC)
        port = tuple->src.u.tcp.port;
    else
        port = tuple->dst.u.tcp.port;
    //在最大和最小之間
    return ntohs(port) >= ntohs(min->tcp.port) && ntohs(port) <= ntohs(max->tcp.port);
}
static int tcp_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range,
        enum ip_nat_manip_type maniptype, const struct ip_conntrack *conntrack)
{
    static u_int16_t port;
    u_int16_t *portptr;
    unsigned int range_size, min, i;
    //指向相應的端口
    if (maniptype == IP_NAT_MANIP_SRC)
        portptr = &tuple->src.u.tcp.port;
    else
        portptr = &tuple->dst.u.tcp.port;

    //沒有指定範圍
    if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
        if (maniptype == IP_NAT_MANIP_DST) //是目的NAT,不改變端口
            return 0;

        if (ntohs(*portptr) < 1024) { //端口小於1024
            if (ntohs(*portptr) < 512) { //小於512
                min = 1;
                range_size = 511 - min + 1;
            } else { //大於512
                min = 600;
                range_size = 1023 - min + 1;
            }
        } else { //大於1024
            min = 1024;
            range_size = 65535 - 1024 + 1;
        }

    } else { //指定了範圍
        min = ntohs(range->min.tcp.port);
        range_size = ntohs(range->max.tcp.port) - min + 1;
    }
    for (i = 0; i < range_size; i++, port++) { //循環直到找到一個未使用的tuple
        *portptr = htons(min + port % range_size); //取一個隨機端口,在範圍內的
        if (!ip_nat_used_tuple(tuple, conntrack)) { 在ip_conntrack_hash全局表中查找相同的tuple
            return 1;
        }
    }
    return 0;
}
[/協議實現部分]
相關文章
相關標籤/搜索