Network Address Translation(轉載)

Network Address Translationhtml

 來源:http://alexanderlaw.blog.hexun.com/9791596_d.html算法

      地址轉換用來改變源/目的地址/端口,是netfilter的一部分,也是經過hook點上註冊相應的結構來工做數組

 

      Nat註冊的hook點和conntrack相同,只是優先級不一樣,數據包進入netfilter以後先通過conntrack,再通過nat。而在數據包離開netfilter以前先通過nat,再通過conntrack數據結構

 

 

1  nat模塊的初始化app

1.1       數據結構    ip_nat_standalone.ctcp

在ip_conntrack結構中有爲nat定義的一個nat結構,爲何把這個結構放在ip_conntrack裏呢。簡單的說,對於非初始化鏈接的數據包,即後續的數據包,一旦肯定它屬於某個鏈接,則能夠直接利用鏈接狀態裏的nat信息來進行地址轉換;而對於初始數據包,必須在nat表裏查找相應的規則,肯定了地址轉換的內容後,將這些信息放到鏈接跟蹤結構的nat參量裏面,供後續的數據包使用。函數

#ifdef CONFIG_IP_NF_NAT_NEEDEDthis

      struct {spa

           struct ip_nat_info info;指針

           union ip_conntrack_nat_help help;

#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \

      defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)

           int masq_index;

#endif

#if defined(CONFIG_IP_NF_RTSP) || defined(CONFIG_IP_NF_RTSP_MODULE)

                struct ip_nat_rtsp_info rtsp_info;

#endif

      } nat;

#endif /* CONFIG_IP_NF_NAT_NEEDED */

 

#if defined(CONFIG_IP_NF_CONNTRACK_MARK)

      unsigned long mark;

#endif

 

 

它包括兩個參數,struct ip_nat_info和union ip_conntrack_nat_help,後一個暫時沒什麼用,只看前一個

struct ip_nat_info

{

      /* 用來檢測該鏈接是否已經進行過某類nat初始化了,在新的內核中該參數被去掉了,固然,有其它方法來實現它的做用。 */

      int initialized;

     

      unsigned int num_manips;

 

      /* 這個就是用來存儲關於如何進行地址轉換的相關信息的數據結構,其中IP_NAT_MAX_MANIPS表明某個鏈接的數據包在通過netfilter一次的過程當中最多能進行的地址轉換的次數,這裏是(2*3)=6 。意思大概是說對於某個鏈接,若是nat表的每條鏈上都有一條規則:

NF_IP_PRE_ROUTING==>NF_IP_POST_ROUTING
若是在NF_IP_PRE_ROUTING上作目的轉換,要在NF_IP_POST_ROUTING上作反方向上的源轉換
NF_IP_POST_ROUTING==>NF_IP_PRE_ROUTING
若是在NF_IP_POST_ROUTING上作源轉換,要在NF_IP_PRE_ROUTING上作反方向上的目的轉換
NF_IP_LOCAL_OUT==>NF_IP_LOCAL_IN
若是在NF_IP_LOCAL_OUT作源轉換,要在NF_IP_LOCAL_IN上作反方向上的目的轉換  

算下來就是最多進行6次地址轉換 */

      struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];

 

      /* 兩個全局hash表,用來將全部須要進行地址轉換的鏈接組織起來 */

      struct ip_nat_hash bysource, byipsproto;

 

      /* 作特殊用途,一般是NULL */

      struct ip_nat_helper *helper;

 

      struct ip_nat_seq seq[IP_CT_DIR_MAX];

};

 

ip_nat_info_manip結構定義以下:

struct ip_nat_info_manip

{

      /* 方向,初始或應答 */

      u_int8_t direction;

 

      /* 轉換髮生的hook點 */

      u_int8_t hooknum;

 

      /* 轉換的類型,源仍是目的 */

      u_int8_t maniptype;

 

      /* Manipulations to occur at each conntrack in this dirn. */

      struct ip_conntrack_manip manip;

};

 

struct ip_conntrack_manip

{

      u_int32_t ip;

      union ip_conntrack_manip_proto u;

};

 

 

ip_nat_hash結構   ip_nat.h

struct ip_nat_hash

{

      struct list_head list;

      struct ip_conntrack *conntrack;

};

 

 

1.2       init()函數    ip_nat_standalone.c

static int __init init(void)

{

      return init_or_cleanup(1);

}

init()函數直接調用init_or_cleanup()

 

static int init_or_cleanup(int init)

{

      int ret = 0;

/* nat依賴於conntrack,這個函數是空的 */

      need_ip_conntrack();

 

      if (!init) goto cleanup;

/* 初始化nat規則 */

      ret = ip_nat_rule_init();

      if (ret < 0) {

           printk("ip_nat_init: can't setup rules.\n");

           goto cleanup_nothing;

      }

/* 初始化nat */

      ret = ip_nat_init();

      if (ret < 0) {

           printk("ip_nat_init: can't setup rules.\n");

           goto cleanup_rule_init;

      }

/* 註冊hook,共在四個hook點上註冊了函數,分別是:

NF_IP_PRE_ROUTING   ip_nat_fn

NF_IP_POST_ROUTING  ip_nat_out

NF_IP_LOCAL_OUT   ip_nat_local_fn

NF_IP_LOCAL_IN     ip_nat_fn

NF_IP_LOCAL_OUT和NF_IP_LOCAL_IN須要定義CONFIG_IP_NF_NAT_LOCAL

其中在ip_nat_out和ip_nat_local_fn中都會調用ip_nat_fn

*/

      ret = nf_register_hook(&ip_nat_in_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register in hook.\n");

           goto cleanup_nat;

      }

      ret = nf_register_hook(&ip_nat_out_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register out hook.\n");

           goto cleanup_inops;

      }

#ifdef CONFIG_IP_NF_NAT_LOCAL

      ret = nf_register_hook(&ip_nat_local_out_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register local out hook.\n");

           goto cleanup_outops;

      }

      ret = nf_register_hook(&ip_nat_local_in_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register local in hook.\n");

           goto cleanup_localoutops;

      }

#endif

      return ret;

 

 cleanup:

#ifdef CONFIG_IP_NF_NAT_LOCAL

      nf_unregister_hook(&ip_nat_local_in_ops);

 cleanup_localoutops:

      nf_unregister_hook(&ip_nat_local_out_ops);

 cleanup_outops:

#endif

      nf_unregister_hook(&ip_nat_out_ops);

 cleanup_inops:

      nf_unregister_hook(&ip_nat_in_ops);

 cleanup_nat:

      ip_nat_cleanup();

 cleanup_rule_init:

      ip_nat_rule_cleanup();

 cleanup_nothing:

      MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);

      return ret;

}

 

 

1.3  ip_nat_rule_init()函數  ip_nat_rule.c

int __init ip_nat_rule_init(void)

{

      int ret;

/* 註冊nat表 */

      ret = ipt_register_table(&nat_table);

      if (ret != 0)

           return ret;

/* 註冊了兩個target,一個是snat一個是dnat  */

      ret = ipt_register_target(&ipt_snat_reg);

      if (ret != 0)

           goto unregister_table;

      ret = ipt_register_target(&ipt_dnat_reg);

      if (ret != 0)

           goto unregister_snat;

 

      return ret;

 unregister_snat:

      ipt_unregister_target(&ipt_snat_reg);

 unregister_table:

      ipt_unregister_table(&nat_table);

 

      return ret;

}

 

看一下nat表的初始化:

static struct ipt_table nat_table = {

      .name        = "nat",

      .table         = &nat_initial_table.repl,

      .valid_hooks     = NAT_VALID_HOOKS,

      .lock          = RW_LOCK_UNLOCKED,

      .me      = THIS_MODULE,

};

和filter表的初始化相似,一開始規則都是空的

 

兩個target的初始化:

static struct ipt_target ipt_snat_reg = {

      .name        = "SNAT",

      .target       = ipt_snat_target,

      .checkentry = ipt_snat_checkentry,

};

 

static struct ipt_target ipt_dnat_reg = {

      .name        = "DNAT",

      .target       = ipt_dnat_target,

      .checkentry = ipt_dnat_checkentry,

};

兩個target函數分別是ipt_snat_target和ipt_dnat_target

 

 

1.4  ip_nat_init()函數  ipt_nat_core.c

int __init ip_nat_init(void)

{

      size_t i;

 

      /* nat的hash表大小和conntrack的hash表相同 */

      ip_nat_htable_size = ip_conntrack_htable_size;

 

      /* 初始化了一個叫bysource的全局鏈表指針 */

      bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);

      if (!bysource) {

           return -ENOMEM;

      }

      /* 全局鏈表指針byipsproto,在bysource以後。bysource和byipsproto實際上也是兩個hash表,每一個節點是一個ip_nat_hash結構,包含一個list_head和一個ip_conntrack。有點特別的就是nat用兩個hash表來組織地址轉換的數據結構,其本質是同樣的,只是所使用的hash算法不一樣,bysource通常用於SNAT的處理,計算bysource的hash值的函數是hash_by_src();byipsproto用於DNAT的處理,計算byipsproto的hash值的函數是hash_by_ipsproto()。*/

      byipsproto = bysource + ip_nat_htable_size;

 

      /* 註冊一些內建的協議,&protos是用來維護nat模塊中用到的協議結構ip_nat_protocol的全局鏈表 */

      WRITE_LOCK(&ip_nat_lock);

      list_append(&protos, &ip_nat_protocol_tcp);

      list_append(&protos, &ip_nat_protocol_udp);

      list_append(&protos, &ip_nat_protocol_icmp);

      WRITE_UNLOCK(&ip_nat_lock);

 

      for (i = 0; i < ip_nat_htable_size; i++) {

/* 初始化bysource和byipsproto中的全部鏈表,兩個數組的大小都是ip_nat_htables_size,數組的每一個節點是一個鏈表頭 */

           INIT_LIST_HEAD(&bysource[i]);

           INIT_LIST_HEAD(&byipsproto[i]);

      }

 

      IP_NF_ASSERT(ip_conntrack_destroyed == NULL);

/* 初始化一個ip_conntrack_destroyed函數,ip_nat_cleanup_conntrack(struct ip_conntrack *conn) 的做用是在bysource和byipproto鏈表中刪除conn對應的節點 */

      ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;

     

      /* Initialize fake conntrack so that NAT will skip it */

      ip_conntrack_untracked.nat.info.initialized |=

           (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);

 

      return 0;

}

 

 

地址轉換的過程

2.1  ip_nat_fn函數  ip_nat_standalone.c

ip_nat_fn()是nat中的主要函數,nat在netfilter中註冊了四個hook,最終都會調用該函數

static unsigned int

ip_nat_fn(unsigned int hooknum,

        struct sk_buff **pskb,

        const struct net_device *in,

        const struct net_device *out,

        int (*okfn)(struct sk_buff *))

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

      struct ip_nat_info *info;

        /* 根據所在的hook點判斷轉換類型是源地址轉換仍是目的地址轉換,爲0(IP_NAT_MANIP_SRC)表示源地址轉換,爲1(IP_NAT_MANIP_DST)表示目的地址轉換 */

      enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);

 

      /* 前面函數中已經處理過度片的狀況,這裏應該不會再出現分片包了. */

      IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off

                  & htons(IP_MF|IP_OFFSET)));

 

      /*由於地址轉換會修改數據包,因此這裏先初始化將其設置爲「未修改」標誌,後面進行數據包修改時再來重置這個標誌*/

      (*pskb)->nfcache |= NFC_UNKNOWN;

 

      /* 校驗和 */

      if ((*pskb)->ip_summed == CHECKSUM_HW)

           if (skb_checksum_help(pskb, (out == NULL)))

                 return NF_DROP;

/*取得數據包的鏈接狀態*/

      ct = ip_conntrack_get(*pskb, &ctinfo);

      /* 若是找不到對應鏈接,則應該直接放行它,而再也不對其進行轉換處理,特別地,ICMP重定向報文將會被丟棄*/

      if (!ct) {

           /* Exception: ICMP redirect to new connection (not in

                   hash table yet).  We must not let this through, in

                   case we're doing NAT to the same network. */

           if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {

                 struct icmphdr hdr;

 

                 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,

                              &hdr, sizeof(hdr)) == 0

                     && hdr.type == ICMP_REDIRECT)

                      return NF_DROP;

           }

           return NF_ACCEPT;

      }

/* 判斷鏈接狀態,調用相應的處理函數*/

      switch (ctinfo) {

      case IP_CT_RELATED:

      case IP_CT_RELATED+IP_CT_IS_REPLY:

           if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {

                 if (!icmp_reply_translation(pskb, ct, hooknum,

                                      CTINFO2DIR(ctinfo)))

                      return NF_DROP;

                 else

                       return NF_ACCEPT;

           }

           /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */

/* 若是是一個初始鏈接的數據包 */

      case IP_CT_NEW:

           info = &ct->nat.info;

 

           WRITE_LOCK(&ip_nat_lock);

/* 觀察這個鏈接中的nat部分是否已經被初始化過了,若是有則跳過下面的部分,直接進行地址轉換,若是沒有,進一步判斷 */    

      if (!(info->initialized & (1 << maniptype))

#ifndef CONFIG_IP_NF_NAT_LOCAL

               && !(ct->status & IPS_CONFIRMED)

#endif

               ) {

                 unsigned int ret;

      /* 若是該鏈接是由expect建立的,而且有expect函數,則在這裏調用 */

                 if (ct->master

                     && master_ct(ct)->nat.info.helper

                     && master_ct(ct)->nat.info.helper->expect) {

                      ret = call_expect(master_ct(ct), pskb,

                                    hooknum, ct, info);

                 } else {

#ifdef CONFIG_IP_NF_NAT_LOCAL

                      /* LOCAL_IN hook doesn't have a chain!  */

                      if (hooknum == NF_IP_LOCAL_IN)

                            ret = alloc_null_binding(ct, info,

                                              hooknum);

                      else

#endif

      /* 既沒有被nat修改過,也不是由expect建立,這是一個初始的數據包,開始在nat表中查找規則 */

                 ret = ip_nat_rule_find(pskb, hooknum, in, out, ct, info);

                 }

 

                 if (ret != NF_ACCEPT) {

                      WRITE_UNLOCK(&ip_nat_lock);

                      return ret;

                 }

           } else

/* 若是該鏈接的nat部分已經被初始化了,打印調試信息 */

                 DEBUGP("Already setup manip %s for ct %p\n",

                        maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",

                        ct);

           WRITE_UNLOCK(&ip_nat_lock);

           break;

 

      default:

           /* ESTABLISHED */

           IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED

                      || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));

           info = &ct->nat.info;

      }

 

      IP_NF_ASSERT(info);

      /* 前面已經修改了鏈接跟蹤表,這裏正式修改了數據包裏的地址 */

      return do_bindings(ct, ctinfo, info, hooknum, pskb);

}

 

 

2.2           ip_nat_rule_find函數  ip_nat_rule.c

int ip_nat_rule_find(struct sk_buff **pskb,

                unsigned int hooknum,

                const struct net_device *in,

                const struct net_device *out,

                struct ip_conntrack *ct,

                struct ip_nat_info *info)

{

      int ret;

/* 調用ipt_do_tables函數,第五個參數是&nat_table  */

      ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);

 

      if (ret == NF_ACCEPT) {

           if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))

                 /* NUL mapping */

                 ret = alloc_null_binding(ct, info, hooknum);

      }

      return ret;

}

nat表和filter表同樣,都是經過調用ipt_do_table函數來工做的

ipt_do_table查找表中的全部entry,若是match全都匹配,則調用target函數

此時的target函數就是在nat初始化時註冊的ipt_snat_target和ipt_dnat_target

 

 

2.3           ipt_s(d)nat_target函數  ip_nat_rule.c

static unsigned int ipt_snat_target(struct sk_buff **pskb,

                          const struct net_device *in,

                          const struct net_device *out,

                          unsigned int hooknum,

                          const void *targinfo,

                          void *userinfo)

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

 

      IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);

 

/* 取得數據包的鏈接狀態 */

      ct = ip_conntrack_get(*pskb, &ctinfo);

 

      /* Connection must be valid and new. */

      IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));

      IP_NF_ASSERT(out);

 

      return ip_nat_setup_info(ct, targinfo, hooknum);

}

 

ipt_dnat_target和ipt_snat_target差很少,都是調用ip_nat_setup_info完成地址轉換,這裏的targinfo參數來自ipt_entry_target結構的unsigned char data[0]參數,一個長度爲0的數組,指向target的末尾

static unsigned int ipt_dnat_target(struct sk_buff **pskb,

                          const struct net_device *in,

                          const struct net_device *out,

                          unsigned int hooknum,

                          const void *targinfo,

                          void *userinfo)

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

 

#ifdef CONFIG_IP_NF_NAT_LOCAL

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING

                || hooknum == NF_IP_LOCAL_OUT);

#else

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING);

#endif

 

      ct = ip_conntrack_get(*pskb, &ctinfo);

 

      /* Connection must be valid and new. */

      IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));

 

      return ip_nat_setup_info(ct, targinfo, hooknum);

}

 

2.4           ip_nat_setup_info()函數  ip_nat_rule.c

 

unsigned int

ip_nat_setup_info(struct ip_conntrack *conntrack,   /* 數據包的鏈接狀態 */

             const struct ip_nat_multi_range *mr,       /* 轉換後的地址池 */

             unsigned int hooknum)                    /* hook點 */

{

      struct ip_conntrack_tuple new_tuple, inv_tuple, reply;

      struct ip_conntrack_tuple orig_tp;

      struct ip_nat_info *info = &conntrack->nat.info;

      int in_hashes = info->initialized;

 

      MUST_BE_WRITE_LOCKED(&ip_nat_lock);

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING

                || hooknum == NF_IP_POST_ROUTING

                || hooknum == NF_IP_LOCAL_IN

                || hooknum == NF_IP_LOCAL_OUT);

      IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

      IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));

 

      /* 對當前狀態的應答方向的tuple調用invert_tuplepr取反,獲得一個orig_tp,若是以前沒有進行過地址或端口轉換,一般這裏獲得的orig_tp就等於初始方向的tuple */

      invert_tuplepr(&orig_tp, conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);

 

      do {

      /* 進行地址轉換,new_tuple爲轉換後的地址的tuple */

        if (!get_unique_tuple(&new_tuple,&orig_tp,mr,conntrack,hooknum))

        {

                 DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",

                        conntrack);

                 return NF_DROP;

           }

 

      /* 對new_tuple取反,獲得通過轉換後的應答方向的tuple  */

           invert_tuplepr(&reply, &new_tuple);

 

      /* 修改conntrack中的應答方向的reply tuple,在這以前還要檢查若是該reply tuple已經在hash表裏存在了,即被其它鏈接佔用(存在初始方向tuple不一樣,應答方向tuple相同的鏈接),則還要回頭繼續修改 */

      } while (!ip_conntrack_alter_reply(conntrack, &reply));

 

      /* 對orig_tp取反,實際上又獲得了原conntrack的reply_tuple…… */

      invert_tuplepr(&inv_tuple, &orig_tp);

 

      /* 將所做轉換的相關信息保存到鏈接狀態conntrack裏,這樣該鏈接的後續數據包就能夠直接利用這些信息進行地址轉換,不用從新查找nat表了 */

      /* 若是是源地址改變(SNAT) */

      if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {

           /* In this direction, a source manip. */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_ORIGINAL, hooknum,

                    IP_NAT_MANIP_SRC, new_tuple.src });

 

           IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

 

           /* 在相對的hook點上必然有對應的目的地址改變(DNAT) */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                      /* opposite_hook便是求當前hook點的對應hook點 */

                  { IP_CT_DIR_REPLY, opposite_hook[hooknum],

                    IP_NAT_MANIP_DST, orig_tp.src });

           IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);

      }

 

      /* 若是是目的地址改變(DNAT) */

      if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {

           /* In this direction, a destination manip */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_ORIGINAL, hooknum,

                    IP_NAT_MANIP_DST, reply.src });

 

           IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

 

           /* In the reverse direction, a source manip. */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_REPLY, opposite_hook[hooknum],

                    IP_NAT_MANIP_SRC, inv_tuple.src });

           IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);

      }

 

      /* 若是這個鏈接不是某個鏈接的預期的鏈接(子鏈接),則在全局鏈表helpers查找對應的ip_nat_helper結構 */

      if (!conntrack->master)

           info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, &reply);

 

      /* 轉換完了,標記一下 */

      info->initialized |= (1 << HOOK2MANIP(hooknum));

 

      /* 將所作的地址轉換的數據結構加入到全局hash表bysource和byipsproto中,若是該地址轉換是某地址轉換基礎上的再次轉換,則用replace_in_hashes替換,反之則用place_in_hashes */

      if (in_hashes) {

           IP_NF_ASSERT(info->bysource.conntrack);

           replace_in_hashes(conntrack, info);

      } else {

           place_in_hashes(conntrack, info);

      }

 

      return NF_ACCEPT;

}

 

 

2.5           get_unique_tuple ()函數  ip_nat_core.c

get_unique_tuple,得到一個惟一的tuple,就是說除了要作地址/段口的轉換,還要保證轉換獲得的tuple是惟一的。

很複雜的一個函數。。。

第三個參數是用來替換的地址或端口的範圍

static int

get_unique_tuple(struct ip_conntrack_tuple *tuple,

            const struct ip_conntrack_tuple *orig_tuple,

            const struct ip_nat_multi_range *mrr,

            struct ip_conntrack *conntrack,

            unsigned int hooknum)

{

      struct ip_nat_protocol *proto

           = find_nat_proto(orig_tuple->dst.protonum);

      struct ip_nat_range *rptr;

      unsigned int i;

      int ret;

 

      struct ip_nat_multi_range *mr = (void *)mrr;

 

      /* 下面這一段比較暈,和p2p,udp打洞等技術有關。 */

      if (hooknum == NF_IP_POST_ROUTING) {

      /* ip_conntrack_manip結構包含一個ip地址和一個協議端口 */

           struct ip_conntrack_manip *manip;

      /* find_appropriate_src函數先調用hash_by_src函數計算orig_tuple的hash值,而後去bysource表裏查找,若是能找到源地址和端口都匹配的鏈接,而且若是該鏈接的地址/端口自己就知足目標地址/端口範圍的話,就直接返回查到的這個鏈接的源ip */

           manip = find_appropriate_src(orig_tuple, mr);

           if (manip) {

                 /* Apply same source manipulation. */

                 *tuple = ((struct ip_conntrack_tuple)

                        { *manip, orig_tuple->dst });

                 DEBUGP("get_unique_tuple: Found current src map\n");

                 /* 還要保證鏈接跟蹤表裏沒有這個鏈接 */

                 if (!ip_nat_used_tuple(tuple, conntrack))

                      return 1;

           }

      }

 

      /* orig_tuple是轉換以前的,tuple是轉換以後的 */

      *tuple = *orig_tuple;

      /* 循環 ,嘗試mr參數所指定的地址/端口範圍,直到能知足其tuple是惟一的 */

      while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))

             != NULL) {

           DEBUGP("Found best for "); DUMP_TUPLE(tuple);

      /*  IP_NAT_MANIP_SRC, 進行SNAT

IP_NAT_MANIP_DST 進行DNAT

IP_NAT_RANGE_MAP_IPS 在range裏指定了IP地址

IP_NAT_RANGE_PROTO_SPECIFIED 在range裏指定了port

若是沒有指定協議端口範圍,或者知足了所指定的範圍 */

           if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED)

                || proto->in_range(tuple, HOOK2MANIP(hooknum),

                            &rptr->min, &rptr->max))

               && !ip_nat_used_tuple(tuple, conntrack)) {

                 ret = 1;

                 goto clear_fulls;

           } else {

                 if (proto->unique_tuple(tuple, rptr,

                                  HOOK2MANIP(hooknum),

                                  conntrack)) {

                      /* Must be unique. */

                      IP_NF_ASSERT(!ip_nat_used_tuple(tuple,

                                             conntrack));

                      ret = 1;

                      goto clear_fulls;

                 } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {

                      /* Try implicit source NAT; protocol

                                   may be able to play with ports to

                                   make it unique. */

                      struct ip_nat_range r

                            = { IP_NAT_RANGE_MAP_IPS,

                                tuple->src.ip, tuple->src.ip,

                                { 0 }, { 0 } };

                      DEBUGP("Trying implicit mapping\n");

                      if (proto->unique_tuple(tuple, &r,

                                       IP_NAT_MANIP_SRC,

                                       conntrack)) {

                            /* Must be unique. */

                            IP_NF_ASSERT(!ip_nat_used_tuple

                                       (tuple, conntrack));

                            ret = 1;

                            goto clear_fulls;

                      }

                 }

                 DEBUGP("Protocol can't get unique tuple %u.\n",

                        hooknum);

           }

 

           /* Eliminate that from range, and try again. */

           rptr->flags |= IP_NAT_RANGE_FULL;

           *tuple = *orig_tuple;

      }

 

      ret = 0;

 

 clear_fulls:

      /* Clear full flags. */

      IP_NF_ASSERT(mr->rangesize >= 1);

      for (i = 0; i < mr->rangesize; i++)

           mr->range[i].flags &= ~IP_NAT_RANGE_FULL;

 

      return ret;

}

 

Network Address Translation

 

      地址轉換用來改變源/目的地址/端口,是netfilter的一部分,也是經過hook點上註冊相應的結構來工做

 

 

      Nat註冊的hook點和conntrack相同,只是優先級不一樣,數據包進入netfilter以後先通過conntrack,再通過nat。而在數據包離開netfilter以前先通過nat,再通過conntrack

 

 

1  nat模塊的初始化

1.1       數據結構    ip_nat_standalone.c

在ip_conntrack結構中有爲nat定義的一個nat結構,爲何把這個結構放在ip_conntrack裏呢。簡單的說,對於非初始化鏈接的數據包,即後續的數據包,一旦肯定它屬於某個鏈接,則能夠直接利用鏈接狀態裏的nat信息來進行地址轉換;而對於初始數據包,必須在nat表裏查找相應的規則,肯定了地址轉換的內容後,將這些信息放到鏈接跟蹤結構的nat參量裏面,供後續的數據包使用。

#ifdef CONFIG_IP_NF_NAT_NEEDED

      struct {

           struct ip_nat_info info;

           union ip_conntrack_nat_help help;

#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \

      defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)

           int masq_index;

#endif

#if defined(CONFIG_IP_NF_RTSP) || defined(CONFIG_IP_NF_RTSP_MODULE)

                struct ip_nat_rtsp_info rtsp_info;

#endif

      } nat;

#endif /* CONFIG_IP_NF_NAT_NEEDED */

 

#if defined(CONFIG_IP_NF_CONNTRACK_MARK)

      unsigned long mark;

#endif

 

 

它包括兩個參數,struct ip_nat_info和union ip_conntrack_nat_help,後一個暫時沒什麼用,只看前一個

struct ip_nat_info

{

      /* 用來檢測該鏈接是否已經進行過某類nat初始化了,在新的內核中該參數被去掉了,固然,有其它方法來實現它的做用。 */

      int initialized;

     

      unsigned int num_manips;

 

      /* 這個就是用來存儲關於如何進行地址轉換的相關信息的數據結構,其中IP_NAT_MAX_MANIPS表明某個鏈接的數據包在通過netfilter一次的過程當中最多能進行的地址轉換的次數,這裏是(2*3)=6 。意思大概是說對於某個鏈接,若是nat表的每條鏈上都有一條規則:

NF_IP_PRE_ROUTING==>NF_IP_POST_ROUTING
若是在NF_IP_PRE_ROUTING上作目的轉換,要在NF_IP_POST_ROUTING上作反方向上的源轉換
NF_IP_POST_ROUTING==>NF_IP_PRE_ROUTING
若是在NF_IP_POST_ROUTING上作源轉換,要在NF_IP_PRE_ROUTING上作反方向上的目的轉換
NF_IP_LOCAL_OUT==>NF_IP_LOCAL_IN
若是在NF_IP_LOCAL_OUT作源轉換,要在NF_IP_LOCAL_IN上作反方向上的目的轉換  

算下來就是最多進行6次地址轉換 */

      struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];

 

      /* 兩個全局hash表,用來將全部須要進行地址轉換的鏈接組織起來 */

      struct ip_nat_hash bysource, byipsproto;

 

      /* 作特殊用途,一般是NULL */

      struct ip_nat_helper *helper;

 

      struct ip_nat_seq seq[IP_CT_DIR_MAX];

};

 

ip_nat_info_manip結構定義以下:

struct ip_nat_info_manip

{

      /* 方向,初始或應答 */

      u_int8_t direction;

 

      /* 轉換髮生的hook點 */

      u_int8_t hooknum;

 

      /* 轉換的類型,源仍是目的 */

      u_int8_t maniptype;

 

      /* Manipulations to occur at each conntrack in this dirn. */

      struct ip_conntrack_manip manip;

};

 

struct ip_conntrack_manip

{

      u_int32_t ip;

      union ip_conntrack_manip_proto u;

};

 

 

ip_nat_hash結構   ip_nat.h

struct ip_nat_hash

{

      struct list_head list;

      struct ip_conntrack *conntrack;

};

 

 

1.2       init()函數    ip_nat_standalone.c

static int __init init(void)

{

      return init_or_cleanup(1);

}

init()函數直接調用init_or_cleanup()

 

static int init_or_cleanup(int init)

{

      int ret = 0;

/* nat依賴於conntrack,這個函數是空的 */

      need_ip_conntrack();

 

      if (!init) goto cleanup;

/* 初始化nat規則 */

      ret = ip_nat_rule_init();

      if (ret < 0) {

           printk("ip_nat_init: can't setup rules.\n");

           goto cleanup_nothing;

      }

/* 初始化nat */

      ret = ip_nat_init();

      if (ret < 0) {

           printk("ip_nat_init: can't setup rules.\n");

           goto cleanup_rule_init;

      }

/* 註冊hook,共在四個hook點上註冊了函數,分別是:

NF_IP_PRE_ROUTING   ip_nat_fn

NF_IP_POST_ROUTING  ip_nat_out

NF_IP_LOCAL_OUT   ip_nat_local_fn

NF_IP_LOCAL_IN     ip_nat_fn

NF_IP_LOCAL_OUT和NF_IP_LOCAL_IN須要定義CONFIG_IP_NF_NAT_LOCAL

其中在ip_nat_out和ip_nat_local_fn中都會調用ip_nat_fn

*/

      ret = nf_register_hook(&ip_nat_in_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register in hook.\n");

           goto cleanup_nat;

      }

      ret = nf_register_hook(&ip_nat_out_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register out hook.\n");

           goto cleanup_inops;

      }

#ifdef CONFIG_IP_NF_NAT_LOCAL

      ret = nf_register_hook(&ip_nat_local_out_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register local out hook.\n");

           goto cleanup_outops;

      }

      ret = nf_register_hook(&ip_nat_local_in_ops);

      if (ret < 0) {

           printk("ip_nat_init: can't register local in hook.\n");

           goto cleanup_localoutops;

      }

#endif

      return ret;

 

 cleanup:

#ifdef CONFIG_IP_NF_NAT_LOCAL

      nf_unregister_hook(&ip_nat_local_in_ops);

 cleanup_localoutops:

      nf_unregister_hook(&ip_nat_local_out_ops);

 cleanup_outops:

#endif

      nf_unregister_hook(&ip_nat_out_ops);

 cleanup_inops:

      nf_unregister_hook(&ip_nat_in_ops);

 cleanup_nat:

      ip_nat_cleanup();

 cleanup_rule_init:

      ip_nat_rule_cleanup();

 cleanup_nothing:

      MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);

      return ret;

}

 

 

1.3  ip_nat_rule_init()函數  ip_nat_rule.c

int __init ip_nat_rule_init(void)

{

      int ret;

/* 註冊nat表 */

      ret = ipt_register_table(&nat_table);

      if (ret != 0)

           return ret;

/* 註冊了兩個target,一個是snat一個是dnat  */

      ret = ipt_register_target(&ipt_snat_reg);

      if (ret != 0)

           goto unregister_table;

      ret = ipt_register_target(&ipt_dnat_reg);

      if (ret != 0)

           goto unregister_snat;

 

      return ret;

 unregister_snat:

      ipt_unregister_target(&ipt_snat_reg);

 unregister_table:

      ipt_unregister_table(&nat_table);

 

      return ret;

}

 

看一下nat表的初始化:

static struct ipt_table nat_table = {

      .name        = "nat",

      .table         = &nat_initial_table.repl,

      .valid_hooks     = NAT_VALID_HOOKS,

      .lock          = RW_LOCK_UNLOCKED,

      .me      = THIS_MODULE,

};

和filter表的初始化相似,一開始規則都是空的

 

兩個target的初始化:

static struct ipt_target ipt_snat_reg = {

      .name        = "SNAT",

      .target       = ipt_snat_target,

      .checkentry = ipt_snat_checkentry,

};

 

static struct ipt_target ipt_dnat_reg = {

      .name        = "DNAT",

      .target       = ipt_dnat_target,

      .checkentry = ipt_dnat_checkentry,

};

兩個target函數分別是ipt_snat_target和ipt_dnat_target

 

 

1.4  ip_nat_init()函數  ipt_nat_core.c

int __init ip_nat_init(void)

{

      size_t i;

 

      /* nat的hash表大小和conntrack的hash表相同 */

      ip_nat_htable_size = ip_conntrack_htable_size;

 

      /* 初始化了一個叫bysource的全局鏈表指針 */

      bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);

      if (!bysource) {

           return -ENOMEM;

      }

      /* 全局鏈表指針byipsproto,在bysource以後。bysource和byipsproto實際上也是兩個hash表,每一個節點是一個ip_nat_hash結構,包含一個list_head和一個ip_conntrack。有點特別的就是nat用兩個hash表來組織地址轉換的數據結構,其本質是同樣的,只是所使用的hash算法不一樣,bysource通常用於SNAT的處理,計算bysource的hash值的函數是hash_by_src();byipsproto用於DNAT的處理,計算byipsproto的hash值的函數是hash_by_ipsproto()。*/

      byipsproto = bysource + ip_nat_htable_size;

 

      /* 註冊一些內建的協議,&protos是用來維護nat模塊中用到的協議結構ip_nat_protocol的全局鏈表 */

      WRITE_LOCK(&ip_nat_lock);

      list_append(&protos, &ip_nat_protocol_tcp);

      list_append(&protos, &ip_nat_protocol_udp);

      list_append(&protos, &ip_nat_protocol_icmp);

      WRITE_UNLOCK(&ip_nat_lock);

 

      for (i = 0; i < ip_nat_htable_size; i++) {

/* 初始化bysource和byipsproto中的全部鏈表,兩個數組的大小都是ip_nat_htables_size,數組的每一個節點是一個鏈表頭 */

           INIT_LIST_HEAD(&bysource[i]);

           INIT_LIST_HEAD(&byipsproto[i]);

      }

 

      IP_NF_ASSERT(ip_conntrack_destroyed == NULL);

/* 初始化一個ip_conntrack_destroyed函數,ip_nat_cleanup_conntrack(struct ip_conntrack *conn) 的做用是在bysource和byipproto鏈表中刪除conn對應的節點 */

      ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;

     

      /* Initialize fake conntrack so that NAT will skip it */

      ip_conntrack_untracked.nat.info.initialized |=

           (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);

 

      return 0;

}

 

 

地址轉換的過程

2.1  ip_nat_fn函數  ip_nat_standalone.c

ip_nat_fn()是nat中的主要函數,nat在netfilter中註冊了四個hook,最終都會調用該函數

static unsigned int

ip_nat_fn(unsigned int hooknum,

        struct sk_buff **pskb,

        const struct net_device *in,

        const struct net_device *out,

        int (*okfn)(struct sk_buff *))

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

      struct ip_nat_info *info;

        /* 根據所在的hook點判斷轉換類型是源地址轉換仍是目的地址轉換,爲0(IP_NAT_MANIP_SRC)表示源地址轉換,爲1(IP_NAT_MANIP_DST)表示目的地址轉換 */

      enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);

 

      /* 前面函數中已經處理過度片的狀況,這裏應該不會再出現分片包了. */

      IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off

                  & htons(IP_MF|IP_OFFSET)));

 

      /*由於地址轉換會修改數據包,因此這裏先初始化將其設置爲「未修改」標誌,後面進行數據包修改時再來重置這個標誌*/

      (*pskb)->nfcache |= NFC_UNKNOWN;

 

      /* 校驗和 */

      if ((*pskb)->ip_summed == CHECKSUM_HW)

           if (skb_checksum_help(pskb, (out == NULL)))

                 return NF_DROP;

/*取得數據包的鏈接狀態*/

      ct = ip_conntrack_get(*pskb, &ctinfo);

      /* 若是找不到對應鏈接,則應該直接放行它,而再也不對其進行轉換處理,特別地,ICMP重定向報文將會被丟棄*/

      if (!ct) {

           /* Exception: ICMP redirect to new connection (not in

                   hash table yet).  We must not let this through, in

                   case we're doing NAT to the same network. */

           if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {

                 struct icmphdr hdr;

 

                 if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,

                              &hdr, sizeof(hdr)) == 0

                     && hdr.type == ICMP_REDIRECT)

                      return NF_DROP;

           }

           return NF_ACCEPT;

      }

/* 判斷鏈接狀態,調用相應的處理函數*/

      switch (ctinfo) {

      case IP_CT_RELATED:

      case IP_CT_RELATED+IP_CT_IS_REPLY:

           if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {

                 if (!icmp_reply_translation(pskb, ct, hooknum,

                                      CTINFO2DIR(ctinfo)))

                      return NF_DROP;

                 else

                       return NF_ACCEPT;

           }

           /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */

/* 若是是一個初始鏈接的數據包 */

      case IP_CT_NEW:

           info = &ct->nat.info;

 

           WRITE_LOCK(&ip_nat_lock);

/* 觀察這個鏈接中的nat部分是否已經被初始化過了,若是有則跳過下面的部分,直接進行地址轉換,若是沒有,進一步判斷 */    

      if (!(info->initialized & (1 << maniptype))

#ifndef CONFIG_IP_NF_NAT_LOCAL

               && !(ct->status & IPS_CONFIRMED)

#endif

               ) {

                 unsigned int ret;

      /* 若是該鏈接是由expect建立的,而且有expect函數,則在這裏調用 */

                 if (ct->master

                     && master_ct(ct)->nat.info.helper

                     && master_ct(ct)->nat.info.helper->expect) {

                      ret = call_expect(master_ct(ct), pskb,

                                    hooknum, ct, info);

                 } else {

#ifdef CONFIG_IP_NF_NAT_LOCAL

                      /* LOCAL_IN hook doesn't have a chain!  */

                      if (hooknum == NF_IP_LOCAL_IN)

                            ret = alloc_null_binding(ct, info,

                                              hooknum);

                      else

#endif

      /* 既沒有被nat修改過,也不是由expect建立,這是一個初始的數據包,開始在nat表中查找規則 */

                 ret = ip_nat_rule_find(pskb, hooknum, in, out, ct, info);

                 }

 

                 if (ret != NF_ACCEPT) {

                      WRITE_UNLOCK(&ip_nat_lock);

                      return ret;

                 }

           } else

/* 若是該鏈接的nat部分已經被初始化了,打印調試信息 */

                 DEBUGP("Already setup manip %s for ct %p\n",

                        maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",

                        ct);

           WRITE_UNLOCK(&ip_nat_lock);

           break;

 

      default:

           /* ESTABLISHED */

           IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED

                      || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));

           info = &ct->nat.info;

      }

 

      IP_NF_ASSERT(info);

      /* 前面已經修改了鏈接跟蹤表,這裏正式修改了數據包裏的地址 */

      return do_bindings(ct, ctinfo, info, hooknum, pskb);

}

 

 

2.2           ip_nat_rule_find函數  ip_nat_rule.c

int ip_nat_rule_find(struct sk_buff **pskb,

                unsigned int hooknum,

                const struct net_device *in,

                const struct net_device *out,

                struct ip_conntrack *ct,

                struct ip_nat_info *info)

{

      int ret;

/* 調用ipt_do_tables函數,第五個參數是&nat_table  */

      ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);

 

      if (ret == NF_ACCEPT) {

           if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))

                 /* NUL mapping */

                 ret = alloc_null_binding(ct, info, hooknum);

      }

      return ret;

}

nat表和filter表同樣,都是經過調用ipt_do_table函數來工做的

ipt_do_table查找表中的全部entry,若是match全都匹配,則調用target函數

此時的target函數就是在nat初始化時註冊的ipt_snat_target和ipt_dnat_target

 

 

2.3           ipt_s(d)nat_target函數  ip_nat_rule.c

static unsigned int ipt_snat_target(struct sk_buff **pskb,

                          const struct net_device *in,

                          const struct net_device *out,

                          unsigned int hooknum,

                          const void *targinfo,

                          void *userinfo)

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

 

      IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);

 

/* 取得數據包的鏈接狀態 */

      ct = ip_conntrack_get(*pskb, &ctinfo);

 

      /* Connection must be valid and new. */

      IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));

      IP_NF_ASSERT(out);

 

      return ip_nat_setup_info(ct, targinfo, hooknum);

}

 

ipt_dnat_target和ipt_snat_target差很少,都是調用ip_nat_setup_info完成地址轉換,這裏的targinfo參數來自ipt_entry_target結構的unsigned char data[0]參數,一個長度爲0的數組,指向target的末尾

static unsigned int ipt_dnat_target(struct sk_buff **pskb,

                          const struct net_device *in,

                          const struct net_device *out,

                          unsigned int hooknum,

                          const void *targinfo,

                          void *userinfo)

{

      struct ip_conntrack *ct;

      enum ip_conntrack_info ctinfo;

 

#ifdef CONFIG_IP_NF_NAT_LOCAL

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING

                || hooknum == NF_IP_LOCAL_OUT);

#else

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING);

#endif

 

      ct = ip_conntrack_get(*pskb, &ctinfo);

 

      /* Connection must be valid and new. */

      IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));

 

      return ip_nat_setup_info(ct, targinfo, hooknum);

}

 

2.4           ip_nat_setup_info()函數  ip_nat_rule.c

 

unsigned int

ip_nat_setup_info(struct ip_conntrack *conntrack,   /* 數據包的鏈接狀態 */

             const struct ip_nat_multi_range *mr,       /* 轉換後的地址池 */

             unsigned int hooknum)                    /* hook點 */

{

      struct ip_conntrack_tuple new_tuple, inv_tuple, reply;

      struct ip_conntrack_tuple orig_tp;

      struct ip_nat_info *info = &conntrack->nat.info;

      int in_hashes = info->initialized;

 

      MUST_BE_WRITE_LOCKED(&ip_nat_lock);

      IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING

                || hooknum == NF_IP_POST_ROUTING

                || hooknum == NF_IP_LOCAL_IN

                || hooknum == NF_IP_LOCAL_OUT);

      IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

      IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));

 

      /* 對當前狀態的應答方向的tuple調用invert_tuplepr取反,獲得一個orig_tp,若是以前沒有進行過地址或端口轉換,一般這裏獲得的orig_tp就等於初始方向的tuple */

      invert_tuplepr(&orig_tp, conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);

 

      do {

      /* 進行地址轉換,new_tuple爲轉換後的地址的tuple */

        if (!get_unique_tuple(&new_tuple,&orig_tp,mr,conntrack,hooknum))

        {

                 DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",

                        conntrack);

                 return NF_DROP;

           }

 

      /* 對new_tuple取反,獲得通過轉換後的應答方向的tuple  */

           invert_tuplepr(&reply, &new_tuple);

 

      /* 修改conntrack中的應答方向的reply tuple,在這以前還要檢查若是該reply tuple已經在hash表裏存在了,即被其它鏈接佔用(存在初始方向tuple不一樣,應答方向tuple相同的鏈接),則還要回頭繼續修改 */

      } while (!ip_conntrack_alter_reply(conntrack, &reply));

 

      /* 對orig_tp取反,實際上又獲得了原conntrack的reply_tuple…… */

      invert_tuplepr(&inv_tuple, &orig_tp);

 

      /* 將所做轉換的相關信息保存到鏈接狀態conntrack裏,這樣該鏈接的後續數據包就能夠直接利用這些信息進行地址轉換,不用從新查找nat表了 */

      /* 若是是源地址改變(SNAT) */

      if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {

           /* In this direction, a source manip. */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_ORIGINAL, hooknum,

                    IP_NAT_MANIP_SRC, new_tuple.src });

 

           IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

 

           /* 在相對的hook點上必然有對應的目的地址改變(DNAT) */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                      /* opposite_hook便是求當前hook點的對應hook點 */

                  { IP_CT_DIR_REPLY, opposite_hook[hooknum],

                    IP_NAT_MANIP_DST, orig_tp.src });

           IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);

      }

 

      /* 若是是目的地址改變(DNAT) */

      if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {

           /* In this direction, a destination manip */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_ORIGINAL, hooknum,

                    IP_NAT_MANIP_DST, reply.src });

 

           IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);

 

           /* In the reverse direction, a source manip. */

           info->manips[info->num_manips++] =

                 ((struct ip_nat_info_manip)

                  { IP_CT_DIR_REPLY, opposite_hook[hooknum],

                    IP_NAT_MANIP_SRC, inv_tuple.src });

           IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);

      }

 

      /* 若是這個鏈接不是某個鏈接的預期的鏈接(子鏈接),則在全局鏈表helpers查找對應的ip_nat_helper結構 */

      if (!conntrack->master)

           info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, &reply);

 

      /* 轉換完了,標記一下 */

      info->initialized |= (1 << HOOK2MANIP(hooknum));

 

      /* 將所作的地址轉換的數據結構加入到全局hash表bysource和byipsproto中,若是該地址轉換是某地址轉換基礎上的再次轉換,則用replace_in_hashes替換,反之則用place_in_hashes */

      if (in_hashes) {

           IP_NF_ASSERT(info->bysource.conntrack);

           replace_in_hashes(conntrack, info);

      } else {

           place_in_hashes(conntrack, info);

      }

 

      return NF_ACCEPT;

}

 

 

2.5           get_unique_tuple ()函數  ip_nat_core.c

get_unique_tuple,得到一個惟一的tuple,就是說除了要作地址/段口的轉換,還要保證轉換獲得的tuple是惟一的。

很複雜的一個函數。。。

第三個參數是用來替換的地址或端口的範圍

static int

get_unique_tuple(struct ip_conntrack_tuple *tuple,

            const struct ip_conntrack_tuple *orig_tuple,

            const struct ip_nat_multi_range *mrr,

            struct ip_conntrack *conntrack,

            unsigned int hooknum)

{

      struct ip_nat_protocol *proto

           = find_nat_proto(orig_tuple->dst.protonum);

      struct ip_nat_range *rptr;

      unsigned int i;

      int ret;

 

      struct ip_nat_multi_range *mr = (void *)mrr;

 

      /* 下面這一段比較暈,和p2p,udp打洞等技術有關。 */

      if (hooknum == NF_IP_POST_ROUTING) {

      /* ip_conntrack_manip結構包含一個ip地址和一個協議端口 */

           struct ip_conntrack_manip *manip;

      /* find_appropriate_src函數先調用hash_by_src函數計算orig_tuple的hash值,而後去bysource表裏查找,若是能找到源地址和端口都匹配的鏈接,而且若是該鏈接的地址/端口自己就知足目標地址/端口範圍的話,就直接返回查到的這個鏈接的源ip */

           manip = find_appropriate_src(orig_tuple, mr);

           if (manip) {

                 /* Apply same source manipulation. */

                 *tuple = ((struct ip_conntrack_tuple)

                        { *manip, orig_tuple->dst });

                 DEBUGP("get_unique_tuple: Found current src map\n");

                 /* 還要保證鏈接跟蹤表裏沒有這個鏈接 */

                 if (!ip_nat_used_tuple(tuple, conntrack))

                      return 1;

           }

      }

 

      /* orig_tuple是轉換以前的,tuple是轉換以後的 */

      *tuple = *orig_tuple;

      /* 循環 ,嘗試mr參數所指定的地址/端口範圍,直到能知足其tuple是惟一的 */

      while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))

             != NULL) {

           DEBUGP("Found best for "); DUMP_TUPLE(tuple);

      /*  IP_NAT_MANIP_SRC, 進行SNAT

IP_NAT_MANIP_DST 進行DNAT

IP_NAT_RANGE_MAP_IPS 在range裏指定了IP地址

IP_NAT_RANGE_PROTO_SPECIFIED 在range裏指定了port

若是沒有指定協議端口範圍,或者知足了所指定的範圍 */

           if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED)

                || proto->in_range(tuple, HOOK2MANIP(hooknum),

                            &rptr->min, &rptr->max))

               && !ip_nat_used_tuple(tuple, conntrack)) {

                 ret = 1;

                 goto clear_fulls;

           } else {

                 if (proto->unique_tuple(tuple, rptr,

                                  HOOK2MANIP(hooknum),

                                  conntrack)) {

                      /* Must be unique. */

                      IP_NF_ASSERT(!ip_nat_used_tuple(tuple,

                                             conntrack));

                      ret = 1;

                      goto clear_fulls;

                 } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {

                      /* Try implicit source NAT; protocol

                                   may be able to play with ports to

                                   make it unique. */

                      struct ip_nat_range r

                            = { IP_NAT_RANGE_MAP_IPS,

                                tuple->src.ip, tuple->src.ip,

                                { 0 }, { 0 } };

                      DEBUGP("Trying implicit mapping\n");

                      if (proto->unique_tuple(tuple, &r,

                                       IP_NAT_MANIP_SRC,

                                       conntrack)) {

                            /* Must be unique. */

                            IP_NF_ASSERT(!ip_nat_used_tuple

                                       (tuple, conntrack));

                            ret = 1;

                            goto clear_fulls;

                      }

                 }

                 DEBUGP("Protocol can't get unique tuple %u.\n",

                        hooknum);

           }

 

           /* Eliminate that from range, and try again. */

           rptr->flags |= IP_NAT_RANGE_FULL;

           *tuple = *orig_tuple;

      }

 

      ret = 0;

 

 clear_fulls:

      /* Clear full flags. */

      IP_NF_ASSERT(mr->rangesize >= 1);

      for (i = 0; i < mr->rangesize; i++)

           mr->range[i].flags &= ~IP_NAT_RANGE_FULL;

 

      return ret;

}

相關文章
相關標籤/搜索