@(Linux network)linux
闡述告終構體之間的關聯socket
在net->netns_ipv4結構體中,包含了以下幾個xt_tabletcp
struct xt_table { struct list_head list;//xt_table list /* What hooks you will enter on */ // 此table中包含的chain,譬如nat table就不包含forward chain // 在hook point,會去執行相應nf_hook[][] list上的hook_fn. 而不一樣的hook_fn會去查對應模塊的table. // 就是說, nat 註冊的hook_fn 不會去在nat table中查找forward chain unsigned int valid_hooks; /* Man behind the curtain... */ // 具體存儲table 內容 struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; u_int8_t af; /* address/protocol family */ int priority; /* hook order */ /* A unique name... */ // 表名 const char name[XT_TABLE_MAXNAMELEN]; };
下面來看private指針指向的結構體,這個結構體中具體存儲了xt_entry結構體,每一個xt_entry對應一條規則。ide
struct xt_table_info { /* Size per table */ // table 的大小 unsigned int size; /* Number of entries: FIXME. --RR */ // 包含的entry個數 unsigned int number; /* Initial number of entries. Needed for module usage count */ // table的起始entry offset unsigned int initial_entries; /* Entry points and underflows */ // hook_entry[] 存儲了每一個chain 對應的起始entry offset // underflow[] 存儲了每一個chain 最後一個entry的offset unsigned int hook_entry[NF_INET_NUMHOOKS]; unsigned int underflow[NF_INET_NUMHOOKS]; /* * Number of user chains. Since tables cannot have loops, at most * @stacksize jumps (number of user chains) can possibly be made. */ // scott added: // we can only jump form hook chains to user chains, PRE_ROUTING-> POST_ROUTING are not supported. // so stacksize is the number of user chains and the max value of jump times. // And also, jumpstack can only store child chain entry. // 當chain之間發生jump的時候(A->B),只須要將A中當前的entry push進stack便可。因此jumpstack最大隻須要存儲user chains個數的entry. // 若是B chain的某個entry target爲XT_RETURN,則從stack中pop出A中的entry,並順着A chain往下匹配. unsigned int stacksize; unsigned int __percpu *stackptr; // 等同於 jumpstack[cpuid][entry*] void ***jumpstack; //保存的entries unsigned char entries[0] __aligned(8); };
ipt_entry具體的定義了每一條規則,例如
iptables -t nat -A PREROUTING -s 12.12.12.12 -ptcp --dport 808 -j DROP函數
/* This structure defines each of the firewall rules. Consists of 3 parts which are*/ // 1) general IP header stuff // 2) match specific stuff // 3) the target to perform if the rule matches struct ipt_entry { //存放了標準匹配 struct ipt_ip ip; /* Mark with fields that we care about. */ unsigned int nfcache; /* Size of ipt_entry + matches */ // 指定了target 元素的地址 // target_offset = elems + sizeof(xt_match) * numof(matchs) __u16 target_offset; /* Size of ipt_entry + matches + target */ // 下一個xt_entry的地址 __u16 next_offset; /* Back pointer */ // 從哪一個entry 過來的 unsigned int comefrom; /* Packet and byte counters. */ // 計數 packet 和 byte? struct xt_counters counters; /* The matches (if any), then the target. */ // elems中保存了xt_match 和 xt_target. // xt_match 和 xt_target 地址偏移在 target_offset指定 unsigned char elems[0]; };
ipt_entry 看起來仍是比較簡單的,下面這張圖解釋的對ipt_entry比較清楚:oop
注意,ipt_ip結構中存放的是標準匹配,如 ip , netmask ,prototype,interface 等,擴展匹配等才存放在xt_match結構中。ui
/* Yes, Virginia, you have to zero the padding. */ struct ipt_ip { /* Source and destination IP addr */ struct in_addr src, dst; /* Mask for src and dest IP addr */ struct in_addr smsk, dmsk; char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; /* Protocol, 0 = ANY */ __u16 proto; /* Flags word */ //SYN, ACK, FIN, RST ... __u8 flags; /* Inverse flags */ __u8 invflags; };
xt_entry_match 中主要保存了拓展match,在iptables中用-m 表示,在下例中,使用拓展socket match:
iptables -t nat -A PREROUTING -m socket -j DIVERTthis
struct xt_entry_match { union { struct { __u16 match_size; /* Used by userspace */ char name[XT_EXTENSION_MAXNAMELEN]; __u8 revision; } user; struct { __u16 match_size; /* Used inside the kernel */ struct xt_match *match; } kernel; /* Total length */ __u16 match_size; } u; unsigned char data[0]; };
注意: 在/usr/include /下的頭文件,提供的都是用戶態編寫程序的接口。只有真正kernel src中提供的include文件,纔是編寫內核代碼時須要引入的頭文件。
例如 xt_entry_match,在/usr/include/linux/netfilter/x_tables.h中和內核代碼 /usr/src/kernels/3.10.0-514.el7.x86_64/include/linux/netfilter/都有定義,可是隻有內核代碼文件中才包含xt_match結構的定義。用戶文件中是沒有次結構定義的。spa
這塊是用戶和內核共同操做的一塊地址。當咱們經過iptables添加規則的時候,用戶態也會建立xt_entry_match結構,並將user.name設置爲match名稱,內核拿到此結構後,經過name的值,找到匹配的xt_match結構,並使kernel.match指向它。也就是說,name的值會被覆蓋,可是沒有關係,由於存儲在table chain中的時候,xt_entry_match不須要name,只須要xt_match便可。prototype
這裏咱們須要關注的xt_match結構體,其具體指向了一個匹配
xt_match是內核的匹配模塊,須要先註冊,才能使用。
xt_register_match(&pktsize_match);
也就是說,xt_match結構體是公用的,當建立多個xt_entry_match的時候,其指向的都是match list中的該match, 可是xt_entry_match是不一樣的。
struct xt_match { //extend match list? struct list_head list; //match name const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ /* Arguments changed since 2.6.9, as this must now handle non-linear skb, using skb_header_pointer and skb_ip_make_writable. */ //match 函數,判斷sk_buff是否知足該match,入參爲sk_buff報文 以及 //xt_action_param 用來match和target的參數 bool (*match)(const struct sk_buff *skb, struct xt_action_param *); /* Called when user tries to insert an entry of this type. */ // 新建match entry的時候check int (*checkentry)(const struct xt_mtchk_param *); /* Called when entry of this type deleted. */ // 刪除該類型match 的時候調用 void (*destroy)(const struct xt_mtdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int matchsize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; };
注意: 這裏重要的match指針函數,
bool (*match)(const struct sk_buff *skb, struct xt_action_param *);
skb 爲須要處理的報文,xt_action_param則指向了匹配規則,網口信息等。這個後面還須要看這個參數是怎麼構建以及傳入的。
mtach或者target操做的時候,傳入的參數。這個結構體是被xt_match和xt_target公用的,所以不少參數採用union
// struct xt_action_param - parameters for matches/targets // // @match: the match extension // @target: the target extension // @matchinfo: per-match data // @targetinfo: per-target data // @in: input netdevice // @out: output netdevice // @fragoff: packet is a fragment, this is the data offset // @thoff: position of transport header relative to skb->data // @hook: hook number given packet came from // @family: Actual NFPROTO_* through which the function is invoked // (helpful when match->family == NFPROTO_UNSPEC) // // Fields written to by extensions: // // @hotdrop: drop packet if we had inspection problems // Network namespace obtainable using dev_net(in/out) struct xt_action_param { union { const struct xt_match *match; const struct xt_target *target; }; union { //配置的match 匹配參數 const void *matchinfo, *targinfo; }; const struct net_device *in, *out; //分片報文的偏移量 int fragoff; unsigned int thoff; //packet從哪一個hook點過來的 unsigned int hooknum; u_int8_t family; //hotdrop = 1馬上丟掉此packet bool hotdrop; };
建立iptable 規則時指定的動做,xt_entry_target依賴xt_target,extend target 須要register到xt_af[pf].target list.
xt_entry_target , xt_target 和 xt_entry_match,xt_match的結構相似。
struct xt_entry_target { union { struct { __u16 target_size; /* Used by userspace */ char name[XT_EXTENSION_MAXNAMELEN]; __u8 revision; } user; struct { __u16 target_size; /* Used inside the kernel */ struct xt_target *target; } kernel; /* Total length */ __u16 target_size; } u; unsigned char data[0]; };
/* Registration hooks for targets. */ struct xt_target { struct list_head list; const char name[XT_EXTENSION_MAXNAMELEN]; u_int8_t revision; /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ unsigned int (*target)(struct sk_buff *skb, const struct xt_action_param *); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be called. */ /* Should return 0 on success or an error code otherwise (-Exxxx). */ int (*checkentry)(const struct xt_tgchk_param *); /* Called when entry of this type deleted. */ void (*destroy)(const struct xt_tgdtor_param *); #ifdef CONFIG_COMPAT /* Called when userspace align differs from kernel space one */ void (*compat_from_user)(void *dst, const void *src); int (*compat_to_user)(void __user *dst, const void *src); #endif /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; const char *table; unsigned int targetsize; #ifdef CONFIG_COMPAT unsigned int compatsize; #endif unsigned int hooks; unsigned short proto; unsigned short family; RH_KABI_RESERVE(1) RH_KABI_RESERVE(2) RH_KABI_RESERVE(3) RH_KABI_RESERVE(4) };