tcp 客戶端 synack的接收 以及 相互connect

接收入口

tcp_v4_rcv

    |--> tcp_v4_do_rcv

               |-> tcp_rcv_state_process

                         |-> tcp_rcv_synsent_state_process

1. 狀態爲ESTABLISHED時,用tcp_rcv_established()接收處理。
2. 狀態爲LISTEN時,說明這個sock處於監聽狀態,用於被動打開的接收處理,包括SYN和ACK。
3. 當狀態不爲ESTABLISHED或TIME_WAIT時,用tcp_rcv_state_process()處理。linux

/* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. *//* * TCP傳輸層接收到段以後,通過了簡單的 * 校驗,並肯定接收處理該段的傳輸控制 * 塊以後,除非處於FIN_WAIT_2或TIME_WAIT狀態, * 不然都會調用tcp_v4_do_rcv()做具體的處理 */
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct sock *rsk; #ifdef CONFIG_TCP_MD5SIG /* * We really want to reject the packet as early as possible * if: * o We're expecting an MD5'd packet and this is no MD5 tcp option * o There is an MD5 option and we're not expecting one */
    if (tcp_v4_inbound_md5_hash(sk, skb)) goto discard; #endif

    if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb->rxhash); TCP_CHECK_TIMER(sk); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } TCP_CHECK_TIMER(sk); return 0; } if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) goto csum_err; if (sk->sk_state == TCP_LISTEN) { //說明收到的是三次握手第一步SYN或者第三步ACK,這裏是服務器端的狀況
        struct sock *nsk = tcp_v4_hnd_req(sk, skb); if (!nsk) goto discard; if (nsk != sk) {//若是是第一次握手的SYN,這裏的nsk應該是'父'sk, 若是這裏是三次握手的第三步ACK,則這裏的nsk是‘子'sk
            if (tcp_child_process(sk, nsk, skb)) { //這裏面仍是會調用tcp_rcv_state_process
                rsk = nsk; goto reset; } return 0; //若是是握手的第三步,這裏直接退出
        } //若是是三次握手中的第一步SYN,則繼續後面的操做
    } else sock_rps_save_rxhash(sk, skb->rxhash); //走到這裏說明只能是客戶端收到SYN+ACK,或者是服務器端收到SYN
 TCP_CHECK_TIMER(sk); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; } TCP_CHECK_TIMER(sk); return 0; reset: tcp_v4_send_reset(rsk, skb); discard: kfree_skb(skb); /* Be careful here. If this function gets more complicated and * gcc suffers from register pressure on the x86, sk (in %ebx) * might be destroyed here. This current version compiles correctly, * but you have been warned. */
    return 0; csum_err: TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); goto discard; }

當客戶端connect()以後,sock進入TCP_SYN_SENT狀態,並插入到ehash中, 若是是阻塞socket則connect()等待握手完成
本文考慮收到服務端synack的過程,也就是客戶端握手的第二階段;算法

發送SYN段後,鏈接的狀態變爲SYN_SENT。此時若是收到SYNACK段,處理函數爲tcp_rcv_state_process()。緩存

對於協議棧的接收路徑,服務器

  • tcp_v4_rcv
    • ->__inet_lookup_skb() //在ehash中找到TCP_SYN_SENT狀態的sk
    • ->!sock_owned_by_user() //connect()即便阻塞也不佔有鎖
      • ->!tcp_prepare() //對於synack,不會排入prepare隊列
      • ->tcp_v4_do_rcv()
        • ->tcp_rcv_state_process() //進入TCP_SYN_SENT狀態處理邏輯
          • -> tcp_rcv_synsent_state_process

總體代碼先摺疊cookie

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); const struct tcphdr *th = tcp_hdr(skb); struct request_sock *req; int queued = 0; bool acceptable; switch (sk->sk_state) { case TCP_CLOSE: goto discard; case TCP_LISTEN: //服務器端收到SYN
        /* * 在半鏈接的LISTEN狀態下,只處理SYN段。若是是 * ACK段,此時鏈接還沒有開始創建,所以返回1。在調用 * tcp_rcv_state_process()函數中會給對方發送RST段; * 若是接收的是RST段,則丟棄 */
        if (th->ack) return 1; if (th->rst) goto discard; if (th->syn) { if (th->fin) goto discard; /* * 處理SYN段,主要由conn_request接口(TCP中爲tcp_v4_conn_request)處理, * icsk_af_ops成員在建立套接字時被初始化,參見tcp_v4_init_sock() */
             /*收到三次握手的第一步SYN, 則在tcp_v4_conn_request中建立鏈接請求控制塊request_sock */
            if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)//ipv4_specific--->tcp_v4_conn_request
                return 1; consume_skb(skb); return 0; } goto discard; case TCP_SYN_SENT://客戶端收到SYN+ACK
    /* 對於TCP_SYN_SENT狀態的sock,會調用tcp_rcv_synsent_state_process來進行處理 解析tcp選項,獲取服務端的支持狀況, 好比sack, TFO, wscale, MSS, timestamp等 若是有ack, 進行tcp_ack, 這時候可能fastopen確認了以前的數據 調用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED 若是包含fastopen cookie則保存 判斷是否須要當即ack仍是延時ack 若是包裏沒有ack,只有syn,則表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 併發送synack */ tp->rx_opt.saw_tstamp = 0; queued = tcp_rcv_synsent_state_process(sk, skb, th); if (queued >= 0) return queued; /* Do step6 onward by hand. */ tcp_urg(sk, skb, th); __kfree_skb(skb); tcp_data_snd_check(sk); return 0; } tp->rx_opt.saw_tstamp = 0; req = tp->fastopen_rsk; if (req) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); if (!tcp_check_req(sk, skb, req, true)) goto discard; } if (!th->ack && !th->rst && !th->syn) goto discard; if (!tcp_validate_incoming(sk, skb, th, 0)) return 0; /* * 處理TCP段ACK標誌,tcp_ack()返回非零值表示處理 * ACK段成功,是正常的第三次握手TCP段 */
    /* step 5: check the ACK field */ acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) > 0; /* tcp_rcv_state_process函數中對於ack的處理步驟中,假如鏈接處於FIN_WAIT_1, 且數據均已經被確認完,則進入TIME_WAIT_2狀態;若是無需在該狀態等待(linger2<0), 或者收到了亂序數據段,則直接關閉鏈接;若是須要等待, 則須要判斷等待時間與TIMEWAIT時間的大小關係,若>TIMEWAIT_LEN, 則添加TIME_WAIT_2定時器,不然直接進入TIME_WAIT接管(其子狀態仍然是FIN_WAIT_2), 接管以後會添加TIME_WAIT定時器; */
    switch (sk->sk_state) { case TCP_SYN_RECV:////握手完成時的新建鏈接的初始狀態
        if (!acceptable) return 1; if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); /*/這裏是由tcp_v4_do_rcv裏面的tcp_child_process走到這裏, 在tcp_child_process前會經過tcp_check_req建立一個新的struct sock Once we leave TCP_SYN_RECV, we no longer need req * so release it. */
        if (req) { tp->total_retrans = req->num_retrans; reqsk_fastopen_remove(sk, req, false);    //回收fastopen req
        } else { /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); tcp_init_congestion_control(sk); tcp_mtup_init(sk); tp->copied_seq = tp->rcv_nxt; tcp_init_buffer_space(sk); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED);// TCP_SYN_RECV->TCP_ESTABLISHED
        sk->sk_state_change(sk);//sock_def_wakeup, 喚醒epoll /* sock_init_data中 有 sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; sk->sk_write_space = sock_def_write_space; sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; */
//epoll而後調用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll
 /* * 設置"子"傳輸控制塊爲ESTABLISHED狀態 */
        /* Note, that this wakeup is only for marginal crossed SYN case. * Passively open sockets are not waked up, because * sk->sk_sleep == NULL and sk->sk_socket == NULL. */
         /* * 發信號給那些將經過該套接字發送數據的進程, * 通知他們套接字目前已經能夠發送數據了 sk_state_change()->sock_def_wakeup()->ep_poll_callback(), 添加到epoll的ready list中,並喚醒阻塞中的epoll。 epoll而後調用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll */
                 
        if (sk->sk_socket) sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); /* * 初始化傳輸控制塊各字段,若是存在時間戳選項, * 同時平滑RTT爲零,則需計算重傳超時時間等 */ tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; if (req) { /* Re-arm the timer because data may have been sent out. * This is similar to the regular data transmission case * when new data has just been ack'ed. * * (TFO) - we could try to be more aggressive and * retransmitting any data sooner based on when they * are sent out. */ tcp_rearm_rto(sk); } else tcp_init_metrics(sk); /* * 爲該套接字創建路由,初始化擁塞控制模塊 */
                  /* * 初始化與路徑MTU有關的成員 */ tcp_update_pacing_rate(sk); /* * 更新最近一次發送數據包的時間 */
        /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; tcp_initialize_rcv_mss(sk); /* * 計算有關TCP首部預測的標誌 */ tcp_fast_path_on(tp); break; case TCP_FIN_WAIT1: { struct dst_entry *dst; int tmo; /* If we enter the TCP_FIN_WAIT1 state and we are a * Fast Open socket and this is the first acceptable * ACK we have received, this would have acknowledged * our SYNACK so stop the SYNACK timer. */
        if (req) { /* Return RST if ack_seq is invalid. * Note that RFC793 only says to generate a * DUPACK for it but for TCP Fast Open it seems * better to treat this case like TCP_SYN_RECV * above. */
            if (!acceptable) return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); } /* 發送數據未確認完畢 */
        if (tp->snd_una != tp->write_seq) break; tcp_set_state(sk, TCP_FIN_WAIT2); /* 進入FIN_WAIT_2狀態 */ sk->sk_shutdown |= SEND_SHUTDOWN;/* 關閉發送端 */ dst = __sk_dst_get(sk); if (dst)/* 路由緩存確認 */ dst_confirm(dst); if (!sock_flag(sk, SOCK_DEAD)) { /* Wake up lingering close() */ sk->sk_state_change(sk); /* 套接口不是DEAD狀態,狀態發生變化,喚醒等待進程 */
            break; } /* linger2<0,無需在FIN_WAIT_2等待 */
        if (tp->linger2 < 0 || /* 收到指望序號之後的數據段(data, fin) */ (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) { tcp_done(sk);/* 關閉鏈接 */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); return 1; } tmo = tcp_fin_time(sk); /* 獲取FIN_WAIT_2等待時間 */
        if (tmo > TCP_TIMEWAIT_LEN) {  /* > TIMEWAIT_LEN,加入FIN_WAIT_2定時器 */ inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing * and not so rare event. We still can lose it now, * if it spins in bh_lock_sock(), but it is really * marginal case. */ /* 有fin?? 或者 被用戶進程鎖定,加入FIN_WAIT_2定時器 */ inet_csk_reset_keepalive_timer(sk, tmo); } else { /* 正常等待時間< TIMEWAIT_LEN,進入TIMEWAIT接管狀態 */ tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto discard; } break; } case TCP_CLOSING: if (tp->snd_una == tp->write_seq) { tcp_time_wait(sk, TCP_TIME_WAIT, 0); goto discard; } break; case TCP_LAST_ACK: if (tp->snd_una == tp->write_seq) { tcp_update_metrics(sk); tcp_done(sk); goto discard; } break; } /* step 6: check the URG bit */ tcp_urg(sk, skb, th); /* FIN_WAIT_2狀態的走向有如下幾個流程觸發點, (1)TIME_WAIT_2定時器未超時時間內,收到數據段觸發; (2)TIME_WAIT_2定時器超時觸發; (3)TIME_WAIT定時器未超時時間內,收到數據段觸發; (4)TIME_WAIT定時器超時觸發; */
    /* step 7: process the segment text */
    switch (sk->sk_state) { case TCP_CLOSE_WAIT: case TCP_CLOSING: case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; case TCP_FIN_WAIT1: case TCP_FIN_WAIT2://TIME_WAIT_2定時器未超時時間內,收到數據段觸發,若是設置FIN標記,則直接進入TIME_WAIT狀態;
        /* RFC 793 says to queue data in these states, * RFC 1122 says we MUST send a reset. * BSD 4.4 also does reset. */
        if (sk->sk_shutdown & RCV_SHUTDOWN) { if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA); tcp_reset(sk); return 1; } } /* Fall through */
    case TCP_ESTABLISHED: tcp_data_queue(sk, skb); //若是帶數據部分則處理,好比客戶端設置了deferaccept的時候
        queued = 1; break; } /* tcp_data could move socket to TIME-WAIT */
    if (sk->sk_state != TCP_CLOSE) { tcp_data_snd_check(sk);//給數據一個發送機會,tcp_push_pending_frame
        tcp_ack_snd_check(sk);//檢查是否有ack被推遲,判斷是否須要當即發送
 } if (!queued) { discard: tcp_drop(sk, skb); } return 0; } EXPORT_SYMBOL(tcp_rcv_state_process);
View Code

參考:https://blog.csdn.net/zhangskd/article/details/47380761併發

http://www.cnhalo.net/2016/06/13/linux-tcp-synack-rcv/less

tcp_rcv_synsent_state_process()用於SYN_SENT狀態的處理,具體又分兩種場景。
(1) 接收到SYNACK
通常狀況下會收到服務端的SYNACK,處理以下:
檢查ack_seq是否合法。若是使用了時間戳選項,檢查回顯的時間戳是否合法。檢查TCP的標誌位是否合法。若是SYNACK是合法的,更新sock的各類信息。 異步

  把鏈接的狀態設置爲TCP_ESTABLISHED,喚醒調用connect()的進程。判斷是立刻發送ACK,仍是延遲發送。

(2) 接收到SYN
本端以前發送出一個SYN,如今又接收到了一個SYN,雙方同時向對端發起創建鏈接的請求。
處理以下:把鏈接狀態置爲SYN_RECV。更新sock的各類信息。構造和發送SYNACK。接者對端也會迴應SYNACK,以後的處理流程和服務器端接收ACK相似socket

當tcp_rcv_synsent_state_process()的返回值大於0時,會致使上層調用函數發送一個被動的RST。 async

Q:那麼什麼狀況下此函數的返回值會大於0?

A:收到一個ACK段,但ack_seq的序號不正確,或者回顯的時間戳不正確。

 

分析:tcp_rcv_synsent_state_process

對於TCP_SYN_SENT狀態的sock,會調用tcp_rcv_synsent_state_process來進行處理

  • 解析tcp選項,獲取服務端的支持狀況, 好比sack, TFO, wscale, MSS, timestamp等
  • 若是有ack, 進行tcp_ack, 這時候可能fastopen確認了以前的數據
  • 調用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED
  • 若是包含fastopen cookie則保存
  • 判斷是否須要當即ack仍是延時ack
  • 若是包裏沒有ack,只有syn,則表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 併發送synack
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { struct inet_connection_sock *icsk = inet_csk(sk);    //客戶端sk
    struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_cookie foc = { .len = -1 }; int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0, &foc);    //解析tcp選項,可能帶fastopen cookie
    if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; //在repair模式下的時間修正
    if (th->ack) { /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit * If the ACK bit is set * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send * a reset (unless the RST bit is set, if so drop * the segment and return)" */
        if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) ||    //初始化的時候snd_una設置爲syn序號,返回的ack爲syn+1, 或者fastopen的時候更大
            after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt))    //ack的是尚未發送的數據
            goto reset_and_undo; if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
            !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,    //retrans_stamp會在發送syn的時候記錄,接收包須要在時間範圍內
 tcp_time_stamp)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSACTIVEREJECTED); goto reset_and_undo; } /* Now ACK is acceptable. * * "If the RST bit is set * If the ACK was acceptable then signal the user "error: * connection reset", drop the segment, enter CLOSED state, * delete TCB, and return." */
        if (th->rst) { tcp_reset(sk); //進入TCP_CLOSE狀態
            goto discard;    //丟棄包
 } /* rfc793: * "fifth, if neither of the SYN or RST bits is set then * drop the segment and return." * * See note below! * --ANK(990513) */
        if (!th->syn)    //若是rst和syn都沒被設置,則丟棄並返回
            goto discard_and_undo; /* rfc793: * "If the SYN bit is on ... * are acceptable then ... * (our SYN has been ACKed), change the connection * state to ESTABLISHED..." */ tcp_ecn_rcv_synack(tp, th); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_ack(sk, skb, FLAG_SLOWPATH); //ack確認,有可能fastopen的數據被確認了
        /* Ok.. it's good. Set up sequence numbers and * move to established. */ tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. */ tp->snd_wnd = ntohs(th->window);    //更新收到的窗口通告
        if (!tp->rx_opt.wscale_ok) {    // 若是對方不支持wsacle
            tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; tp->window_clamp = min(tp->window_clamp, 65535U);    //本機發送給對方的最大窗口也不能帶wscale的大小
 } if (tp->rx_opt.saw_tstamp) { /* 有時間戳選項 */
/* 在syn中有時間戳選項 */
tp
->rx_opt.tstamp_ok = 1; tp->tcp_header_len = /* tcp首部須要增長時間戳長度 */ sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* mss須要減去時間戳長度 */ tcp_store_ts_recent(tp);/* 設置回顯時間戳 */ } else { tp->tcp_header_len = sizeof(struct tcphdr); } if (tcp_is_sack(tp) && sysctl_tcp_fack) //服務端支持sack,而且系統支持fack,則開啓fack tcp_enable_fack(tp); tcp_mtup_init(sk); //此時收到對方的tcp MSS選項,能夠初始化mtu探測區間 tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); //使用pmtu更新探測區間和mss tcp_initialize_rcv_mss(sk); //更新對對方mss的猜想,不會超過TCP_MSS_DEFAULT=536 /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ /* 記錄用戶空間待讀取的序號 */ tp->copied_seq = tp->rcv_nxt; smp_mb();
//tcp_finish_connect主要是客戶端進入鏈接完成狀態(TCP_ESTABLISHED),能夠發送數據了/* 鏈接創建完成的狀態改變和相關初始化 */ tcp_finish_connect(sk, skb);
// TCP_SYN_SENT->TCP_ESTABLISHED/* 鏈接創建完成的狀態改變和相關初始化 */ if ((tp->syn_fastopen || tp->syn_data) && tcp_rcv_fastopen_synack(sk, skb, &foc)) //fastopen處理,保存cookie return -1; //有部分數據未確認,重傳了 if (sk->sk_write_pending || //還有數據等待寫 icsk->icsk_accept_queue.rskq_defer_accept || //client設置了TCP_DEFER_ACCEPT, 先不ack,等待有數據發送的時候 icsk->icsk_ack.pingpong) { //pingpong模式,沒有開啓快速ack //延時ack,等待數據一塊兒發送 /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * * It may be deleted, but with this feature tcpdumps * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); //標記有ack被推遲 icsk->icsk_ack.lrcvtime = tcp_time_stamp; //記錄時間 tcp_enter_quickack_mode(sk); // 進入快速ack模式,加速慢啓動 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, //重置延時ack定時器 TCP_DELACK_MAX, TCP_RTO_MAX); discard: tcp_drop(sk, skb); return 0; } else { tcp_send_ack(sk); //不須要等待,當即發送ack } return -1; } /* No ACK in the segment */ //沒有ack,可是待rst, 忽略這個包 if (th->rst) { /* rfc793: * "If the RST bit is set * * Otherwise (no ACK) drop the segment and return." */ goto discard_and_undo; } /* PAWS check. */ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_reject(&tp->rx_opt, 0)) //paws檢測時間戳 goto discard_and_undo; if (th->syn) { /* 收到了SYN段,即同時打開 *///相互connect /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. */
  1. /* 發送SYN後,狀態爲SYN_SENT,若是此時也收到SYN,
  2. * 狀態則變爲SYN_RECV。
  3. */ tcp_set_state(sk, TCP_SYN_RECV);
if (tp->rx_opt.saw_tstamp) { tp->rx_opt.tstamp_ok = 1; tcp_store_ts_recent(tp);/* 記錄對端的時間戳,做爲下次發送的回顯值 */ tp->tcp_header_len =
                sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; } else { tp->tcp_header_len = sizeof(struct tcphdr); } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的要接收的下一個序號 */ tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的左端 */ /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. */* 更新對端接收窗口的大小。在三次握手時,不使用窗口擴大因子。 tp->snd_wnd    = ntohs(th->window); tp->snd_wl1    = TCP_SKB_CB(skb)->seq;/* 記錄最近更新發送窗口的ACK序號 */ tp->max_window = tp->snd_wnd;/* 目前見過的對端的最大通告窗口 */ tcp_ecn_rcv_syn(tp, th); tcp_mtup_init(sk);/* TCP的MTU初始化 mss更新 */ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk);/* 對端有效發送MSS估值的初始化 */ tcp_send_synack(sk); //發送synack
#if 0
        /* Note, we could accept data and URG from this segment. * There are no obstacles to make this (except that we must * either change tcp_recvmsg() to prevent it from returning data * before 3WHS completes per RFC793, or employ TCP Fast Open). * * However, if we ignore data in ACKless segments sometimes, * we have no reasons to accept it sometimes. * Also, seems the code doing it in step6 of tcp_rcv_state_process * is not flawless. So, discard packet for sanity. * Uncomment this return to process the data. */
        return -1; #else
        goto discard; #endif } /* "fifth, if neither of the SYN or RST bits is set then * drop the segment and return." */ discard_and_undo: tcp_clear_options(&tp->rx_opt); tp->rx_opt.mss_clamp = saved_clamp; goto discard; reset_and_undo: tcp_clear_options(&tp->rx_opt); tp->rx_opt.mss_clamp = saved_clamp; return 1; }

同時打開時,在SYN_SENT狀態,收到SYN段後,狀態變爲SYN_RECV,而後發送SYNACK。以後若是收到合法的SYNACK後,就能完成鏈接的創建。

 

/* tcp_finish_connect()用來完成鏈接的創建,主要作了如下事情: 1. 把鏈接的狀態從SYN_SENT置爲ESTABLISHED。 2. 根據路由緩存,初始化TCP相關的變量。 3. 獲取默認的擁塞控制算法。 4. 調整發送緩存和接收緩存的大小。 5. 若是使用了SO_KEEPALIVE選項,激活保活定時器。 6. 喚醒此socket等待隊列上的進程(即調用connect的進程)。 7 若是使用了異步通知,則發送SIGIO通知異步通知隊列上的進程可寫。 */
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); tcp_set_state(sk, TCP_ESTABLISHED); /* 設置爲已鏈接狀態 */ if (skb) {/* 設置接收路由緩存 */ icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);//inet_sk_rx_dst_set
 security_inet_conn_established(sk, skb); } /* Make sure socket is routed, for correct metrics. */ icsk->icsk_af_ops->rebuild_header(sk); /* 檢查或重建路由 */ tcp_init_metrics(sk);//建立初始化tcp metric
 tcp_init_congestion_control(sk);//調用擁塞算法init函數

    /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ tp->lsndtime = tcp_time_stamp;   /* 記錄最後一次發送數據包的時間 */ tcp_init_buffer_space(sk);//根據收到的對端信息初始化緩存配置

    if (sock_flag(sk, SOCK_KEEPOPEN)) /* 開啓了保活,則打開保活定時器 */ inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale)//對方沒有開啓wscale窗口影子,則開啓快速路徑/* 設置預測標誌,判斷快慢路徑的條件之一 */
        __tcp_fast_path_on(tp, tp->snd_wnd); else tp->pred_flags = 0; if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk);/* 指向sock_def_wakeup,喚醒調用connect()的進程 */
 sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);/* 若是使用了異步通知,則發送SIGIO通知進程可寫 */ } }
相關文章
相關標籤/搜索