網絡協議棧學習(二)建立 socket

  下面經過學習linux 1.2.13源碼進一步理解socket通訊機制。對該版本源碼的學習主要參考《Linux內核網絡棧源代碼情景分析》(曹桂平 編著)。node

  要理解socket的本質,就要理解當調用socket函數時,該函數到底建立了什麼?返回了什麼?linux

int  socket(int family, int type, int protocol);

  socket 函數爲用戶層函數,該函數對應的內核函數爲sock_socket(socket.c文件),源碼以下:數組

static int sock_socket(int family, int type, int protocol)
{
    int i, fd;
    struct socket *sock;
    struct proto_ops *ops;

    /* Locate the correct protocol family. */
    for (i = 0; i < NPROTO; ++i) 
    {
        if (pops[i] == NULL) continue;
        if (pops[i]->family == family) 
            break;
    }

    if (i == NPROTO) 
    {
          return -EINVAL;
    }

    ops = pops[i];

/*
 *    Check that this is a type that we know how to manipulate and
 *    the protocol makes sense here. The family can still reject the
 *    protocol later.
 */
  
    if ((type != SOCK_STREAM && type != SOCK_DGRAM &&
        type != SOCK_SEQPACKET && type != SOCK_RAW &&
        type != SOCK_PACKET) || protocol < 0)
            return(-EINVAL);

/*
 *    Allocate the socket and allow the family to set things up. if
 *    the protocol is 0, the family is instructed to select an appropriate
 *    default.
 */

    if (!(sock = sock_alloc())) 
    {
        printk("NET: sock_socket: no more sockets\n");
        return(-ENOSR);    /* Was: EAGAIN, but we are out of
                   system resources! */
    }

    sock->type = type;
    sock->ops = ops;
    if ((i = sock->ops->create(sock, protocol)) < 0) 
    {
        sock_release(sock);
        return(i);
    }

    if ((fd = get_fd(SOCK_INODE(sock))) < 0) 
    {
        sock_release(sock);
        return(-EINVAL);
    }

    return(fd);
}

    sock_socket 函數完成以下工做:網絡

(1)分配socket、sock結構,這兩個結構在網絡棧的不一樣層次表示一個套接字鏈接。app

(2)分配inode、file結構用於普通文件操做。socket

(3)分配一個文件描述符並返回給應用程序做爲之後的操做句柄。async

  sock_alloc 函數用於分配一個inode節點,並返回該節點的socket指針tcp

struct socket *sock_alloc(void)
{
    struct inode * inode;
    struct socket * sock;

    inode = get_empty_inode();
    if (!inode)
        return NULL;

    inode->i_mode = S_IFSOCK;
    inode->i_sock = 1;
    inode->i_uid = current->uid;
    inode->i_gid = current->gid;

    sock = &inode->u.socket_i;
    sock->state = SS_UNCONNECTED;
    sock->flags = 0;
    sock->ops = NULL;
    sock->data = NULL;
    sock->conn = NULL;
    sock->iconn = NULL;
    sock->next = NULL;
    sock->wait = &inode->i_wait;
    sock->inode = inode;        /* "backlink": we could use pointer arithmetic instead */
    sock->fasync_list = NULL;
    sockets_in_use++;
    return sock;
}

 inode的定義以下函數

/* include/fs.h */
struct inode {
    dev_t        i_dev;
    unsigned long    i_ino;
    umode_t        i_mode;
    nlink_t        i_nlink;
    uid_t        i_uid;
    gid_t        i_gid;
    dev_t        i_rdev;
    off_t        i_size;
    time_t        i_atime;
    time_t        i_mtime;
    time_t        i_ctime;
    unsigned long    i_blksize;
    unsigned long    i_blocks;
    unsigned long    i_version;
    struct semaphore i_sem;
    struct inode_operations * i_op;
    struct super_block * i_sb;
    struct wait_queue * i_wait;
    struct file_lock * i_flock;
    struct vm_area_struct * i_mmap;
    struct inode * i_next, * i_prev;
    struct inode * i_hash_next, * i_hash_prev;
    struct inode * i_bound_to, * i_bound_by;
    struct inode * i_mount;
    unsigned short i_count;
    unsigned short i_wcount;
    unsigned short i_flags;
    unsigned char i_lock;
    unsigned char i_dirt;
    unsigned char i_pipe;
    unsigned char i_sock;
    unsigned char i_seek;
    unsigned char i_update;
    union {
        struct pipe_inode_info pipe_i;
        struct minix_inode_info minix_i;
        struct ext_inode_info ext_i;
        struct ext2_inode_info ext2_i;
        struct hpfs_inode_info hpfs_i;
        struct msdos_inode_info msdos_i;
        struct umsdos_inode_info umsdos_i;
        struct iso_inode_info isofs_i;
        struct nfs_inode_info nfs_i;
        struct xiafs_inode_info xiafs_i;
        struct sysv_inode_info sysv_i;
        struct socket socket_i;
        void * generic_ip;
    } u;
};

  inode 結構是文件系統的一個結構體,該結構體中的成員變量u指明瞭該inode結構具體的文件類型,當inode是用於socket通訊時,u的值就爲socket_i。sock_alloc 的做用就是建立inode結構體,而後返回socket_i的地址。至於具體如何分配inode涉及到文件系統方面的知識,這裏暫不討論。學習

  當協議族爲AF_INET時,ops->create 將調用inet_create(struct socket*sock, int protocol)函數。該函數將建立一個sock結構體並使得socket的data指針指向該sock結構體。

static int inet_create(struct socket *sock, int protocol)
{
    struct sock *sk;
    struct proto *prot;
    int err;

    sk = (struct sock *) kmalloc(sizeof(*sk), GFP_KERNEL);
    if (sk == NULL) 
        return(-ENOBUFS);
    sk->num = 0;
    sk->reuse = 0;
    switch(sock->type) 
    {
        case SOCK_STREAM:
        case SOCK_SEQPACKET:
            if (protocol && protocol != IPPROTO_TCP) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPROTONOSUPPORT);
            }
            protocol = IPPROTO_TCP;
            sk->no_check = TCP_NO_CHECK;
            prot = &tcp_prot;
            break;

        case SOCK_DGRAM:
            if (protocol && protocol != IPPROTO_UDP) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPROTONOSUPPORT);
            }
            protocol = IPPROTO_UDP;
            sk->no_check = UDP_NO_CHECK;
            prot=&udp_prot;
            break;
      
        case SOCK_RAW:
            if (!suser()) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPERM);
            }
            if (!protocol) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPROTONOSUPPORT);
            }
            prot = &raw_prot;
            sk->reuse = 1;
            sk->no_check = 0;    /*
                         * Doesn't matter no checksum is
                         * performed anyway.
                         */
            sk->num = protocol;
            break;

        case SOCK_PACKET:
            if (!suser()) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPERM);
            }
            if (!protocol) 
            {
                kfree_s((void *)sk, sizeof(*sk));
                return(-EPROTONOSUPPORT);
            }
            prot = &packet_prot;
            sk->reuse = 1;
            sk->no_check = 0;    /* Doesn't matter no checksum is
                         * performed anyway.
                         */
            sk->num = protocol;
            break;

        default:
            kfree_s((void *)sk, sizeof(*sk));
            return(-ESOCKTNOSUPPORT);
    }
    sk->socket = sock;
#ifdef CONFIG_TCP_NAGLE_OFF
    sk->nonagle = 1;
#else    
    sk->nonagle = 0;
#endif  
    sk->type = sock->type;
    sk->stamp.tv_sec=0;
    sk->protocol = protocol;
        ...... 
    sk->timer.function = &net_timer;
    skb_queue_head_init(&sk->back_log);
    sk->blog = 0;
    sock->data =(void *) sk; //socket 指向 sock
    sk->dummy_th.doff = sizeof(sk->dummy_th)/4;
       ......
    if (sk->prot->init) 
    {
        err = sk->prot->init(sk);
        if (err != 0) 
        {
            destroy_sock(sk);
            return(err);
        }
    }
    return(0);
}

  最後調用get_fd 返回一個文件描述符給上層應用。

/* socket.c */
static int get_fd(struct inode *inode)
{
    int fd;
    struct file *file;

    /*
     *    Find a file descriptor suitable for return to the user. 
     */

    file = get_empty_filp(); // 獲取一個閒置的file結構
    if (!file) 
        return(-1);

    for (fd = 0; fd < NR_OPEN; ++fd)
        if (!current->files->fd[fd]) 
            break;
    if (fd == NR_OPEN) 
    {
        file->f_count = 0;
        return(-1);
    }

    FD_CLR(fd, &current->files->close_on_exec);
        current->files->fd[fd] = file;  
    file->f_op = &socket_file_ops; // socket 文件操做
    file->f_mode = 3;
    file->f_flags = O_RDWR;
    file->f_count = 1;
    file->f_inode = inode;
    if (inode) 
        inode->i_count++;
    file->f_pos = 0;
    return(fd);
}

  get_fd 用於爲網絡套接字分配一個文件描述符,分配描述符的同時須要一個file結構,每一個file結構都須要一個inode結構對應。內核維護一個file結構數據,get_empty_filp 函數即經過檢查該數組,獲取一個閒置的成員。f_op 字段的賦值實現了網絡操做的普通文件接口。若是調用write、read函數進行操做就會調用相應的sock_read 和 sock_write 函數。

  如何根據文件描述如fd找到相應的sock?

相關文章
相關標籤/搜索