Socket網絡編程--簡單Web服務器(1)

時間 2019-11-21

原文原文鏈接

　　這一次的Socket系列準備講Web服務器。就是編寫一個簡單的Web服務器，具體怎麼作呢？我也不是很清楚流程，因此我找來了一個開源的小的Web服務器--tinyhttpd。這個服務器才500多行的代碼，使用C語言。這一小節就不講別的內容了。就對這個程序進行一些註釋和講解了。html

　　主函數:web

 1 int main(void)
 2 {
 3     int server_sock = -1;
 4     u_short port = 0;
 5     int client_sock = -1;
 6     struct sockaddr_in client_name;
 7     int client_name_len = sizeof(client_name);
 8     pthread_t newthread;
 9 
10     server_sock = startup(&port);//Web服務器打開指定端口
11     printf("httpd running on port %d\n", port);
12 
13     while (1)
14     {
15         client_sock = accept(server_sock,(struct sockaddr *)&client_name,&client_name_len);
16         if (client_sock == -1)
17             error_die("accept");
18         if (pthread_create(&newthread , NULL, accept_request, client_sock) != 0)
19             perror("pthread_create");
20     }
21     close(server_sock);
22     return(0);
23 }

　　從主函數咱們能夠知道，這個服務器是對於每個客戶端的鏈接都採用一個線程對其處理。上面對應的startup函數是對指定的端口進行socket的建立，綁定，監聽。瀏覽器

　　startup函數:緩存

 1 int startup(u_short *port)
 2 {
 3     int httpd = 0;
 4     struct sockaddr_in name;
 5 
 6     httpd = socket(PF_INET, SOCK_STREAM, 0);
 7     if (httpd == -1)
 8         error_die("socket");
 9     memset(&name, 0, sizeof(name));
10     name.sin_family = AF_INET;
11     name.sin_port = htons(*port);
12     name.sin_addr.s_addr = htonl(INADDR_ANY);
13     if (bind(httpd, (struct sockaddr *)&name, sizeof(name)) < 0)
14         error_die("bind");
15     if (*port == 0)  /* if dynamically allocating a port */
16     {
17         int namelen = sizeof(name);
18         if (getsockname(httpd, (struct sockaddr *)&name, &namelen) == -1)
19             error_die("getsockname");
20         *port = ntohs(name.sin_port);
21     }
22     if (listen(httpd, 5) < 0)
23         error_die("listen");
24     return(httpd);
25 }

　　對於上面的getsockname函數是，若是傳進來的port爲0，那麼就前面的bind就會失敗，因此要使用getsockname函數來獲取一個當前可用的可鏈接的Socket套接字的名字。此時返回的端口就是隨機的。安全

　　接下來就是一個對每一個客戶端鏈接的處理函數服務器

　　accept_request函數socket

 1 void accept_request(int client)
 2 {
 3     char buf[1024];
 4     int numchars;
 5     char method[255];
 6     char url[255];
 7     char path[512];
 8     size_t i, j;
 9     struct stat st;
10     int cgi = 0;      /* becomes true if server decides this is a CGI
11                        * program */
12     char *query_string = NULL;
13 
14     numchars = get_line(client, buf, sizeof(buf));//獲取第一行客戶端的請求 GET / HTTP/1.1 相似這樣的
15     i = 0; j = 0;
16     while (!ISspace(buf[j]) && (i < sizeof(method) - 1))//獲取第一個單詞，通常爲GET或POST 兩種請求方法
17     {
18         method[i] = buf[j];
19         i++; j++;
20     }
21     method[i] = '\0';
22 
23     if (strcasecmp(method, "GET") && strcasecmp(method, "POST"))//若是不是GET或POST方法的，那麼就回復一個不支持的請求方法頁面。話說若是本身寫服務器能夠加本身的請求方法。不過有個問題就是瀏覽器是沒有的？怎麼辦，看來還要本身弄個小的瀏覽器
24     {
25         unimplemented(client);
26         return;
27     }
28 
29     if (strcasecmp(method, "POST") == 0)//若是是使用POST方法，那麼就必定是cgi程序
30         cgi = 1;
31 
32     i = 0;
33     while (ISspace(buf[j]) && (j < sizeof(buf)))//取出空格
34         j++;
35     // GET / HTTP/1.1  接下來是取第二個字符串，第二個串是這次請求的頁面地址
36     while (!ISspace(buf[j]) && (i < sizeof(url) - 1) && (j < sizeof(buf)))
37     {
38         url[i] = buf[j];
39         i++; j++;
40     }
41     url[i] = '\0';
42 
43     if (strcasecmp(method, "GET") == 0)//若是是GET方法,GET方法和POST方法是有點區別的，GET方法是經過URL請求來傳遞用戶的數據，將表單等各個字段名稱與內容，以成對的字符串進行鏈接來傳遞參數的。
44         //例如 http://www.baidu.com/s?wd=cnblogs 這個URL就是使用百度搜索cnblogs的URL地址，baidu搜索怎麼知道我在輸入框中輸入的是什麼數據？就是經過這樣的一個參數來告訴它的。通常參數都是在?(問號)後面的。
45     {
46         query_string = url;
47         while ((*query_string != '?') && (*query_string != '\0'))//一直讀，直到遇到問號
48             query_string++;
49         if (*query_string == '?')//若是有問號，就表示可能要調用cgi程序了，不是簡單的靜態HTML頁面的。
50         {
51             cgi = 1;
52             *query_string = '\0';
53             query_string++;
54         }
55     }
56 
57     sprintf(path, "htdocs%s", url);//這個是web服務器的主目錄
58     if (path[strlen(path) - 1] == '/')
59         strcat(path, "index.html");//若是輸入的網址沒有指定網頁，那麼默認使用index.html這個頁面
60     if (stat(path, &st) == -1) {//根據文件名，獲取該文件的文件信息，若是爲-1，表示獲取該文件的文件信息失敗，可能的問題是沒有該文件，或是權限什麼的問題,具體失敗的緣由能夠查看errno
61         while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
62             numchars = get_line(client, buf, sizeof(buf));
63         not_found(client);//返回一個not found 404的頁面了
64     }
65     else
66     {
67         if ((st.st_mode & S_IFMT) == S_IFDIR)//若是該文件名對應的是一個目錄，那麼就訪問該目錄下的默認主頁index.html，這裏若是是jsp，就是index.jsp什麼的。
68             strcat(path, "/index.html");
69         if ((st.st_mode & S_IXUSR) || (st.st_mode & S_IXGRP) || (st.st_mode & S_IXOTH))//判斷該文件的執行權限問題
70             cgi = 1;
71         if (!cgi)//若是不是cgi程序，而是一個簡單的靜態頁面
72             serve_file(client, path);
73         else//一個cgi程序
74             execute_cgi(client, path, method, query_string);
75     }
76 
77     close(client);
78 }

　　關於GET和POST的區別，能夠參考別的博客，這裏就不詳解了。指說一個咱們處理是要注意的問題，那就是GET方法的參數是在URL地址中。而Post 方法經過 HTTP post 機制，將表單內各字段名稱與其內容放置在 HTML 表頭(header)內一塊兒傳送給服務器端交由 action 屬性能所指的程序處理，該程序會經過標準輸入(stdin)方式，將表單的數據讀出並加以處理。說的有點抽象，仍是上幾張圖片比較容易看吧。jsp

　　這一張是get方法的(使用百度搜索功能，搜索的關鍵字是使用get方法提交)ide

　　這一張是post方法的(使用一個遊戲的登陸界面，該登陸界面的賬號和密碼的提交方式是使用POST方式)函數

　　能夠看到，在Hypertext Transfer Protocol後面有個Line-based text data。能夠看到有個這樣的字符串，username=...&passwd=...&serverid=...竟然明文傳輸，這個遊戲太不厚道了，伐開心了，我一直不知道。咱們能夠看到上面的Content-Length:53 就表示在\r\n\r\n後面會有接着的53個字符要接收。這個看起來是否是跟應答信息很像啊。

　　提示:經過get方法提交數據，可能會帶來安全性的問題。好比一個登錄頁面。當經過get方法提交數據時，用戶名和密碼將出如今URL上。
　　1.登錄頁面能夠被瀏覽器緩存；
　　2.其餘人能夠訪問客戶的這臺機器。
　　那麼，別人便可以從瀏覽器的歷史記錄中，讀取到此客戶的帳號和密碼。因此，在某些狀況下，get方法會帶來嚴重的安全性問題。因此建議在Form中，建議使用post方法。

　　好，咱們繼續講解其餘的函數了。

　　serve_file函數，就是對一個簡單的HTML靜態頁面進行返回

 1 void serve_file(int client, const char *filename)
 2 {
 3     FILE *resource = NULL;
 4     int numchars = 1;
 5     char buf[1024];
 6 
 7     buf[0] = 'A'; buf[1] = '\0';
 8     while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
 9         numchars = get_line(client, buf, sizeof(buf));//從上面的圖咱們能夠看到還有一些請求信息如Connection,Cache-Control,User-Agent,Accept等等的信息，這些在這個簡單Web服務器中就忽略了。若是要增長功能，就可使用這些信息。如最簡單的判斷使用的瀏覽器類型，操做系統等。
10 
11     resource = fopen(filename, "r");//根據GET 後面的文件名，將文件打開。
12     if (resource == NULL)
13         not_found(client);
14     else
15     {
16         headers(client, filename);//發送一個應答頭信息
17         cat(client, resource);//逐字符發送
18     }
19     fclose(resource);
20 }

　　cat函數，這個就不用講了。就是一個發送send

 1 void cat(int client, FILE *resource)
 2 {
 3     char buf[1024];
 4 
 5     fgets(buf, sizeof(buf), resource);
 6     while (!feof(resource))
 7     {
 8         send(client, buf, strlen(buf), 0);
 9         fgets(buf, sizeof(buf), resource);
10     }
11 }

　　還有一個關鍵的函數，execute_cgi這個函數，用來執行cgi程序的。

 1 void execute_cgi(int client, const char *path, const char *method, const char *query_string)
 2 {
 3     char buf[1024];
 4     int cgi_output[2];
 5     int cgi_input[2];
 6     pid_t pid;
 7     int status;
 8     int i;
 9     char c;
10     int numchars = 1;
11     int content_length = -1;
12 
13     buf[0] = 'A'; buf[1] = '\0';
14     if (strcasecmp(method, "GET") == 0)//同什麼的serve_file函數，對那些請求頭進行忽略
15     {
16         while ((numchars > 0) && strcmp("\n", buf))  /* read & discard headers */
17             numchars = get_line(client, buf, sizeof(buf));
18     }
19     else    /* POST方法 */
20     {
21         numchars = get_line(client, buf, sizeof(buf));
22         while ((numchars > 0) && strcmp("\n", buf))//這裏一樣是忽略請求頭
23         {
24             buf[15] = '\0';
25             if (strcasecmp(buf, "Content-Length:") == 0)//可是考慮到在請求頭後面還有信息要讀，而信息的大小就在這裏。這個Content-Length後面，也就是上面截圖是所說的。看了這個代碼是否是對剛纔說的有了更深的理解了
26                 content_length = atoi(&(buf[16]));//獲取後面字符的個數
27             numchars = get_line(client, buf, sizeof(buf));
28         }
29         //注意到了這裏後Post請求頭後面的附帶信息尚未讀出來，要在下面纔讀取。
30         if (content_length == -1) {
31             bad_request(client);
32             return;
33         }
34     }
35 
36     sprintf(buf, "HTTP/1.0 200 OK\r\n");
37     send(client, buf, strlen(buf), 0);
38 
39     if (pipe(cgi_output) < 0) {//建立管道，方便程序或進程之間的數據通訊
40         cannot_execute(client);
41         return;
42     }
43     if (pipe(cgi_input) < 0) {
44         cannot_execute(client);
45         return;
46     }
47     //子進程中，用剛纔初始化的管道替換掉標準輸入標準輸出，將請求參數加到環境變量中，調用execl執行cgi程序得到輸出。
48     if ( (pid = fork()) < 0 ) {
49         cannot_execute(client);
50         return;
51     }
52     if (pid == 0)  /* child: CGI script */
53     {
54         char meth_env[255];
55         char query_env[255];
56         char length_env[255];
57 
58         dup2(cgi_output[1], 1);//將文件描述符爲1(stdout)的句柄複製到output中
59         dup2(cgi_input[0], 0);//將文件描述符爲0(stdin)的句柄複製到input中
60         close(cgi_output[0]);//關閉output的讀端
61         close(cgi_input[1]);//關閉input的寫端
62         sprintf(meth_env, "REQUEST_METHOD=%s", method);
63         putenv(meth_env);//putenv保存到環境變量中
64         if (strcasecmp(method, "GET") == 0) {
65             sprintf(query_env, "QUERY_STRING=%s", query_string);
66             putenv(query_env);
67         }
68         else {   /* POST */
69             sprintf(length_env, "CONTENT_LENGTH=%d", content_length);
70             putenv(length_env);
71         }
72         execl(path, path, NULL);//保存在環境變量中的數據，還有parent進行的write到cgi_input[1]中的數據，都是存在的，能夠在cgi程序自己中進行判斷。看起來有點複雜，我到時候實現就實現個簡單的吧。
73         exit(0);
74     } else {    /* parent */
75         close(cgi_output[1]);//關閉output的寫端
76         close(cgi_input[0]);//關閉input的讀端
77         if (strcasecmp(method, "POST") == 0)//Post方式，讀取後面尚未讀的附帶信息
78             for (i = 0; i < content_length; i++) {
79                 recv(client, &c, 1, 0);
80                 write(cgi_input[1], &c, 1);//讀取到的信息一個一個字符寫到input的寫端
81             }
82         while (read(cgi_output[0], &c, 1) > 0)//循環讀取output的讀端，而後發送個客戶端，注意這裏接收的是cgi程序的輸出(也就是打印在stdin上的數據)
83             send(client, &c, 1, 0);
84 
85         close(cgi_output[0]);
86         close(cgi_input[1]);
87         waitpid(pid, &status, 0);//等待子進程結束
88     }
89 }

　　上面第72行處，原來的代碼就是那樣，但聽說好像是錯的。應該是:execl(path,參數列表,NULL);而參數列表對於get方法就是query_string，而對於post方法就沒有參數，它的參數是在父進程中第80行處經過stdin進行輸入，因此cgi程序要手動從控制檯stdin讀取數據。如今重要的函數都基本完了，接下來就是幾個應答信息頭。

　　400 Bad Request

 1 void bad_request(int client)
 2 {
 3     char buf[1024];
 4 
 5     sprintf(buf, "HTTP/1.0 400 BAD REQUEST\r\n");
 6     send(client, buf, sizeof(buf), 0);
 7     sprintf(buf, "Content-type: text/html\r\n");
 8     send(client, buf, sizeof(buf), 0);
 9     sprintf(buf, "\r\n");
10     send(client, buf, sizeof(buf), 0);
11     sprintf(buf, "<P>Your browser sent a bad request, ");
12     send(client, buf, sizeof(buf), 0);
13     sprintf(buf, "such as a POST without a Content-Length.\r\n");
14     send(client, buf, sizeof(buf), 0);
15 }

　　500 Internal Server Error

 1 void cannot_execute(int client)
 2 {
 3     char buf[1024];
 4 
 5     sprintf(buf, "HTTP/1.0 500 Internal Server Error\r\n");
 6     send(client, buf, strlen(buf), 0);
 7     sprintf(buf, "Content-type: text/html\r\n");
 8     send(client, buf, strlen(buf), 0);
 9     sprintf(buf, "\r\n");
10     send(client, buf, strlen(buf), 0);
11     sprintf(buf, "<P>Error prohibited CGI execution.\r\n");
12     send(client, buf, strlen(buf), 0);
13 }

　　200 OK

 1 void headers(int client, const char *filename)
 2 {
 3     char buf[1024];
 4     (void)filename;  /* could use filename to determine file type */
 5 
 6     strcpy(buf, "HTTP/1.0 200 OK\r\n");
 7     send(client, buf, strlen(buf), 0);
 8     strcpy(buf, SERVER_STRING);
 9     send(client, buf, strlen(buf), 0);
10     sprintf(buf, "Content-Type: text/html\r\n");
11     send(client, buf, strlen(buf), 0);
12     strcpy(buf, "\r\n");
13     send(client, buf, strlen(buf), 0);
14 }

　　404 Not Found

 1 void not_found(int client)
 2 {
 3     char buf[1024];
 4 
 5     sprintf(buf, "HTTP/1.0 404 NOT FOUND\r\n");
 6     send(client, buf, strlen(buf), 0);
 7     sprintf(buf, SERVER_STRING);
 8     send(client, buf, strlen(buf), 0);
 9     sprintf(buf, "Content-Type: text/html\r\n");
10     send(client, buf, strlen(buf), 0);
11     sprintf(buf, "\r\n");
12     send(client, buf, strlen(buf), 0);
13     sprintf(buf, "<HTML><TITLE>Not Found</TITLE>\r\n");
14     send(client, buf, strlen(buf), 0);
15     sprintf(buf, "<BODY><P>The server could not fulfill\r\n");
16     send(client, buf, strlen(buf), 0);
17     sprintf(buf, "your request because the resource specified\r\n");
18     send(client, buf, strlen(buf), 0);
19     sprintf(buf, "is unavailable or nonexistent.\r\n");
20     send(client, buf, strlen(buf), 0);
21     sprintf(buf, "</BODY></HTML>\r\n");
22     send(client, buf, strlen(buf), 0);
23 }

　　501 Method Not Implemented

 1 void unimplemented(int client)
 2 {
 3     char buf[1024];
 4 
 5     sprintf(buf, "HTTP/1.0 501 Method Not Implemented\r\n");
 6     send(client, buf, strlen(buf), 0);
 7     sprintf(buf, SERVER_STRING);
 8     send(client, buf, strlen(buf), 0);
 9     sprintf(buf, "Content-Type: text/html\r\n");
10     send(client, buf, strlen(buf), 0);
11     sprintf(buf, "\r\n");
12     send(client, buf, strlen(buf), 0);
13     sprintf(buf, "<HTML><HEAD><TITLE>Method Not Implemented\r\n");
14     send(client, buf, strlen(buf), 0);
15     sprintf(buf, "</TITLE></HEAD>\r\n");
16     send(client, buf, strlen(buf), 0);
17     sprintf(buf, "<BODY><P>HTTP request method not supported.\r\n");
18     send(client, buf, strlen(buf), 0);
19     sprintf(buf, "</BODY></HTML>\r\n");
20     send(client, buf, strlen(buf), 0);
21 }

　　這個簡單的服務器目前應該是不支持圖片聲音等非文本信息(到我本身寫時，不知道能不能實現)。總的來講，此次對整個HTTP協議的處理過程，還有Web服務器的內部實現簡單的進行了解。接下來的幾個小節，我就本身參考這個程序，本身寫一個。固然代碼確定沒有這個程序那麼簡練。不過若是能夠實現，仍是不錯的。到時候對我開發web服務器是遇到的問題再進行講解。

　　參考資料:

　　GET,POST的區別 http://blog.sina.com.cn/s/blog_50e4caf701009eys.html

　　什麼是CGI http://www.doc88.com/p-173100939493.html

　　本文地址: http://www.cnblogs.com/wunaozai/p/3926033.html