wget https://nginx.org/download/nginx-1.14.0.tar.gzphp
tar -zxvf nginx-1.14.0.tar.gzcss
cd nginx-1.14.0html
安裝依賴庫:java
yum install gcc-c++node
yum install -y pcre pcre-devellinux
yum install -y zlib zlib-develnginx
yum install -y openssl openssl-develc++
./configuresql
緩存
make install
nginx命令:
start nginx(linux 下直接nginx啓動便可)
nginx -s stop(關閉)
nginx -s reload(重啓)
nginx負載均衡配置:
1 #user nobody; #全局塊 2 worker_processes 1; 3 4 #error_log logs/error.log; 5 #error_log logs/error.log notice; 6 #error_log logs/error.log info; 7 8 #pid logs/nginx.pid; 9 10 11 events { # events塊 12 worker_connections 1024; 13 } 14 15 16 http { # http塊 17 include mime.types; # http全局塊 18 default_type application/octet-stream; 19 20 #log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 21 # '$status $body_bytes_sent "$http_referer" ' 22 # '"$http_user_agent" "$http_x_forwarded_for"'; 23 24 #access_log logs/access.log main; 25 26 sendfile on; 27 #tcp_nopush on; 28 29 #keepalive_timeout 0; 30 keepalive_timeout 65; 31 32 #gzip on; 33 34 upstream myserver.com{ # 負載均衡配置 35 server 127.0.0.1:8080; # 能夠在此添加weight(權重),配置每一個服務器流量的權重;例如:server 127.0.0.1:8080 weight=1; 36 } 37 38 server { # server塊 39 # 反爬蟲 # server 全局塊 40 include anti_spider.conf; # 加載反爬蟲配置 41 42 listen 80; 43 server_name 211.67.160.21; 44 45 #charset koi8-r; 46 47 #access_log logs/host.access.log main; 48 49 location ~*^.+$ { # location 塊 50 proxy_pass http://myserver.com; 51 allow all; 52 } 53 54 error_page 404 https://www.baidu.com; 55 56 #error_page 404 /404.html; 57 58 # redirect server error pages to the static page /50x.html 59 # 60 error_page 500 502 503 504 /50x.html; 61 location = /50x.html { 62 root html; 63 } 64 65 # proxy the PHP scripts to Apache listening on 127.0.0.1:80 66 # 67 #location ~ \.php$ { 68 # proxy_pass http://127.0.0.1; 69 #} 70 71 # pass the PHP scripts to FastCGI server listening on 127.0.0.1:9000 72 # 73 #location ~ \.php$ { 74 # root html; 75 # fastcgi_pass 127.0.0.1:9000; 76 # fastcgi_index index.php; 77 # fastcgi_param SCRIPT_FILENAME /scripts$fastcgi_script_name; 78 # include fastcgi_params; 79 #} 80 81 # deny access to .htaccess files, if Apache's document root 82 # concurs with nginx's one 83 # 84 #location ~ /\.ht { 85 # deny all; 86 #} 87 88 location ~* ^.+\.(html|jpg|jpeg|gif|png|ico|css|js)$ 89 { 90 root D:/register; 91 expires 30d; 92 break; 93 } 94 95 location ~ ^/static/ { 96 root D:/register; 97 expires 30d; 98 break; 99 } 100 101 location ~ ^/ { 102 fastcgi_pass 127.0.0.1:80; 103 fastcgi_param PATH_INFO $fastcgi_script_name; 104 fastcgi_param REQUEST_METHOD $request_method; 105 fastcgi_param QUERY_STRING $query_string; 106 fastcgi_param CONTENT_TYPE $content_type; 107 fastcgi_param CONTENT_LENGTH $content_length; 108 fastcgi_param SERVER_PROTOCOL $server_protocol; 109 fastcgi_param SERVER_PORT $server_port; 110 fastcgi_param SERVER_NAME $server_name; 111 fastcgi_pass_header Authorization; 112 fastcgi_intercept_errors off; 113 } 114 } 115 116 117 # another virtual host using mix of IP-, name-, and port-based configuration 118 # 119 #server { 120 # listen 8000; 121 # listen somename:8080; 122 # server_name somename alias another.alias; 123 124 # location / { 125 # root html; 126 # index index.html index.htm; 127 # } 128 #} 129 130 131 # HTTPS server 132 # 133 #server { 134 # listen 443 ssl; 135 # server_name localhost; 136 137 # ssl_certificate cert.pem; 138 # ssl_certificate_key cert.key; 139 140 # ssl_session_cache shared:SSL:1m; 141 # ssl_session_timeout 5m; 142 143 # ssl_ciphers HIGH:!aNULL:!MD5; 144 # ssl_prefer_server_ciphers on; 145 146 # location / { 147 # root html; 148 # index index.html index.htm; 149 # } 150 #} 151 152 }
nginx反爬蟲:
1 #禁止Scrapy等工具的抓取 2 if ($http_user_agent ~* (Scrapy|Curl|HttpClient)) { 3 return 403; 4 } 5 6 #禁止指定UA及UA爲空的訪問 7 if ($http_user_agent ~ "WinHttp|WebZIP|FetchURL|node-superagent|java/|FeedDemon|Jullo|JikeSpider|Indy Library|Alexa Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|Java|Feedly|Apache-HttpAsyncClient|UniversalFeedParser|ApacheBench|Microsoft URL Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports Bot|YYSpider|DigExt|HttpClient|MJ12bot|heritrix|EasouSpider|Ezooms|BOT/0.1|YandexBot|FlightDeckReports|Linguee Bot|^$" ) { 8 return 403; 9 } 10 11 #禁止非GET|HEAD|POST方式的抓取 12 if ($request_method !~ ^(GET|HEAD|POST)$) { 13 return 403; 14 } 15 16 #屏蔽單個IP的命令是 17 #deny 123.45.6.7 18 #封整個段即從123.0.0.1到123.255.255.254的命令 19 #deny 123.0.0.0/8 20 #封IP段即從123.45.0.1到123.45.255.254的命令 21 #deny 124.45.0.0/16 22 #封IP段即從123.45.6.1到123.45.6.254的命令是 23 #deny 123.45.6.0/24 24 25 # 如下IP皆爲流氓 26 #deny 58.95.66.0/24;
常見垃圾UA列表:
> FeedDemon 內容採集
> BOT/0.1 (BOT for JCE) sql注入 > CrawlDaddy sql注入 > Java 內容採集 > Jullo 內容採集 > Feedly 內容採集 > UniversalFeedParser 內容採集 > ApacheBench cc攻擊器 > Swiftbot 無用爬蟲 > YandexBot 無用爬蟲 > AhrefsBot 無用爬蟲 > YisouSpider 無用爬蟲 > jikeSpider 無用爬蟲 > MJ12bot 無用爬蟲 > ZmEu phpmyadmin 漏洞掃描 > WinHttp 採集cc攻擊 > EasouSpider 無用爬蟲 > HttpClient tcp攻擊 > Microsoft URL Control 掃描 > YYSpider 無用爬蟲 > jaunty wordpress爆破掃描器 > oBot 無用爬蟲 > Python-urllib 內容採集 > Indy Library 掃描 > FlightDeckReports Bot 無用爬蟲 > Linguee Bot 無用爬蟲