{ "compilerOptions": { "target": "ES6", "module": "commonjs", "noEmitOnError": true, "noImplicitAny": true, "experimentalDecorators": true, "sourceMap": false, // "sourceRoot": "./", "outDir": "./out" }, "exclude": [ "node_modules" ] } |
import superagent = require('superagent'); import cheerio = require('cheerio'); export const remote_get = function(url: string) { const promise = new Promise<superagent.Response>(function (resolve, reject) { superagent.get(url) .end(function (err, res) { if (!err) { resolve(res); } else { console.log(err) reject(err); } }); }); return promise; } |
import api = require('./api'); const go = async () => { let res = await api.remote_get('http://www.baidu.com/'); console.log(res.text); } go(); |
import api = require('./api'); import cheerio = require('cheerio'); const go = async () => { const res = await api.remote_get('http://cnodejs.org/'); const $ = cheerio.load(res.text); let urls: string[] = []; let titles: string[] = []; $('.topic_title_wrapper').each((index, element) => { titles.push($(element).find('.topic_title').first().text().trim()); urls.push('http://cnodejs.org/' + $(element).find('.topic_title').first().attr('href')); }) console.log(titles, urls); } go(); |
import api = require('./api'); import cheerio = require('cheerio'); const go = async () => { const res = await api.remote_get('http://cnodejs.org/'); const $ = cheerio.load(res.text); $('.topic_title_wrapper').each(async (index, element) => { let url = ('http://cnodejs.org' + $(element).find('.topic_title').first().attr('href')); const res_content = await api.remote_get(url); const $_content = cheerio.load(res_content.text); console.log($_content('.topic_content').first().text()); }) } go(); |
export const wait_seconds = function (senconds: number) { return new Promise(resolve => setTimeout(resolve, senconds * 1000)); } |
import superagent = require('superagent'); import cheerio = require('cheerio'); export const get_index_urls = function () { const res = await remote_get('http://cnodejs.org/'); const $ = cheerio.load(res.text); let urls: string[] = []; $('.topic_title_wrapper').each(async (index, element) => { urls.push('http://cnodejs.org' + $(element).find('.topic_title').first().attr('href')); }); return urls; } export const get_content = async function (url: string) { const res = await remote_get(url); const $ = cheerio.load(res.text); return $('.topic_content').first().text(); } export const remote_get = function (url: string) { const promise = new Promise<superagent.Response>(function (resolve, reject) { superagent.get(url) .end(function (err, res) { if (!err) { resolve(res); } else { console.log(err) reject(err); } }); }); return promise; } |
import api = require('./api'); import helper = require('./helper'); import cheerio = require('cheerio'); const go = async () => { const res = await api.remote_get('http://cnodejs.org/'); const $ = cheerio.load(res.text); let urls = await api.get_index_urls(); for (let i = 0; i < urls.length; i++) { await helper.wait_seconds(1); let text = await api.get_content(urls[i]); console.log(text); } } go(); |
import * as mongoose from 'mongoose'; mongoose.connect('mongodb://127.0.0.1/cnodejs_data', { server: { poolSize: 20 } }, function (err) { if (err) { process.exit(1); } }); // models export const Article = require('./article'); |
interface IArticle { title: String; url: String; text: String; } export = IArticle; |
import mongoose = require('mongoose'); import IArticle = require('./IArticle'); interface IArticleModel extends IArticle, mongoose.Document { } const ArticleSchema = new mongoose.Schema({ title: { type: String }, url: { type: String }, text: { type: String }, }); const Article = mongoose.model<IArticleModel>("Article", ArticleSchema); export = Article; |
import superagent = require('superagent'); import cheerio = require('cheerio'); import models = require('./models'); const Article = models.Article; export const get_index_urls = async function () { const res = await remote_get('http://cnodejs.org/'); const $ = cheerio.load(res.text); let urls: string[] = []; $('.topic_title_wrapper').each((index, element) => { urls.push('http://cnodejs.org' + $(element).find('.topic_title').first().attr('href')); }); return urls; } export const fetch_content = async function (url: string) { const res = await remote_get(url); const $ = cheerio.load(res.text); let article = new Article(); article.text = $('.topic_content').first().text(); article.title = $('.topic_full_title').first().text().replace('置頂', '').replace('精華', '').trim(); article.url = url; console.log('獲取成功:' + article.title); article.save(); } export const remote_get = function (url: string) { return new Promise<superagent.Response>((resolve, reject) => { superagent.get(url) .end(function (err, res) { if (!err) { resolve(res); } else { reject(err); } }); }); } |
import api = require('./api'); import helper = require('./helper'); import cheerio = require('cheerio'); (async () => { try { let urls = await api.get_index_urls(); for (let i = 0; i < urls.length; i++) { await helper.wait_seconds(1); await api.fetch_content(urls[i]); } } catch (err) { console.log(err); } console.log('完畢!'); })(); |