各類網站的資源都有防盜鏈處理,第一步,獲取資源真實連接。第二步,分析防盜鏈機制。第三步,資源去重。java
NicoNico視頻資源是採用Cookies校驗,破解難度大且麻煩,找第三方 http://www.movies-downloader.com,這裏面有個post的搜索api但是使用。在爬的時候發現,老資源都會統一指到一個舒適提示,視頻資源。簡單地校驗視頻md5就能夠去重了。web
BiliBili視頻資源是採用連接地址解密的辦法,解析也能夠用第三方 http://www.flvurl.cn/。暫時發現重複的不高。spring
Tumblr最簡單什麼加密都沒有。直接下載就能夠了,可是重複資源海量,能夠對封面,頭像,同時作md5做去重處理。json
/** * Created by ray on 2017/6/18. * 獲取 BILIBILI 視頻真實地址 */ @Service public class FlvUrlService { @Value("${flvurl.appId}") String appIdKey; @Autowired RestTemplate restTemplate; private String baseUrl = "http://api.v2.flvurl.cn/parse/"; private Logger logger = LoggerFactory.getLogger(this.getClass()); public JSONObject parse (String url){ String reqUrl = baseUrl + "?appid=" + appIdKey + "&type=vod&url=" + url; String string = restTemplate.getForObject(reqUrl , String.class); JSONObject jsonObject = JSON.parseObject(string,JSONObject.class); return jsonObject; } public FlvUrlRecModel parseToObject (String url){ String reqUrl = baseUrl + "?appid=" + appIdKey + "&type=vod&url=" + url; String string = ""; FlvUrlRecModel jsonObject = null; try { string = restTemplate.getForObject(reqUrl , String.class); jsonObject = JSON.parseObject(string,FlvUrlRecModel.class); return jsonObject; } catch (Exception e){ logger.error(reqUrl); return null; } }
/** * Created by ray on 2017/7/23. * 獲取NICONICO 視頻真實地址 */ @Service public class NicoVideoUrlService { @Autowired RestTemplate restTemplate; //post private String baseUrl = "http://www.movies-downloader.com/a.cgi"; private Logger logger = LoggerFactory.getLogger(this.getClass()); public NicoVideoRecModel parse (String url){ //url = "http://www.nicovideo.jp/watch/sm31610292"; NicoVideoRecModel recModel = null; String reqUrl = baseUrl; try { //head HttpHeaders headers = new HttpHeaders(); headers.setContentType(MediaType.APPLICATION_FORM_URLENCODED); //body MultiValueMap<String, String> map = new LinkedMultiValueMap<String, String>(); map.add("url", url); //請求封裝 HttpEntity<MultiValueMap<String, String>> request = new HttpEntity<MultiValueMap<String, String>>(map, headers); String body = restTemplate.postForEntity(reqUrl,request,String.class).getBody(); //logger.info("req map:" + map.toString() + " body:" + body); Html html = new Html(body); String aid = url.substring(url.lastIndexOf("/") + 1,url.length()); String title = html.xpath("//b/text()").toString(); String imgsrc = html.xpath("//img/@src").toString(); String href = html.xpath("//a/@href").toString().split("=")[1]; String videoUrl = ""; videoUrl = URLDecoder.decode(href,"UTF-8"); recModel = new NicoVideoRecModel(); recModel.setAid(aid); recModel.setTitle(title); recModel.setImgsrc(imgsrc); recModel.setVideoUrl(videoUrl); logger.info("Nico Api:" + map.toString() + " body:" + JSON.toJSONString(recModel)); } catch (Exception e){ logger.error(reqUrl); } return recModel; } }
原文:http://raychow.linkfun.top/2017/12/22/archives/9_javaSpring/spriderDown/index/c#