您的位置:首页 > Web前端 > Node.js

nodejs 写爬虫爬取最近上映电影数据

2017-01-20 19:21 369 查看
直接上代码:这是http模块

'use strict'
//引入内建和第三方模块
const https = require("https")
const url = require("url")
const Promise = require("bluebird")

//创建启动服务模块
function start(url){
return new Promise((resolve,reject)=>{
https.get(url,(res)=>{
const statusCode = res.statusCode
const contentType = res.headers['content-type']

let error

if(statusCode != 200){
error = new Error(`请求失败.\n`+`code:${statusCode}`)
}

if(error){
console.log(error)
res.resume()
return
}

res.setEncoding("utf-8")
let getData = ""

res.on("data",(datachunk)=>{
getData += datachunk
})

res.on("end",()=>{
resolve(getData)
})
}).on("error",(e)=>{
reject(e)
console.log("获取数据出错")
})
})
}

exports.start = start

主程序入口:

const crawler = require("./crawler")
const querystring = require("querystring")
const cheerio = require("cheerio")
const fs = require("fs")
//url
const douban_url = "https://movie.douban.com/nowplaying/chengdu/"

crawler.start(douban_url).then(res=>{
var $ = cheerio.load(res)
var data = []
$('#nowplaying .list-item').each(function(i,e){
var obj = "obj" + i
obj = {}
obj.title = $(e).attr("data-title")+"\n"
obj.score = $(e).attr("data-score")+"\n"
obj.src = $('#nowplaying .list-item img').attr("src")+"\n"
data.push(obj)
})
fs.writeFileSync("./get_data/data.txt",JSON.stringify(data));
})


效果展示:

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  node 爬虫