您的位置:首页 > 其它

一种通用数据采集的schema定义形式

2015-02-09 16:39 218 查看
{
"name": "凤凰金融",
"notice": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
  "comments": "网站通告"
},
"url": {
"data": "attribute",
"value": "http://www.fengjr.com/financing/list?type=cx"
"comments": "本平台数据的采集URL"
},
"project": {
"data": "url",
"url": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"template": ""
},
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"detail": {
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"amount": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
}
}
},
"member": {
"data": "sub_item",
"sub_item": {
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"src-save": 0,
"url": {
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"template": ""
}
},
"detail": {
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"amount": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
}
}
},
"src-save": 1
}


补充:

{
"name": "凤凰金融",
"notice": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"url": {
"data": "attribute",
"value": "http://www.fengjr.com/financing/list?type=cx"
},
"project": {
"data": "url",
"url": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"template": ""
},
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"detail": {
"name": "网贷列表",
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"amount": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
}
}
},
"member": {
"data": "sub_item",
"sub_item": {
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"src-save": 0,
"url": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
],
"template": ""
}
},
"detail": {
"name": "会员材料",
"title": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
},
"amount": {
"data": "attribute",
"matcher": [
{
"match": "xpath",
"pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
}
]
}
}
},
"src-save": 1,

  "crawler": {

      "handler":"httpClient|selenium",
      "results":"html|json|text",
      "next_page": {
        "matcher": [
          {
            "match": "xpath",
            "pattern": "//*[@id=\"page-financing\"]/div[1]/div[5]/div/div/div[3]"
          }
         ],
        "template": ""
      },
      "history": "re-crawl|skip|stop"
    }

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: