perl 爬取铜板街
2016-02-26 20:09
190 查看
use LWP::UserAgent;
use utf8;
use HTML::TreeBuilder;
use Data::Dumper;
open DATAFH,">data.html" || die "open data file failed:$!";
#模拟浏览器进行网页数据爬取
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get('https://product.tongbanjie.com/list.html?pageNo=1#list_wrap');
if ($response->is_success) {
print DATAFH $response->decoded_content;
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder->new;
$tree->parse_file( "data.html"); #此处有个 parse_file 跟 parse 的区别
my @total_count = $tree->find_by_tag_name("a"); #根据 标签 名字来获取页数
#foreach my $dd ( @total_count ) {
# print $dd->as_text,"\n";
#}
$total_num = $total_count[@total_count-2]->as_text;
close DATAFH; #每次用完之后都要关闭 文件句柄
for ( $num=1; $num<=$total_num; $num++ ){ #循环遍历每页数据,拿取历史产品数据
#if ( -f data.html ) {
# open DATAFH, ">data.html" || die "open data file failed:$!";
# close DATAFH;
#}
open DATAFH,">data.html" || die "open data file failed:$!";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get("https://product.tongbanjie.com/list.html?pageNo=$num#list_wrap");
if ( $response->is_success ) {
print DATAFH $response->decoded_content;
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder->new;
$tree->parse_file( "data.html");
#获取产品名字
my @title=$tree->findvalues( '/html/body//div[@class="title"]/a');
foreach (@title){
print "$_\n";
}
# 获取产品利率
my @rate = $tree->findvalues('/html/body//div[@class="income"]/p[@class="numBox"]');
foreach (@rate) {
print "$_\n";
}
#获取产品期限
my @date = $tree->findvalues('/html/body//div[@class="day floatleft"]/span[@class="numBox"]');
foreach (@date) {
print "$_\n";
}
#循环遍历插入数据库
for ( $single=0; $single<=@title-1; $single++ ) {
}
}
close DATAFH;
sleep(10); #停留 10 秒, 防止被 屏蔽
}
#my @rows=$tree->find_by_tag_name("div");
#shift @rows;
#foreach my $row ( @rows ) {
# my @cell = $row->content_list;
# foreach my $cell ( @cell ) {
# print $cell->as_text, "\t";
# }
# print "\n";
#}
}
else {
die $response->status_line;
}
use utf8;
use HTML::TreeBuilder;
use Data::Dumper;
open DATAFH,">data.html" || die "open data file failed:$!";
#模拟浏览器进行网页数据爬取
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get('https://product.tongbanjie.com/list.html?pageNo=1#list_wrap');
if ($response->is_success) {
print DATAFH $response->decoded_content;
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder->new;
$tree->parse_file( "data.html"); #此处有个 parse_file 跟 parse 的区别
my @total_count = $tree->find_by_tag_name("a"); #根据 标签 名字来获取页数
#foreach my $dd ( @total_count ) {
# print $dd->as_text,"\n";
#}
$total_num = $total_count[@total_count-2]->as_text;
close DATAFH; #每次用完之后都要关闭 文件句柄
for ( $num=1; $num<=$total_num; $num++ ){ #循环遍历每页数据,拿取历史产品数据
#if ( -f data.html ) {
# open DATAFH, ">data.html" || die "open data file failed:$!";
# close DATAFH;
#}
open DATAFH,">data.html" || die "open data file failed:$!";
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;
$ua->agent("Mozilla/8.0");
my $response = $ua->get("https://product.tongbanjie.com/list.html?pageNo=$num#list_wrap");
if ( $response->is_success ) {
print DATAFH $response->decoded_content;
use HTML::TreeBuilder::XPath;
my $tree= HTML::TreeBuilder->new;
$tree->parse_file( "data.html");
#获取产品名字
my @title=$tree->findvalues( '/html/body//div[@class="title"]/a');
foreach (@title){
print "$_\n";
}
# 获取产品利率
my @rate = $tree->findvalues('/html/body//div[@class="income"]/p[@class="numBox"]');
foreach (@rate) {
print "$_\n";
}
#获取产品期限
my @date = $tree->findvalues('/html/body//div[@class="day floatleft"]/span[@class="numBox"]');
foreach (@date) {
print "$_\n";
}
#循环遍历插入数据库
for ( $single=0; $single<=@title-1; $single++ ) {
}
}
close DATAFH;
sleep(10); #停留 10 秒, 防止被 屏蔽
}
#my @rows=$tree->find_by_tag_name("div");
#shift @rows;
#foreach my $row ( @rows ) {
# my @cell = $row->content_list;
# foreach my $cell ( @cell ) {
# print $cell->as_text, "\t";
# }
# print "\n";
#}
}
else {
die $response->status_line;
}
相关文章推荐
- GesturesAndEventHandle常用手势
- 多态之override与final
- 排列问题
- java安全(一)DES 的简单使用和加密过程(原理)
- bsoj 3175 【HNOI2010】弹飞绵羊
- perl 数组跟哈希
- perl 登录人人,并发送帖子
- MFC消息机制---消息映射
- 驾照考试:理论考试注意事项
- 光谱学
- perl 文件句柄
- mysql参数设置与查看
- 一个下音乐的方法
- 模拟计算器进行四则运算(同等优先级)(内测第2届第3题)
- 微信现在越来越难开了,有什么办法可以快速开很多微信号呢
- 驾照考试:科目二考试注意事项
- 第一篇博文
- 用V4包中的DrawerLayout实现下拉刷…
- 模仿微信6.0的界面效果
- 自定义的上下拉刷新和SwipeListVie…