CURL抓取网页内容并用正则提取。
2015-09-05 10:36
429 查看
[php] view
plaincopy
<?php
header("Content-Type:text/html;charset=UTF-8");
/*
* CURL网页抓取
* */
class Curl{
var $setopt;
var $data;
function __construct($url){
$this->setopt =array(
CURLOPT_URL => "$url",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
);
}
function exec(){
$ch = curl_init();
curl_setopt_array($ch,$this->setopt);
$this->data = curl_exec($ch);
curl_close($ch);
return $this->data;
}
};
/*
* 抓取回来的网页进行正则查找
* id是按ID查找内容
* tagName是标签查找
* className按类名查找*/
class Preg{
function id($data,$id){
preg_match('/<(.*)\s*id=.*('.$id.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
function tagName($data,$tag){
preg_match('/<'.$tag.'.*>\s*(.*)\s*<\/'.$tag.'>/',$data,$str);
return $str[1];
}
function className($data,$class){
preg_match('/<(.*)\s*class=.*('.$class.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
}
$c = new Curl('www.corker.cc');
$data = $c->exec();
$data = @iconv("gb2312", "utf-8",$data);
$preg = new Preg();
echo $preg->tagName($data,'title');
?>
plaincopy
<?php
header("Content-Type:text/html;charset=UTF-8");
/*
* CURL网页抓取
* */
class Curl{
var $setopt;
var $data;
function __construct($url){
$this->setopt =array(
CURLOPT_URL => "$url",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
);
}
function exec(){
$ch = curl_init();
curl_setopt_array($ch,$this->setopt);
$this->data = curl_exec($ch);
curl_close($ch);
return $this->data;
}
};
/*
* 抓取回来的网页进行正则查找
* id是按ID查找内容
* tagName是标签查找
* className按类名查找*/
class Preg{
function id($data,$id){
preg_match('/<(.*)\s*id=.*('.$id.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
function tagName($data,$tag){
preg_match('/<'.$tag.'.*>\s*(.*)\s*<\/'.$tag.'>/',$data,$str);
return $str[1];
}
function className($data,$class){
preg_match('/<(.*)\s*class=.*('.$class.').*>\s*(.*)\s*<\/(.*)>/',$data,$str);
return $str[0];
}
}
$c = new Curl('www.corker.cc');
$data = $c->exec();
$data = @iconv("gb2312", "utf-8",$data);
$preg = new Preg();
echo $preg->tagName($data,'title');
?>
相关文章推荐
- 二级c++——基本操作题小结
- 【codeforces 29A】Spit Problem
- 将你的Vim 打造成轻巧强大的IDE
- hdu1500 (排序+单调队列优化 )
- window 给链接加下划线或取消下划线
- Ajax-JS
- HDU 1150
- Unity3d通用工具类之生成文件的MD5
- POJ 2083 Fractal
- 基于SCN的查询(AS OF SCN)
- Unity3d通用工具类之生成文件的MD5
- c语言中static作用
- HDU 5202
- Android View.OnTouchListener 的子类,AutoScrollHelper,ZoomButtonsController,ListViewAutoScrollHelper
- IIS安装设置-Sql Sever2008安装设置-移动端设置等必读
- python题目——认识*与**,判断函数输出
- freebsd上安装nginx+php记录
- 网络虚拟化相关
- Hadoop 运行wordcount 实例
- ThinkCMF框架写接口时不能正确输入json字符串而是带上了网页源码怎么办?