curl post登录 抓取重定向网址
2011-10-12 10:52
134 查看
现在网站都做得贼拉恶心,boss要做个抓取程序,别人的网站要求登录,登录入口就一个,用curl写了个,死活不行,抓包一看才发下丫的做了n次跳转,无耻呀~~这不是逼我嘛~~招了n就没找到现成的~~唉自个动手吧~~~存个档
<?php
set_time_limit(1000);
$timeout = 1000; //超时时间
$cookie_jar = tempnam('./','cookie');//cookie
function curl_redirect_exec($ch, &$redirects, $curlopt_returntransfer = false, $curlopt_maxredirs = 10, $curlopt_header = false) {
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$exceeded_max_redirects = $curlopt_maxredirs > $redirects;
$exist_more_redirects = false;
if ($http_code == 301 || $http_code == 302) {
if ($exceeded_max_redirects) {
list($header) = explode("\r\n\r\n", $data, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
$url = trim(array_pop($matches));
$url_parsed = parse_url($url);
if (isset($url_parsed)) {
curl_setopt($ch, CURLOPT_URL, $url);
$redirects++;
return curl_redirect_exec($ch, $redirects, $curlopt_returntransfer, $curlopt_maxredirs, $curlopt_header);
}
} else {
$exist_more_redirects = true;
}
}
if ($data !== false) {
if (!$curlopt_header)
list(,$data) = explode("\r\n\r\n", $data, 2);
if ($exist_more_redirects) return false;
if ($curlopt_returntransfer) {
return $data;
} else {
if (curl_errno($ch) === 0) return true;
else return false;
}
} else {
return false;
}
}
$useragent="Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)";
$request="user=user&pwd=pwd";
$loginurl ="login.asp";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $loginurl);
curl_setopt($ch, CURLOPT_REFERER, 'index.asp');
curl_setopt($ch,CURLOPT_USERAGENT,$useragent);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_UNRESTRICTED_AUTH, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $request);
curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
@unlink($cookie_jar);
?>
<?php
set_time_limit(1000);
$timeout = 1000; //超时时间
$cookie_jar = tempnam('./','cookie');//cookie
function curl_redirect_exec($ch, &$redirects, $curlopt_returntransfer = false, $curlopt_maxredirs = 10, $curlopt_header = false) {
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$data = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$exceeded_max_redirects = $curlopt_maxredirs > $redirects;
$exist_more_redirects = false;
if ($http_code == 301 || $http_code == 302) {
if ($exceeded_max_redirects) {
list($header) = explode("\r\n\r\n", $data, 2);
$matches = array();
preg_match('/(Location:|URI:)(.*?)\n/', $header, $matches);
$url = trim(array_pop($matches));
$url_parsed = parse_url($url);
if (isset($url_parsed)) {
curl_setopt($ch, CURLOPT_URL, $url);
$redirects++;
return curl_redirect_exec($ch, $redirects, $curlopt_returntransfer, $curlopt_maxredirs, $curlopt_header);
}
} else {
$exist_more_redirects = true;
}
}
if ($data !== false) {
if (!$curlopt_header)
list(,$data) = explode("\r\n\r\n", $data, 2);
if ($exist_more_redirects) return false;
if ($curlopt_returntransfer) {
return $data;
} else {
if (curl_errno($ch) === 0) return true;
else return false;
}
} else {
return false;
}
}
$useragent="Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C)";
$request="user=user&pwd=pwd";
$loginurl ="login.asp";
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $loginurl);
curl_setopt($ch, CURLOPT_REFERER, 'index.asp');
curl_setopt($ch,CURLOPT_USERAGENT,$useragent);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
curl_setopt($ch, CURLOPT_UNRESTRICTED_AUTH, 1);
curl_setopt($ch, CURLOPT_NOBODY, 1);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_jar);
curl_setopt($ch, CURLOPT_POST, 1);
curl_setopt($ch, CURLOPT_POSTFIELDS, $request);
curl_exec($ch);
$info = curl_getinfo($ch);
curl_close($ch);
@unlink($cookie_jar);
?>
相关文章推荐
- 获取CURL模拟POST提交之后重定向的网址
- PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)
- perl中设置POST登录时的重定向
- PHP的cURL库功能简介 抓取网页、POST数据及其他
- 微博第三方登录 curl post传值 处理返回的json数据
- php使用curl存取cookie进行登录抓取数据示例
- 转载:PHP的cURL库功能简介:抓取网页,POST数据及其他
- CURL 抓取网页内容,模拟post
- curl抓取网页内容时的重定向和乱码解决
- PHP的cURL库功能简介:抓取网页,POST数据及其他
- curl抓取页面时遇到重定向的解决方法
- 使用HttpClient 4.3.4 自动登录并抓取中国联通用户基本信息和账单数据,GET/POST/Cookie
- PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)
- 使用C#抓取页面----GET方法,POST方法,抓取登录页面
- PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)
- PHP的cURL库功能简介:抓取网页,POST数据及其他
- PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)
- 一个完整的 curl post登录带验证码的代码
- PHP的CURL方法curl_setopt()函数案例介绍(抓取网页,POST数据)