C语言调用curl库抓取网页图片
2016-07-18 19:41
393 查看
思路是先用curl抓取网页源码,然后以关键字寻找出图片网址。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
void get_key_from_str(char *origin, char *str1, char *str2, char *key);
int main(int argc, char **argv)
{
CURL *curl;
FILE *fp = NULL, *fp_read = NULL;
int file_size = 0;
char *tmp = NULL;
int flag = 0;
char key[1024] = {0};
char str1[128] = {0};
char str2[128] = {0};
if ((fp = fopen("test.txt", "w")) == NULL)
{
return 1;
}
curl = curl_easy_init();
if (curl)
{
flag = 1;
curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //curl设置网址
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); //将网页源码存在文件中,好像只能存文件,不能存字符串
curl_easy_perform(curl);//开始抓取
curl_easy_cleanup(curl);
fclose(fp);
fp_read = fopen("test.txt", "rb");
fseek(fp_read, 0, SEEK_END); //把指针移到文本最后
file_size = ftell(fp_read) ;//算出文本大小
fseek(fp_read, 0, SEEK_SET); //再把指针移到最前面
tmp = (char *)malloc(file_size * sizeof(char)); //malloc一段内存
printf("file_size:%d\n", file_size);
fread(tmp, file_size, sizeof(char), fp_read); //读取文件
fclose(fp_read);
snprintf(str1, sizeof(str1), "img src=\"");
snprintf(str2, sizeof(str2), "\"");
get_key_from_str(tmp, str1, str2, key); //取得网址
printf("key:%s\n", key);
free(tmp);
}
if (!flag)
{
fclose(fp);
}
return 0;
}
//抓取str1和str2之间的关键字
void get_key_from_str(char *origin, char *str1, char *str2, char *key)
{
char *p = strstr(origin, str1);
char *q = strstr(p + strlen(str1), str2);
int len = q - p - strlen(str1);
snprintf(key, len + 1, "%s", p + strlen(str1));
key[len + 1] = '\0';
}
gcc -g -Wall main.c -o test -lcurl
./test url
即可
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
void get_key_from_str(char *origin, char *str1, char *str2, char *key);
int main(int argc, char **argv)
{
CURL *curl;
FILE *fp = NULL, *fp_read = NULL;
int file_size = 0;
char *tmp = NULL;
int flag = 0;
char key[1024] = {0};
char str1[128] = {0};
char str2[128] = {0};
if ((fp = fopen("test.txt", "w")) == NULL)
{
return 1;
}
curl = curl_easy_init();
if (curl)
{
flag = 1;
curl_easy_setopt(curl, CURLOPT_URL, argv[1]); //curl设置网址
curl_easy_setopt(curl, CURLOPT_WRITEDATA, fp); //将网页源码存在文件中,好像只能存文件,不能存字符串
curl_easy_perform(curl);//开始抓取
curl_easy_cleanup(curl);
fclose(fp);
fp_read = fopen("test.txt", "rb");
fseek(fp_read, 0, SEEK_END); //把指针移到文本最后
file_size = ftell(fp_read) ;//算出文本大小
fseek(fp_read, 0, SEEK_SET); //再把指针移到最前面
tmp = (char *)malloc(file_size * sizeof(char)); //malloc一段内存
printf("file_size:%d\n", file_size);
fread(tmp, file_size, sizeof(char), fp_read); //读取文件
fclose(fp_read);
snprintf(str1, sizeof(str1), "img src=\"");
snprintf(str2, sizeof(str2), "\"");
get_key_from_str(tmp, str1, str2, key); //取得网址
printf("key:%s\n", key);
free(tmp);
}
if (!flag)
{
fclose(fp);
}
return 0;
}
//抓取str1和str2之间的关键字
void get_key_from_str(char *origin, char *str1, char *str2, char *key)
{
char *p = strstr(origin, str1);
char *q = strstr(p + strlen(str1), str2);
int len = q - p - strlen(str1);
snprintf(key, len + 1, "%s", p + strlen(str1));
key[len + 1] = '\0';
}
gcc -g -Wall main.c -o test -lcurl
./test url
即可
相关文章推荐
- C语言调用curl库抓取网页图片
- ACM知识点 之 贪心(4)部分背包问题
- Leetcode 20. Valid Parentheses (Easy) (cpp)
- ROS学习之 cpp节点句柄
- C++字符串和字符串结束标志
- c++中typename和class的区别介绍
- 位运算
- ralloc、calloc和malloc的用法、区别。实现原理
- 用VS2010编写的C++程序,在其他电脑上无法运行的问题
- C语言指针与函数之三数比较大小并排序
- C语言的HelloWorld 过渡到 OC的HelloWorld
- 法国Nao机器人在C++开发中如何使用外部库
- 服务端与客户端握手规则,心跳逻辑
- jhash的C++实现
- jhash的C++实现
- jhash的C++实现
- jhash的C++实现
- jhash的C++实现
- jhash的C++实现
- ROS学习之 cpp定时器