您的位置:首页 > 理论基础 > 计算机网络

[HTTP]_[C/C++]_[获取URL里的域名主体]

2014-03-03 21:59 453 查看
场景:

1.有时候需要获取域名主体来做一些记录操作或公司名称记录,比如域名baidu.com域名的主体是baidu.

2.在blog下载工具里
博客下载备份导出工具 需要根据不同的域名主体来调用相应的下载脚本,域名主体同时也是包名。

#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <string>
#include "domain.h"
#include "assert.h"

using namespace std;

char* GetDomainTopPart(const char* url1)
{
	cout << url1 << endl;
	//1.获取域名
	char* url = strdup(url1);
	const char* http_str = "http://";
	char slash = '/';
	char dot = '.';
	char* http_pr = strstr(url,http_str);
	char* start = url;
	if (http_pr == url)
	{
		start = url+strlen(http_str);	
	}
	char* slash_pr = strchr(start,slash);
	*slash_pr = 0;

	
	//1.从第2部分开始查找
	char* domain_top = NULL;
	char* first_dot = strrchr(start,dot);
	
	*first_dot = 0;
	char* second_dot = strrchr(start,dot);
	if(second_dot)
	{
		*first_dot = dot;
		char* second_part = strstr(gDomainPart,second_dot);
		if (second_part)
		{
			*second_dot = 0;
			char* third_dot = strrchr(start,dot);
			if (third_dot)
			{
				domain_top = strdup(third_dot+1);
			}else
			{
				domain_top = strdup(start);
			}
		}else
		{
			*first_dot = 0;
			domain_top = strdup(second_dot+1);
		}

	}else
	{
		*first_dot = 0;
		domain_top = strdup(start);	
	}

	free(url);
	return domain_top;
}	

int main(int argc, char const *argv[])
{
	cout << gDomainPart << endl;
	cout << argv[0] << endl;
	char* domain_top = GetDomainTopPart("http://blog.csdn.net/infoworld/1.html");
	assert(!strcmp(domain_top,"csdn"));
	free(domain_top);

	domain_top = GetDomainTopPart("blog.csdn.net/infoworld/1.html");
	assert(!strcmp(domain_top,"csdn"));
	free(domain_top);

	domain_top = GetDomainTopPart("http://tomcat-oracle.iteye.com/blog/2020634");
	assert(!strcmp(domain_top,"iteye"));
	free(domain_top);

	domain_top = GetDomainTopPart("http://www.blogjava.net/paulwong/archive/2014/02/23/410200.html");
	assert(!strcmp(domain_top,"blogjava"));
	free(domain_top);

	domain_top = GetDomainTopPart("http://blogjava.net/paulwong/archive/2014/02/23/410200.html");
	assert(!strcmp(domain_top,"blogjava"));
	free(domain_top);

	domain_top = GetDomainTopPart("http://test.com.cn/paulwong/archive/2014/02/23/410200.html");
	assert(!strcmp(domain_top,"test"));
	free(domain_top);

	domain_top = GetDomainTopPart("http://www.test.com.cn/paulwong/archive/2014/02/23/410200.html");
	assert(!strcmp(domain_top,"test"));
	free(domain_top);

	return 0;
}


域名后缀头文件 domain.h:

static const char* gDomainPart = "" 
";.com" 
";.net" 
";.org" 
";.hk" 
";.cn" 
";.com.cn" 
";.net.cn" 
";.org.cn" 
";.gov.cn" 
";.biz" 
";.info" 
";.cc" 
";.tv" 
";.mobi" 
";.name" 
";.asia" 
";.tw" 
";.sh" 
";.ac" 
";.io" 
";.tm" 
";.travel" 
";.ws" 
";.us" 
";.sc" 
";.mn" 
";.ag" 
";.vc" 
";.la" 
";.bz" 
";.in" 
";.cm" 
";.co" 
";.tel" 
";.me" 
";.pro" 
";.com.hk" 
";.com.tw" 
";.pw;";


输出:

;.com;.net;.org;.hk;.cn;.com.cn;.net.cn;.org.cn;.gov.cn;.biz;.info;.cc;.tv;.mobi;.name;.asia;.tw;.sh;.ac;.io;.tm;.travel;.ws;.us;.sc;.mn;.ag;.vc;.la;.bz;.in;.cm;.co;.tel;.me;.pro;.com.hk;.com.tw;.pw;
C:\workspace\script-test\test_blog\src\test_domain http://blog.csdn.net/infoworld/1.html blog.csdn.net/infoworld/1.html http://tomcat-oracle.iteye.com/blog/2020634 http://www.blogjava.net/paulwong/archive/2014/02/23/410200.html http://blogjava.net/paulwong/archive/2014/02/23/410200.html http://test.com.cn/paulwong/archive/2014/02/23/410200.html http://www.test.com.cn/paulwong/archive/2014/02/23/410200.html [Finished in 0.1s]
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐