您的位置:首页 > 编程语言 > C语言/C++

Accelerated C++ .P110 find_urls

2010-10-13 14:30 148 查看
/*
  * Accelerated C++ .P110
  * 查找一个文档中的所有有效url地址
  * 输入:const string& s   包含文档所有内容的字符串s
  * 输出:vecotr<string>   所有有效url构成的集合        
*/

#include<iostream>
#include<string>
#include<vector>
#include<algorithm>
#include<cctype>

using std::vector;
using std::string;
using std::cin;
using std::cout;
using std::endl;

string::const_iterator url_end(string::const_iterator b,string::const_iterator);
string::const_iterator url_beg(string::const_iterator b,string::const_iterator e);

vector<string> find_urls(const string& s)
{
    vector<string> ret;
    
    typedef string::const_iterator iter;
    iter b = s.begin();
    iter e = s.end();
    
    while(b!=e)
    {
        b = url_beg(b,e);
        if(b != e)
        {
            iter after = url_end(b,e);
            
            ret.push_back(string(b,after));
            
            b=after;     
        }           
    }   
    
    return ret;        
}

bool not_url_char(char c)
{
    static const string url_ch = "~;/?:@=&$-_.+!*(),";
    
    return !(isalnum(c)||find(url_ch.begin(),url_ch.end(),c)!=url_ch.end());     
}

string::const_iterator url_end(string::const_iterator b,string::const_iterator e)
{
    return find_if(b,e,not_url_char);                       
}

string::const_iterator url_beg(string::const_iterator b,string::const_iterator e)
{
    static const string sep = "://";
    
    typedef string::const_iterator iter;
    
    iter i = b;
    
    while((i = search(i,e,sep.begin(),sep.end()))!=e)
    {
        if(i != b && i+sep.size() != e)
        {
            iter beg = i;
            
            while(beg != b && isalpha(beg[-1]))
                --beg;
                
            if(beg != i && i+sep.size() != e &&!not_url_char(i[sep.size()]))
                return beg;
                 
        }
        
        if( i!= e)
            i += sep.size();         
    }
    
    return e;                       
}

int main(void)
{
    freopen("1.htm","r",stdin);
    
    string content;
    
    string str;

    while(getline(cin,str))
    {
        content += str;                       
    }
    
    
    vector<string> urls = find_urls(content);
    
    for(vector<string>::iterator iter = urls.begin(); iter != urls.end(); iter++)
    {
        cout<<*iter<<endl;                             
    }
    
    return 0;
        
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: