您的位置:首页 > Web前端 > HTML

利用正则表达式将html网页数据变成Web Service

2004-10-05 16:02 555 查看
这次的题目很简单,中国银行有一个查当天汇率的网页(http://www.bank-of-china.com/info/qpindex.shtml),不过是传统的Html格式,而其又没有提供Xml格式或者WebService查询。现在如果希望其他的信息系统能够随时读取其中的数据,那么方便的莫过于中行提供一个WebService接口供大家调用,这也是典型的安全的WebService应用。可惜中行没有做,那么我们能不能自己来做呢?当然可以,只要用程序分析其html网页,那么就可以很容易的读取其中的数据。文本分析,当然要看我们的"Regular Expression"(呵呵,其实这才是写这个程序的真实目的 -- 应用正则表达式。)


2004/09/30 有效期至2004/10/07

1453.1500 1492.6400
港币105.9700 105.3300 106.2900 106.1100
美元826.4200 821.4500 828.9000 827.6600
瑞士法郎655.9300 641.1400 659.2200
新加坡元488.7600 477.2600 490.2300
瑞典克朗112.4900 109.8400 112.8300
丹麦克朗136.5900 133.3700 137.0000
挪威克朗121.9500 119.0800 122.3100
日元7.4344 7.3785 7.4717 7.4519
加拿大元650.8000 635.4800 652.7600
澳大利亚元591.9900 578.6400 594.9600
欧元1019.6400 1010.9600 1022.7000 1019.7000
澳门元103.2200 102.6000 103.5300
菲律宾比索14.6700 14.3300 14.7200
泰国铢19.9000 19.4300 19.9600
新西兰元553.7000 555.3600


@"<tr bgcolor='#/w+' ><td height='20'>(?<currency>.*)</td>/s*" +
@"<td height='20'><p align='right'>(?<bankbuytt>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<buynotes>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<sell>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<base>/d*.?/d*)( )+.?</td>/s*"



using System;
using System.Collections;
using System.ComponentModel;
using System.Data;
using System.Diagnostics;
using System.Web;
using System.Net;
using System.Web.Services;
using System.Xml;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

namespace ChinaBank
/// <summary>
/// Summary description for ForeignExchange.
/// </summary>
public class ForeignExchange : System.Web.Services.WebService
public ForeignExchange()
//CODEGEN: This call is required by the ASP.NET Web Services Designer

#region Component Designer generated code

//Required by the Web Services Designer
private IContainer components = null;

/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()

/// <summary>
/// Clean up any resources being used.
/// </summary>
protected override void Dispose( bool disposing )
if(disposing && components != null)


public XmlDataDocument GetForeignExchangeRates()
return getXmlDoc();
public DataSet GetForeignExchangeRatesDataSet()
return getXmlDoc().DataSet;
public string GetBankPage()
return getWebContent( "http://www.bank-of-china.com/info/whjrpj.html" );
// private methods
private string getWebContent( string url )
using( WebClient client = new WebClient() )
byte[] buffer = client.DownloadData( url );
string str = Encoding.GetEncoding("GB2312").GetString( buffer, 0, buffer.Length );
return str;
private XmlDataDocument getXmlDoc()
string webcontent = getWebContent("http://www.bank-of-china.com/info/whjrpj.html");

// Prepair for DataSet
DataSet ds = new DataSet("Exchange");
DataTable dt = new DataTable("ForeignExchange");
ds.Tables.Add( dt );
dt.Columns.Add( "Currency", typeof(string) );
dt.Columns.Add( "BankBuyTT", typeof(double) );
dt.Columns.Add( "BankBuyNotes", typeof(double) );
dt.Columns.Add( "BankSell", typeof(double) );
dt.Columns.Add( "Baseline", typeof(double) );
XmlDataDocument xmldoc = new XmlDataDocument( ds );

Regex expr = new Regex(
@"<tr bgcolor='#/w+' ><td height='20'>(?<currency>.*)</td>/s*" +
@"<td height='20'><p align='right'>(?<bankbuytt>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<buynotes>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<sell>/d*.?/d*)( )+.?</td>/s*" +
@"<td height='20'><p align='right'>(?<base>/d*.?/d*)( )+.?</td>/s*"
, RegexOptions.Compiled);
for( Match m = expr.Match(webcontent) ; m.Success ; m=m.NextMatch() )
string key;
DataRow row = dt.NewRow();
row["Currency"] = m.Groups["currency"];
key = m.Groups["bankbuytt"].ToString();
row["BankBuyTT"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["buynotes"].ToString();
row["BankBuyNotes"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["sell"].ToString();
row["BankSell"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
key = m.Groups["base"].ToString();
row["Baseline"] = key.Length > 0 ? Convert.ToDouble( key )/100 : 0;
dt.Rows.Add( row );
return xmldoc;

客户端也很容易,只要用wsdl生成了相应的WebService Proxy后,直接调用就行了,由于我让Server端返回了DataSet,因此客户端直接用DataGrid来显示DataSet即可,非常Easy,在这个问题上客户端没有什么技术关键点。

using System;
using System.Threading;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;

namespace BankDataClient
/// <summary>
/// Summary description for frmMainBankRates.
/// </summary>
public class frmMainBankRates : System.Windows.Forms.Form
private System.Windows.Forms.DataGrid dataGrid1;
private System.Windows.Forms.Button btnConnect;
private System.Data.DataSet ds;
private BankDataClient.com.dancefires.www.ForeignExchange proxy = new BankDataClient.com.dancefires.www.ForeignExchange();
private System.Windows.Forms.TextBox txtUrl;
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.Container components = null;

public frmMainBankRates()
// Required for Windows Form Designer support
txtUrl.Text = System.Configuration.ConfigurationSettings.AppSettings["url"];
proxy.Url = txtUrl.Text;
proxy.Url = "http://www.dancefires.com/ChinaBank/ForeignExchange.asmx";
txtUrl.Text = proxy.Url;

/// <summary>
/// Clean up any resources being used.
/// </summary>
protected override void Dispose( bool disposing )
if( disposing )
if(components != null)
base.Dispose( disposing );

#region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
this.dataGrid1 = new System.Windows.Forms.DataGrid();
this.ds = new System.Data.DataSet();
this.btnConnect = new System.Windows.Forms.Button();
this.txtUrl = new System.Windows.Forms.TextBox();
// dataGrid1
this.dataGrid1.DataMember = "";
this.dataGrid1.DataSource = this.ds;
this.dataGrid1.HeaderForeColor = System.Drawing.SystemColors.ControlText;
this.dataGrid1.Location = new System.Drawing.Point(32, 48);
this.dataGrid1.Name = "dataGrid1";
this.dataGrid1.Size = new System.Drawing.Size(480, 256);
this.dataGrid1.TabIndex = 0;
// ds
this.ds.DataSetName = "Exchange";
this.ds.Locale = new System.Globalization.CultureInfo("zh-CN");
// btnConnect
this.btnConnect.Location = new System.Drawing.Point(432, 16);
this.btnConnect.Name = "btnConnect";
this.btnConnect.TabIndex = 1;
this.btnConnect.Text = "连接";
this.btnConnect.Click += new System.EventHandler(this.btnConnect_Click);
// txtUrl
this.txtUrl.Location = new System.Drawing.Point(32, 16);
this.txtUrl.Name = "txtUrl";
this.txtUrl.Size = new System.Drawing.Size(384, 20);
this.txtUrl.TabIndex = 2;
this.txtUrl.Text = "";
// frmMainBankRates
this.AutoScaleBaseSize = new System.Drawing.Size(5, 13);
this.ClientSize = new System.Drawing.Size(544, 318);
this.Name = "frmMainBankRates";
this.Text = "Foreign Exchange Rates of Bank of China";


private void btnConnect_Click(object sender, System.EventArgs e)
private void UpdateDataGrid()
btnConnect.Enabled = false;
txtUrl.ReadOnly = true;
proxy.Url = txtUrl.Text;
ds = proxy.GetForeignExchangeRatesDataSet();
dataGrid1.SetDataBinding( ds, "ForeignExchange" );
catch( Exception err )
MessageBox.Show( err.Message );
txtUrl.ReadOnly = false;
btnConnect.Enabled = true;
static void Main( string[] args )
Application.Run( new frmMainBankRates() );

有了这个例子,应该可以从中了解最基本的XML, WebService, Regular Expression, DataSet, DataGrid的知识。



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息