您的位置:首页 > Web前端 > HTML

HTML标签过滤方案

2008-11-18 10:37 211 查看
取决于不同的解决机制:

解决方案1:

在数据库中存入标签的HTML转换符,按照原样输出。

在存入数据库之前,加上server.htmlEncode(txtboxName.text);

解决方案2:

在数据库中不存入HTML标签的有关信息,过滤HTML标签,只显示文字。

存入数据库之前加上过滤函数过滤便可:checkStr(txtboxName.text)或者StripHTML(txtboxName.text)或者NoHTML(txtboxName.text);(提供三个过滤函数,任选一个,添加命名空间:using System.Text.RegularExpressions;)

以下代码均调试通过:

/// <summary>

/// HTML过滤方法一

/// </summary>

/// <param name="html"></param>

/// <returns></returns>

public string checkStr(string html)

{

System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" on[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);

html = regex1.Replace(html, ""); //过滤<script></script>标记

html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性

html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件

html = regex4.Replace(html, ""); //过滤iframe

html = regex5.Replace(html, ""); //过滤frameset

html = regex6.Replace(html, ""); //过滤frameset

html = regex7.Replace(html, ""); //过滤frameset

html = regex8.Replace(html, ""); //过滤frameset

html = regex9.Replace(html, "");

html = html.Replace(" ", "");

html = html.Replace("</strong>", "");

html = html.Replace("<strong>", "");

return html;

}

#region 过滤掉html代码

/// <summary>

/// 方法二:去除HTML标记

/// </summary>

/// <param name="StripHtml">包括HTML的源码 </param>

/// <returns>已经去除后的文字</returns>

public static string StripHTML(string strHtml)

{

string[] aryReg ={

@"<script[^>]*?>.*?</script>",

@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",

@"([\r\n])[\s]+",

@"&(quot|#34);",

@"&(amp|#38);",

@"&(lt|#60);",

@"&(gt|#62);",

@"&(nbsp|#160);",

@"&(iexcl|#161);",

@"&(cent|#162);",

@"&(pound|#163);",

@"&(copy|#169);",

@"&#(\d+);",

@"-->",

@"<!--.*\n"

};

string[] aryRep = {

"",

"",

"",

"\"",

"&",

"<",

">",

" ",

"\xa1",//chr(161),

"\xa2",//chr(162),

"\xa3",//chr(163),

"\xa9",//chr(169),

"",

"\r\n",

""

};

string newReg = aryReg[0];

string strOutput = strHtml;

for (int i = 0; i < aryReg.Length; i++)

{

System.Text.RegularExpressions.Regex regex = new System.Text.RegularExpressions.Regex(aryReg[i], System.Text.RegularExpressions.RegexOptions.IgnoreCase);

strOutput = regex.Replace(strOutput, aryRep[i]);

}

strOutput.Replace("<", "");

strOutput.Replace(">", "");

strOutput.Replace("\r\n", "");

return strOutput;

}

#endregion

/// <summary>

/// 方法三:去除HTML标记

/// </summary>

/// <param name="NoHTML">包括HTML的源码 </param>

/// <returns>已经去除后的文字</returns>

public static string NoHTML(string Htmlstring)

{

//删除脚本

Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",

RegexOptions.IgnoreCase);

//删除HTML

Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",

RegexOptions.IgnoreCase);

Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",

RegexOptions.IgnoreCase);

Htmlstring.Replace("<", "");

Htmlstring.Replace(">", "");

Htmlstring.Replace("\r\n", "");

Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();

return Htmlstring;

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: