您的位置:首页 > 编程语言 > C#

C#中查询字符串中是否包含指定字符/字符串,使用IndexOf还是Contains?

2010-03-28 18:27 836 查看
C#中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?这是一个很常见的命题,以前也没有注意,今天QQ群里有人提起,于是就做了下试验,代码如下:

using System;
using System.Diagnostics;

namespace ConsoleApplication1
{
class Program
{
private const int N = 10000000;
private static Stopwatch watch = new Stopwatch();
static void Main(string[] args)
{

string source = "abcdefghijklmnopqrstuvwxyz0123456789C#"
+ "中查询字符串中是否包含指定字符/串,使用IndexOf还是Contains?.uonun";
string target = "a";
Console.WriteLine("目标为第一个字符时:");
TestContains(source, target);
TestIndexOf(source, target);
Console.WriteLine();

Console.WriteLine("目标为中部某个字符时:");
target = "中";
TestContains(source, target);
TestIndexOf(source, target);
Console.WriteLine();

Console.WriteLine("目标为最后一个字符时:");
target = "u";
TestContains(source, target);
TestIndexOf(source, target);

Console.WriteLine("执行完毕,按任意键退出...");
Console.ReadKey();

}
private static void TestIndexOf(string source, string target)
{
watch.Reset();
watch.Start();
for (int i = 0;i < N;i++)
{
source.IndexOf(target);
}
watch.Stop();
Console.WriteLine("IndexOf: " + watch.ElapsedMilliseconds.ToString() + "ms");
return;
}

private static void TestContains(string source, string target)
{
watch.Reset();
watch.Start();
for (int i = 0;i < N;i++)
{
source.Contains(target);
}
watch.Stop();
Console.WriteLine("Contains: " + watch.ElapsedMilliseconds.ToString() + "ms");
return;
}
}
}

得到的结果是:

目标为第一个字符时:
Contains: 973ms
IndexOf: 1343ms

目标为中部某个字符时:
Contains: 1813ms
IndexOf: 8602ms

目标为最后一个字符时:
Contains: 1433ms
IndexOf: 5094ms
执行完毕,按任意键退出...

可以看出,使用Contains方法的效率比IndexOf的效率高很多。

疑问:Contains 调用了 IndexOf,反倒比 IndexOf 还快?

正如评论中@Vampire_D 所说,Contains 调用了 IndexOf,如果 Contains 比 IndexOf 还快,简直不科学啊!不过仔细一看一对比你就会知道,正是 IndexOf 的第二个参数在捣鬼。

.NET 中 Contains 和 IndexOf 分别是这样实现的:

// Type: System.String
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

// .NET 中字符串的 Contains 和 IndexOf 方法的实现:

public bool Contains(string value)
{
return this.IndexOf(value, StringComparison.Ordinal) >= 0;
}

public int IndexOf(string value)
{
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value);
}

public int IndexOf(string value, int startIndex)
{
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex);
}

public int IndexOf(string value, int startIndex, int count)
{
if (startIndex < 0 || startIndex > this.Length)
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
if (count < 0 || count > this.Length - startIndex)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
else
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
}

public int IndexOf(string value, StringComparison comparisonType)
{
return this.IndexOf(value, 0, this.Length, comparisonType);
}

public int IndexOf(string value, int startIndex, StringComparison comparisonType)
{
return this.IndexOf(value, startIndex, this.Length - startIndex, comparisonType);
}

public int IndexOf(string value, int startIndex, int count, StringComparison comparisonType)
{
if (value == null)
throw new ArgumentNullException("value");
if (startIndex < 0 || startIndex > this.Length)
throw new ArgumentOutOfRangeException("startIndex", Environment.GetResourceString("ArgumentOutOfRange_Index"));
if (count < 0 || startIndex > this.Length - count)
throw new ArgumentOutOfRangeException("count", Environment.GetResourceString("ArgumentOutOfRange_Count"));
switch (comparisonType)
{
case StringComparison.CurrentCulture:
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
case StringComparison.CurrentCultureIgnoreCase:
return CultureInfo.CurrentCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
case StringComparison.InvariantCulture:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.None);
case StringComparison.InvariantCultureIgnoreCase:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.IgnoreCase);
case StringComparison.Ordinal:
return CultureInfo.InvariantCulture.CompareInfo.IndexOf(this, value, startIndex, count, CompareOptions.Ordinal);
case StringComparison.OrdinalIgnoreCase:
return TextInfo.IndexOfStringOrdinalIgnoreCase(this, value, startIndex, count);
default:
throw new ArgumentException(Environment.GetResourceString("NotSupported_StringComparison"), "comparisonType");
}
}


可以看出,Contains 默认使用 CultureInfo.InvariantCulture 来比较字符/字符串,而 IndexOf 在不带第二个参数时,默认使用 CultureInfo.CurrentCulture 来比较字符/字符串,这直接导致了比较过程的复杂性,从而影响了两个方法的执行效率(以及结果——我没有去验证什么情况下结果会不一致)。

进一步地,我们来看看使用这两个不同的 CultureInfo 到底有什么不同?

CultureInfo.InvariantCulture:它是 CultureInfo 中的一个静态变量,直接在 CultureInfo 的静态构造里实例化了,并且只需要实例化一次。

// Type: System.Globalization.CultureInfo
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

//------------CultureInfo.InvariantCulture----------

private static CultureInfo m_InvariantCultureInfo;

static CultureInfo()
{
if (CultureInfo.m_InvariantCultureInfo == null)
CultureInfo.m_InvariantCultureInfo = new CultureInfo((int) sbyte.MaxValue, false)
{
m_isReadOnly = true
};
CultureInfo.m_userDefaultCulture = CultureInfo.m_userDefaultUICulture = CultureInfo.m_InvariantCultureInfo;
CultureInfo.m_userDefaultCulture = CultureInfo.InitUserDefaultCulture();
CultureInfo.m_userDefaultUICulture = CultureInfo.InitUserDefaultUICulture();
}

public static CultureInfo InvariantCulture
{
get
{
return CultureInfo.m_InvariantCultureInfo;
}
}

//-----------CultureInfo.CurrentCulture--------
public static CultureInfo CurrentCulture
{
get
{
return Thread.CurrentThread.CurrentCulture;
}
}

// other code...


再看 CultureInfo.CurrentCulture:它是当前线程的 CultureInfo,要获取这个属性的值会有一系列的操作,请见下面的代码,Thread.CurrentThread.CurrentCulture:

// Type: System.Threading.Thread
// Assembly: mscorlib, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
// MVID: 00788FED-9041-4DB2-AB5B-0952A06515B1
// Assembly location: C:\Windows\Microsoft.NET\Framework\v2.0.50727\mscorlib.dll

public CultureInfo CurrentCulture
{
get
{
if (this.m_CurrentCulture == null)
return CultureInfo.UserDefaultCulture;
CultureInfo safeCulture = (CultureInfo) null;
if (!Thread.nativeGetSafeCulture(this, Thread.GetDomainID(), false, ref safeCulture) || safeCulture == null)
return CultureInfo.UserDefaultCulture;
else
return safeCulture;
}
[SecurityPermission(SecurityAction.Demand, ControlThread = true)] set
{
if (value == null)
throw new ArgumentNullException("value");
CultureInfo.CheckNeutral(value);
CultureInfo.nativeSetThreadLocale(value.LCID);
value.StartCrossDomainTracking();
this.m_CurrentCulture = value;
}
}


因此在使用两个不同的 CultureInfo 进行字符/字符串比较时,所花的代价就很不同了,因此执行效率上就有了较大差别。

结论:不特殊考虑 CultureInfo 时,Contains(string) 的执行效率比 IndexOf(string) 高

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐