您的位置:首页 > 其它

汉字拼音的一个解决方法(初具使用价值)

2006-10-25 10:09 435 查看
Author:水如烟
这个版本已有使用价值。如果要求不高,基本满足需要。一并贴出来,也对比一下。
这个版本的汉字库已纳入程序资源里头,大小为324K。
主要代码如下:

Namespace Businness.PinYin
Public Class SpellInformation
Private gTone As String
Private gSpellInput As String
Private gSpellExpress As String

''' <summary>
''' 声调
''' </summary>
Public Property Tone() As String
Get
Return gTone
End Get
Set(ByVal value As String)
gTone = value.Trim
End Set
End Property

''' <summary>
''' 拼音输入码
''' </summary>
Public Property SpellInput() As String
Get
Return gSpellInput
End Get
Set(ByVal value As String)
gSpellInput = value.Trim
End Set
End Property

''' <summary>
''' 拼音
''' </summary>
Public Property SpellExpress() As String
Get
Return gSpellExpress
End Get
Set(ByVal value As String)
gSpellExpress = value.Trim
End Set
End Property

Sub New()
End Sub

''' <param name="tone">声调</param>
''' <param name="spellinput">拼音输入码</param>
''' <param name="spellexpress">拼音</param>
Sub New(ByVal tone As String, ByVal spellinput As String, ByVal spellexpress As String)
Me.Tone = tone
Me.SpellInput = spellinput
Me.SpellExpress = spellexpress
End Sub

''' <param name="spellexpress">拼音</param>
Public Overrides Function Equals(ByVal spellexpress As Object) As Boolean
Return Me.SpellExpress.Equals(spellexpress)
End Function

Public Overrides Function ToString() As String
Return String.Format("拼音码 {0,-6} 声调 {1} 拼音 {2}", Me.SpellInput, Me.Tone, Me.SpellExpress)
End Function
End Class

End Namespace

Namespace Businness.PinYin
Public Class Word
Private gValue As String

''' <param name="word">单个汉字</param>
Sub New(ByVal word As String)
Me.gValue = word
End Sub

''' <summary>
''' 汉字
''' </summary>
Public ReadOnly Property Value() As String
Get
Return gValue
End Get
End Property

Public ReadOnly Property Code() As String
Get
Return Common.Code(gValue)
End Get
End Property

Private gSpellList As New Dictionary(Of String, SpellInformation)

''' <summary>
''' 拼音集
''' </summary>
Public ReadOnly Property Spells() As SpellInformation()
Get
Dim tmp(gSpellList.Count - 1) As SpellInformation
gSpellList.Values.CopyTo(tmp, 0)
Return tmp
End Get
End Property

''' <summary>
''' 是否多音字
''' </summary>
Public ReadOnly Property IsMutiSpell() As Boolean
Get
Return gSpellList.Count > 1
End Get
End Property

Public Sub AddSpell(ByVal spell As SpellInformation)
If String.IsNullOrEmpty(spell.SpellExpress) Then Exit Sub

If Me.gSpellList.ContainsKey(spell.SpellExpress) Then Exit Sub

Me.gSpellList.Add(spell.SpellExpress, spell)
End Sub

Public Sub AddSpell(ByVal tone As String, ByVal spellinput As String, ByVal spellexpress As String)
Dim tmp As New SpellInformation(tone, spellinput, spellexpress)

AddSpell(tmp)
End Sub

Public ReadOnly Property DefaultSpell() As SpellInformation
Get
If Me.gSpellList.Count = 0 Then Return Nothing
Return Me.Spells(0)
End Get
End Property

Public ReadOnly Property DefaultSpellExpress() As String
Get
If DefaultSpell Is Nothing Then Return Nothing
Return Me.DefaultSpell.SpellExpress
End Get
End Property

Public ReadOnly Property DefaultSpellInput() As String
Get
If DefaultSpell Is Nothing Then Return Nothing
Return Me.DefaultSpell.SpellInput
End Get
End Property

Public ReadOnly Property DefaultTone() As String
Get
If DefaultSpell Is Nothing Then Return Nothing
Return Me.DefaultSpell.Tone
End Get
End Property

Public ReadOnly Property AllSpellExpress() As String
Get
If Me.gSpellList.Count = 0 Then Return Nothing

If Not Me.IsMutiSpell Then Return Me.DefaultSpellExpress

Dim tmp(gSpellList.Count - 1) As String
For i As Integer = 0 To gSpellList.Count - 1
tmp(i) = Me.Spells(i).SpellExpress
Next

Return String.Join(" ", tmp)
End Get
End Property

Public ReadOnly Property AllSpellInput() As String
Get
If Me.gSpellList.Count = 0 Then Return Nothing

If Not Me.IsMutiSpell Then Return Me.DefaultSpellInput

Dim tmp(gSpellList.Count - 1) As String
For i As Integer = 0 To gSpellList.Count - 1
tmp(i) = Me.Spells(i).SpellInput
Next

Return String.Join(" ", tmp)
End Get
End Property

Public ReadOnly Property AllTone() As String
Get
If Me.gSpellList.Count = 0 Then Return Nothing

If Not Me.IsMutiSpell Then Return Me.DefaultTone

Dim tmp(gSpellList.Count - 1) As String
For i As Integer = 0 To gSpellList.Count - 1
tmp(i) = Me.Spells(i).Tone
Next

Return String.Join(" ", tmp)
End Get
End Property

Public Overrides Function ToString() As String
Dim mBuilder As New System.Text.StringBuilder
For Each spell As SpellInformation In Me.Spells
mBuilder.AppendLine(String.Concat(Me.Value, " ", spell.ToString))
Next
Return mBuilder.ToString
End Function
End Class

End Namespace

Imports System.IO
Imports System.Text.RegularExpressions

Namespace Businness.PinYin
Public Class PYService
Private gDataSet As dsPinYin

''' <summary>
''' 汉字表
''' </summary>
Public ReadOnly Property PinYinTable() As dsPinYin.PinYinDataTable
Get
Return gDataSet.PinYin
End Get
End Property

''' <summary>
''' 单个汉字信息
''' </summary>
''' <param name="word">单个汉字</param>
Public Function GetWord(ByVal word As String) As Word
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(GetCode(word))

Return RowConverter(mRow)
End Function

Private Function RowConverter(ByVal row As dsPinYin.PinYinRow) As Word
If row Is Nothing Then Return Nothing

Dim mWord As New Word(row.汉字)

Dim mSpellExpressArray() As String = row.拼音.Split(" "c)
Dim mToneArray() As String = row.声调.Split(" "c)
Dim mSpellInputArray() As String = row.拼音码.Split(" "c)

For i As Integer = 0 To mSpellExpressArray.Length - 1
mWord.AddSpell(mToneArray(i), mSpellInputArray(i), mSpellExpressArray(i))
Next

Return mWord
End Function

Private Sub RowUpdate(ByVal word As Word)
Dim mWord As Word = GetWord(word.Value)
If mWord Is Nothing Then
Dim tmpRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.AddPinYinRow(word.Value, word.Code, "", "", "", True)
mWord = RowConverter(tmpRow)
End If

For Each spell As SpellInformation In word.Spells
mWord.AddSpell(spell)
Next

Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(mWord.Code)
With mRow
.拼音码 = mWord.AllSpellInput
.拼音 = mWord.AllSpellExpress
.声调 = mWord.AllTone
.单音 = Not mWord.IsMutiSpell
End With

End Sub

Public Sub Load()
UpdateFromTxt()
End Sub

'文件存放的格式是:汉字,拼音,音调,拼音码
Private Sub UpdateFromTxt()
Me.gDataSet = New dsPinYin

LzmTW.uSystem.uCollections.CommonServices.MoveNext(Of String)(My.Resources.pinyin.Split(CChar(vbCrLf)), AddressOf Action)

Me.gDataSet.AcceptChanges()
End Sub

'文件存放的格式是:汉字,拼音,音调,拼音码
Private Sub Action(ByVal line As String)

Dim mArray As String()
mArray = line.Split(","c)

If mArray.Length <> 4 Then Exit Sub

Dim mWord As String = mArray(0).Trim
Dim mSpellExpress As String = mArray(1).Trim
Dim mTone As String = mArray(2).Trim
Dim mSpellInput As String = mArray(3).Trim

Dim mWordInformation As New Word(mWord)
mWordInformation.AddSpell(mTone, mSpellInput, mSpellExpress)

RowUpdate(mWordInformation)

End Sub

''' <summary>
''' 将字符串转为拼音
''' </summary>
''' <param name="line">字符串</param>
''' <param name="isgetfirst">如是多音字,取第一个拼音</param>
''' <param name="forInput">是则查拼音码,否则查拼音</param>
Public Function ToPinyin(ByVal line As String, ByVal isgetfirst As Boolean, ByVal forInput As Boolean) As String
Dim mBuilder As New Text.StringBuilder

For Each s As Char In line.ToCharArray
If Common.IsSingleWord(s) Then
mBuilder.Append(GetPinyin(s, isgetfirst, forInput))
Else
mBuilder.Append(s)
End If
Next

Return mBuilder.ToString
End Function

Private Function GetPinyin(ByVal word As String, ByVal isgetfirst As Boolean, ByVal forInput As Boolean) As String
Dim mResult As String

Dim mWord As Word = GetWord(word)

If isgetfirst Or Not mWord.IsMutiSpell Then
If forInput Then
mResult = mWord.DefaultSpellInput
Else
mResult = mWord.DefaultSpellExpress
End If

Else
If forInput Then

Dim tmpList(-1) As String

For Each spell As SpellInformation In mWord.Spells
If Array.IndexOf(tmpList, spell.SpellInput) = -1 Then
LzmTW.uSystem.uCollections.CommonServices.Append(tmpList, spell.SpellInput)
End If
Next

If tmpList.Length = 1 Then
mResult = mWord.DefaultSpellInput
Else
mResult = String.Format("({0})", String.Join(" ", tmpList))
End If
Else
mResult = String.Format("({0})", mWord.AllSpellExpress)
End If

End If

Return mResult
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function WordArray(ByVal pinyin As String) As String()
Dim mRows As dsPinYin.PinYinRow() = CType(Me.gDataSet.PinYin.Select(String.Format("拼音码 LIKE '%{0}%'", pinyin)), dsPinYin.PinYinRow())
Dim mResult(-1) As String
For i As Integer = 0 To mRows.Length - 1
If Array.IndexOf(mRows(i).拼音码.Split(" "c), pinyin) <> -1 Then
LzmTW.uSystem.uCollections.CommonServices.Append(mResult, mRows(i).汉字)
End If
Next
Return mResult
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function Words(ByVal pinyin As String) As String
Return String.Concat(WordArray(pinyin))
End Function

Public Function GetCode(ByVal word As String) As String
Return Common.Code(word)
End Function

End Class
End Namespace

调用的代码

Public Class Form1
Dim gPinyinService As New LzmTW.Businness.PinYin.PYService

Private Sub ButtonLoad_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles ButtonLoad.Click
gPinyinService.Load()
Me.DataGridView1.DataSource = gPinyinService.PinYinTable
End Sub

Private Sub ButtonTran_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles ButtonTran.Click
Me.RichTextBox2.Text = gPinyinService.ToPinyin(Me.RichTextBox1.Text, Me.CheckBox1.Checked, Me.CheckBox2.Checked)
End Sub

Private Sub ButtonWord_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles ButtonWord.Click
Me.RichTextBox3.Text = gPinyinService.Words(Me.TextBoxPinyin.Text)
End Sub

Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click
Me.RichTextBox3.Text = gPinyinService.GetWord(Me.TextBox1.Text).ToString
End Sub
End Class

效果图:















下载方案:代码

Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1330615

平常中,经常用到汉字转拼音,比如批量生成姓名->拼音作为登录帐号。

这个方法只是简单的利用汉字拼音库。至于怎么找这个库,网上多有介绍。在最后提供下载的方案中也提供了这个库文本文件。

主要代码如下:

Imports System.IO
Imports System.Text.RegularExpressions

Namespace Businness.PinYin
Public Class PYService
Private gDataSet As New dsPinYin

''' <summary>
''' 汉字表
''' </summary>
Public ReadOnly Property PinYinTable() As dsPinYin.PinYinDataTable
Get
Return gDataSet.PinYin
End Get
End Property

Private gTxtFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.txt"
Private gxmlFile As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase & "pinyin.xml"

Private gRegex As New Regex("(?<Word>^[/u4e00-/u9fa5]+)(?<PingYin>.*)")

''' <summary>
''' 加载汉字库,文件名为pinyin.xml,在程序当前目录下
''' </summary>
Public Sub Load()
If Not IO.File.Exists(gxmlFile) Then
Throw New Exception(String.Format("文件{0}不存在", gxmlFile))
End If

DataSetInitialize()
gDataSet.ReadXml(gxmlFile)
End Sub

''' <summary>
''' 从汉字文件中更新,文件名为pinyin.txt,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Update()
If Not IO.File.Exists(gTxtFile) Then
Throw New Exception(String.Format("文件{0}不存在", gTxtFile))
End If

UpdateFromTxt(gTxtFile)
End Sub

''' <summary>
''' 保存汉字库,文件为pingyin.xml,在程序当前目录下
''' </summary>
''' <remarks></remarks>
Public Sub Save()
gDataSet.WriteXml(gxmlFile)
End Sub

Private Sub DataSetInitialize()
'在更新或读入时,清除
Me.gDataSet.Clear()
Me.gDataSet.AcceptChanges()
End Sub

Private Sub UpdateFromTxt(ByVal file As String)
DataSetInitialize()

Dim mLine As String
Dim mBuilder As New System.Text.StringBuilder
Dim mReader As New IO.StreamReader(file, System.Text.Encoding.Default)
Do
mLine = mReader.ReadLine
Add(mLine)
Loop Until String.IsNullOrEmpty(mLine)

mReader.Close()
mReader.Dispose()

Me.gDataSet.PinYin.AcceptChanges()
End Sub

Private Sub Add(ByVal line As String)
If line Is Nothing Then Exit Sub

With gRegex.Match(line)

If .Success Then

'只取单字,不取词组
If .Groups("Word").Value.Length = 1 Then
Add(.Groups("Word").Value, .Groups("PingYin").Value)
End If

End If
End With
End Sub

Private Sub Add(ByVal word As String, ByVal py As String)
'多音的,拼音间用单个空枨符隔开

py = py.Trim.Replace(" ", " ")

Dim mCode As String = ChineseCode(word)
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(mCode)
If mRow Is Nothing Then
Me.gDataSet.PinYin.AddPinYinRow(word, mCode, py)
Else
Dim pyArray() As String = py.Split(" "c)
For Each s As String In pyArray
If Not mRow.拼音.Contains(s) Then
mRow.拼音 = String.Concat(mRow.拼音, " " & s)
End If
Next
End If

End Sub

''' <summary>
''' 将字符串转为拼音
''' </summary>
''' <param name="line">字符串</param>
''' <param name="isgetfirst">如是多音字,取第一个拼音</param>
Public Function ToPinyin(ByVal line As String, ByVal isgetfirst As Boolean) As String
Dim mBuilder As New Text.StringBuilder

For Each s As Char In line.ToCharArray
If IsTrue(s) Then
mBuilder.Append(GetPinyin(s, isgetfirst))
Else
mBuilder.Append(s)
End If
Next

Return mBuilder.ToString
End Function

Private Function GetPinyin(ByVal word As String, ByVal isgetfirst As Boolean) As String
Dim mResult As String = word

Dim mArray As String() = PinYinArray(ChineseCode(word)) '取拼音组
If Not mArray Is Nothing Then
If mArray.Length = 1 Or isgetfirst Then
mResult = mArray(0) '单音的
Else

mResult = String.Format("({0})", String.Join(",", mArray)) '多音的用括号括住,拼音间用逗号隔开
End If
End If

Return mResult
End Function

'取拼音组
Private Function PinYinArray(ByVal code As String) As String()
Dim mRow As dsPinYin.PinYinRow = Me.gDataSet.PinYin.FindBy代码(code)
If mRow Is Nothing Then Return Nothing

Return mRow.拼音.Split(" "c)
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function WordArray(ByVal pinyin As String) As String()
Dim mRows As dsPinYin.PinYinRow() = CType(Me.gDataSet.PinYin.Select(String.Format("拼音 LIKE '%{0}%'", pinyin)), dsPinYin.PinYinRow())
Dim mResult(-1) As String
For i As Integer = 0 To mRows.Length - 1
If Array.IndexOf(mRows(i).拼音.Split(" "c), pinyin) <> -1 Then
Me.Append(mResult, mRows(i).汉字)
End If
Next
Return mResult
End Function

''' <summary>
''' 按拼音查字
''' </summary>
''' <param name="pinyin">拼音</param>
Public Function Words(ByVal pinyin As String) As String
Return String.Concat(WordArray(pinyin))
End Function

''' <summary>
''' 汉字代码
''' </summary>
''' <param name="word">单个汉字</param>
Public Shared Function ChineseCode(ByVal word As String) As String
If Not IsTrue(word) Then Return Nothing

Dim bytes() As Byte = System.Text.Encoding.Default.GetBytes(word)

Return String.Concat(Hex(bytes(0)), Hex(bytes(1)))
End Function

''' <summary>
''' 是否是单个汉字
''' </summary>
''' <param name="word">字符</param>
Public Shared Function IsTrue(ByVal word As String) As Boolean
If word Is Nothing Then Return False

Return System.Text.RegularExpressions.Regex.IsMatch(word, "^[/u4e00-/u9fa5]$")
End Function

Private Sub Append(ByRef collection As String(), ByVal value As String)
ReDim Preserve collection(collection.Length)
collection(collection.Length - 1) = value
End Sub
End Class
End Namespace

效果图:









下载方案:代码 版本二

Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1329538
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐