C#编写的日英字典程序 - 基于EDict
2011-07-06 10:06
633 查看
关于EDict, 详细资料可以访问 http://www.csse.monash.edu.au/~jwb/edict_doc_old.html
EDict的文件组织形式:
FORMATEDICT's format is that of the original "EDICT" format used by the early PC Japanese word-processor MOKE (Mark's Own Kanji Editor). It uses EUC-JP coding for kana and kanji, however this can be converted to JIS (ISO-2022-JP) or Shift-JIS by any of the several conversion programs around. It is a text file with one entry per line. The format of entries is:
主要代码:
MainForm.cs
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace EdictLookup
{
public partial class MainForm : Form
{
private Edict myEdict;
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
myEdict = new Edict();
try
{
myEdict.EdictLoad(this.cb_fulldic.Checked);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString(), "Error!", MessageBoxButtons.OK, MessageBoxIcon.Error);
this.Close();
}
this.edit_input.Focus();
}
private void btn_lookup_Click(object sender, EventArgs e)
{
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
private void cb_fulldic_CheckedChanged(object sender, EventArgs e)
{
try
{
myEdict.EdictLoad(this.cb_fulldic.Checked);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString(), "Error!", MessageBoxButtons.OK, MessageBoxIcon.Error);
this.Close();
}
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
private void cb_exact_CheckedChanged(object sender, EventArgs e)
{
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
}
public class Edict
{
private const string DIC_SUB_FILENAME = "edict_sub"; // filename of the edit_sub file
private const string DIC_FILENAME = "edict"; // filename of the edit file
private int dicFileSize; // edict file size
private char[] dicBuffer; // buffer of edict file, contains all text of edict
private string dicText; // text of edict, a copy of dicBuffer
private void EdictLoad_Sub()
{
FileInfo fi = new FileInfo(DIC_SUB_FILENAME);
this.dicFileSize = (int)fi.Length;
FileStream fs = new FileStream(DIC_SUB_FILENAME, FileMode.Open);
StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("EUC-JP"));
this.dicBuffer = new char[this.dicFileSize + 1];
sr.Read(this.dicBuffer, 0, this.dicFileSize);
sr.Close();
fs.Close();
this.dicText = new string(this.dicBuffer);
}
private void EdictLoad_Full()
{
FileInfo fi = new FileInfo(DIC_FILENAME);
this.dicFileSize = (int)fi.Length;
FileStream fs = new FileStream(DIC_FILENAME, FileMode.Open);
StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("EUC-JP"));
this.dicBuffer = new char[this.dicFileSize + 1];
sr.Read(this.dicBuffer, 0, this.dicFileSize);
sr.Close();
fs.Close();
this.dicText = new string(this.dicBuffer);
}
public void EdictLoad(bool full)
{
GC.Collect();
if (full)
{
this.EdictLoad_Full();
}
else
{
this.EdictLoad_Sub();
}
}
private void EdictLookup_Fuzzy(string search, ListBox result)
{
result.Items.Clear();
if (search.Length > 0)
{
int index = -1;
int pos1 = -2;
int pos2;
while ((index = this.dicText.IndexOf(search, index + 1)) != -1)
{
pos1 = this.dicText.LastIndexOf('\n', index) + 1;
pos2 = this.dicText.IndexOf('\n', index);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
if (pos1 == -2)
{
result.Items.Add("Not Found!");
}
}
else
{
result.Items.Add("Empty Input!");
}
}
private void EdictLookup_Exact(string search, ListBox result)
{
try
{
result.Items.Clear();
if (search.Length > 0)
{
int index = -1;
int pos1 = -2;
int pos2;
// kanji match
string temp = '\n' + search + ' ';
while ((index = this.dicText.IndexOf(temp, index + 1)) != -1)
{
pos1 = index + 1;
pos2 = this.dicText.IndexOf('\n', pos1);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
// kana match
index = -1;
temp = '[' + search + ']';
while ((index = this.dicText.IndexOf(temp, index + 1)) != -1)
{
pos1 = this.dicText.LastIndexOf('\n', index) + 1;
pos2 = this.dicText.IndexOf('\n', index);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
if (pos1 == -2)
{
result.Items.Add("Not Found!");
}
}
else
{
result.Items.Add("Empty Input!");
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
public void EdictLookup(string search, ListBox result, bool exact)
{
if (exact)
{
this.EdictLookup_Exact(search, result);
}
else
{
this.EdictLookup_Fuzzy(search, result);
}
}
}
}
完整源代码:
EdictLookup.rar
在bin\Release中已包含 edict(完整字典文件) 和 edict_sub(常用字典文件) 文件
EDict的文件组织形式:
FORMATEDICT's format is that of the original "EDICT" format used by the early PC Japanese word-processor MOKE (Mark's Own Kanji Editor). It uses EUC-JP coding for kana and kanji, however this can be converted to JIS (ISO-2022-JP) or Shift-JIS by any of the several conversion programs around. It is a text file with one entry per line. The format of entries is:
KANJI [KANA] /English_1/English_2/.../or
KANA /English_1/.../(NB: Only the KANJI and KANA are in EUC; all the other characters, including spaces, must be ASCII.)The English translations were initially deliberately brief, as the application of the dictionary was expected to be primarily on-line look-ups, etc. Over time the translations have become more extended.The EDICT file is not intended to have its entries in any particular order. In fact it almost always is in order as a by-product of the update method I use, however there is no guarantee of this. (The order is almost always JIS + alphabetical, starting with the head-word.)
主要代码:
MainForm.cs
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace EdictLookup
{
public partial class MainForm : Form
{
private Edict myEdict;
public MainForm()
{
InitializeComponent();
}
private void MainForm_Load(object sender, EventArgs e)
{
myEdict = new Edict();
try
{
myEdict.EdictLoad(this.cb_fulldic.Checked);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString(), "Error!", MessageBoxButtons.OK, MessageBoxIcon.Error);
this.Close();
}
this.edit_input.Focus();
}
private void btn_lookup_Click(object sender, EventArgs e)
{
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
private void cb_fulldic_CheckedChanged(object sender, EventArgs e)
{
try
{
myEdict.EdictLoad(this.cb_fulldic.Checked);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString(), "Error!", MessageBoxButtons.OK, MessageBoxIcon.Error);
this.Close();
}
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
private void cb_exact_CheckedChanged(object sender, EventArgs e)
{
myEdict.EdictLookup(this.edit_input.Text, this.list_result, this.cb_exact.Checked);
}
}
public class Edict
{
private const string DIC_SUB_FILENAME = "edict_sub"; // filename of the edit_sub file
private const string DIC_FILENAME = "edict"; // filename of the edit file
private int dicFileSize; // edict file size
private char[] dicBuffer; // buffer of edict file, contains all text of edict
private string dicText; // text of edict, a copy of dicBuffer
private void EdictLoad_Sub()
{
FileInfo fi = new FileInfo(DIC_SUB_FILENAME);
this.dicFileSize = (int)fi.Length;
FileStream fs = new FileStream(DIC_SUB_FILENAME, FileMode.Open);
StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("EUC-JP"));
this.dicBuffer = new char[this.dicFileSize + 1];
sr.Read(this.dicBuffer, 0, this.dicFileSize);
sr.Close();
fs.Close();
this.dicText = new string(this.dicBuffer);
}
private void EdictLoad_Full()
{
FileInfo fi = new FileInfo(DIC_FILENAME);
this.dicFileSize = (int)fi.Length;
FileStream fs = new FileStream(DIC_FILENAME, FileMode.Open);
StreamReader sr = new StreamReader(fs, Encoding.GetEncoding("EUC-JP"));
this.dicBuffer = new char[this.dicFileSize + 1];
sr.Read(this.dicBuffer, 0, this.dicFileSize);
sr.Close();
fs.Close();
this.dicText = new string(this.dicBuffer);
}
public void EdictLoad(bool full)
{
GC.Collect();
if (full)
{
this.EdictLoad_Full();
}
else
{
this.EdictLoad_Sub();
}
}
private void EdictLookup_Fuzzy(string search, ListBox result)
{
result.Items.Clear();
if (search.Length > 0)
{
int index = -1;
int pos1 = -2;
int pos2;
while ((index = this.dicText.IndexOf(search, index + 1)) != -1)
{
pos1 = this.dicText.LastIndexOf('\n', index) + 1;
pos2 = this.dicText.IndexOf('\n', index);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
if (pos1 == -2)
{
result.Items.Add("Not Found!");
}
}
else
{
result.Items.Add("Empty Input!");
}
}
private void EdictLookup_Exact(string search, ListBox result)
{
try
{
result.Items.Clear();
if (search.Length > 0)
{
int index = -1;
int pos1 = -2;
int pos2;
// kanji match
string temp = '\n' + search + ' ';
while ((index = this.dicText.IndexOf(temp, index + 1)) != -1)
{
pos1 = index + 1;
pos2 = this.dicText.IndexOf('\n', pos1);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
// kana match
index = -1;
temp = '[' + search + ']';
while ((index = this.dicText.IndexOf(temp, index + 1)) != -1)
{
pos1 = this.dicText.LastIndexOf('\n', index) + 1;
pos2 = this.dicText.IndexOf('\n', index);
result.Items.Add(this.dicText.Substring(pos1, pos2 - pos1));
}
if (pos1 == -2)
{
result.Items.Add("Not Found!");
}
}
else
{
result.Items.Add("Empty Input!");
}
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
}
public void EdictLookup(string search, ListBox result, bool exact)
{
if (exact)
{
this.EdictLookup_Exact(search, result);
}
else
{
this.EdictLookup_Fuzzy(search, result);
}
}
}
}
完整源代码:
EdictLookup.rar
在bin\Release中已包含 edict(完整字典文件) 和 edict_sub(常用字典文件) 文件
相关文章推荐
- CSharp make plugin(Addin) program C# 编写基于插件的程序
- 用VB编写基于控制台的程序
- 一个简单的AJAX实现,基于C#的ASP.Net,包括服务器端的程序代码
- 一个简单的 C# 语言编写的 WIN32 程序
- 编写高质量代码改善C#程序的157个建议——建议108:将类型标识为sealed
- 用c#编写通过无线网(例如CDMA)传输数据的通信程序
- 编写高质量代码改善C#程序的157个建议[C#闭包的陷阱、委托、事件、事件模型]
- 如何利用C#编写网页投票器程序 如何使用代理来投票 代理IP来投票
- 编写高质量代码改善C#程序的157个建议——建议136:优先使用后缀表示已有类型的新版本
- 也来玩转Skype ——基于Skype4Com API编写Skype外壳程序
- 用TCL编写了一个生成密码字典的小测试程序,共享一下
- 编写高质量代码改善C#程序的157个建议——建议142:总是提供有意义的命名
- c#/.net 基于文件流FileStream读写的文本操作小程序
- 也来玩转Skype ——基于Skype4Java API编写Skype外壳程序入门介绍
- 《C#入门经典(第6版)》读书笔记2_第二章:编写C#程序
- 基于ArcGIS10.0和Oracle10g的空间数据管理平台十八(C#开发)-数据字典编辑
- 使用C#编写不同的"Hello World"程序(转)
- C#编写程序监测某个文件夹内是否有文件进行了增,删,改的动作
- 几个DSP高手的经验介绍,编写基于DSP程序的注意事项
- C#中编写多线程程序之起步