您的位置:首页 > 编程语言 > Lua

笔划输入法查找算法示例(Lua实现)

2012-08-04 17:58 381 查看
公司同事最近利用谷歌拼音输入法源代码实现了自己的拼音输入法,经过了解,最核心的就是一个trie(词典树)的构造和检索(这里不太介绍trie树了,google一搜一大把),于是今天就想实现了一个笔划输入法。大概的想法是:

找一个所有汉字或者一、二级汉字的笔顺数据库

用Lua将数据库读进来,构造一个trie树
每一个节点存一个笔划

每一个节点带一个子节点集合

每一个节点带一个汉字集合,表示到这一级时所有笔划组成的完整汉字

检索时根据用户输入的笔划,检索到一个节点,然后按笔划顺序遍历子树
遍历子树可以给出所有以这些笔划开始的所有汉字,但是总不能一下显示出来吧,所以需要一个迭代器,每调用一次给出一个可能的值,这个迭代器用C实 现比较复杂,但是用Lua实现简直就是小意思,直接将遍历子树的函数封装到一个coroutine中,每找到一个汉字就 yield(汉字) 即可

笔顺数据库

CSDN上可以下载到 http://download.csdn.net/detail/yyjlan/3766691

下载的mdb格式,我不太喜欢,Lua也不太喜欢。由于luasql支持odbc,所以可以将mdb文件加入到odbc数据源,然后载入后转成sqlite3的格式,方便以后使用,转换代码如下

require "luasql.odbc"
require "luasql.sqlite3"

odbc_env = luasql.odbc()

-- 将Access文件在控制面板->管理工具->数据源 中增加到用户DSN,名称是hzbs
odbc_conn = odbc_env:connect("hzbs")
odbc_cur = odbc_conn:execute("SELECT * FROM hzbs;")

sqlite_env = luasql.sqlite3()
sqlite_conn = sqlite_env:connect("hzbs.sqlite3.db")
sqlite_conn:execute("CREATE TABLE hzbs (id INTEGER primary key, hanzi TEXT, stroke_number INTEGER, stroke_order TEXT, unicode TEXT, gbk TEXT);")
sqlite_conn:setautocommit(false) -- start transaction

record = {}
while odbc_cur:fetch(record, "n") do
local id = record[1]
local hanzi = record[2]
local stroke_number = record[3]
local stroke_order = record[4]
local unicode = record[5]
local gbk = record[6]
sqlite_conn:execute("INSERT INTO hzbs(id, hanzi, stroke_number, stroke_order, unicode, gbk) VALUES(" .. id .. ",\'" .. hanzi .. "\'," .. stroke_number .. ",\'" .. stroke_order .. "\',\'" .. unicode .. "\',\'" .. gbk .. "\');")
end

sqlite_conn:commit() -- commit the transaction
sqlite_conn:close()

odbc_cur:close()
odbc_conn:close()
odbc_env:close()


构造子树与检索

多的不说,直接看代码吧。代码写得有点乱,不过凑合看是没什么问题的。要运行代码必须要先安装 LuaForWindows

require "luasql.sqlite3"
require "wx"

function _T(s)
return s
end

-- enum stroke_t {
local stroke_root = 0 -- for trie root, not a valid stroke
local stroke_heng = 1
local stroke_shu = 2
local stroke_pie = 3
local stroke_na = 4
local stroke_zhe = 5
local stroke_max = 5
local stroke_text = {_T"一", _T"丨", _T"丿", _T"丶", _T"乛"}
-- }

function new_node(stroke)
return {stroke=stroke,  -- see stroke definition
subnodes = {},  -- next strokes
hanzis={} -- two or more hanzi could have the same stroke order
}
end

function new_trie()
return new_node(stroke_root)
end

-- insert hanzi and create the trie
function insert_hanzi(node, stroke_order, hanzi)
local stroke, not_found_index
for i = 1, #stroke_order do
stroke = tonumber(stroke_order:sub(i,i))
if node.subnodes[stroke] then
node = node.subnodes[stroke]
else
not_found_index = i
break
end
end
if not_found_index then
for i = not_found_index, #stroke_order do
stroke = tonumber(stroke_order:sub(i,i))
node.subnodes[stroke] = new_node(stroke)
node = node.subnodes[stroke]
end
end
table.insert(node.hanzis, hanzi)
end

-- 看看strokes数组组成的笔划顺序的节点是否存在,如果存在则返回节点
function find_node(root, strokes)
local node = root

if #strokes < 1 then
return nil
end

for i, stroke in ipairs(strokes) do
if node.subnodes[stroke] then
node = node.subnodes[stroke]
else
return nil
end
end
return node
end

function db_to_trie(db_name)
local env = luasql.sqlite3()
local conn = env:connect(db_name)
local cur = conn:execute("SELECT hanzi,stroke_order FROM hzbs;")
local trie = new_trie()

record = {}
while cur:fetch(record, "a") do
insert_hanzi(trie, record.stroke_order, record.hanzi)
end

cur:close()
conn:close()
env:close()

return trie
end

function get_hanzi_enumerator(root)
local traverse

traverse = function(node)
for i = 1, #node.hanzis do
coroutine.yield(node.hanzis[i])
end

for stroke = 1, stroke_max do
if node.subnodes[stroke] then
traverse(node.subnodes[stroke])
end
end
end
local co = coroutine.create(function () traverse(root) end)

return (function ()
local ret, hanzi = coroutine.resume(co)
if not ret then -- already stopped
return nil
elseif hanzi == nil then -- the last call, no yield and no return value
return nil
else
return hanzi
end
end)
end

---------------------------------------------------------------
-- GUI
local new_id = (function ()
local id = wx.wxID_HIGHEST
return (function ()
id = id + 1
return id
end)
end)()

dialog = wx.wxDialog(wx.NULL, new_id(), _T"Lua笔划输入法演示",
wx.wxDefaultPosition, wx.wxDefaultSize)
panel = wx.wxPanel(dialog, wx.wxID_ANY)
local main_sizer = wx.wxBoxSizer(wx.wxVERTICAL)

-- 横竖撇捺折 按钮
local stroke_label = wx.wxStaticText(panel, new_id(), _T"可选笔划")
local heng_button = wx.wxButton(panel, stroke_heng, stroke_text[stroke_heng])
local shu_button = wx.wxButton(panel, stroke_shu, stroke_text[stroke_shu])
local pie_button = wx.wxButton(panel, stroke_pie, stroke_text[stroke_pie])
local na_button = wx.wxButton(panel, stroke_na, stroke_text[stroke_na])
local zhe_button = wx.wxButton(panel, stroke_zhe, stroke_text[stroke_zhe])

local button_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
button_sizer:Add(stroke_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)
button_sizer:Add(heng_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
button_sizer:Add(shu_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
button_sizer:Add(pie_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
button_sizer:Add(na_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
button_sizer:Add(zhe_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)

main_sizer:Add(button_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)

-- 输入笔划列表
local input_label = wx.wxStaticText(panel, new_id(), _T"输入笔划")
local input_textctrl = wx.wxTextCtrl(panel, new_id(), "",
wx.wxDefaultPosition, wx.wxDefaultSize, wx.wxTE_READONLY)
local input_backspace_button = wx.wxButton(panel, new_id(), _T"退格")
local input_clear_button = wx.wxButton(panel, wx.wxID_CANCEL, _T"清除")

local input_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
input_sizer:Add(input_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)
input_sizer:Add(input_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
input_sizer:Add(input_backspace_button, 0, wx.wxALL, 5)
input_sizer:Add(input_clear_button, 0, wx.wxALL, 5)
main_sizer:Add(input_sizer, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)

-- 备选汉字
local candidate_label = wx.wxStaticText(panel, new_id(), _T"备选汉字")
local candidate_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
candidate_sizer:Add(candidate_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)

local candidate_number = 5
function create_candidate_btn(num)
local textctrls = {}
for i= 1, num do
textctrls[i] = wx.wxButton(panel, new_id(), "")
candidate_sizer:Add(textctrls[i], 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)
end
textctrls.start_id = textctrls[1]:GetId()
textctrls.end_id = textctrls.start_id + candidate_number - 1
return textctrls
end
local candidate_textctrls = create_candidate_btn(candidate_number)
main_sizer:Add(candidate_sizer, 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)

-- 选择输出的汉字
local output_textctrl = wx.wxTextCtrl(panel, new_id(), "", wx.wxDefaultPosition,
wx.wxSize(0, 100), wx.wxTE_MULTILINE)
local output_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)
output_sizer:Add(output_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)
main_sizer:Add(output_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 0)

main_sizer:SetSizeHints(dialog)
dialog:SetSizer(main_sizer)

-- 必须加,否则不能正确退出程序
dialog:Connect(wx.wxEVT_CLOSE_WINDOW,
function (event)
dialog:Destroy()
event:Skip()
end)

-- 读入笔划数据库
local trie = db_to_trie("hzbs.sqlite3.db")

-- 输入的stroke数组
input_strokes = {}
get_next_candidate = nil

function update_candidate()
if get_next_candidate == nil then
for _,textctrl in ipairs(candidate_textctrls) do
textctrl:SetLabel("")
end
else
for _,textctrl in ipairs(candidate_textctrls) do
local hanzi = get_next_candidate()
if hanzi then
textctrl:SetLabel(hanzi)
else
textctrl:SetLabel("")
end
end
end
end

function update_input()
local text = {}
for _,stroke in ipairs(input_strokes) do
table.insert(text, stroke_text[stroke])
end

input_textctrl:SetValue(table.concat(text, " "))
end

function insert_stroke(stroke)
table.insert(input_strokes, stroke);
local node = find_node(trie, input_strokes)
if node == nil then
table.remove(input_strokes) -- 删除不合法的输入
-- BEEP
else
get_next_candidate = get_hanzi_enumerator(node)
update_input()
update_candidate()
end
end

function remove_stroke()
table.remove(input_strokes)
local node = find_node(trie, input_strokes)
if node == nil then
get_next_candidate = nil
else
get_next_candidate = get_hanzi_enumerator(node)
end

update_input()
update_candidate()
end

function clear_stroke()
input_strokes = {}
get_next_candidate = nil
update_input()
update_candidate()
end

dialog:Connect(wx.wxID_ANY, wx.wxEVT_COMMAND_BUTTON_CLICKED,
function(event)
local id = event:GetId()
if id <= stroke_max then
insert_stroke(id)
elseif id >= candidate_textctrls.start_id and id <= candidate_textctrls.end_id then
output_textctrl:AppendText(candidate_textctrls[id-candidate_textctrls.start_id+1]:GetLabel())
clear_stroke()
elseif id == input_backspace_button:GetId() then
remove_stroke()
elseif id == input_clear_button:GetId() then
clear_stroke()
end
end)

dialog:Connect(wx.wxID_ANY, wx.wxEVT_KEY_DOWN, function (event)
local key = event:GetKeyCode()
local callbacks = {    }
callbacks[wx.WXK_NUMPAD7] = function ()
insert_stroke(stroke_heng)
end
callbacks[wx.WXK_NUMPAD8] = function ()
insert_stroke(stroke_shu)
end
callbacks[wx.WXK_NUMPAD9] = function ()
insert_stroke(stroke_pie)
end
callbacks[wx.WXK_NUMPAD4] = function ()
insert_stroke(stroke_na)
end
callbacks[wx.WXK_NUMPAD5] = function ()
insert_stroke(stroke_zhe)
end
callbacks[wx.WXK_BACK] = function ()
remove_stroke()
end
for i = 1, candidate_number do
callbacks[i - 1 + string.byte("1")] = function ()
output_textctrl:AppendText(candidate_textctrls[i]:GetLabel())
clear_stroke()
end
end

if callbacks[key] then
callbacks[key]()
end
end)

-- wxwindgets比较特殊,子窗口的按键是发不到主窗口的,需要这样处理下
function process_children_keydown_event(parent, processer)
local wnd
local wlist = parent:GetChildren()

for i = 0, wlist:GetCount()-1 do
wnd = wlist:Item(i):GetData():DynamicCast("wxWindow")
wnd:SetNextHandler(processer)
process_children_keydown_event(wnd, processer)
end
end

process_children_keydown_event(dialog, dialog)

dialog:Centre()
dialog:Show(true)
input_textctrl:SetFocus() --放这里没有响声

wx.wxGetApp():MainLoop()


打包下载

源代码包和sqlite3数据库可以在这里下载
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: