Python语言实现哈夫曼编码
2017-12-02 20:32
155 查看
汉语版:使用python实现huffman编码是一个能够很快地实现。所以我们选择使用python来实现我们这个程序。 l
E-version: we will use python to realize this program called huffman encoding and decoding. why we use python, because in python we can finish this program faster then other codes. this program are not the final implementation. actually, this is the first
version i commit to git. i changed a lot in the least version . so if you run those codes on your environment. some problems may be exist; don`t worry, the first four drafts are right, you can build everything based on them. so good lucky to you.
I:实现节点类
II:为每一个节点赋权值
def create_nodes(frequencies):
return [Node(freq) for freq in frequencies]
III:创建哈夫曼树
def create_huffman_tree(nodes):
queue = nodes[:]
while len(queue) > 1:
queue.sort(key=lambda item: item.freq)
node_left = queue.pop(0)
node_right = queue.pop(0)
node_father = Node(node_left.freq + node_right.freq)
node_father.left = node_left
node_father.right = node_right
node_left.father = node_father
node_right.father = node_father
queue.append(node_father)
queue[0].father = None
return queue[0]
III:遍历叶节点
def huffman_encoding(nodes, root):
codes = [''] * len(nodes)
for i in range(len(nodes)):
node_tmp = nodes[i]
while node_tmp != root:
if node_tmp.is_left():
codes[i] = '0' + codes[i]
else:
codes[i] = '1' + codes[i]
node_tmp = node_tmp.father
return codes
IV:获取字符出现的频数
# 获取字符出现的频数
def count_frequency(input_string):
# 用于存放字符
char_store = []
# 用于存放频数
freq_store = []
# 解析字符串
for index in range(len(input_string)):
if char_store.count(input_string[index]) > 0:
temp = int(freq_store[char_store.index(input_string[index])])
temp = temp + 1
freq_store[char_store.index(input_string[index])] = temp
else:
char_store.append(input_string[index])
freq_store.append(1)
# 返回字符列表和频数列表
return char_store, freq_store
V:获取字符、频数的列表
# 获取字符、频数的列表
def get_char_frequency(char_store=[], freq_store=[]):
# 用于存放char_frequency
char_frequency = []
for item in zip(char_store, freq_store):
temp = (item[0], item[1])
char_frequency.append(temp)
return char_frequency
VI:将字符转换成哈夫曼编码
# 将字符转换成huffman编码
def get_huffman_file(input_string, char_frequency, codes):
# 逐个字符替换
file_content = ''
for index in range(len(input_string)):
for item in zip(char_frequency, codes):
if input_string[index] == item[0][0]:
file_content = file_content + item[1]
file_name = 'huffman_' + str(uuid.uuid1())+'.txt'
with open(file_name, 'w+') as destination:
destination.write(file_content)
return file_name
VII:解压缩哈夫曼文件
# 解压缩huffman文件
def decode_huffman(input_string, char_store, freq_store):
encode = ''
decode = ''
for index in range(len(input_string)):
encode = encode + input_string[index]
for item in zip(char_store, freq_store):
if encode == item[1]:
decode = decode + item[0]
encode = ''
return decode
VIII:计算压缩比(写错了,可以自行改写)
# 计算压缩比
def get_encode_ration(codes):
# 计算所需要的二进制个数
h_length = 0
for item in codes:
h_length = h_length + len(item)
t_length = bin_middle(len(codes))*len(codes)
ratio = t_length/h_length
return str(ratio)[0:3]
# 计算所在的二进制空间
def bin_middle(number):
n, i = 1, 0
while n < number:
n = n * 2
i = i + 1
return i
最后:Django文件接收,并返回
def upload(request):
ctx = {}
if request.method == "POST":
file_name = str(request.FILES['file'])
if not file
96f0
_name.endswith('txt'):
ctx['fail'] = 'file format exist wrong!'
else:
file = request.FILES['file']
ctx['success'] = 'Successful'
input_string = tool.read_file(tool.save_file(file))
char_store, freq_store = tool.count_frequency(input_string)
char_frequency = tool.get_char_frequency(char_store, freq_store)
nodes = huf.create_nodes([item[1] for item in char_frequency])
root = huf.create_huffman_tree(nodes)
codes = huf.huffman_encoding(nodes, root)
save_file_name = tool.get_huffman_file(input_string, char_frequency, codes)
for item in zip(char_frequency, codes):
print('Character:%s freq:%-2d encoding: %s', item[0][0], item[0][1], item[1])
ctx['node'] = char_frequency
def file_iterator(files, chunk_size=512):
with open(files) as f:
while True:
c = f.read(chunk_size)
if c:
yield c
else:
break
the_file_name = tool.get_encode_ration(codes)+'_'+str(uuid.uuid1())+'.txt'
response = StreamingHttpResponse(file_iterator(save_file_name))
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment;filename="{0}"'.format(the_file_name)
return response
E-version: we will use python to realize this program called huffman encoding and decoding. why we use python, because in python we can finish this program faster then other codes. this program are not the final implementation. actually, this is the first
version i commit to git. i changed a lot in the least version . so if you run those codes on your environment. some problems may be exist; don`t worry, the first four drafts are right, you can build everything based on them. so good lucky to you.
I:实现节点类
class Node: def __init__(self,freq): self.left = None self.right = None self.father = None self.freq = freq def is_left(self): return self.father.left == self
II:为每一个节点赋权值
def create_nodes(frequencies):
return [Node(freq) for freq in frequencies]
III:创建哈夫曼树
def create_huffman_tree(nodes):
queue = nodes[:]
while len(queue) > 1:
queue.sort(key=lambda item: item.freq)
node_left = queue.pop(0)
node_right = queue.pop(0)
node_father = Node(node_left.freq + node_right.freq)
node_father.left = node_left
node_father.right = node_right
node_left.father = node_father
node_right.father = node_father
queue.append(node_father)
queue[0].father = None
return queue[0]
III:遍历叶节点
def huffman_encoding(nodes, root):
codes = [''] * len(nodes)
for i in range(len(nodes)):
node_tmp = nodes[i]
while node_tmp != root:
if node_tmp.is_left():
codes[i] = '0' + codes[i]
else:
codes[i] = '1' + codes[i]
node_tmp = node_tmp.father
return codes
IV:获取字符出现的频数
# 获取字符出现的频数
def count_frequency(input_string):
# 用于存放字符
char_store = []
# 用于存放频数
freq_store = []
# 解析字符串
for index in range(len(input_string)):
if char_store.count(input_string[index]) > 0:
temp = int(freq_store[char_store.index(input_string[index])])
temp = temp + 1
freq_store[char_store.index(input_string[index])] = temp
else:
char_store.append(input_string[index])
freq_store.append(1)
# 返回字符列表和频数列表
return char_store, freq_store
V:获取字符、频数的列表
# 获取字符、频数的列表
def get_char_frequency(char_store=[], freq_store=[]):
# 用于存放char_frequency
char_frequency = []
for item in zip(char_store, freq_store):
temp = (item[0], item[1])
char_frequency.append(temp)
return char_frequency
VI:将字符转换成哈夫曼编码
# 将字符转换成huffman编码
def get_huffman_file(input_string, char_frequency, codes):
# 逐个字符替换
file_content = ''
for index in range(len(input_string)):
for item in zip(char_frequency, codes):
if input_string[index] == item[0][0]:
file_content = file_content + item[1]
file_name = 'huffman_' + str(uuid.uuid1())+'.txt'
with open(file_name, 'w+') as destination:
destination.write(file_content)
return file_name
VII:解压缩哈夫曼文件
# 解压缩huffman文件
def decode_huffman(input_string, char_store, freq_store):
encode = ''
decode = ''
for index in range(len(input_string)):
encode = encode + input_string[index]
for item in zip(char_store, freq_store):
if encode == item[1]:
decode = decode + item[0]
encode = ''
return decode
VIII:计算压缩比(写错了,可以自行改写)
# 计算压缩比
def get_encode_ration(codes):
# 计算所需要的二进制个数
h_length = 0
for item in codes:
h_length = h_length + len(item)
t_length = bin_middle(len(codes))*len(codes)
ratio = t_length/h_length
return str(ratio)[0:3]
# 计算所在的二进制空间
def bin_middle(number):
n, i = 1, 0
while n < number:
n = n * 2
i = i + 1
return i
最后:Django文件接收,并返回
def upload(request):
ctx = {}
if request.method == "POST":
file_name = str(request.FILES['file'])
if not file
96f0
_name.endswith('txt'):
ctx['fail'] = 'file format exist wrong!'
else:
file = request.FILES['file']
ctx['success'] = 'Successful'
input_string = tool.read_file(tool.save_file(file))
char_store, freq_store = tool.count_frequency(input_string)
char_frequency = tool.get_char_frequency(char_store, freq_store)
nodes = huf.create_nodes([item[1] for item in char_frequency])
root = huf.create_huffman_tree(nodes)
codes = huf.huffman_encoding(nodes, root)
save_file_name = tool.get_huffman_file(input_string, char_frequency, codes)
for item in zip(char_frequency, codes):
print('Character:%s freq:%-2d encoding: %s', item[0][0], item[0][1], item[1])
ctx['node'] = char_frequency
def file_iterator(files, chunk_size=512):
with open(files) as f:
while True:
c = f.read(chunk_size)
if c:
yield c
else:
break
the_file_name = tool.get_encode_ration(codes)+'_'+str(uuid.uuid1())+'.txt'
response = StreamingHttpResponse(file_iterator(save_file_name))
response['Content-Type'] = 'application/octet-stream'
response['Content-Disposition'] = 'attachment;filename="{0}"'.format(the_file_name)
return response
相关文章推荐
- OpenCV3 Python语言实现 笔记3
- Python语言基于Tensorflow实现RNN(预测)
- nginx tomcat集群配置实现无痛重启服务教程python语言版本
- Python语言实现获取主机名根据端口杀死进程
- Python的另外几种语言实现
- 判断平面内一点和三角形位置关系的算法和python语言的程序实现
- 设计模式十四(享元模式flyweight,python语言实现)
- [通用技术]在不同语言中用协程实现全排列算法(C++/Lua/Python/C#)
- Python语言实现将图片转化为html页面
- Python语言实现获取主机名根据端口杀死进程
- python语言实现postgresql数据库自动备份
- OpenCV3计算机视觉Python语言实现(三):使用OpenCV3处理图像
- Jython--python语言在Java中的完全实现
- C#, Java, PHP, Python和Javascript几种语言的AES加密解密实现[转载]
- 实例讲解hadoop中的map/reduce查询(python语言实现)
- OpenCV 3计算机视觉:Python语言实现(原书第2版) pdf+项目源代码
- python中使用and和or来实现其它语言中的?号表达式
- python实现哈夫曼编码
- C#, Java, PHP, Python和Javascript几种语言的AES加密解密实现
- Python语言实现百度语音识别API的使用实例