您的位置:首页 > 其它

项目:文件压缩及解压缩

2016-07-13 23:36 375 查看
项目描述:实现文件的压缩及解压缩。 

开发平台:VS2013 

开发技术:堆,Huaffman树,文件输入输出函数 

项目特点:

1.统计文件中字符出现的次数,利用数据结构堆建造Huffman树,出现次数多的编码短,出现次数少的编码长。 

2.根据建造好的Huffman树形成编码,以对文件进行压缩。

3.将文件中出现的字符以及他们出现的次数写入配置文件,以便后续的解压缩。 

4.根据配置文件读取相关信息重建Huffman树,对压缩后的文件进行译码。

项目成果:该文件压缩可将8k文件大概压缩至6k,解压缩可无错误的还原原文件。

注:由于文件较大,不易用眼睛看出原文件和解压后文件是否完全相同,这里借助软件Beyond Compare来检查两文件是否完全相同

项目代码:

FileCompress.h

#pragma once

#include<iostream>
#include<stdio.h>
#include<string>
#include "Heap.h"
#include<assert.h>

using namespace std;
template<class T>
struct HaffManTreeNode
{
HaffManTreeNode<T>* _left;
HaffManTreeNode<T>* _right;
T _weight;

HaffManTreeNode(const T& w)
:_left(NULL)
, _right(NULL)
, _weight(w)
{
}

};
struct CharInfo
{
typedef unsigned long LongType;

unsigned char _ch;
LongType _count;
string _code;

CharInfo(LongType count = 0)
:_count(count)
{}

CharInfo operator +(const CharInfo right) const
{
return CharInfo(_count + right._count);
}

bool operator !=(const CharInfo right) const
{
return _count != right._count;
}
bool operator >(const CharInfo right) const
{
return _count > right._count;
}
bool operator <(const CharInfo right) const
{
return _count < right._count;
}

};
template<class T>
class HaffManTree
{

typedef HaffManTreeNode<T> Node;
public:
HaffManTree(const T* a, size_t n, const T& invaid)
{
struct NodeCompare
{
bool operator()(Node* l, Node* r)
{
return l->_weight < r->_weight;
}
};

Heap<Node*, NodeCompare> minHeap;

for (size_t i = 0; i < n; i++)
{
if (a[i] != invaid)
{
minHeap.Push(new Node(a[i]));
}
}
while (minHeap.Size()>1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
minHeap.Push(parent);
}
_root = minHeap.Top();
}

Node *GetRoot()
{
return _root;
}

protected:
Node* _root;
};

class FileCompress
{
public:
FileCompress()
{
for (int i = 0; i < 256; i++)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
}
}
void Compress(string filename)
{
FILE *fout = fopen(filename.c_str(), "rb");
assert(fout);
char ch = fgetc(fout);//读取文件字符
while (ch != EOF)
{
_infos[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
CharInfo invalid;
string code;
HaffManTree<CharInfo> tree(_infos, 256, invalid);//建立哈夫曼树
GenerateHaffManCode(tree.GetRoot(), code);//创建哈夫曼编码

//压缩
string compressName = filename + ".haffman";
FILE* fin = fopen(compressName.c_str(), "wb");
fseek(fout, 0, SEEK_SET);
ch = fgetc(fout);
char value = 0;
int size = 0;
while (ch != EOF)
{
string code = _infos[(unsigned char)ch]._code;
for (int i = 0; i < code.size(); i++)
{
if (code[i] == '1')
{
value |= 1;
}
++size;
if (size == 8)
{
fputc(value, fin);
value = 0;
size = 0;
}
value <<= 1;
}

ch = fgetc(fout);
}
if (size > 0)
{
value <<= 7 - size;
fputc(value, fin);
}
//写匹配文件

string configFile = filename += ".config";
FILE* fconfig = fopen(configFile.c_str(),"wb");
string line;
for (int i = 0; i < 256; i++)
{
if (_infos[i]._count>0)
{
line += _infos[i]._ch;
line += ",";
char buf[1024];
_itoa(_infos[i]._count, buf, 10);
line += buf;
line += '\n';
fputs(line.c_str(), fconfig);
}
line.clear();
}

fclose(fout);
fclose(fin);
fclose(fconfig);
}
void GenerateHaffManCode(HaffManTreeNode<CharInfo>* root, string code)
{
if (root == NULL)
return;
if (root->_left == NULL && root->_right == NULL)
{
_infos[root->_weight._ch]._code = code;
return;
}
GenerateHaffManCode(root->_left, code + '0');
GenerateHaffManCode(root->_right, code + '1');

}
bool ReadLine(FILE* fout,string& line)
{
char ch = fgetc(fout);
if (ch==EOF)
{
return false;
}
while (ch!=EOF && ch !='\n')
{
line += ch;
ch = fgetc(fout);
}
return true;
}
void UnCompress(string filename)
{
//读配置文件
string configname = filename + ".config";
FILE* fconfig = fopen(configname.c_str(),"rb");
assert(fconfig);
string line;

while(ReadLine(fconfig, line))
{
if (line.empty())
{
line += '\n';
}
else
{
unsigned char ch = line[0];
_infos[ch]._count = atoi(line.substr(2).c_str());
line.clear();
}
}
CharInfo invalid;
HaffManTree<CharInfo> tree(_infos, 256, invalid);
string compressFile = filename + ".haffman";

FILE *fout = fopen(compressFile.c_str(), "rb");
assert(fout);
string uncompress = filename += "com";
FILE *fin = fopen(uncompress.c_str(), "wb");
unsigned char ch = fgetc(fout);
int pos = 7;
HaffManTreeNode<CharInfo>* root = tree.GetRoot();
HaffManTreeNode<CharInfo>* cur = root;
int count = root->_weight._count;
while (ch != EOF)
{
if (ch & (1 << pos))
{
cur = cur->_right;
}
else
cur = cur->_left;
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch, fin);
count--;
cur = root;
}

if (pos == 0)
{
ch = fgetc(fout);
pos = 8;
}
pos--;
if (count == 0)
break;
}

fclose(fout);
fclose(fin);
fclose(fconfig);
}
protected:
CharInfo _infos[256];
};

void TestCompressFile()
{
FileCompress file;
file.Compress("input");
}
void TestUnCompressFile()
{
FileCompress file;
file.UnCompress("input");
}


堆文件:

#pragma once
#include <vector>
#include <assert.h>
#include<iostream>
using namespace std;

// 仿函数
template <class T>
struct Less
{
bool operator() (const T& l, const T& r)
{
return l < r;
}

};

template <class T>
struct Greater
{
bool operator() (const T& l, const T& r)
{
return l > r;
}
};

// 大堆
template<class T, class Compare = Greater<T>>
class Heap
{
public:
Heap()
{}

Heap(const T* a, size_t size)
{
assert(a);
for (size_t i = 0; i < size; ++i)
{
_a.push_back(a[i]);
}

// 建堆 N*lgN
for (int i = (_a.size() - 2) / 2; i >0; --i)
{
_AdjustDown(i);
}
}

//
void Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size() - 1);
}

//
void Pop()
{
assert(!_a.empty());

swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();

_AdjustDown(0);
}
T& Top()
{
return _a[0];
}
size_t Size()
{
return _a.size();
}

bool Empty()
{
return _a.empty();
}

protected:
void _AdjustDown(size_t parent)
{
// 指向左孩子
size_t child = parent * 2 + 1;

Compare com;
while (child < _a.size())
{
// 选出左右孩子里面大的那一个

//if (child+1 < _a.size()
//	&&_a[child+1] > _a[child])
if (child + 1 < _a.size()
&& com(_a[child + 1], _a[child]))
{
++child;
}

// 如果孩子比父节点要大,则交换并继续往下调整
//if (_a[child] > _a[parent])
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
parent = child;
child = parent * 2 + 1;
}
else
{
break;
}
}
}

void _AdjustUp(int child)
{
int parent = (child - 1) / 2;
Compare com;

while (child > 0)
//while(parent >= 0)
{
//if (_a[child] > _a[parent])
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
{
break;
}
}
}

protected:
//T* _a;
//size_t _size;
//size_t _capacity;
vector<T> _a;

};

void TestHeap()
{
int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };

Heap<int, Less<int>> hp1(a, sizeof(a) / sizeof(a[0]));

hp1.Push(20);
}

template<class T>
class PriorityQueue
{
public:
// O(lgN)
void Push(const T& x)
{
_hp.Push(x);
}

// O(lgN)
void Pop()
{
_hp.Pop();
}

protected:
Heap<T> _hp;
};

void _AdjustDown(int a[], int n, int parent)
{
int child = parent * 2 + 1;
while (child < n)
{
if (child + 1 < n && a[child + 1]>a[child])
{
++child;
}

if (a[parent] < a[child])
{
swap(a[parent], a[child]);
parent = child;
child = 2 * parent + 1;
}
else
{
break;
}
}
}

void HeapSort(int a[], size_t n)
{
assert(a);

// 建堆
for (int i = (n - 2) / 2; i >= 0; --i)
{
_AdjustDown(a, n, i);
}

// 选出一个最大数据交换到末尾,剩下数据进行向下调整
for (int i = 0; i < n; ++i)
{
swap(a[0], a[n - 1 - i]);

_AdjustDown(a, n - i - 1, 0);
}
}

void TestHeapSort()
{
int a[] = { 2, 1, 4, 5, 0, 6, 3, 7, 8, 9 };
HeapSort(a, sizeof(a) / sizeof(a[0]));
for (int i = 0; i < 10; i++)
{
cout << a[i];
}
}


测试代码:

#include"FileCompress.h"
#include<iostream>
#include<windows.h>
using namespace std;

int main()
{
int begin = GetTickCount();
TestCompressFile();
TestUnCompressFile();
int end = GetTickCount();

cout << "begin:" << begin << endl;
cout << "end:" << end << endl;

cout << end-begin<< endl;
cout << getchar() << endl;
return 0;
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息