题目5:
压缩算法
题目说明:
试给出压缩算法,被压缩数据为
a,b,c
……
z
等
26
个英文字母随机出现的文本(每个字母出现的概率为
1/26
),计算出你设计的程序的平均压缩比
注意:原文本自己编写程序随机生成,要求大小
10K
以上
平均压缩比为
10
次结果的平均值(程序跑
10
次即可,当然,原始文本需要生成
10
次)
===================================================================================================================
本想用神经网络方法解决,但因为时间太紧肯定做不出来,即使作出来要训练,调整参数也很费时间。
退而求其次,打算用huffman算法,结果一顿搜资料,又耽误了不少时间,再编写调试也来不及了,而且编的低效程序还不如不编。况且,前提是26个字母出现概率相同,huffman算法应该起不了很大作用。
后来,我就只好用了最初级的,把字母重新编码,即把原来用八位保存的char改为5位保存的二进制数,直接把二进制写进文件。
代码如下,欢迎各位批判:
头文件: compress.h
#include<iostream>
#include<fstream>
#include<sstream>
#include<string>
#include<cstdlib>
#include<ctime>
#include<map>
#include<cassert>
#include<bitset>
using namespace std;
/*
*本程序的思想是将26个字母进行重新编码,从00000到11001,则每个char类型可以只用5位就可以存放了,节省了3/8的空间
*把该5位二进制以二进制形式存放到文件中,具体是每八位二进制把它转化为char类型,存放到压缩文件中
*/
class Compress{
public:
Compress(string fileName):m_fileName(fileName){initMap();}
void generRandomFile();
void compressFile();
void decompressFile();
public:
private:
void initMap(); //初始化字母与编码的映射关系
void getfile(string &str, string fileName); //一次读取整个普通文件并存放到str中
void getBfile(string &str, string fileName); //一次读取整个二进制文件并存放到str中
char transToChar(string str); //把string类型的二进制串转化为char类型
void trans2Binary(char ch, string &bin); //把char类型的字符转化为它的二进制形式,存放到string类型中
private:
string m_fileName; //存放要处理的文件名
map<char, string> m_alph2Binary; //存放字母与编码的映射关系
};
头文件 compress.cpp:
#include"compress.h"
//初始化字母与编码的映射表
void Compress::initMap()
{
m_alph2Binary['a'] = "00000";
m_alph2Binary['b'] = "00001";
m_alph2Binary['c'] = "00010";
m_alph2Binary['d'] = "00011";
m_alph2Binary['e'] = "00100";
m_alph2Binary['f'] = "00101";
m_alph2Binary['g'] = "00110";
m_alph2Binary['h'] = "00111";
m_alph2Binary['i'] = "01000";
m_alph2Binary['j'] = "01001";
m_alph2Binary['k'] = "01010";
m_alph2Binary['l'] = "01011";
m_alph2Binary['m'] = "01100";
m_alph2Binary['n'] = "01101";
m_alph2Binary['o'] = "01110";
m_alph2Binary['p'] = "01111";
m_alph2Binary['q'] = "10000";
m_alph2Binary['r'] = "10001";
m_alph2Binary['s'] = "10010";
m_alph2Binary['t'] = "10011";
m_alph2Binary['u'] = "10100";
m_alph2Binary['v'] = "10101";
m_alph2Binary['w'] = "10110";
m_alph2Binary['x'] = "10111";
m_alph2Binary['y'] = "11000";
m_alph2Binary['z'] = "11001";
}
//产生随机字母组成的文件
void Compress::generRandomFile()
{
string tmpname = "output/" + m_fileName+ ".txt";
m_fileName = tmpname;
ofstream outfile(tmpname.c_str());
string alphabet("abcdefghijklmnopqrstuvwxyz");
srand((unsigned)time(NULL)); //设定随机数发生公式的种子值
for(int i = 0; i < 10000; i++)
{
int num = rand()%26;
outfile << alphabet[num];
}
}
//压缩文件,压缩文件存放方式为.by后缀
void Compress::compressFile()
{
string text;
getfile(text , m_fileName);
string binaryText = "";
string fileName = m_fileName + ".by";
ofstream outfile(fileName.c_str(),ios_base::binary);
for(int i = 0; i < text.size(); i++)
{
if(m_alph2Binary.count(text[i]))
{
binaryText += m_alph2Binary[text[i]];
}
else
{
cerr << "wrong!" << endl;
}
}
for(int i = 0; i < binaryText.size(); i += 8)
{
char newch = transToChar(binaryText.substr(i, 8));
outfile << newch;
}
outfile.close();
}
//读取文件,一次把整个文件读到string类型中去
void Compress::getfile(string &str, string fileName)
{
ifstream infile(fileName.c_str());
assert(infile.is_open());
istreambuf_iterator<char> beg(infile), end;
string stri(beg, end);
str = stri;
}
//读取二进制文件,并存放到二进制文件中去
void Compress::getBfile(string &str, string fileName)
{
ifstream infile(fileName.c_str(), ios_base::binary);
assert(infile.is_open());
istreambuf_iterator<char> beg(infile), end;
string stri(beg, end);
str = stri;
}
//把8位二进制转化为char类型
char Compress::transToChar(string str)
{
char result = 0x00;
char value = 0x1;
for(int i = 7; i >= 0; i--)
{
if(str[i] == '1')
{
result += value;
}
value *= 2;
}
return result;
}
//解压文件,在原文件后加后缀.by
void Compress::decompressFile()
{
string fileName = m_fileName + ".by";
ifstream infile(fileName.c_str(), ios_base::binary);
assert(infile.is_open());
string text;
string binaryText;
string origText = "";
string tmp;
getBfile(text, fileName);
for(int i= 0; i < text.size(); i++)
{
trans2Binary(text[i], tmp); //把压缩文件每个char转化为string类型的二进制形式
binaryText += tmp;
}
map<string, char> bin2Char;
map<char, string>::iterator beg = m_alph2Binary.begin();
while(beg != m_alph2Binary.end())
{
bin2Char.insert(pair<string, char>(beg->second, beg->first));
beg++;
}
for(int i = 0; i < binaryText.size(); i += 5) //把string类型的二进制进行还原,每5位一组,反映射到原字母上去
{
if(bin2Char.count(binaryText.substr(i, 5)))
{
origText += bin2Char[binaryText.substr(i, 5)];
}
else
cerr << "wrong!" << endl;
}
fileName += ".og";
ofstream outfile(fileName.c_str()); //输出解压前的文件
outfile << origText;
}
//把压缩文件中的char类型转化成string类型的二进制形式
void Compress::trans2Binary(char ch, string &bin)
{
unsigned long ll = ch;
bitset<8> bs(ll);
ostringstream os;
os << bs;
bin = os.str();
}
主文件main.cpp:
#include"compress.h"
void main()
{
string fileName;
cout << "please enter the name of the file" << endl;
cin >> fileName;
Compress compress(fileName); //新建压缩类
compress.generRandomFile(); //产生随机文件
compress.compressFile(); //压缩文件
compress.decompressFile(); //解压文件
}