您的位置:首页 > 编程语言 > Python开发

Python边学边用--BT客户端实现之BitTorrent文件解析

2012-09-30 11:20 597 查看
BitTorrent文件使用bencode编码,其中包括了4种数据类型:
'd' 开头表示是dict类型,'e'表示结束
'l' (小写字母L)开头表示是list类型,'e'表示结束
'i'开头表示是integer类型,'e'表示结束,可以表示负数
以数字开头表示string类型,数字为string长度,长度与string内容以':'分割
默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式
BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html
以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。


torrent_file.py

import os
from datetime import tzinfo
from datetime import datetime

import bcodec

_READ_MAX_LEN = -1

class BTFormatError(BaseException):
pass

class TorrentFile(object):

__metainfo = {}
__file_name = ''

def read_file(self, filename):

torrent_file = open(filename, 'rb')
data = torrent_file.read(_READ_MAX_LEN)
torrent_file.close()

data = list(data)
metainfo = bcodec.bdcode(data)
if metainfo and type(metainfo) == type({}):
self.__file_name = filename
self.__metainfo = metainfo
else:
raise BTFormatError()

def __is_singlefile(self):
return 'length' in self.__metainfo.keys()

def __decode_text(self, text):
encoding = 'utf-8'
resultstr = ''
if self.get_encoding():
encoding = self.get_encoding()
elif self.get_codepage():
encoding = 'cp' + str(self.get_codepage())
if text:
try:
resultstr = text.decode(encoding=encoding)
except ValueError:
return text
else:
return None
return resultstr

def __get_meta_top(self, key):
if key in self.__metainfo.keys():
return self.__metainfo[key]
else:
return None
def __get_meta_info(self,key):
meta_info = self.__get_meta_top('info')
if meta_info and key in meta_info.keys():
return meta_info[key]
return None

def get_codepage(self):
return self.__get_meta_top('codepage')
def get_encoding(self):
return self.__get_meta_top('encoding')

def get_announces(self):
announces = []
ann = self.__get_meta_top('announce')
if ann:
ann_list = []
ann_list.append(ann)
announces.append(ann_list)
announces.append(self.__get_meta_top('announce-list'))
return announces

def get_publisher(self):
return self.__decode_text(self.__get_meta_top('publisher'))
def get_publisher_url(self):
return self.__decode_text(self.__get_meta_top('publisher-url'))

def get_creater(self):
return self.__decode_text(self.__get_meta_top('created by'))
def get_creation_date(self):
utc_date = self.__get_meta_top('creation date')
if utc_date is None:
return utc_date
creationdate = datetime.utcfromtimestamp(utc_date)
return creationdate
def get_comment(self):
return self.__get_meta_top('comment')

def get_nodes(self):
return self.__get_meta_top('nodes')

def get_piece_length(self):
return self.__get_meta_info('piece length')

def get_files(self):

files = []
pieces = self.__get_meta_info('pieces')
name = self.__decode_text(self.__get_meta_info('name'))
piece_length = self.get_piece_length()

if not pieces or not name:
return files

if self.__is_singlefile():
file_name = name
file_length = self.__get_meta_info('length')
if not file_length:
return files

pieces_num = file_length/piece_length
if file_length % piece_length:
pieces_num = int(pieces_num) + 1
if 20*pieces_num > len(pieces):
return  files

file_pieces = []
i = 0
pn = 0
while pn < pieces_num:
file_pieces.append(pieces[i:i+20])
i += 20
pn += 1

files.appen({'name':[file_name], 'length':file_length, 'peaces':file_pieces})
return files

folder = name
meta_files = self.__get_meta_info('files')
if not meta_files:
return files

total_length = 0
for one_file in self.__get_meta_info('files'):

file_info = {}
path_list = []
path_list.append(folder)

if 'path' not in one_file.keys():
break
for path in one_file['path']:
path_list.append(self.__decode_text(path))
file_info['name'] = path_list

if 'length' not in one_file.keys():
break

file_info['length'] =  one_file['length']

piece_index = int(total_length / piece_length)
total_length += one_file['length']
pieces_num = int(total_length / piece_length) - piece_index
pieces_num = int(file_info['length']/piece_length)

if total_length % piece_length:
pieces_num += 1

# print  (piece_index+pieces_num)*20, len(pieces),pieces_num,file_info['length'], self.get_piece_length()

if (piece_index+pieces_num)*20 > len(pieces):
break

file_info['pieces'] = []

pn = 0
while pn < pieces_num:
file_info['pieces'].append(pieces[piece_index*20:piece_index*20+20])
pn += 1

files.append(file_info)

return files

if __name__ == '__main__':
#filename = r".\huapi2.torrent"
#filename = r".\mh5t3tJ0EC.torrent"
filename = r".\huapi2.1.torrent"
torrent = TorrentFile()

print "begin to read file"
try:
torrent.read_file(filename)
except (IOError,BTFormatError), reason:
print "Read bittorrent file error! Error:%s" %reason

print "end to read file"

print "announces: " , torrent.get_announces()
print "peace length:", torrent.get_piece_length()
print "code page:" , torrent.get_codepage()
print "encoding:" , torrent.get_encoding()
print "publisher:" ,torrent.get_publisher()
print "publisher url:", torrent.get_publisher_url()
print "creater:" , torrent.get_creater()
print "creation date:", torrent.get_creation_date()
print "commnent:", torrent.get_comment()
print "nodes:", torrent.get_nodes()
torrent.get_files()
for one_file in torrent.get_files():
print 'file name:', '\\'.join(one_file['name'])
print 'file length:', one_file['length']
print 'pieces:', list(one_file['pieces'])



bcodec.py

1 '''
2 Created on 2012-9-30
3
4 @author: ddt
5 '''
6 def bdcode(data):
7     data = list(data)
8     return _read_chunk(data)
9
10 def _read_chunk(data):
11
12     chunk = None
13
14     if len(data) == 0:
15         return chunk
16
17     leading_chr = data[0]
18
19     if leading_chr.isdigit():
20         chunk = _read_string(data)
21     elif leading_chr == 'd':
22         chunk = _read_dict(data)
23     elif leading_chr == 'i':
24         chunk = _read_integer(data)
25     elif leading_chr == 'l':
26         chunk = _read_list(data)
27
28     #print leading_chr, chunk
29     return chunk
30
31 def _read_dict(data):
32
33     if  len(data) == 0 or data.pop(0) != 'd':
34         return None
35
36     chunk = {}
37     while len(data) > 0 and data[0] != 'e':
38
39         key = _read_chunk(data)
40         value = _read_chunk(data)
41
42         if key and value and type(key) == type(''):
43             chunk[key] = value
44         else:
45             return None
46
47     if len(data) == 0 or data.pop(0) != 'e':
48         return None
49
50     return chunk
51
52 def _read_list(data):
53
54     if  len(data) == 0 or data.pop(0) != 'l':
55         return None
56
57     chunk = []
58     while len(data) > 0 and data[0] != 'e':
59         value = _read_chunk(data)
60         if value:
61             chunk.append(value)
62         else:
63             return None
64
65     if len(data) == 0 or data.pop(0) != 'e':
66         return None
67
68     return chunk
69
70 def _read_string(data):
71
72     str_len = ''
73     while len(data) > 0 and data[0].isdigit():
74         str_len +=  data.pop(0)
75
76     if len(data) == 0 or data.pop(0) != ':':
77         return None
78
79     str_len = int(str_len)
80     if str_len > len(data):
81         return None
82
83     value = data[0:str_len]
84     del data[0:str_len]
85     return ''.join(value)
86
87 def _read_integer(data):
88
89     integer = ''
90     if len(data) < len('i2e') or data.pop(0) != 'i':
91         return None
92
93     sign = data.pop(0)
94     if sign != '-' and not sign.isdigit():
95         return None
96     integer += sign
97
98     while len(data) > 0 and data[0].isdigit():
99         integer += data.pop(0)
100
101     if len(data) == 0 or data.pop(0) != 'e':
102         return None
103
104     return  int(integer)


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: