您的位置:首页 > 编程语言 > Python开发

python多线程模块--threading三部曲(一)

2017-05-10 11:55 393 查看
Threading(一)


当时你还小,手动把一个大文件分成了两个。添了几行代码,简陋的多线程。程序用时少了一半吧,你还笑了。<纪念第一次threading-傻>

注:linux分割文件:

split -l 300(分割行数) large_file new_file (指定分割后行数)

split -b 10m(文件大小) large_file new_file (指定分割后大小)

import threading
import urllib2
import sys
import csv
import re
import datetime

def read_csv(csv_file):
#读取csv文件,提取需要字段拼接成url,存入列表,最后返回一个列表
with open(csv_file,'rb') as f:
# csv.Error: line contains NULL byte,这个bug用下面一行解决的
reader_csv = csv.reader(line.replace('\0','')for line in f)
l_original = []
for item in reader_csv:
#item是包含一个字符串的列表,字段之间用‘#=:’拼接而成的字符串
iitem = item[0].split('#=:')
if len(iitem)>=10:
#拼接url,这里隐藏部分url细节
url = 'http://mp.weixin.??. ='+iitem[7]+'==&mid='+iitem[8]+'&idx='+iitem[-1]+'&sn='+iitem[9]
l_original.append(url)

return l_original

def get_html(csv_file):
#根据url请求网页,用正则提取所需内容
l_out = []
l_urls = read_csv(csv_file)
n=1
for url_wx in l_urls:
print n
n+=1
l_info = []  #此列表用来存储需要存入的字段
biz = url_wx.split('=')[1]
l_info.append(biz) #需要字段1
try:
#获取网页的代码,正则提取所需字段
html = urllib2.urlopen(url_wx).read()
res_name = r'<strong class="profile_nickname">(.*?)</strong>'
info_name = re.findall(res_name,html,re.S|re.M)
if info_name == []:
info_name = ['none']
l_info.append(info_name[0]) #需要字段2
res_value = r'<span class="profile_meta_value">(.*?)</span>'
info_value = re.findall(res_value,html,re.S|re.M)
if info_value ==[]:
info_value = ['none']
l_info.append(info_value[0])  #需要字段3
l_out.append(l_info)  #大列表存储所有需要输出的内容
except:
l_info.append(url_wx)
l_out.append(l_info)
return l_out
def write_csv(csv_file):
#将列表写入输出的csv文件中
l_out = get_html(csv_file)
file_result = open('kol_result_one.csv','w+')
writer = csv.writer(file_result)
for i in l_out:
writer.writerow(i)
def write_csv_2(csv_file):
l_out = get_html(csv_file)
file_result = open('kol_result_two.csv','w+')
writer = csv.writer(file_result)
for i in l_out:
writer.writerow(i)
#创建了两个线程
threads = []
t1 = threading.Thread(target=write_csv,args=(sys.argv[1],))
threads.append(t1)
t2 = threading.Thread(target=write_csv_2,args=(sys.argv[2],))
threads.append(t2)

def main():

starttime = datetime.datetime.now()
#write_csv(csv_file=sys.argv[1])
#write_csv_2(csv_file=sys.argv[2])
for t in threads:
#t.setDaemon(True)
t.start()
endtime = datetime.datetime.now()
#打印程序用时
print (endtime-starttime).seconds

if __name__ == '__main__':
main()

进化后的代码,随后跟进.
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  多线程 python linux