您的位置:首页 > 编程语言

淘宝店铺图片数据迁移核心代码

2011-03-18 17:27 375 查看
核心代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Data;
using System.Data.OleDb;
using System.Threading;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace ConsoleA
{

class Image
{
public string Key { get; set; }
public string ImageUrl { get; set; }
}
class Program
{
static Dictionary<int, List<string>> PathDic = new Dictionary<int, List<string>>();
static Dictionary<int, List<Image>> ImgDic = new Dictionary<int, List<Image>>();
static string DBPATH = @"E:\精品\1\318.xls";

//小叶
static string tb1="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&uid=billowye&site=cntaobao&s=1&charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小叶 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&uid=billowye&site=cntaobao&s=1&charset=utf-8\"></A>";
static string rtb1 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=hubaochan&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=hubaochan&s=1\" alt=\"点击这里给我发消息\" /></a>";

static string tb2="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&uid=%E7%B1%B3%E5%BE%B7%E5%88%9B%E6%84%8F&site=cntaobao&s=1&charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小米 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&uid=米德创意&site=cntaobao&s=1&charset=utf-8\" width=77 height=18></A>";
static string rtb2 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=zengrunfeng&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=zengrunfeng&s=1\" alt=\"点击这里给我发消息\" /></a>";

static string tb3="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&uid=happyluck77&site=cntaobao&s=1&charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小伍 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&uid=happyluck77&site=cntaobao&s=1&charset=utf-8\"></A>";
static string rtb3 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=gensonzeng&s=1\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=gensonzeng&s=1\" alt=\"点击这里给我发消息\" /></a>";

static string tb4="<A href=\"http://amos.im.alisoft.com/msg.aw?v=2&uid=happyluck77&site=cntaobao&s=1&charset=utf-8\" target=_blank><IMG border=0 alt=客服在线:小静 align=absMiddle src=\"http://amos.im.alisoft.com/online.aw?v=2&uid=世纪天使的故事&site=cntaobao&s=1&charset=utf-8\"></A>";
static string rtb4 = "<a target=\"_blank\" href=\"http://amos1.taobao.com/msg.ww?v=2&uid=zengrunfeng&s=1&zkaction=1000\" ><img border=\"0\" src=\"http://amos1.taobao.com/online.ww?v=2&uid=zengrunfeng&s=1\" alt=\"点击这里给我发消息\" /></a>";
static string dianpu = @"http://micc.taobao.com/view_page-64967943.htm";
static string rdianpu = @"http://fav365.taobao.com/view_page-70328410.htm";

static int replacecount = 1;
static void Main(string[] args)
{

// DownLoad();

T2DataTableToDict();
// Console.Read();

DataTable dt = ExcelToDT(DBPATH, "t");

int count = 1;

Dictionary<string, string> dict = T2DataTableToDict();
foreach (DataRow dr in dt.Rows)//原数据
{
string strdesc = dr["宝贝描述"].ToString();
List<Image> pics = new List<Image>();
var images = GetHtmlImageUrlList(strdesc);//取出图
foreach (string item in images)
{
Image myimg = new Image() { ImageUrl = item, Key = MiccFileKey(item) };
pics.Add(myimg);
}

foreach (KeyValuePair<string, string> item in dict)
{
foreach (Image img in pics)
{
if (item.Key == img.Key)
{
strdesc = strdesc.Replace(img.ImageUrl, item.Value);
Console.WriteLine(img.ImageUrl + "替换为" + item.Value);
replacecount++;
}
}
}

strdesc = strdesc.Replace(dianpu, rdianpu);

//strdesc.Contains(tb1);

//strdesc = strdesc.Replace(tb1,rtb1);
//strdesc = strdesc.Replace(tb2, rtb2);
//strdesc = strdesc.Replace(tb3, rtb3);
//strdesc = strdesc.Replace(tb4, rtb4);

strdesc = strdesc.Replace("uid=billowye", "uid=hubaochan");
strdesc = strdesc.Replace("uid=米德创意", "uid=zengrunfeng");
strdesc = strdesc.Replace("uid=%E7%B1%B3%E5%BE%B7%E5%88%9B%E6%84%8F", "uid=zengrunfeng");
strdesc = strdesc.Replace("uid=happyluck77", "uid=gensonzeng");

strdesc = strdesc.Replace("小叶", "小胡");
strdesc = strdesc.Replace("小米", "小曾");

UPdateDT(strdesc, dr["Id"].ToString(), DBPATH);

Console.WriteLine("完成第" + count + "条替换");
count++;

}
Console.WriteLine("完成");
Console.WriteLine(replacecount + "条替换");
Console.Read();

}

public static void DownLoad()
{
DataTable dt = ExcelToDT(DBPATH, "t");
SavePictureFromUrl(dt);
}

/// <summary>
/// 获取t2 table
/// </summary>
/// <returns></returns>
public static Dictionary<string, string> T2DataTableToDict()
{

DataTable dt = ExcelToDT(DBPATH, "t2");

Dictionary<string, string> dict = new Dictionary<string, string>();
foreach (DataRow dr in dt.Rows)
{
string str = dr[0].ToString();
string[] strArray = str.Split(new char[] { '^' }, StringSplitOptions.RemoveEmptyEntries);
string key = strArray[1].Replace(";", "").Trim();
try
{
dict.Add(key, strArray[0].Trim());
}
catch (Exception ex)
{
Console.WriteLine(key);
Console.WriteLine("出错" + ex.Message);
}

}
return dict;
}

/// <summary>
/// 取得HTML中所有图片的 URL。
/// </summary>
/// <param name="sHtmlText">HTML代码</param>
/// <returns>图片的URL列表</returns>
public static string[] GetHtmlImageUrlList(string sHtmlText)
{
// 定义正则表达式用来匹配 img 标签
Regex regImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

// 搜索匹配的字符串
MatchCollection matches = regImg.Matches(sHtmlText);

int i = 0;
string[] sUrlList = new string[matches.Count];

// 取得匹配项列表
foreach (Match match in matches)
{

sUrlList[i++] = match.Groups["imgUrl"].Value;
}

return sUrlList;
}

static string MiccFileKey(string miccfile)
{
miccfile = miccfile.Trim();
if (miccfile.StartsWith("http://", StringComparison.InvariantCulture))
{
string tmp = miccfile.Replace("http://", "");
tmp = tmp.Replace("/", "@");
if (tmp.Length > 50)
{
tmp = tmp.Substring(0, 50);
}

return tmp;
}
throw new Exception(miccfile);

}

static DataTable ExcelToDT(string Path, string tableName)
{

string strConn = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + Path + ";" + "Extended Properties='Excel 8.0;'";
OleDbConnection conn = new OleDbConnection(strConn);
if (conn.State != ConnectionState.Open)
{
conn.Open();
}
string strExcel = "";
OleDbDataAdapter myCommand = null;
DataSet ds = null;
strExcel = "select * from [" + tableName + "$]";
myCommand = new OleDbDataAdapter(strExcel, strConn);
ds = new DataSet();
myCommand.Fill(ds, "table1");
conn.Close();
return ds.Tables["table1"];

}

static void UPdateDT(string desc, string id, string Path)
{

string strConn = "Provider=Microsoft.Jet.OLEDB.4.0;" + "Data Source=" + Path + ";" + "Extended Properties='Excel 8.0;'";
OleDbConnection conn = new OleDbConnection(strConn);

conn.Open();

OleDbCommand myCommand = null;

try
{
string updatesql = "Update [t$] set 宝贝描述=@desc,城市='佛山',省='广东' where ID=" + id;
myCommand = new OleDbCommand(updatesql, conn);
myCommand.Parameters.Add(new OleDbParameter("desc", desc));
int result = myCommand.ExecuteNonQuery();
if (result == 1)
{
Console.WriteLine("更新数据库成功");
}
}
catch (Exception ex)
{
Console.WriteLine(id);
}
finally
{
conn.Close();
}

}

static void SavePictureFromUrl(DataTable dt)
{

List<string> PathList = new List<string>();
List<string> tempPathList = new List<string>();
int pathTempNum = 0;
int DicKey = 0;
foreach (DataRow row in dt.Rows)
{
if (pathTempNum == 0)
{
tempPathList = new List<string>();
}
string description = row["宝贝描述"].ToString();

foreach (string str in GetHtmlImageUrlList(description))
{
PathList.Add(str);
tempPathList.Add(str);
pathTempNum++;
}

if (pathTempNum > 100)
{
PathDic.Add(DicKey, tempPathList);
DicKey++;
pathTempNum = 0;
}
}
ThreadPool.SetMaxThreads(100, 100);
foreach (int key in PathDic.Keys)
{
ThreadPool.QueueUserWorkItem(new WaitCallback(SavePicFromDic), key);
}
}

static void SavePicFromDic(object DicKey)
{
foreach (string path in PathDic[Convert.ToInt32(DicKey)])
{
Console.WriteLine(DicKey.ToString() + path);
SavePictureFromHTTP(path, @"E:\精品\excel\ConsoleA\ConsoleA\taobao18\" + path.Substring(7).Replace('/', '@'));
}
}
static void SavePictureFromHTTP(string url, string path)
{
try
{
long fileLength = 0;
WebRequest webReq = WebRequest.Create(url);
WebResponse webRes = webReq.GetResponse();
fileLength = webRes.ContentLength;
Stream srm = webRes.GetResponseStream();
StreamReader srmReader = new StreamReader(srm);
byte[] bufferbyte = new byte[fileLength];
int allByte = (int)bufferbyte.Length;
int startByte = 0;
while (fileLength > 0)
{
int downByte = srm.Read(bufferbyte, startByte, allByte);
if (downByte == 0) { break; };
startByte += downByte;
allByte -= downByte;
}
if (!File.Exists(path))
{
string tempPath = path;
FileStream fs = new FileStream(tempPath, FileMode.OpenOrCreate, FileAccess.Write);
fs.Write(bufferbyte, 0, bufferbyte.Length);
srm.Close();
srmReader.Close();
fs.Close();
Console.WriteLine("已经下载" + url);
}
else
{
//path = path.Insert(path.LastIndexOf('.'), Guid.NewGuid().ToString());
}

}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}

}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: