您的位置:首页 > 其它

产品id替换为其分词结果

2015-11-05 15:42 211 查看
ReplaceItem2.java将final_example中的每个产品的id替换为其分词结果(分词结果用空格隔开,产品用逗号隔开),(输出结果:ExampleToTerms.txt)

然后对每行的每个分词结果去到产品库中找出最相似的产品,(可以先排序,可以设定阈值>0.8),每行的都要去产品库匹配,然后所有匹配商品都要放在一行,用逗号隔开

关于推荐个数选取问题:首先对于每个产品都有相似的产品,必须按相似度排序,map:id->commoncount/count-foreitem(也可以用两个数组存),如果推荐example为1个,那必须要推荐200条,不管第200条与之相似度到底有多低,如果推荐example为n个,第一个推荐200*(2*i)/(n+n*n) i由n到1,也不管相似度的大小了,根据原先的相似度配比,选择个数

package test;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import redis.clients.jedis.Jedis;

public class ReplaceItem2 {

public static void getStrings() {
FileInputStream fis;
InputStreamReader isr;
BufferedReader br = null;
Jedis jedis;
String host = "10.20.100.5";
int port = 6379;
jedis = new Jedis(host, port);
try {
fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/dim_items.txt");
isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String[] strings = new String[1];
int i=0;
String str;
try {
while ((str = br.readLine()) != null) {
strings[0] = str;
String[] str1 = str.split(" ");
//分割“ ”
Map<String, Object> map = new HashMap<String, Object>();
for(int k=0;k<3;k++)
{
map.put("item_id",str1[0]);
map.put("cat_id", str1[1]);
map.put("terms", str1[2]);
jedis.set(str1[0].getBytes(), writeObject(map));

}
//      list.add(map);

}
} catch (IOException e) {
e.printStackTrace();
}
//  return list;
}

/**
* * 二分查找算法 * *
*
* @param srcArray
*            有序数组 *
* @param des
*            查找元素 *
* @return des的数组下标,没找到返回-1
*/
public static int binarySearch(ArrayList<Map<String, Object>> list, String des){

int low = 0;
int high = list.size()-1;
while(low <= high) {
int middle = (low + high)/2;
if(des.equals(list.get(middle).get("item_id"))) {
return middle;
}else if(Double.parseDouble(des) <Double.parseDouble((String) list.get(middle).get("item_id")) ) {
high = middle - 1;
}else {
low = middle + 1;
}
}
return -1;
}
public static int binarySearch1(int[] srcArray, int des){

int low = 0;
int high = srcArray.length-1;
while(low <= high) {
int middle = (low + high)/2;
if(des == srcArray[middle]) {
return middle;
}else if(des <srcArray[middle]) {
high = middle - 1;
}else {
low = middle + 1;
}
}
return -1;
}
private static byte[] writeObject(Object obj)//写对象
{
byte[] array = null;
try
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ObjectOutputStream os = new ObjectOutputStream(baos);
os.writeObject(obj);
array =baos.toByteArray();
System.out.println("序列化成功。");
System.out.println(array);
}
catch(Exception ex)
{
ex.printStackTrace();
}
return array;
}

private static Object readObject( byte[] array)//读对象
{
try
{
ByteArrayInputStream bais = new ByteArrayInputStream(array);
ObjectInputStream is = new ObjectInputStream(bais);

Object temp = (Object) is.readObject();

if (temp != null)
{
System.out.println("反序列化成功。");
//              System.out.println("age"+temp.age);
System.out.println(temp);
return temp;
}
}
catch(Exception ex)
{
ex.printStackTrace();
}

return null;
}

public static void appendMethod(String fileName, String content) {
try {
//打开一个写文件器,构造函数中的第二个参数true表示以追加形式写文件
FileWriter writer = new FileWriter(fileName, true);
writer.write(content);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}

public static void main(String args[])
{
Jedis jedis;
String host = "10.20.100.5";
int port = 6379;
jedis = new Jedis(host, port);
getStrings();

byte[] mapbyte=jedis.get("116".getBytes());
Object mapobj=  readObject(mapbyte);
System.out.println("id"+((Map<String, Object>) mapobj).get("cat_id"));
String fileName = "/public/home/dsj/Public/sundujing/fpgrowth/ExampleToTerms1.txt";
String content;

FileInputStream fis;
InputStreamReader isr;
BufferedReader br = null;
try {
fis = new FileInputStream("/public/home/dsj/Public/sundujing/fpgrowth/final_example1.txt");

isr = new InputStreamReader(fis, "UTF-8");
br = new BufferedReader(isr);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
String[] strings = new String[1];

String str;
try {
while ((str = br.readLine()) != null) {
strings[0] = str;
String[] str3 = str.split(",");
for(int j=0;j<str3.length;j++)
{
System.out.println(str3[j]);
mapbyte=jedis.get(str3[j].getBytes());
mapobj=  readObject(mapbyte);
System.out.println("id"+((Map<String, Object>) mapobj).get("terms"));
content=(String)((Map<String, Object>) mapobj).get("terms");

appendMethod(fileName, content.replaceAll(",", " "));
appendMethod(fileName, ",");
}
appendMethod(fileName, "\n");
}
} catch (IOException e) {
e.printStackTrace();
}

}

}






内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: