约会网站预测函数
2017-11-09 15:46
148 查看
#coding:utf-8 #约会网站 from numpy import * import matplotlib import matplotlib.pyplot as plt import operator #创建数据 def createDataSet(): group = array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]]) labels = ['A','A','B','B'] return group,labels #k近邻算法 def classify0(inX , dataSet , labels ,k): dataSetSize = dataSet.shape[0] #shape = (4,2) , size = 4 diffMat = tile(inX , (dataSetSize , 1)) - dataSet #将inX整形成为dataSet的大小 sqDiffMat = diffMat **2 #矩阵平方 sqDistances = sqDiffMat.sum(axis = 1) #水平相加 distances = sqDistances **0.5 #取根号得距离 sortedDistIndicies = distances.argsort() #排序 classCount = {} for i in range(k): voteIlabel = labels[sortedDistIndicies[i]] #得到距离第i近的点的label classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1 #D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None. #itemgetter(item, ...) --> itemgetter object #Return a callable object that fetches the given item(s) from its operand. #After f = itemgetter(2), the call f(r) returns r[2]. sortedClassCount = sorted(classCount.items() , key = operator.itemgetter(1) , reverse=True) return sortedClassCount[0][0] def file2matrix(filename): fr = open(filename) arrayOLines = fr.readlines() numberOLines = len(arrayOLines) #得到文件行数 returnMat = zeros((numberOLines,3)) #创建返回的Numpy矩阵,保存特征 classLabelVector = [] #保存最后一列元素 index = 0 for line in arrayOLines: #解析文件数据到列表 line = line.strip() #截取掉所有回车字符 listFromLine = line.split('\t') #用\t将整行数据分割成一个元素列表 returnMat[index,:] = listFromLine[0:3] classLabelVector.append(int(listFromLine[-1])) index += 1 return returnMat,classLabelVector def autoNorm(dataSet): minVals = dataSet.min(0) maxVals = dataSet.max(0) ranges = maxVals - minVals normDataSet = zeros(shape(dataSet)) m = dataSet.shape[0] normDataSet = dataSet - tile(minVals,(m,1)) normDataSet = normDataSet / tile(ranges ,(m,1)) return normDataSet , ranges , minVals def datingClassTest(): hoRatio = 0.10 datingDataMat , datingLabels = file2matrix('datingTestSet2.txt') normMat , ranges , minVals = autoNorm(datingDataMat) m = normMat.shape[0] numTestVecs = int(m*hoRatio) #测试样本数 errorCount = 0.0 for i in range(numTestVecs): classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3) print("the classifier came back with : %d , the real answer is : %d"\ % (classifierResult , datingLabels[i])) if (classifierResult != datingLabels[i]) : errorCount += 1.0 print("the total error rate is : %f" % (errorCount / float(numTestVecs))) def classifyPerson(): resultList = ['not at all','in small doses','in large doses'] percentTats = float(input("percentage of time spent playing video games?")) ffMile = float(input("frequent fliter miles earned per year?")) iceCream = float(input("liters of ice cream consumed per year?")) datingDataMat , datingLabels = file2matrix('datingTestSet2.txt') normMat , ranges , minVals = autoNorm(datingDataMat) inArr = array([ffMile , percentTats , iceCream]) classifierResult = classify0((inArr - minVals)/ranges , normMat , datingLabels ,3) print("You will probably like this this person :" , resultList[classifierResult - 1]) classifyPerson()
输出结果:
相关文章推荐
- 《机器学习实战》程序清单2-5 约会网站预测函数
- K近邻法及简单的约会网站预测系统(一)
- K近邻改进约会网站(五):使用算法进行预测
- HEVC函数入门(4)——指针,帧内预测,滤波
- 用xdebug的函数跟踪功能测试网站性能
- 用go开发的足球预测分析网站上线了
- FormatRemoteUrl函数之asp实现格式化成当前网站完整的URL-将相对地址转换为绝对地址的代码
- 连接到网站的函数
- kNN近邻算法改善约会网站配对效果案例
- 《机器学习实战》学习笔记-[2]-K近邻_网站约会实例
- 帧内预测之函数Intra16x16_Mode_Decision的分析与理解
- 机器学习实战——k-邻近算法:约会网站
- javascript+css好多网站用的选星星实现打分功能的函数
- k近邻 - 改进约会网站的匹配效果
- 在线函数绘制网站x2
- php获取Alexa网站排名、流量、访问量、页面浏览量代码函数分享
- Machine Learning in Action_CH2_2_使用kNN改进约会网站的配对效果
- tensorflow 学习笔记13 RNN LSTM结构预测正弦(sin)函数
- 基于KNN算法的约会网站配对效果 python3.2
- 机器学习实践-k近邻算法-约会网站配对源码