您的位置：首页 > 编程语言 > Delphi

用Delphi实现24位真彩色图标

2009-07-19 17:37 260 查看

给定训练样本
('Nobody owns water.','good');

('the quick rabbit jumps fences','good');

('buy pharmaceuticals','bad');

('make quick money at the online casino','bad');

('the quick brown fox jumps','good');

如何判断一个新的样本'quick money'是good还是bad呢，最常用的办法就是朴素贝叶斯分类

朴素贝叶斯分类的步骤大致如下：
1.根据样本集判断每个词属于各个分类的可能性。
也就是计算一个词的文档频度df
2.对待分类文本中的每一个词，计算相应的df,利用贝叶斯公式，把所有的df相乘，结果在乘以p(目录)的值，就算出了当前文本属于一个分类的概率

import re
import math

def getwords(doc):
spliter=re.compile('\\W*')
words=[s.lower() for s in spliter.split(doc) if len(s)>2 and len(s)<20]
return dict([(w,1) for w in words]);

def sampletrain(cl):
cl.train('Nobody owns water.','good');
cl.train('the quick rabbit jumps fences','good');
cl.train('buy pharmaceuticals','bad');
cl.train('make quick money at the online casino','bad');
cl.train('the quick brown fox jumps','good');

class classifier:
def __init__(self,getfeatures,filename=None):
self.fc={}
self.cc={}
self.getfeatures=getfeatures
self.thresholds={}

def setthreshold(self,cat,t):
self.thresholds[cat]=t

def getthreshold(self,cat):
if cat not in self.thresholds: return 1.0
return self.thresholds[cat]

def classify(self,item,default=None):
probs={}
max=0.0
for cat in self.categories():
probs[cat]=self.prob(item,cat)
if probs[cat]>max:
max=probs[cat]
best=cat

for cat in probs:
if cat==best:continue
if probs[cat]*self.getthreshold(best)>probs[best]:return default

return best

def incf(self,f,cat):
self.fc.setdefault(f,{})
self.fc[f].setdefault(cat,0)
self.fc[f][cat]+=1

def incc(self,cat):
self.cc.setdefault(cat,0)
self.cc[cat]+=1

def fcount(self,f,cat):
if f in self.fc and cat in self.fc[f]:
return float(self.fc[f][cat])
return 0.0

def catcount(self,cat):
if cat in self.cc:
return float(self.cc[cat])
return 0.0

def totalcount(self):
return sum(self.cc.values())

def categories(self):
return self.cc.keys()

def train(self,item,cat):
features=self.getfeatures(item)
for f in features:
self.incf(f,cat)
self.incc(cat)

def fprob(self,f,cat):
if self.catcount(cat)==0:return 0
return self.fcount(f,cat)/self.catcount(cat)

def weightedprob(self,f,cat,prf,weight=1.0,ap=0.5):
basicprob=prf(f,cat)
totals=sum([self.fcount(f,c) for c in self.categories()])
bp=((weight*ap)+(totals*basicprob))/(weight+totals)
return bp

class nativebayes(classifier):
def docprob(self,item,cat):
features=self.getfeatures(item)
p=1
for f in features:p*=self.weightedprob(f,cat,self.fprob)
return p

def prob(self,item,cat):
catprob=self.catcount(cat)/self.totalcount()
docprob=self.docprob(item,cat)
return docprob*catprob

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航