solr5.5(6)——debugQuery的打分分析
2017-08-03 17:59
399 查看
目前在了解打分机制
我的条件
q:pro_name:Evod AND pro_brand:53
得到结果为
"7": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 6) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=6,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 6, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=6)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 6) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=6,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 6, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=6)\n",
"69": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 68) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=68,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 68, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=68)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 68) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=68,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 68, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=68)\n",
"873": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 872) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=872,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 872, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=872)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 872) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=872,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 872, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=872)\n",
"874": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 873) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=873,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 873, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=873)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 873) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=873,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 873, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=873)\n",
"875": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 874) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=874,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 874, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=874)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 874) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=874,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 874, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=874)\n",
"876": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 875) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=875,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 875, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=875)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 875) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=875,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 875, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=875)\n",
"877": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 876) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=876,freq=1.0), product of:\n 0.80325437 = queryWe
4000
ight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 876, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=876)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 876) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=876,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 876, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=876)\n",
"878": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 877) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=877,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 877, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=877)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 877) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=877,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 877, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=877)\n",
"879": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 878) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=878,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 878, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=878)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 878) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=878,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 878, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=878)\n",
"880": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 879) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=879,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 879, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=879)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 879) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=879,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 879, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=879)\n"现在我只取一条来分析
4.6345463 = sum of:
2.4654682 = weight(pro_name:evod in 6) [ClassicSimilarity], result of:
2.4654682 = score(doc=6,freq=1.0), product of:
0.80325437 = queryWeight, product of:
4.910959 = idf(docFreq=187, maxDocs=9390)
0.16356365 = queryNorm
3.0693493 = fieldWeight in 6, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
4.910959 = idf(docFreq=187, maxDocs=9390)
0.625 = fieldNorm(doc=6)
2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 6) [ClassicSimilarity], result of:
2.1690784 = score(doc=6,freq=1.0), product of:
0.5956361 = queryWeight, product of:
3.6416166 = idf(docFreq=668, maxDocs=9390)
0.16356365 = queryNorm
3.6416166 = fieldWeight in 6, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
3.6416166 = idf(docFreq=668, maxDocs=9390)
1.0 = fieldNorm(doc=6)一看就明白这个默认使用的相似度类是ClassicSimilarity,不明白的可以去
org.apache.lucene.search.similarities.ClassicSimilarity看下源码
总打分计算
具体到上面的测试来讲,每个文档有两个域:pro_name和pro_brand,最终匹配分值=查询语句在两个域中的得分之和。即最终结果4.6345463= 2.4654682 + 2.1690784。
每个域的打分计算
先说pro_name=Evod的得分
field的score得分=域权重fieldWeight * 查询权重queryWeight
2.4654682 = 0.80325437 * 3.0693493;
queryWeight的计算
这个数值的计算在TFIDFSimilarity
public void normalize(float queryNorm, float boost) {
this.boost = boost;
this.queryNorm = queryNorm;
this.queryWeight = queryNorm * boost * this.idf.getValue();
this.value = this.queryWeight * this.idf.getValue();
}这样一看就明白
queryWeight = queryNorm * boost * idf ,而lucene中boost默认为1
0.80325437 = 4.910959 * 1 * 0.16356365
idf的计算
idf是项在倒排文档中出现的频率,是在ClassicSimilarity,计算方式为
public float idf(long docFreq, long numDocs) {
return (float)(Math.log((double)numDocs / (double)(docFreq + 1L)) + 1.0D);
}
Math.log(number)这个方法相当于数学的ln(number)
docFreq是根据指定关键字进行检索,检索到的Document的数量,我们测试的docFreq=187;numDocs是指索引文件中总共的Document的数量,我们测试的numDocs=9390。
queryNorm的计算
计算方法在ClassicSimilarity
public float queryNorm(float sumOfSquaredWeights) {
return (float)(1.0D / Math.sqrt((double)sumOfSquaredWeights));
}Math.sqrt(number)这个方法相当于数学的√number
这里,sumOfSquaredWeights的计算是在org.apache.lucene.search.TermQuery.TermWeight类中的sumOfSquaredWeights方法实现:
public float sumOfSquaredWeights() {
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}其实默认情况下,sumOfSquaredWeights = idf * idf,因为Lucune中默认的boost = 1.0。
sumOfSquaredWeights = 4.910959 * 4.910959 + 3.6416166 * 3.6416166 = 37.3789
然后计算queryNorm = 1.0D / Math.sqrt(37.3789) = 0.1635
fieldWeight的计算
在org/apache/lucene/search/similarities/TFIDFSimilarity.java的explainField方法中有
private Explanation explainField(int doc, Explanation freq, TFIDFSimilarity.IDFStats stats, NumericDocValues norms) {
Explanation tfExplanation = Explanation.match(this.tf(freq.getValue()), "tf(freq=" + freq.getValue() + "), with freq of:", new Explanation[]{freq});
Explanation fieldNormExpl = Explanation.match(norms != null?this.decodeNormValue(norms.get(doc)):1.0F, "fieldNorm(doc=" + doc + ")", new Explanation[0]);
return Explanation.match(tfExplanation.getValue() * stats.idf.getValue() * fieldNormExpl.getValue(), "fieldWeight in " + doc + ", product of:", new Explanation[]{tfExplanation, stats.idf, fieldNormExpl});
}fieldWeight = tf * idf * fieldNorm
tf和idf的计算参考前面的,fieldNorm的计算在索引的时候确定了,此时直接从索引文件中读取,这个方法并没有给出直接的计算。如果使用ClassicSimilarity的话,它实际上就是lengthNorm,域越长的话Norm越小,在org/apache/lucene/search/similarities/ClassicSimilarity.java里面有关于它的计算:
public float lengthNorm(FieldInvertState state) {
int numTerms;
if(this.discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return state.getBoost() * (float)(1.0D / Math.sqrt((double)numTerms));
}
我的条件
q:pro_name:Evod AND pro_brand:53
得到结果为
"7": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 6) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=6,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 6, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=6)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 6) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=6,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 6, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=6)\n",
"69": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 68) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=68,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 68, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=68)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 68) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=68,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 68, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=68)\n",
"873": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 872) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=872,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 872, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=872)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 872) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=872,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 872, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=872)\n",
"874": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 873) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=873,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 873, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=873)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 873) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=873,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 873, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=873)\n",
"875": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 874) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=874,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 874, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=874)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 874) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=874,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 874, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=874)\n",
"876": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 875) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=875,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 875, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=875)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 875) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=875,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 875, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=875)\n",
"877": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 876) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=876,freq=1.0), product of:\n 0.80325437 = queryWe
4000
ight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 876, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=876)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 876) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=876,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 876, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=876)\n",
"878": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 877) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=877,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 877, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=877)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 877) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=877,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 877, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=877)\n",
"879": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 878) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=878,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 878, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=878)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 878) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=878,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 878, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=878)\n",
"880": "\n4.6345463 = sum of:\n 2.4654682 = weight(pro_name:evod in 879) [ClassicSimilarity], result of:\n 2.4654682 = score(doc=879,freq=1.0), product of:\n 0.80325437 = queryWeight, product of:\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.0693493 = fieldWeight in 879, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 4.910959 = idf(docFreq=187, maxDocs=9390)\n 0.625 = fieldNorm(doc=879)\n 2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 879) [ClassicSimilarity], result of:\n 2.1690784 = score(doc=879,freq=1.0), product of:\n 0.5956361 = queryWeight, product of:\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 0.16356365 = queryNorm\n 3.6416166 = fieldWeight in 879, product of:\n 1.0 = tf(freq=1.0), with freq of:\n 1.0 = termFreq=1.0\n 3.6416166 = idf(docFreq=668, maxDocs=9390)\n 1.0 = fieldNorm(doc=879)\n"现在我只取一条来分析
4.6345463 = sum of:
2.4654682 = weight(pro_name:evod in 6) [ClassicSimilarity], result of:
2.4654682 = score(doc=6,freq=1.0), product of:
0.80325437 = queryWeight, product of:
4.910959 = idf(docFreq=187, maxDocs=9390)
0.16356365 = queryNorm
3.0693493 = fieldWeight in 6, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
4.910959 = idf(docFreq=187, maxDocs=9390)
0.625 = fieldNorm(doc=6)
2.1690784 = weight(pro_brand:`\b\u0000\u0000\u00005 in 6) [ClassicSimilarity], result of:
2.1690784 = score(doc=6,freq=1.0), product of:
0.5956361 = queryWeight, product of:
3.6416166 = idf(docFreq=668, maxDocs=9390)
0.16356365 = queryNorm
3.6416166 = fieldWeight in 6, product of:
1.0 = tf(freq=1.0), with freq of:
1.0 = termFreq=1.0
3.6416166 = idf(docFreq=668, maxDocs=9390)
1.0 = fieldNorm(doc=6)一看就明白这个默认使用的相似度类是ClassicSimilarity,不明白的可以去
org.apache.lucene.search.similarities.ClassicSimilarity看下源码
总打分计算
具体到上面的测试来讲,每个文档有两个域:pro_name和pro_brand,最终匹配分值=查询语句在两个域中的得分之和。即最终结果4.6345463= 2.4654682 + 2.1690784。
每个域的打分计算
先说pro_name=Evod的得分
field的score得分=域权重fieldWeight * 查询权重queryWeight
2.4654682 = 0.80325437 * 3.0693493;
queryWeight的计算
这个数值的计算在TFIDFSimilarity
public void normalize(float queryNorm, float boost) {
this.boost = boost;
this.queryNorm = queryNorm;
this.queryWeight = queryNorm * boost * this.idf.getValue();
this.value = this.queryWeight * this.idf.getValue();
}这样一看就明白
queryWeight = queryNorm * boost * idf ,而lucene中boost默认为1
0.80325437 = 4.910959 * 1 * 0.16356365
idf的计算
idf是项在倒排文档中出现的频率,是在ClassicSimilarity,计算方式为
public float idf(long docFreq, long numDocs) {
return (float)(Math.log((double)numDocs / (double)(docFreq + 1L)) + 1.0D);
}
Math.log(number)这个方法相当于数学的ln(number)
docFreq是根据指定关键字进行检索,检索到的Document的数量,我们测试的docFreq=187;numDocs是指索引文件中总共的Document的数量,我们测试的numDocs=9390。
queryNorm的计算
计算方法在ClassicSimilarity
public float queryNorm(float sumOfSquaredWeights) {
return (float)(1.0D / Math.sqrt((double)sumOfSquaredWeights));
}Math.sqrt(number)这个方法相当于数学的√number
这里,sumOfSquaredWeights的计算是在org.apache.lucene.search.TermQuery.TermWeight类中的sumOfSquaredWeights方法实现:
public float sumOfSquaredWeights() {
queryWeight = idf * getBoost(); // compute query weight
return queryWeight * queryWeight; // square it
}其实默认情况下,sumOfSquaredWeights = idf * idf,因为Lucune中默认的boost = 1.0。
sumOfSquaredWeights = 4.910959 * 4.910959 + 3.6416166 * 3.6416166 = 37.3789
然后计算queryNorm = 1.0D / Math.sqrt(37.3789) = 0.1635
fieldWeight的计算
在org/apache/lucene/search/similarities/TFIDFSimilarity.java的explainField方法中有
private Explanation explainField(int doc, Explanation freq, TFIDFSimilarity.IDFStats stats, NumericDocValues norms) {
Explanation tfExplanation = Explanation.match(this.tf(freq.getValue()), "tf(freq=" + freq.getValue() + "), with freq of:", new Explanation[]{freq});
Explanation fieldNormExpl = Explanation.match(norms != null?this.decodeNormValue(norms.get(doc)):1.0F, "fieldNorm(doc=" + doc + ")", new Explanation[0]);
return Explanation.match(tfExplanation.getValue() * stats.idf.getValue() * fieldNormExpl.getValue(), "fieldWeight in " + doc + ", product of:", new Explanation[]{tfExplanation, stats.idf, fieldNormExpl});
}fieldWeight = tf * idf * fieldNorm
tf和idf的计算参考前面的,fieldNorm的计算在索引的时候确定了,此时直接从索引文件中读取,这个方法并没有给出直接的计算。如果使用ClassicSimilarity的话,它实际上就是lengthNorm,域越长的话Norm越小,在org/apache/lucene/search/similarities/ClassicSimilarity.java里面有关于它的计算:
public float lengthNorm(FieldInvertState state) {
int numTerms;
if(this.discountOverlaps) {
numTerms = state.getLength() - state.getNumOverlap();
} else {
numTerms = state.getLength();
}
return state.getBoost() * (float)(1.0D / Math.sqrt((double)numTerms));
}
相关文章推荐
- Lucene/Solr打分机制的分析
- 结合源码分析Solr&Lucene查询打分的工作流程
- Lucene/Solr打分机制的分析(二)
- cloudsolrclient的add方法分析
- solr dataimport 数据导入源码分析(五)
- Solr4.8.0源码分析(18)之缓存机制(一)
- Solr1.3的启动过程分析一
- solr dataimport 数据导入源码分析(十三)
- solr dataimport 数据导入源码分析(十四)
- Solr笔记(2)_Schema.xml和solrconfig.xml分析
- SolrLucene优劣势分析
- mariadb 5.5 threadpool 源码分析
- mysql5.5、5.6统计信息采样配置及分析表
- solr的高亮空指针异常分析
- Solr4.8.0源码分析(20)之SolrCloud的Recovery策略(一)
- solr学习之六--文本分析
- Solr4.8.0源码分析(23)之SolrCloud的Recovery策略(四)
- Lucene/Solr/ElasticSearch搜索问题案例分析
- solr multivalue的实现分析 推荐
- solr5.5教程-Analyzer、Tokenizer、Filter