solr中如何定义自己的解析器插件(QParserPlugin)
2012-04-11 12:28
232 查看
/*****************************************************/ >solr中如何定义自己的解析器插件 /*****************************************************/ 0.为什么要自定义自己的解析器插件 /*****************************************************/ // 因为solr默认的LuceneQParserPlugin解析器插件是不支持很多高级查询的如 // SpanQuery,MoreLikeThis等,如果要使用这些特性就必须自定的解析器插件。 /*****************************************************/ 1.显示自定义解析器插件的方法 /*****************************************************/ //定义的插件必须继承自org.apache.solr.search.QParserPlugin; public class MyParserPlugin extends QParserPlugin { @Override public void init(NamedList args) { } @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { //创建出内部类就可以了 //return new MyParserPlugin.MyParser(qstr, localParams, params, req); } //继承自org.apache.solr.search.Qparser; private class MyParser extends QParser{ public MyParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } @Override public Query parse() throws ParseException { //返回org.apache.lucene.search.Query;就可以了其他的不用管 return null; } } } //如何配置自定义的解析器插件 // <queryParser name="simple" class="com.zyb.javacc.QParserPlugin" /> \ // 然后在查询的时候需要指定defType这个属性进行指定是哪个解析器来解析你的检索式 // 如:http://localhost:8080/testSolr/select/q=*:*&defType=simple; // 或者也可以在指定的查询处理器中默认的指定也是可以的 //<requestHandler name="search" class="solr.SearchHandler" default="true"> // <lst name="defaults"> // <str name="defType">simple</str> //通过在这里指定后就不用在Url后面显示的指定 // <str name="echoParams">explicit</str> // <int name="rows">10</int> // </lst> //</requestHandler> /*****************************************************/ 2.原理说明 /*****************************************************/ public abstract class QParserPlugin implements NamedListInitializedPlugin { /** internal use - name of the default parser */ public static String DEFAULT_QTYPE = LuceneQParserPlugin.NAME;//solr默认的解析器插件 /** internal use - name to class mappings of builtin parsers */ //solr中存在的查询解析器插件 public static final Object[] standardPlugins = { // LuceneQParserPlugin.NAME, LuceneQParserPlugin.class, OldLuceneQParserPlugin.NAME, OldLuceneQParserPlugin.class, FunctionQParserPlugin.NAME, FunctionQParserPlugin.class, PrefixQParserPlugin.NAME, PrefixQParserPlugin.class, BoostQParserPlugin.NAME, BoostQParserPlugin.class, DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class, ExtendedDismaxQParserPlugin.NAME, ExtendedDismaxQParserPlugin.class, FieldQParserPlugin.NAME, FieldQParserPlugin.class, RawQParserPlugin.NAME, RawQParserPlugin.class, TermQParserPlugin.NAME, TermQParserPlugin.class, NestedQParserPlugin.NAME, NestedQParserPlugin.class, FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class, SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class, SpatialBoxQParserPlugin.NAME, SpatialBoxQParserPlugin.class, }; /** return a {@link QParser} */ public abstract QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req); //采用lucene解析器的实现 public class LuceneQParserPlugin extends QParserPlugin { public static String NAME = "lucene"; public void init(NamedList args) { } @Override public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { // return new LuceneQParser(qstr, localParams, params, req); } } //继承自QParser会带来很多好处因为他已经帮我们实现了部分我们不需要实现的东西我们只需要关心怎么去生成 //org.apache.lucene.search.Query这个接口的实现类就可以了 //QParser是一个抽象类他可以通过this.qstr得到检索式如*:*等 class LuceneQParser extends QParser { String sortStr; // SolrQueryParser lparser;//SolrQueryParser还是继承自org.apache.lucene.queryParser;所以还是采用的lucene //的原有解析器他是不能够满足做spanQuery,moreLikeThis这样的查询的 //除非自己写扩展的查询解析器插件才可以实现 public LuceneQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } //查询解析器的目的就是为了返回一个org.apache.lucene.search.Query的实现类就可以了 //那么这就是和solr无关的东西了只要构造了Query的实现类的对象就可以顺利的做任何类型的查询 @Override public Query parse() throws ParseException { String qstr = getString(); String defaultField = getParam(CommonParams.DF); if (defaultField==null) { defaultField = getReq().getSchema().getDefaultSearchFieldName(); } lparser = new SolrQueryParser(this, defaultField); // these could either be checked & set here, or in the SolrQueryParser constructor String opParam = getParam(QueryParsing.OP); if (opParam != null) { lparser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR); } else { // try to get default operator from schema QueryParser.Operator operator = getReq().getSchema().getSolrQueryParser(null).getDefaultOperator(); lparser.setDefaultOperator(null == operator ? QueryParser.Operator.OR : operator); } return lparser.parse(qstr); } @Override public String[] getDefaultHighlightFields() { return new String[]{lparser.getField()}; } } //这里继承好像没什么用处 class OldLuceneQParser extends LuceneQParser { String sortStr; public OldLuceneQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { super(qstr, localParams, params, req); } @Override public Query parse() throws ParseException { // handle legacy "query;sort" syntax if (getLocalParams() == null) { String qstr = getString(); sortStr = getParams().get(CommonParams.SORT); if (sortStr == null) { // sort may be legacy form, included in the query string List<String> commands = StrUtils.splitSmart(qstr,';'); if (commands.size() == 2) { qstr = commands.get(0); sortStr = commands.get(1); } else if (commands.size() == 1) { // This is need to support the case where someone sends: "q=query;" qstr = commands.get(0); } else if (commands.size() > 2) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "If you want to use multiple ';' in the query, use the 'sort' param."); } } setString(qstr); } return super.parse(); } @Override public SortSpec getSort(boolean useGlobal) throws ParseException { SortSpec sort = super.getSort(useGlobal); if (sortStr != null && sortStr.length()>0 && sort.getSort()==null) { Sort oldSort = QueryParsing.parseSort(sortStr, getReq()); if( oldSort != null ) { sort.sort = oldSort; } } return sort; } } } /*****************************************************/ 3.solr查询内部实现 /*****************************************************/ org.apache.solr.handler.component.searchHandler.@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception, ParseException, InstantiationException, IllegalAccessException { // int sleep = req.getParams().getInt("sleep",0); // if (sleep > 0) {log.error("SLEEPING for " + sleep); Thread.sleep(sleep);} //ResponseBuilder rb = new ResponseBuilder();//在查询的时候最重要的对象 rb.req = req; rb.rsp = rsp; rb.components = components; rb.setDebug(req.getParams().getBool(CommonParams.DEBUG_QUERY, false)); final RTimer timer = rb.isDebug() ? new RTimer() : null; if (timer == null) { // non-debugging prepare phase //for( SearchComponent c : components ) { // c.prepare(rb);//准备查询组件 //} } else { // debugging prepare phase RTimer subt = timer.sub( "prepare" ); // for( SearchComponent c : components ) { // rb.setTimer( subt.sub( c.getName() ) ); //c.prepare(rb); // rb.getTimer().stop(); // } subt.stop(); } if (rb.shards == null) { // a normal non-distributed request // The semantics of debugging vs not debugging are different enough that // it makes sense to have two control loops if(!rb.isDebug()) { // Process 根据组件处理查询请求 //for( SearchComponent c : components ) { // c.process(rb); // } } else { // Process RTimer subt = timer.sub( "process" ); //for( SearchComponent c : components ) { // rb.setTimer( subt.sub( c.getName() ) ); // c.process(rb); // rb.getTimer().stop(); //} subt.stop(); timer.stop(); // add the timing info if( rb.getDebugInfo() == null ) { rb.setDebugInfo( new SimpleOrderedMap<Object>() ); } rb.getDebugInfo().add( "timing", timer.asNamedList() ); } } else { // a distributed request HttpCommComponent comm = new HttpCommComponent(); if (rb.outgoing == null) { rb.outgoing = new LinkedList<ShardRequest>(); } rb.finished = new ArrayList<ShardRequest>(); int nextStage = 0; do { rb.stage = nextStage; nextStage = ResponseBuilder.STAGE_DONE; // call all components for( SearchComponent c : components ) { // the next stage is the minimum of what all components report nextStage = Math.min(nextStage, c.distributedProcess(rb)); } // check the outgoing queue and send requests while (rb.outgoing.size() > 0) { // submit all current request tasks at once while (rb.outgoing.size() > 0) { ShardRequest sreq = rb.outgoing.remove(0); sreq.actualShards = sreq.shards; if (sreq.actualShards==ShardRequest.ALL_SHARDS) { sreq.actualShards = rb.shards; } sreq.responses = new ArrayList<ShardResponse>(); // TODO: map from shard to address[] for (String shard : sreq.actualShards) { ModifiableSolrParams params = new ModifiableSolrParams(sreq.params); params.remove(ShardParams.SHARDS); // not a top-level request params.remove("indent"); params.remove(CommonParams.HEADER_ECHO_PARAMS); params.set(ShardParams.IS_SHARD, true); // a sub (shard) request String shardHandler = req.getParams().get(ShardParams.SHARDS_QT); if (shardHandler == null) { params.remove(CommonParams.QT); } else { params.set(CommonParams.QT, shardHandler); } comm.submit(sreq, shard, params); } } // now wait for replies, but if anyone puts more requests on // the outgoing queue, send them out immediately (by exiting // this loop) while (rb.outgoing.size() == 0) { ShardResponse srsp = comm.takeCompletedOrError(); if (srsp == null) break; // no more requests to wait for // Was there an exception? If so, abort everything and // rethrow if (srsp.getException() != null) { comm.cancelAll(); if (srsp.getException() instanceof SolrException) { throw (SolrException)srsp.getException(); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, srsp.getException()); } } rb.finished.add(srsp.getShardRequest()); // let the components see the responses to the request for(SearchComponent c : components) { c.handleResponses(rb, srsp.getShardRequest()); } } } for(SearchComponent c : components) { c.finishStage(rb); } // we are done when the next stage is MAX_VALUE } while (nextStage != Integer.MAX_VALUE); } } org.apache.solr.handler.component.QueryComponent.public void prepare(ResponseBuilder rb) throws IOException { //ResponseBuilder在查询的时候是在关键的对象 SolrQueryRequest req = rb.req; //现在的参数样子 //{params(indent=on&start=0&q=*:*&version=2.2&rows=10), // defaults(echoParams=explicit&rows=10&defType=simple)} // defaults表示在requestHandler节点中定义的默认参数 SolrParams params = req.getParams(); if (!params.getBool(COMPONENT_NAME, true)) { return; } SolrQueryResponse rsp = rb.rsp; // Set field flags String fl = params.get(CommonParams.FL); int fieldFlags = 0; if (fl != null) { fieldFlags |= SolrPluginUtils.setReturnFields(fl, rsp); } rb.setFieldFlags( fieldFlags ); //String defType = params.get(QueryParsing.DEFTYPE,QParserPlugin.DEFAULT_QTYPE); if (rb.getQueryString() == null) { rb.setQueryString( params.get( CommonParams.Q ) ); } try { //根据defType得到自定的解析插件对象 //QParser parser = QParser.getParser(rb.getQueryString(), defType, req); //rb.setQuery( parser.getQuery() );//通过getQuery()然后调用parse(str) //返回生成查询对象,关键在于怎么写 //rb.setSortSpec( parser.getSort(true) ); //rb.setQparser(parser); String[] fqs = req.getParams().getParams(CommonParams.FQ); if (fqs!=null && fqs.length!=0) { List<Query> filters = rb.getFilters(); if (filters==null) { filters = new ArrayList<Query>(fqs.length); } for (String fq : fqs) { if (fq != null && fq.trim().length()!=0) { QParser fqp = QParser.getParser(fq, null, req); filters.add(fqp.getQuery()); } } // only set the filters if they are not empty otherwise // fq=&someotherParam= will trigger all docs filter for every request // if filter cache is disabled if (!filters.isEmpty()) { rb.setFilters( filters ); } } } catch (ParseException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } // TODO: temporary... this should go in a different component. //如果有分布式查询就得到shards参数通过,进行切割 //String shards = params.get(ShardParams.SHARDS); //if (shards != null) { // List<String> lst = StrUtils.splitSmart(shards, ",", true); // rb.shards = lst.toArray(new String[lst.size()]); //} //String shards_rows = params.get(ShardParams.SHARDS_ROWS); //if(shards_rows != null) { //rb.shards_rows = Integer.parseInt(shards_rows); //} //String shards_start = params.get(ShardParams.SHARDS_START); //if(shards_start != null) { //rb.shards_start = Integer.parseInt(shards_start); //} boolean grouping = params.getBool(GroupParams.GROUP, false); if (!grouping) { return; } SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand(); SolrIndexSearcher searcher = rb.req.getSearcher(); GroupingSpecification groupingSpec = new GroupingSpecification(); rb.setGroupingSpec(groupingSpec); //TODO: move weighting of sort Sort groupSort = searcher.weightSort(cmd.getSort()); // groupSort defaults to sort String groupSortStr = params.get(GroupParams.GROUP_SORT); if (groupSort == null) { groupSort = new Sort(); } //TODO: move weighting of sort Sort sortWithinGroup = groupSortStr == null ? groupSort : searcher.weightSort(QueryParsing.parseSort(groupSortStr, req)); groupingSpec.setSortWithinGroup(sortWithinGroup); groupingSpec.setGroupSort(groupSort); String formatStr = params.get(GroupParams.GROUP_FORMAT, Grouping.Format.grouped.name()); Grouping.Format responseFormat; try { responseFormat = Grouping.Format.valueOf(formatStr); } catch (IllegalArgumentException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Illegal %s parameter", GroupParams.GROUP_FORMAT)); } groupingSpec.setResponseFormat(responseFormat); groupingSpec.setFields(params.getParams(GroupParams.GROUP_FIELD)); groupingSpec.setQueries(params.getParams(GroupParams.GROUP_QUERY)); groupingSpec.setGroupOffset(params.getInt(GroupParams.GROUP_OFFSET, 0)); groupingSpec.setGroupLimit(params.getInt(GroupParams.GROUP_LIMIT, 1)); groupingSpec.setOffset(rb.getSortSpec().getOffset()); groupingSpec.setLimit(rb.getSortSpec().getCount()); groupingSpec.setIncludeGroupCount(params.getBool(GroupParams.GROUP_TOTAL_COUNT, false)); groupingSpec.setMain(params.getBool(GroupParams.GROUP_MAIN, false)); groupingSpec.setNeedScore((cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0); groupingSpec.setTruncateGroups(params.getBool(GroupParams.GROUP_TRUNCATE, false)); } /*****************************************************/
相关文章推荐
- rcp(插件开发) 如何查找自己定义的扩展点
- rcp(插件开发) 如何查找自己定义的扩展点
- 如何定义自己的异常类
- 如何使用MITK定义的插件
- 关于ext用到了自己定义的插件时间(精确到秒)一些注意的地方
- cordova开发自己定义插件
- Jquery--如何创建自己的插件,详细注释版本
- 如何给自己的eclipse插件增加trace功能
- 如何使用Arrays.sort()对自己定义的类进行排序
- 如何自己编译生成hadoop的eclipse插件,如hadoop-eclipse-plugin-2.6.0.jar
- solr添加IK分词和自己定义词库
- 如何编写自己的插件?
- 如何在Visual Studio中开发自己的代码生成器插件
- 如何使用MITK定义的插件
- 如何定义一个自己的可复用的JS文件
- 编写自己的插件如何减少css文件的引入
- 如何去定义一个jquery插件
- 教你如何使用Struts2拦截器并且定义自己的拦截器
- CodeIgniter如何定义自己的Helper和Library
- Java如何定义自己的exception