您的位置:首页 > Web前端 > Node.js

Nodejs:mongo-connector同步mongo数据;使用elasticsearch搜索

2017-03-01 00:00 1171 查看
摘要: 使用mongo-connector同步数据到elasticsearch;使用ik分词,实现同义词,拼音搜索;用supervisor管理进程;

我使用的是elasticsearch5.2.1版本的,对应的ik和pinyin也是5.2.1;

1,下载并安装elasticsearch;

见:https://www.elastic.co/guide/en/elasticsearch/reference/current/_installation.html

2,下载并安装mongo-connector;

见:https://github.com/mongodb-labs/mongo-connector

pip install mongo-connector

Target System
Install Command
MongoDB
pip install mongo-connector

Elasticsearch 1.x
pip install 'mongo-connector[elastic]'

Amazon Elasticsearch 1.x Service
pip install 'mongo-connector[elastic-aws]'

Elasticsearch 2.x
pip install 'mongo-connector[elastic2]'

Amazon Elasticsearch 2.x Service
pip install 'mongo-connector[elastic2-aws]'

Elasticsearch 5.x
pip install 'mongo-connector[elastic5]'

Solr
pip install 'mongo-connector[solr]'

3,安装对应版本的doc_manager

Elasticsearch 1.x: https://github.com/mongodb-labs/elastic-doc-manager

Elasticsearch 2.x and 5.x: https://github.com/mongodb-labs/elastic2-doc-manager

配置mongo-connector的文件启动;

{

"mainAddress": "localhost:27018",
"oplogFile": "/var/log/oplog.timestamp",
"noDump": false,
"batchSize": -1,
"verbosity": 0,
"continueOnError": false,

"logging": {
"type": "file",
"filename": "/var/log/mongo-connector.log",
"__format": "%(asctime)s [%(levelname)s] %(name)s:%(lineno)d - %(message)s",
"__rotationWhen": "D",
"__rotationInterval": 1,
"__rotationBackups": 10,

"__type": "syslog",
"__host": "localhost:514"
},

"namespaces": {
"database.doc1": true,
"database.doc2": true
},

"docManagers": [
{
"docManager": "elastic2_doc_manager",
"targetURL": "user_name:pass@127.0.0.1:9200",
"__bulkSize": 1000,
"__uniqueKey": "_id",
"__autoCommitInterval": null,
"args": {
"clientOptions": {"timeout": 200}
}
}
]
}

配置完成后,在supervisord中配置启动命令:

mongo-connector -c /etc/mongo-connector-conf.json

见:7,安装supervisord,配置启动项:

4,安装ik和pinyin插件并创建映射;

sudo wget https://github.com/medcl/elasticsearch-anaysis-ik/releases/download/v5.2.1/elasticsearch-analysis-ik-5.2.1.zip[/code] 
sudo wget https://github.com/medcl/elasticsearch-anaysis-pinyin/releases/download/v5.2.1/elasticsearch-analysis-pinyin-5.2.1.zip[/code] 具体安装见:https://github.com/medcl/elasticsearch-analysis-ik

没有安装mvn请安装mvn;这里不做详细描述;

创建映射:

需注意:synonyms_path的当前位置为elasticsearch/config/

//若安装了认证插件x-pack,操作时需带上用户名和密码
curl -XPUT 'username:pass@localhost:9200/my_index?pretty' -H 'Content-Type: application/json' -d'
{
"settings": {
"analysis": {
"analyzer": {
"ik_smart_analyzer": {
"type":      "custom",
"tokenizer": "ik_smart",
"filter" : ["lowercase","synonym"]
},
"ik_max_analyzer": {
"type":      "custom",
"tokenizer": "ik_max_word",
"filter" : ["lowercase","synonym"]
},
"pinyin_analyzer": {
"type":      "custom",
"tokenizer": "ik_smart",
"filter" : ["my_pinyin","word_delimiter"]
}
},
"filter" : {
"synonym" : {
"type" : "synonym",
"ignore_case":true,
"synonyms_path" : "analysis/synonym.txt"
},
"my_pinyin": {
"type": "pinyin",
"first_letter" : "none",
"padding_char": " "
}
}
}
}
}
'

analysis/synonym.txt文件:

js,javascript
番茄,西红柿

为字段应用分词,在fulltext中为字段设置的pinyin子字段,使用mongo-connector后,并未在其type中自动映射pinyin字段,需再为其type重新创建一次;

curl -XPOST http://username:pass@localhost:9200/my_index/fulltext/_mapping -d'
{
"fulltext": {
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}
}'

curl -XPOST http://username:pass@localhost:9200/my_index/my_type/_mapping -d'
{
"_all": {
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"term_vector": "no",
"store": "false"
},
"properties": {
"title": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
},
"tags": {
"type": "text",
"analyzer": "ik_smart_analyzer",
"search_analyzer": "ik_smart_analyzer",
"include_in_all": "true",
"boost": 10,
"fields": {
"pinyin": {
"type": "text",
"store": "no",
"term_vector": "with_offsets",
"analyzer": "pinyin_analyzer",
"boost": 10
}
}
}
}
}'


5,在浏览器中测试分词效果

测试ik和同义词

http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=今天天气真好
 http://localhost:9200/my_index/_analyze?analyzer=ik_smart_analyzer&text=js[/code] 测试拼音:

http://localhost:9200/my_index/_analyze?analyzer=pinyin_analyzer&text=今天天气真好


6,启用mongo副本集,此命令只为本地测试用:

//创建副本集
sudo mongod --replSet rs0 --port 27018 --dbpath /data/mongodb/data27018

//启动副本集,此为示例,后面使用supervisord统一管理
sudo mongod --port 27018 --dbpath  /data/mongodb/data27018 --replSet rs0

副本集启动成功后,进入mongo shell,初始化副本集

config = { _id:"rs0", members:[
{_id:0,host:"127.0.0.1:27018"}
]};


rs.initiate(config)

可使用rs.help()查看帮助。

7,安装supervisord,配置启动项:

详见:http://supervisord.org

安装好supervisord后,配置supervisord.conf:

;elasticsearch进程管理
[program:elasticsearch]
command = /usr/local/elasticsearch-5.2.1/bin/elasticsearch
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/es.err.log
stdout_logfile=/var/log/es.out.log

;mongo副本集进程管理
[program:mongo-replset]
command =/usr/bin/mongod --port 27018 --dbpath  /data/mongodb/data27018 --replSet rs0
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_replset.err.log
stdout_logfile=/var/log/mongo_replset.out.log

;mongo-connector进程管理
[program:mongo-connector]
command =mongo-connector -c /etc/mongo-connector-conf.json
user = user
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/mongo_connector.err.log
stdout_logfile=/var/log/mongo_connector.out.log

;主程序
[program:myapp]
command = node server.js
environment=NODE_ENV=development   ;环境变量:开发环境
user = lele
directory = /Users/wo/WebstormProjects/myapp
stopsignal = TERM
autostart = true
autorestart = true
stderr_logfile=/var/log/myapi.err.log
stdout_logfile=/var/log/myapi.out.log


注意:配置文件中的log文件需要手动创建;

启动supervisord:

//启动守护程序
sudo supervisord -c /etc/supervisord.conf
//启动所有服务
sudo supervisorctl restart all

//启动单个服务
sudo supervisorctl restart elasticsearch

若启动失败,可查看supervisor日志排错;

启动成功后,可根据elasticsearch的log文件查看运行状态以及是否同步;

8,测试查询

curl -XGET 'http://localhost:9200/database/col1/_search?q=tags:text'


9,node端的查询

安装elasticsearch-js:https://github.com/elastic/elasticsearch-js

elasticsearch-js API:

https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html

连接elasticsearch

var elasticsearch = require('elasticsearch');
var client = new elasticsearch.Client({
host: 'localhost:9200',
log: 'trace'
});

一个例子:

exports.search = function (word,size, callback) {

//查询 status==100 AND (visible==v1 OR visible==v2)
//^表示elasticsearch boost运算符
//~表示elasticsearch fuzzy运算符
var query_str = "status:100 AND (visible:v1 OR visible:v2) AND 西红柿^20好吃吗 ~1 NOT _id:00001"

var body = {
size:size,
query: {
query_string: {
//查询的字段
fields: ["title^20", "tags^15","title.pinyin", "tags.pinyin"],
query: query_str,
use_dis_max: true
}
},
//设置返回
_source: ["title", "tags", "digest", "top"],
//设置排序
sort:[
"_score",
{ "top" : "desc" }
],
//设置高亮
highlight: {
pre_tags: ['<em>'],
post_tags: ['</em>'],
fields: {
title: {},
condition: {},
digest: {}
},
require_field_match: false
}
};

client.search({
index: 'my_index',
type: 'my_type',
body: body
}).then(function (resp) {
callback(resp.hits);
}, function (err) {
console.trace(err.message);
callback(null);
});
};

写的比较简单,以代码为主,给大家一个参考;

参考资料:

elasticsearch query string:

https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html

elasticsearch-js API:

https://www.elastic.co/guide/en/elasticsearch/client/javascript-api/current/api-reference.html

supervisord:http://supervisord.org

mongodb 最新版本高可用解决方案-replica sets副本集部署详细过程

ElasticSearch5中文分词(IK)

使用 Elasticsearch 实现博客站内搜索:

https://imququ.com/post/elasticsearch.html
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息