您的位置：首页 > 编程语言 > Java开发

Stanford NLP自然语序处理demo，附maven dependency

2015-10-16 10:15 429 查看

################################################ Demo ######################################

/*

* To change this license header, choose License Headers in Project Properties.

* To change this template file, choose Tools | Templates

* and open the template in the editor.

*/

package framework.webapp.commons.utils;

import edu.stanford.nlp.ling.CoreAnnotations;

import edu.stanford.nlp.ling.CoreLabel;

import edu.stanford.nlp.pipeline.Annotation;

import edu.stanford.nlp.pipeline.StanfordCoreNLP;

import edu.stanford.nlp.util.CoreMap;

import java.util.ArrayList;

import java.util.List;

import java.util.Properties;

import org.apache.commons.logging.Log;

import org.apache.commons.logging.LogFactory;

/**

* 将句子中的名词取出，复数形式自动转为单数

* @author mly

*/

public class NLPUtil {

private static final Log log = LogFactory.getLog(NLPUtil.class);

public static StanfordCoreNLP pipeline;

static {

// creates a StanfordCoreNLP object, with POS tagging, lemmatization, NER, parsing, and coreference resolution

Properties props = new Properties();

props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref");

pipeline = new StanfordCoreNLP(props);

}

public static StanfordCoreNLP getStanfordCoreNLP() {

if (pipeline == null) {

pipeline = new StanfordCoreNLP();

}

return pipeline;

}

public static List<String> getTagsForSentence(String text) {

Annotation document = new Annotation(text);

// run all Annotators on this text

getStanfordCoreNLP().annotate(document);

// these are all the sentences in this document

// a CoreMap is essentially a Map that uses class objects as keys and has values with custom types

List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

StringBuilder sb = new StringBuilder();

List<String> tags = new ArrayList<String>();

for (CoreMap sentence : sentences) {

// traversing the words in the current sentence

// a CoreLabel is a CoreMap with additional token-specific methods

4000

String prevNeToken = "O";

String currNeToken = "O";

boolean newToken = true;

for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {

currNeToken = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

String word = token.get(CoreAnnotations.TextAnnotation.class);

String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

System.out.println("word:" + word + " currNeToken:" + currNeToken+ " pos:" +pos);

if (currNeToken.equals("NUMBER")) {

continue;

}

// Strip out "O"s completely, makes code below easier to understand

if (currNeToken.equals("O")) {

if (pos.startsWith("NN")) {

if(pos.equals("NNS")){

tags.add(InflectorUtil.getInstance().singularize(word));

}else{

tags.add(word);

}

}

if (!prevNeToken.equals("O") && (sb.length() > 0)) {

log.info("'"+sb.toString()+"' is a "+prevNeToken);

tags.add(sb.toString());

sb.setLength(0);

newToken = true;

}

continue;

}

if (newToken) {

prevNeToken = currNeToken;

newToken = false;

sb.append(word);

continue;

}

if (currNeToken.equals(prevNeToken)) {

sb.append(" " + word);

} else {

log.info("'"+sb.toString()+"' is a "+prevNeToken);

tags.add(sb.toString());

sb.setLength(0);

newToken = true;

}

prevNeToken = currNeToken;

}

if (!prevNeToken.equals("O") && (sb.length() > 0)) {

//handleEntity(prevNeToken, sb, tokens);

log.info("'" + sb.toString() + "' is a " + prevNeToken);

tags.add(sb.toString());

sb.setLength(0);

newToken = true;

}

}

log.info(tags.toString());

return tags;

}
}

################################################ Demo end ######################################

################################################ maven dependency ######################################

<dependency>

<groupId>edu.stanford.nlp</groupId>

<artifactId>stanford-corenlp</artifactId>

<version>3.5.2</version>

<classifier>models</classifier>

</dependency>

################################################ maven dependency end ######################################

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签： java nlp

相关文章推荐

新的分享

章节导航