stanford parser 中文句法分析
2014-01-05 22:26
330 查看
来自/article/10935474.html
为了进行中文句法分析,使用了stanford parser.开始时,一头茫然。搜索网上资源也有很多异常,勉强处理了下,现将可以运行的代码粘贴如下,希望对于用到的人有所帮助:
[java] view
plaincopy
import java.util.*;
import java.io.StringReader;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
class ParserDemo {
public static void main(String[] args) {
LexicalizedParser lp =
LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
if (args.length > 0) {
demoDP(lp, args[0]);
} else {
demoAPI(lp);
}
}
public static void demoDP(LexicalizedParser lp, String filename) {
// This option shows loading and sentence-segment and tokenizing
// a file using DocumentPreprocessor
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
// You could also create a tokenier here (as below) and pass it
// to DocumentPreprocessor
for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
Tree parse = lp.apply(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection tdl = gs.typedDependenciesCCprocessed(true);
System.out.println(tdl);
System.out.println();
}
}
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "我", "是", "一名", "好", "学生", "。" };
List<CoreLabel> rawWords = new ArrayList<CoreLabel>();
for (String word : sent) {
CoreLabel l = new CoreLabel();
l.setWord(word);
rawWords.add(l);
}
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
TreebankLanguagePack tlp = lp.getOp().langpack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed",tlp);
tp.printTree(parse);
}
private ParserDemo() {} // static methods only
}
运行结果:
[plain] view
plaincopy
Loading parser from serialized file edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz ... done [3.3 sec].
(ROOT
(IP
(NP (PN 我))
(VP (VC 是)
(NP
(QP (CD 一名))
(ADJP (JJ 好))
(NP (NN 学生))))
(PU 。)))
[top(是-2, 我-1), root(ROOT-0, 是-2), nummod(学生-5, 一名-3), amod(学生-5, 好-4), attr(是-2, 学生-5)]
(ROOT
(IP
(NP (PN 我))
(VP (VC 是)
(NP
(QP (CD 一名))
(ADJP (JJ 好))
(NP (NN 学生))))
(PU 。)))
top(是-2, 我-1)
root(ROOT-0, 是-2)
nummod(学生-5, 一名-3)
amod(学生-5, 好-4)
attr(是-2, 学生-5)
为了进行中文句法分析,使用了stanford parser.开始时,一头茫然。搜索网上资源也有很多异常,勉强处理了下,现将可以运行的代码粘贴如下,希望对于用到的人有所帮助:
[java] view
plaincopy
import java.util.*;
import java.io.StringReader;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
class ParserDemo {
public static void main(String[] args) {
LexicalizedParser lp =
LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz");
if (args.length > 0) {
demoDP(lp, args[0]);
} else {
demoAPI(lp);
}
}
public static void demoDP(LexicalizedParser lp, String filename) {
// This option shows loading and sentence-segment and tokenizing
// a file using DocumentPreprocessor
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
// You could also create a tokenier here (as below) and pass it
// to DocumentPreprocessor
for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
Tree parse = lp.apply(sentence);
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
Collection tdl = gs.typedDependenciesCCprocessed(true);
System.out.println(tdl);
System.out.println();
}
}
public static void demoAPI(LexicalizedParser lp) {
// This option shows parsing a list of correctly tokenized words
String[] sent = { "我", "是", "一名", "好", "学生", "。" };
List<CoreLabel> rawWords = new ArrayList<CoreLabel>();
for (String word : sent) {
CoreLabel l = new CoreLabel();
l.setWord(word);
rawWords.add(l);
}
Tree parse = lp.apply(rawWords);
parse.pennPrint();
System.out.println();
TreebankLanguagePack tlp = lp.getOp().langpack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
System.out.println(tdl);
System.out.println();
TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed",tlp);
tp.printTree(parse);
}
private ParserDemo() {} // static methods only
}
运行结果:
[plain] view
plaincopy
Loading parser from serialized file edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz ... done [3.3 sec].
(ROOT
(IP
(NP (PN 我))
(VP (VC 是)
(NP
(QP (CD 一名))
(ADJP (JJ 好))
(NP (NN 学生))))
(PU 。)))
[top(是-2, 我-1), root(ROOT-0, 是-2), nummod(学生-5, 一名-3), amod(学生-5, 好-4), attr(是-2, 学生-5)]
(ROOT
(IP
(NP (PN 我))
(VP (VC 是)
(NP
(QP (CD 一名))
(ADJP (JJ 好))
(NP (NN 学生))))
(PU 。)))
top(是-2, 我-1)
root(ROOT-0, 是-2)
nummod(学生-5, 一名-3)
amod(学生-5, 好-4)
attr(是-2, 学生-5)
相关文章推荐
- stanford parser中文句法分析时注意的问题
- stanford parser 中文句法分析
- Stanford parser入门1:单句中文句法分析
- 中文依存句法分析
- 使用Stanford CoreNLP的Python封装包处理中文(分词、词性标注、命名实体识别、句法树、依存句法分析)
- 【已解决】win10环境下基于nltk搭建stanford parser环境,进行中文依存句法分析
- 中文依存句法分析概述及应用
- 十二、教你如何利用强大的中文语言技术平台做依存句法和语义依存分析
- 我的中文句法分析器
- ZH奶酪:中文依存句法分析概述及应用
- 中文句法分析器 下载
- 中文依存句法分析概述及应用
- 【已解决】win10环境下基于nltk搭建stanford parser环境,进行中文依存句法分析
- 中文依存句法分析概述及应用
- fudannlp - 开源中文自然语言处理工具包|中文分词|词性标注|依存句法分析|指代消解 - Google Project Hosting
- Stanford parser入门2:中文句法路径提取
- 中文依存句法分析概述及应用
- 深入分析 Java 中的中文编码问题
- Java Web--深入分析中文编码问题
- 【Python】Python在文本分析中将中文和非中文进行分割