stanford coreNLP CRFClassifier 模型加载和序列化
2017-07-05 09:52
567 查看
源代码位置:ie.crf.CRFClassifier
模型加载
loadClassifier(String loadPath, Properties props)
/** * Loads a classifier from the file, classpath resource, or URL specified by loadPath. If loadPath ends in * .gz, uses a GZIPInputStream. */ //seg here ,ner here public void loadClassifier(String loadPath, Properties props) throws ClassCastException, IOException, ClassNotFoundException { InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(loadPath); Timing t = new Timing(); loadClassifier(is, props); is.close(); t.done(log, "Loading classifier from " + loadPath); }
loadClassifier(ObjectInputStream ois, Properties props)
/*** Loads a classifier from the specified InputStream. This version works
* quietly (unless VERBOSE is true). If props is non-null then any properties
* it specifies override those in the serialized file. However, only some
* properties are sensible to change (you shouldn't change how features are
* defined).
* <p>
* <i>Note:</i> This method does not close the ObjectInputStream. (But earlier
* versions of the code used to, so beware....)
*/
@Override
@SuppressWarnings( { "unchecked" })
// can't have right types in deserialization
//seg here,ner here
public void loadClassifier(ObjectInputStream ois, Properties props) throws ClassCastException, IOException,
ClassNotFoundException {
Object o = ois.readObject();
// TODO: when we next break serialization, get rid of this fork and only read the List<Index> (i.e., keep first case)
if (o instanceof List) {
labelIndices = (List<Index<CRFLabel>>) o;
} else {
Index<CRFLabel>[] indexArray = (Index<CRFLabel>[]) o;
labelIndices = new ArrayList<>(indexArray.length);
Collections.addAll(labelIndices, indexArray);
}
classIndex = (Index<String>) ois.readObject();
featureIndex = (Index<String>) ois.readObject();
flags = (SeqClassifierFlags) ois.readObject();
if (flags.useEmbedding) {
embeddings = (Map<String, double[]>) ois.readObject();
}
Object featureFactory = ois.readObject();
if (featureFactory instanceof List) {
featureFactories = ErasureUtils.uncheckedCast(featureFactories);
// int i = 0;
// for (FeatureFactory ff : featureFactories) { // XXXX
// System.err.println("List FF #" + i + ": " + ((NERFeatureFactory) ff).describeDistsimLexicon()); // XXXX
// i++;
// }
} else if (featureFactory instanceof FeatureFactory) {
featureFactories = Generics.newArrayList();
featureFactories.add((FeatureFactory) featureFactory);
// System.err.println(((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
} else if (featureFactory instanceof Integer) {
// this is the current format (2014) since writing list didn't work (see note in serializeClassifier).
int size = (Integer) featureFactory;
featureFactories = Generics.newArrayList(size);
for (int i = 0; i < size; ++i) {
featureFactory = ois.readObject();
if (!(featureFactory instanceof FeatureFactory)) {
throw new RuntimeIOException("Should have FeatureFactory but got " + featureFactory.getClass());
}
// System.err.println("FF #" + i + ": " + ((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
featureFactories.add((FeatureFactory) featureFactory);
}
}
// log.info("properties passed into CRF's loadClassifier are:" + props);
if (props != null) {
flags.setProperties(props, false);
}
windowSize = ois.readInt();
weights = (double[][]) ois.readObject();
// WordShapeClassifier.setKnownLowerCaseWords((Set) ois.readObject());
Set<String> lcWords = (Set<String>) ois.readObject();
if (lcWords instanceof MaxSizeConcurrentHashSet) {
knownLCWords = (MaxSizeConcurrentHashSet<String>) lcWords;
} else {
knownLCWords = new MaxSizeConcurrentHashSet<>(lcWords);
}
reinit();
if (flags.labelDictionaryCutoff > 0) {
labelDictionary = (LabelDictionary) ois.readObject();
}
if (VERBOSE) {
log.info("windowSize=" + windowSize);
log.info("flags=\n" + flags);
}
}
模型序列化
/** * Serialize the classifier to the given ObjectOutputStream. * <br> * (Since the classifier is a processor, we don't want to serialize the * whole classifier but just the data that represents a classifier model.) */ @Override public void serializeClassifier(ObjectOutputStream oos) { try { oos.writeObject(labelIndices); oos.writeObject(classIndex); oos.writeObject(featureIndex); oos.writeObject(flags); if (flags.useEmbedding) { oos.writeObject(embeddings); } // For some reason, writing out the array of FeatureFactory // objects doesn't seem to work. The resulting classifier // doesn't have the lexicon (distsim object) correctly saved. So now custom write the list oos.writeObject(featureFactories.size()); for (FeatureFactory ff : featureFactories) { oos.writeObject(ff); } oos.writeInt(windowSize); oos.writeObject(weights); // oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords()); oos.writeObject(knownLCWords); if (labelDictionary != null) { oos.writeObject(labelDictionary); } } catch (IOException e) { throw new RuntimeIOException(e); } }
相关文章推荐
- Stanford CoreNLP API
- [译] 第二十天:Stanford CoreNLP - 用Java对Twitter进行情感分析
- Eclipse下使用Stanford CoreNLP的方法
- 将Stanford CoreNLP的解析结果构造为json格式
- Stanford CoreNLP--Named Entities Recognizer(NER)
- [置顶] 使用Stanford CoreNLP工具包处理中文
- 用 Python 和 Stanford CoreNLP 进行中文自然语言处理
- Stanford CoreNLP 介绍
- 用Python+StanfordCoreNLP做中文命名实体分析
- Stanford CoreNLP--功能列表
- 使用Stanford CoreNLP工具包处理中文
- 斯坦福大学Stanford coreNLP 宾州树库依存句法标注体系
- 中文语料下Stanford CoreNLP开发环境配置和各组件使用例子
- 斯坦福 stanford coreNLP 中的PCFG parser-lexparser
- 命令行调用StanfordCoreNLP3.8.0中文+JDK1.9版本
- Stanford CoreNLP – a suite of core NLP tools
- Stanford CoreNLP学习日记1
- 如何使用Stanford CoreNlp做中文情感分析
- Eclipse下使用Stanford CoreNLP的方法
- stanford CoreNLP 命名实体识别NER学习笔记