您的位置:首页 > 其它

stanford coreNLP CRFClassifier 模型加载和序列化

2017-07-05 09:52 567 查看
源代码位置:ie.crf.CRFClassifier

模型加载

loadClassifier(String loadPath, Properties props)


/**
* Loads a classifier from the file, classpath resource, or URL specified by loadPath. If loadPath ends in
* .gz, uses a GZIPInputStream.
*/
//seg here ,ner here
public void loadClassifier(String loadPath, Properties props) throws ClassCastException, IOException, ClassNotFoundException {
InputStream is = IOUtils.getInputStreamFromURLOrClasspathOrFileSystem(loadPath);
Timing t = new Timing();
loadClassifier(is, props);
is.close();
t.done(log, "Loading classifier from " + loadPath);
}


loadClassifier(ObjectInputStream ois, Properties props)

/**
* Loads a classifier from the specified InputStream. This version works
* quietly (unless VERBOSE is true). If props is non-null then any properties
* it specifies override those in the serialized file. However, only some
* properties are sensible to change (you shouldn't change how features are
* defined).
* <p>
* <i>Note:</i> This method does not close the ObjectInputStream. (But earlier
* versions of the code used to, so beware....)
*/
@Override
@SuppressWarnings( { "unchecked" })
// can't have right types in deserialization
//seg here,ner here
public void loadClassifier(ObjectInputStream ois, Properties props) throws ClassCastException, IOException,
ClassNotFoundException {
Object o = ois.readObject();
// TODO: when we next break serialization, get rid of this fork and only read the List<Index> (i.e., keep first case)
if (o instanceof List) {
labelIndices = (List<Index<CRFLabel>>) o;
} else {
Index<CRFLabel>[] indexArray = (Index<CRFLabel>[]) o;
labelIndices = new ArrayList<>(indexArray.length);
Collections.addAll(labelIndices, indexArray);
}
classIndex = (Index<String>) ois.readObject();
featureIndex = (Index<String>) ois.readObject();
flags = (SeqClassifierFlags) ois.readObject();
if (flags.useEmbedding) {
embeddings = (Map<String, double[]>) ois.readObject();
}
Object featureFactory = ois.readObject();
if (featureFactory instanceof List) {
featureFactories = ErasureUtils.uncheckedCast(featureFactories);
// int i = 0;
// for (FeatureFactory ff : featureFactories) { // XXXX
// System.err.println("List FF #" + i + ": " + ((NERFeatureFactory) ff).describeDistsimLexicon()); // XXXX
// i++;
// }
} else if (featureFactory instanceof FeatureFactory) {
featureFactories = Generics.newArrayList();
featureFactories.add((FeatureFactory) featureFactory);
// System.err.println(((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
} else if (featureFactory instanceof Integer) {
// this is the current format (2014) since writing list didn't work (see note in serializeClassifier).
int size = (Integer) featureFactory;
featureFactories = Generics.newArrayList(size);
for (int i = 0; i < size; ++i) {
featureFactory = ois.readObject();
if (!(featureFactory instanceof FeatureFactory)) {
throw new RuntimeIOException("Should have FeatureFactory but got " + featureFactory.getClass());
}
// System.err.println("FF #" + i + ": " + ((NERFeatureFactory) featureFactory).describeDistsimLexicon()); // XXXX
featureFactories.add((FeatureFactory) featureFactory);
}
}

// log.info("properties passed into CRF's loadClassifier are:" + props);
if (props != null) {
flags.setProperties(props, false);
}

windowSize = ois.readInt();
weights = (double[][]) ois.readObject();

// WordShapeClassifier.setKnownLowerCaseWords((Set) ois.readObject());
Set<String> lcWords = (Set<String>) ois.readObject();
if (lcWords instanceof MaxSizeConcurrentHashSet) {
knownLCWords = (MaxSizeConcurrentHashSet<String>) lcWords;
} else {
knownLCWords = new MaxSizeConcurrentHashSet<>(lcWords);
}

reinit();

if (flags.labelDictionaryCutoff > 0) {
labelDictionary = (LabelDictionary) ois.readObject();
}

if (VERBOSE) {
log.info("windowSize=" + windowSize);
log.info("flags=\n" + flags);
}
}

模型序列化

/**
* Serialize the classifier to the given ObjectOutputStream.
* <br>
* (Since the classifier is a processor, we don't want to serialize the
* whole classifier but just the data that represents a classifier model.)
*/
@Override
public void serializeClassifier(ObjectOutputStream oos) {
try {
oos.writeObject(labelIndices);
oos.writeObject(classIndex);
oos.writeObject(featureIndex);
oos.writeObject(flags);
if (flags.useEmbedding) {
oos.writeObject(embeddings);
}
// For some reason, writing out the array of FeatureFactory
// objects doesn't seem to work.  The resulting classifier
// doesn't have the lexicon (distsim object) correctly saved.  So now custom write the list
oos.writeObject(featureFactories.size());
for (FeatureFactory ff : featureFactories) {
oos.writeObject(ff);
}
oos.writeInt(windowSize);
oos.writeObject(weights);
// oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords());

oos.writeObject(knownLCWords);
if (labelDictionary != null) {
oos.writeObject(labelDictionary);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息