Skip to content

Commit 8ded013

Browse files
committed
Set the sentenceIdx *before* building the SemanticGraph, so that the hashCodes don't get messed up later when setting the sentenceIdx
1 parent 9914d88 commit 8ded013

File tree

1 file changed

+9
-15
lines changed

1 file changed

+9
-15
lines changed

src/edu/stanford/nlp/pipeline/CoNLLUReader.java

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,14 @@ public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument doc) {
326326
// build sentences
327327
List<CoreMap> sentences = new ArrayList<>();
328328
for (CoNLLUSentence sent : doc.sentences) {
329-
sentences.add(convertCoNLLUSentenceToCoreMap(doc, sent));
329+
// pass in the sentences.size() so we can build the CoreLabels with the correct sentIndex()
330+
// this way, we don't mess up the hashCodes later
331+
sentences.add(convertCoNLLUSentenceToCoreMap(doc, sent, sentences.size()));
330332
}
331333
// set sentences
332334
finalAnnotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
333335
// build document wide CoreLabels list
334-
// TODO: do we need to put new SentenceIndexAnnotations on each of the IndexedWords?
335-
// TODO: what about document annotation?
336-
// We should confirm that setting the SentenceIndexAnnotation like this isn't
337-
// distorting any of the SemanticGraphs
336+
// TODO: should we set document annotation?
338337
List<CoreLabel> tokens = new ArrayList<>();
339338
finalAnnotation.set(CoreAnnotations.TokensAnnotation.class, tokens);
340339
int documentIdx = 0;
@@ -351,15 +350,9 @@ public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument doc) {
351350
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
352351
token.set(CoreAnnotations.TokenBeginAnnotation.class, documentIdx);
353352
token.set(CoreAnnotations.TokenEndAnnotation.class, documentIdx + 1);
354-
token.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIdx);
355353
tokens.add(token);
356354
documentIdx++;
357355
}
358-
if (sentence.containsKey(CoreAnnotations.EmptyTokensAnnotation.class)) {
359-
for (CoreLabel token : sentence.get(CoreAnnotations.EmptyTokensAnnotation.class)) {
360-
token.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIdx);
361-
}
362-
}
363356
sentenceIdx++;
364357
}
365358
// make sure to set docText AFTER all the above processing
@@ -389,9 +382,10 @@ public static final String rebuildMisc(Map<String, String> miscKeyValues) {
389382
/**
390383
* Convert a single ten column CoNLLU line into a CoreLabel
391384
*/
392-
public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
385+
public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line, int sentenceIdx) {
393386
List<String> fields = Arrays.asList(line.split("\t"));
394387
CoreLabel cl = new CoreLabel();
388+
cl.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIdx);
395389

396390
String indexField = fields.get(CoNLLU_IndexField);
397391
int sentenceTokenIndex;
@@ -522,12 +516,12 @@ public CoreLabel convertLineToCoreLabel(CoNLLUSentence sentence, String line) {
522516
/**
523517
* Convert a list of CoNLL-U token lines into a sentence CoreMap
524518
**/
525-
public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence sentence) {
519+
public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence sentence, int sentenceIdx) {
526520
List<String> lines = sentence.tokenLines;
527521
// create CoreLabels
528522
List<CoreLabel> coreLabels = new ArrayList<CoreLabel>();
529523
for (String line : lines) {
530-
CoreLabel cl = convertLineToCoreLabel(sentence, line);
524+
CoreLabel cl = convertLineToCoreLabel(sentence, line, sentenceIdx);
531525
coreLabels.add(cl);
532526
}
533527
for (int i = 1 ; i < coreLabels.size() ; i++) {
@@ -570,7 +564,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
570564

571565
List<CoreLabel> emptyLabels = new ArrayList<CoreLabel>();
572566
for (String line : sentence.emptyLines) {
573-
CoreLabel cl = convertLineToCoreLabel(sentence, line);
567+
CoreLabel cl = convertLineToCoreLabel(sentence, line, sentenceIdx);
574568
emptyLabels.add(cl);
575569
}
576570

0 commit comments

Comments
 (0)