@@ -25,6 +25,10 @@ public class CoNLLUReader {
25
25
**/
26
26
// TODO: we should handle field 8, DEPS, for an enhanced dependencies
27
27
// doing that requires processing the empty nodes somehow
28
+ // TODO: read sent_id?
29
+ // TODO: read comments in general
30
+ // TODO: MWT should have after/before set to ""
31
+ // TODO: reconsider the newline as the after on the last word
28
32
public static final int CoNLLU_IndexField = 0 ;
29
33
public static final int CoNLLU_WordField = 1 ;
30
34
public static final int CoNLLU_LemmaField = 2 ;
@@ -275,7 +279,7 @@ public Annotation convertCoNLLUDocumentToAnnotation(CoNLLUDocument doc) {
275
279
if (sentenceIdx > 0 ) {
276
280
// for now we're treating a CoNLL-U document as sentences separated by newline
277
281
// so every sentence after the first should have a newline as the previous character
278
- sentence .get (CoreAnnotations .TokensAnnotation .class ).get (0 ).setBefore (" \n " );
282
+ sentence .get (CoreAnnotations .TokensAnnotation .class ).get (0 ).setBefore (System . lineSeparator () );
279
283
}
280
284
for (CoreLabel token : sentence .get (CoreAnnotations .TokensAnnotation .class )) {
281
285
token .set (CoreAnnotations .TokenBeginAnnotation .class , documentIdx );
@@ -382,7 +386,7 @@ public CoreMap convertCoNLLUSentenceToCoreMap(CoNLLUDocument doc, CoNLLUSentence
382
386
coreLabels .add (cl );
383
387
}
384
388
// the last token should have a newline after
385
- coreLabels .get (coreLabels .size () - 1 ).setAfter (" \n " );
389
+ coreLabels .get (coreLabels .size () - 1 ).setAfter (System . lineSeparator () );
386
390
// set before
387
391
coreLabels .get (0 ).setBefore ("" );
388
392
for (int i = 1 ; i < coreLabels .size () ; i ++) {
0 commit comments