Java Code Examples for de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData#setDocumentId()

The following examples show how to use de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData#setDocumentId() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CasMergeSuiteTest.java    From webanno with Apache License 2.0 8 votes vote down vote up
private void writeAndAssertEquals(JCas curatorCas)
    throws Exception
{
    String targetFolder = "target/test-output/" + testContext.getClassName() + "/"
            + referenceFolder.getName();
    
    DocumentMetaData dmd = DocumentMetaData.get(curatorCas);
    dmd.setDocumentId("curator");
    runPipeline(curatorCas, createEngineDescription(WebannoTsv3XWriter.class,
            WebannoTsv3XWriter.PARAM_TARGET_LOCATION, targetFolder,
            WebannoTsv3XWriter.PARAM_OVERWRITE, true));
    
    File referenceFile = new File(referenceFolder, "curator.tsv");
    assumeTrue("No reference data available for this test.", referenceFile.exists());
    
    File actualFile = new File(targetFolder, "curator.tsv");
    
    String reference = FileUtils.readFileToString(referenceFile, "UTF-8");
    String actual = FileUtils.readFileToString(actualFile, "UTF-8");
    
    assertEquals(reference, actual);
}
 
Example 2
Source File: LoadFactAnnotations.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private void convert(CAS aCAS, int fact) throws CollectionException, SQLException {
  JCas jcas;
  try {
    jcas = aCAS.getJCas();
  } catch (CASException e) {
    throw new CollectionException(e);
  }
  JCasBuilder doc = new JCasBuilder(jcas);
  DocumentMetaData md = JCasUtil.selectSingle(jcas, DocumentMetaData.class);
  md.setDocumentId(Integer.toString(fact));
  doc.add("fact");
  doc.close();
}
 
Example 3
Source File: Document.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
public void addSettingstoJcas(JCas jcas) throws IOException {
  AidaDocumentSettings ads = new AidaDocumentSettings(jcas);
  if (this.getLanguage() != null) {
    if (jcas.getDocumentLanguage() != null && !jcas.getDocumentLanguage().equals("x-unspecified") && !jcas.getDocumentLanguage()
        .equals(this.getLanguage().toString())) {
      throw new IllegalArgumentException("Language in JCas and language in settings are different");
    }
    ads.setLanguage(this.getLanguage().toString());
    jcas.setDocumentLanguage(ads.getLanguage());
  }
  if (this.getDocChunkStrategy() != null) {
    ads.setDocChunkStrategy(this.getDocChunkStrategy().toString());
  }
  ads.setDocumentId(this.getDocumentId());
  if (ads.getDocumentInputFormat() != null) {
    ads.setDocumentInputFormat(this.getDocumentInputFormat().toString());
  }
  ads.setEncoding(this.getEncoding());
  if (disambiguationSettings != null) {
    disambiguationSettings.addToJCas(ads, jcas);
  }
  ads.addToIndexes();
  if (annotations != null) {
    annotations.addMentionsToJCas(jcas);
  }
  if (!exists(jcas, DocumentMetaData.class)) {
    DocumentMetaData md = new DocumentMetaData(jcas);
    md.setDocumentId(ads.getDocumentId());
    md.addToIndexes();
  }
}
 
Example 4
Source File: Step0bTextSegmenterA.java    From argument-reasoning-comprehension-task with Apache License 2.0 5 votes vote down vote up
private static JCas initializeJCas(StandaloneArgument argument)
        throws UIMAException
{
    JCas jCas = JCasFactory.createJCas();
    jCas.setDocumentLanguage("en");
    jCas.setDocumentText(argument.getText());

    DocumentMetaData documentMetaData = DocumentMetaData.create(jCas);
    documentMetaData.setDocumentId(argument.getId());
    documentMetaData.setDocumentTitle(argument.getDebateMetaData().getTitle());
    documentMetaData.addToIndexes();
    return jCas;
}
 
Example 5
Source File: JSONReader.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
@Override
public void getNext(CAS cas) throws IOException, CollectionException {
  Resource res = nextFile();
  String name = res.getPath();
  JsonNode json = objectMapper.readTree(res.getInputStream());

  String title = null;
  if(titleKey != null) {
    title = json.get(titleKey).asText();
  }
  String content = json.get(contentKey).asText();

  StringBuilder sb = new StringBuilder();
  if (title != null) {
    sb.append(title).append(System.lineSeparator()).append(System.lineSeparator());
  } else {
    logger.debug(res.getPath() + " does not have a title field.");
  }

  if (content != null) {
    sb.append(content);
  } else {
    logger.debug(res.getPath() + " does not have a content field.");
  }

  JCas jcas;
  try {
    jcas = cas.getJCas();
  }
  catch (CASException e) {
    throw new CollectionException(e);
  }

  // Set doc id.
  String id = null;
  if(idKey != null) {
    id = json.get(idKey).asText();
  }
  if(id == null) {
    id = name;
  }
  DocumentMetaData dmd = new DocumentMetaData(jcas);
  dmd.addToIndexes();
  dmd.setDocumentId(id);
  jcas.setDocumentLanguage(getLanguage());
  jcas.setDocumentText(sb.toString());
}
 
Example 6
Source File: WordPressXMLReader.java    From ambiverse-nlu with Apache License 2.0 4 votes vote down vote up
private void parseSubDocument(JCas jcas) throws XMLStreamException, IOException, CollectionException {
  if (this.language != null) {
    jcas.setDocumentLanguage(this.language);
  }

  LinkedList openTagStack = new LinkedList();
  String docTag = this.seekSubDocumentRoot();
  StringBuilder documentText = new StringBuilder();
  String docId = null;

  String fileName;
  String docUri;
  while (this.xmlReader.hasNext() && this.xmlReader.getDepth() > 1) {
    String dotPlace;
    if (this.xmlReader.isStartElement()) {
      if (!this.xmlReader.getPrefix().isEmpty()) {
        fileName = this.xmlReader.getPrefix() + ":" + this.xmlReader.getName().getLocalPart();
      } else {
        fileName = this.xmlReader.getName().getLocalPart();
      }
      openTagStack.push(fileName);
      dotPlace = null;
      if (this.isDocIdElement(fileName) && this.docIdAttributeName != null) {
        dotPlace = this.xmlReader.getAttributeValue((String) null, this.docIdAttributeName);
      }

      this.xmlReader.next();
      docUri = this.collectText();
      if (docUri.length() > 0) {
        if (this.isDocIdElement(fileName) && this.docIdAttributeName == null) {
          dotPlace = docUri;
        }
        this.processText(jcas, fileName, docUri, documentText);
      }

      if (dotPlace != null) {
        if (docId != null) {
          throw new CollectionException("multiple_doc_id_error", new Object[] { this.docIdTag });
        }

        if (dotPlace.length() == 0) {
          throw new CollectionException("empty_doc_id_error", new Object[] { this.docIdTag });
        }

        docId = dotPlace;
      }
    } else if (this.xmlReader.isCharacters()) {
      fileName = (String) openTagStack.peek();
      dotPlace = this.collectText();
      if (dotPlace.length() != 0) {
        this.processText(jcas, fileName, dotPlace, documentText);
      }
    } else if (this.xmlReader.isEndElement()) {
      fileName = this.xmlReader.getName().getLocalPart();
      if (docTag.equals(fileName)) {
        this.xmlReader.nextTag();
        break;
      }
      openTagStack.poll();
      this.xmlReader.next();
    } else if (this.xmlReader.getEventType() == XMLStreamConstants.CDATA) {
      fileName = (String) openTagStack.peek();
      dotPlace = this.xmlReader.getText();
      if (dotPlace.length() != 0) {
        this.processText(jcas, fileName, dotPlace, documentText);
      }
      this.xmlReader.next();
    }
  }

  jcas.setDocumentText(documentText.toString());
  fileName = ((File) this.xmlFiles.get(this.currentParsedFile)).getName();
  int dotPlace1 = fileName.lastIndexOf(46);
  if (this.docIdTag != null) {
    if (docId == null) {
      throw new CollectionException("de.tudarmstadt.ukp.dkpro.core.io.xml.XmlReader_Messages", "missing_doc_id_error",
          new Object[] { this.docIdTag });
    }
  } else if (dotPlace1 >= 0) {
    docId = fileName.substring(0, dotPlace1) + "-" + this.iDoc;
  }

  docUri = ((File) this.xmlFiles.get(this.currentParsedFile)).toURI().toString();
  DocumentMetaData docMetaData = DocumentMetaData.create(jcas);
  docMetaData.setDocumentId(docId);
  docMetaData.setDocumentUri(docUri + "#" + docId);
  docMetaData.setCollectionId(this.collectionId);
}