Java Code Examples for org.apache.uima.cas.CAS#getDocumentText()

The following examples show how to use org.apache.uima.cas.CAS#getDocumentText() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ArkTweetTokenizerFixed.java    From argument-reasoning-comprehension-task with Apache License 2.0 6 votes vote down vote up
@Override
public void process(CAS cas)
        throws AnalysisEngineProcessException
{
    String text = cas.getDocumentText();

    // NOTE: Twokenize provides a API call that performs a normalization first - this would
    // require a mapping to the text how it is present in the CAS object. Due to HTML escaping
    // that would become really messy, we use the call which does not perform any normalization
    List<String> tokenize = Twokenize.tokenize(text);
    int offset = 0;
    for (String t : tokenize) {
        int start = text.indexOf(t, offset);
        int end = start + t.length();
        createTokenAnnotation(cas, start, end);
        offset = end;
    }

}
 
Example 2
Source File: CasDumpWriter.java    From uima-uimafit with Apache License 2.0 6 votes vote down vote up
@Override
public void process(CAS aCAS) throws AnalysisEngineProcessException {
  out.println("======== CAS " + iCas + " begin ==================================");
  out.println();

  Iterator<CAS> viewIt = aCAS.getViewIterator();
  while (viewIt.hasNext()) {
    CAS view = viewIt.next();
    processView(view);

    if (view.getDocumentText() == null && view.getSofaDataStream() != null) {
      processSofaData(view);
    }
  }

  out.println("======== CAS " + iCas + " end ==================================");
  out.println();
  out.println();
  out.flush();

  iCas++;
}
 
Example 3
Source File: TestAnnotator.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#process(CAS,ResultSpecification)
 */
public void process(CAS aCAS, ResultSpecification aResultSpec) throws AnnotatorProcessException {
  // set static fields to contain document text, result spec,
  // and value of StringParam configuration parameter.
  lastDocument = aCAS.getDocumentText();
  lastResultSpec = aResultSpec;
}
 
Example 4
Source File: SimpleRunCPM.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Called when the processing of a Document is completed. <br>
 * The process status can be looked at and corresponding actions taken.
 * 
 * @param aCas
 *          CAS corresponding to the completed processing
 * @param aStatus
 *          EntityProcessStatus that holds the status of all the events for aEntity
 */
public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
  if (aStatus.isException()) {
    List exceptions = aStatus.getExceptions();
    for (int i = 0; i < exceptions.size(); i++) {
      ((Throwable) exceptions.get(i)).printStackTrace();
    }
    return;
  }
  entityCount++;
  String docText = aCas.getDocumentText();
  if (docText != null) {
    size += docText.length();
  }
}
 
Example 5
Source File: SimpleRunCPE.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * Called when the processing of a Document is completed. <br>
 * The process status can be looked at and corresponding actions taken.
 * 
 * @param aCas
 *          CAS corresponding to the completed processing
 * @param aStatus
 *          EntityProcessStatus that holds the status of all the events for aEntity
 */
public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
  if (aStatus.isException()) {
    List exceptions = aStatus.getExceptions();
    for (int i = 0; i < exceptions.size(); i++) {
      ((Throwable) exceptions.get(i)).printStackTrace();
    }
    return;
  }
  entityCount++;
  String docText = aCas.getDocumentText();
  if (docText != null) {
    size += docText.length();
  }
}
 
Example 6
Source File: TestAnnotator.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
/**
 * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#process(CAS,ResultSpecification)
 */
public void process(CAS aCAS, ResultSpecification aResultSpec) throws AnnotatorProcessException {
  // set static fields to contain document text, result spec,
  // and value of StringParam configuration parameter.
  lastDocument = aCAS.getDocumentText();
  lastResultSpec = aResultSpec;
}
 
Example 7
Source File: SimpleCasGenerator.java    From uima-uimaj with Apache License 2.0 5 votes vote down vote up
public void process(CAS aCas) throws AnalysisEngineProcessException {
  // set static fields to contain document text, result spec,
  // and value of StringParam configuration parameter.
  lastDocument = aCas.getDocumentText();
  lastResultSpec = getResultSpecification();
  this.mCount = 0;
  this.docCount = 0;
}
 
Example 8
Source File: WebAnnoCasUtil.java    From webanno with Apache License 2.0 5 votes vote down vote up
public static FeatureStructure createDocumentMetadata(CAS aCas)
{
    Type type = getType(aCas, DocumentMetaData.class);
    FeatureStructure dmd;
    if (aCas.getDocumentText() != null) {
        dmd = aCas.createAnnotation(type, 0, aCas.getDocumentText().length());
    }
    else {
        dmd = aCas.createAnnotation(type, 0, 0);
    }
    
    // If there is already a DocumentAnnotation copy it's information and delete it
    FeatureStructure da = aCas.getDocumentAnnotation();
    if (da != null) {
        FSUtil.setFeature(dmd, FEATURE_BASE_NAME_LANGUAGE,
                FSUtil.getFeature(da, FEATURE_BASE_NAME_LANGUAGE, String.class));
        FSUtil.setFeature(dmd, FEATURE_BASE_NAME_BEGIN,
                FSUtil.getFeature(da, FEATURE_BASE_NAME_BEGIN, Integer.class));
        FSUtil.setFeature(dmd, FEATURE_BASE_NAME_END,
                FSUtil.getFeature(da, FEATURE_BASE_NAME_END, Integer.class));
        aCas.removeFsFromIndexes(da);
    }
    else if (aCas.getDocumentText() != null) {
        FSUtil.setFeature(dmd, FEATURE_BASE_NAME_BEGIN, 0);
        FSUtil.setFeature(dmd, FEATURE_BASE_NAME_END, aCas.getDocumentText().length());
    }
    aCas.addFsToIndexes(dmd);
    return dmd;
}
 
Example 9
Source File: BlueAnnotationViewer.java    From bluima with Apache License 2.0 4 votes vote down vote up
public void createHtml(JCas jCas, TypeSystem typeSystem, File styleMapFile,
        File outputDirectory) throws IOException {
    try {

        FileUtils.forceMkdir(outputDirectory);
        generator.setOutputDirectory(outputDirectory);

        CAS cas = jCas.getCas();

        // get the specified view
        cas = cas.getView(this.defaultCasViewName);

        CAS defaultView = cas.getView(CAS.NAME_DEFAULT_SOFA);
        if (defaultView.getDocumentText() == null) {
            System.err
                    .println("The HTML and XML Viewers can only view the default text document, which was not found in this CAS.");
            return;
        }

        // generate inline XML
        File inlineXmlFile = new File(outputDirectory, "inline.xml");
        String xmlAnnotations = new CasToInlineXml()
                .generateXML(defaultView);
        FileOutputStream outStream = new FileOutputStream(inlineXmlFile);
        outStream.write(xmlAnnotations.getBytes("UTF-8"));
        outStream.close();

        // generate HTML view
        // if (!styleMapFile.exists()) {
        // AnalysisEngineDescription aed = null;
        //
        // annotationViewGenerator.autoGenerateStyleMapFile(
        // aed.getAnalysisEngineMetaData(), styleMapFile);
        // }
        generator.processStyleMap(styleMapFile);
        generator.processDocument(inlineXmlFile);
        // File genFile = new File(viewerDirectory, "index.html");

    } catch (Exception ex) {
        throw new IOException("cannot create html annotationviewer", ex);
    }
}
 
Example 10
Source File: StringMatchingRecommender.java    From inception with Apache License 2.0 4 votes vote down vote up
private List<Sample> predict(int aDocNo, CAS aCas, Trie<DictEntry> aDict)
{
    boolean requireEndAtTokenBoundary = !CHARACTERS
            .equals(getRecommender().getLayer().getAnchoringMode());

    boolean requireSingleSentence = !getRecommender().getLayer().isCrossSentence();

    Type sentenceType = getType(aCas, Sentence.class);
    Type tokenType = getType(aCas, Token.class);
    
    List<Sample> data = new ArrayList<>();
    String text = aCas.getDocumentText();   
        
    for (Annotation sentence : aCas.<Annotation>select(sentenceType)) {
        List<Span> spans = new ArrayList<>();
        List<Annotation> tokens = aCas.<Annotation>select(tokenType).coveredBy(sentence)
                .asList();
        for (Annotation token : tokens) {
            Trie<DictEntry>.Node node = aDict.getNode(text, token.getBegin());
            if (node != null) {
                int begin = token.getBegin();
                int end = begin + node.level;

                // If the end is not in the same sentence as the start, skip
                if (requireSingleSentence && !(end <= sentence.getEnd())) {
                    continue;
                }

                // Need to check that the match actually ends at a token boundary!
                if (
                        requireEndAtTokenBoundary && 
                        !aCas.<Annotation>select(tokenType).startAt(token)
                                .filter(t -> t.getEnd() == end).findAny().isPresent()
                ) {
                    continue;
                }
                
                for (LabelStats lc : node.value.getBest(maxRecommendations)) {
                    String label = lc.getLabel();
                    // check instance equality to avoid collision with user labels
                    if (label == UNKNOWN_LABEL) {
                        label = null;
                    }
                    spans.add(new Span(begin, end, text.substring(begin, end), label,
                            lc.getRelFreq()));
                }
            }
        }
        
        data.add(new Sample(aDocNo, aCas.getDocumentText(), tokens, spans));
    }
    
    return data;
}
 
Example 11
Source File: AnnotationViewerDialog.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * Launch that viewer.
 *
 * @param inputDirPath the input dir path
 * @param fileName the file name
 * @param typeSystem the type system
 * @param aTypesToDisplay the a types to display
 * @param javaViewerRBisSelected the java viewer R bis selected
 * @param javaViewerUCRBisSelected the java viewer UCR bis selected
 * @param xmlRBisSelected the xml R bis selected
 * @param styleMapFile the style map file
 * @param viewerDirectory the viewer directory
 */
public void launchThatViewer(String inputDirPath, String fileName, TypeSystem typeSystem,
        final String[] aTypesToDisplay, boolean javaViewerRBisSelected,
        boolean javaViewerUCRBisSelected, boolean xmlRBisSelected, File styleMapFile,
        File viewerDirectory) {
  try {

    File xcasFile = new File(inputDirPath, fileName);
    // create a new CAS
    CAS cas = CasCreationUtils.createCas(Collections.EMPTY_LIST, typeSystem, UIMAFramework
            .getDefaultPerformanceTuningProperties());
    // deserialize XCAS into CAS
    try (InputStream xcasInStream = new FileInputStream(xcasFile)) {
      XmlCasDeserializer.deserialize(xcasInStream, cas, true);
    }
    
    //get the specified view
    cas = cas.getView(this.defaultCasViewName);

    // launch appropriate viewer
    if (javaViewerRBisSelected || javaViewerUCRBisSelected) { // JMP
      // record preference for next time
      med1.setViewType(javaViewerRBisSelected ? "Java Viewer" : "JV User Colors");

      // create tree viewer component
      CasAnnotationViewer viewer = new CasAnnotationViewer();
      viewer.setDisplayedTypes(aTypesToDisplay);
      if (javaViewerUCRBisSelected)
        getColorsForTypesFromFile(viewer, styleMapFile);
      else
        viewer.setHiddenTypes(new String[] { "uima.cpm.FileLocation" });
      // launch viewer in a new dialog
      viewer.setCAS(cas);
      JDialog dialog = new JDialog(AnnotationViewerDialog.this, "Annotation Results for "
              + fileName + " in " + inputDirPath); // JMP
      dialog.getContentPane().add(viewer);
      dialog.setSize(850, 630);
      dialog.pack();
      dialog.show();
    } else {
      CAS defaultView = cas.getView(CAS.NAME_DEFAULT_SOFA);
      if (defaultView.getDocumentText() == null) {
        displayError("The HTML and XML Viewers can only view the default text document, which was not found in this CAS.");
        return;
      }
      // generate inline XML
      File inlineXmlFile = new File(viewerDirectory, "inline.xml");
      CasToInlineXml casToInlineXml = new CasToInlineXml();
      casToInlineXml.setFormattedOutput(false);
      String xmlAnnotations = casToInlineXml.generateXML(defaultView);
      FileOutputStream outStream = new FileOutputStream(inlineXmlFile);
      outStream.write(xmlAnnotations.getBytes(StandardCharsets.UTF_8));
      outStream.close();

      if (xmlRBisSelected) // JMP passed in
      {
        // record preference for next time
        med1.setViewType("XML");

        BrowserUtil.openUrlInDefaultBrowser(inlineXmlFile.getAbsolutePath());
      } else
      // HTML view
      {
        med1.setViewType("HTML");
        // generate HTML view
        // first process style map if not done already
        if (!processedStyleMap) {
          if (!styleMapFile.exists()) {
            annotationViewGenerator.autoGenerateStyleMapFile(
                    promptForAE().getAnalysisEngineMetaData(), styleMapFile);
          }
          annotationViewGenerator.processStyleMap(styleMapFile);
          processedStyleMap = true;
        }
        annotationViewGenerator.processDocument(inlineXmlFile);
        File genFile = new File(viewerDirectory, "index.html");
        // open in browser
        BrowserUtil.openUrlInDefaultBrowser(genFile.getAbsolutePath());
      }
    }

    // end LTV here

  } catch (Exception ex) {
    displayError(ex);
  }
}
 
Example 12
Source File: AeroRemoteApiController.java    From webanno with Apache License 2.0 4 votes vote down vote up
private CAS createCompatibleCas(long aProjectId, long aDocumentId, MultipartFile aFile,
        Optional<String> aFormatId)
    throws RemoteApiException, ClassNotFoundException, IOException, UIMAException
{
    Project project = getProject(aProjectId);
    SourceDocument document = getDocument(project, aDocumentId);

    // Check if the format is supported
    String format = aFormatId.orElse(FORMAT_DEFAULT);
    if (!importExportService.getReadableFormatById(format).isPresent()) {
        throw new UnsupportedFormatException(
                "Format [%s] not supported. Acceptable formats are %s.", format,
                importExportService.getReadableFormats().stream()
                        .map(FormatSupport::getId).sorted().collect(Collectors.toList()));
    }

    // Convert the uploaded annotation document into a CAS
    File tmpFile = null;
    CAS annotationCas;
    try {
        tmpFile = File.createTempFile("upload", ".bin");
        aFile.transferTo(tmpFile);
        annotationCas = importExportService.importCasFromFile(tmpFile, project, format);
    }
    finally {
        if (tmpFile != null) {
            FileUtils.forceDelete(tmpFile);
        }
    }
    
    // Check if the uploaded file is compatible with the source document. They are compatible
    // if the text is the same and if all the token and sentence annotations have the same
    // offsets.
    CAS initialCas = documentService.createOrReadInitialCas(document);
    String initialText = initialCas.getDocumentText();
    String annotationText = annotationCas.getDocumentText();
    
    // If any of the texts contains tailing line breaks, we ignore that. We assume at the moment
    // that nobody will have created annotations over that trailing line breaks.
    initialText = StringUtils.chomp(initialText);
    annotationText = StringUtils.chomp(annotationText);
    
    if (ObjectUtils.notEqual(initialText, annotationText)) {
        int diffIndex = StringUtils.indexOfDifference(initialText, annotationText);
        String expected = initialText.substring(diffIndex,
                Math.min(initialText.length(), diffIndex + 20));
        String actual = annotationText.substring(diffIndex,
                Math.min(annotationText.length(), diffIndex + 20));
        throw new IncompatibleDocumentException(
                "Text of annotation document does not match text of source document at offset "
                        + "[%d]. Expected [%s] but found [%s].",
                diffIndex, expected, actual);
    }
    
    // Just in case we really had to chomp off a trailing line break from the annotation CAS,
    // make sure we copy over the proper text from the initial CAS
    // NOT AT HOME THIS YOU SHOULD TRY
    // SETTING THE SOFA STRING FORCEFULLY FOLLOWING THE DARK SIDE IS!
    forceOverwriteSofa(annotationCas, initialCas.getDocumentText());
    
    Collection<AnnotationFS> annotationSentences = selectSentences(annotationCas);
    Collection<AnnotationFS> initialSentences = selectSentences(initialCas);
    if (annotationSentences.size() != initialSentences.size()) {
        throw new IncompatibleDocumentException(
                "Expected [%d] sentences, but annotation document contains [%d] sentences.",
                initialSentences.size(), annotationSentences.size());
    }
    assertCompatibleOffsets(initialSentences, annotationSentences);
    
    Collection<AnnotationFS> annotationTokens = selectTokens(annotationCas);
    Collection<AnnotationFS> initialTokens = selectTokens(initialCas);
    if (annotationTokens.size() != initialTokens.size()) {
        throw new IncompatibleDocumentException(
                "Expected [%d] sentences, but annotation document contains [%d] sentences.",
                initialSentences.size(), annotationSentences.size());
    }
    assertCompatibleOffsets(initialTokens, annotationTokens);
    
    return annotationCas;
}
 
Example 13
Source File: SymbolIndexedDocument.java    From biomedicus with Apache License 2.0 4 votes vote down vote up
/**
 * Indexes all the symbols from an original document.
 *
 * @param originalDocumentView jCas original document view.
 * @return The newly created symbol indexed document.
 */
public static SymbolIndexedDocument fromView(CAS originalDocumentView) {
  Type viewIndexType = originalDocumentView.getTypeSystem()
      .getType("edu.umn.biomedicus.rtfuima.type.ViewIndex");

  Feature destinationNameFeature = viewIndexType
      .getFeatureByBaseName("destinationName");
  Feature destinationIndexFeature = viewIndexType
      .getFeatureByBaseName("destinationIndex");

  AnnotationIndex<AnnotationFS> viewIndexAI = originalDocumentView
      .getAnnotationIndex(viewIndexType);

  List<SymbolLocation> symbolLocations = new ArrayList<>();

  Map<String, Map<Integer, Integer>> destinationMap = new HashMap<>();

  int index = 0;
  int lastEnd = 0;
  for (AnnotationFS annotation : viewIndexAI) {
    int begin = annotation.getBegin();
    int end = annotation.getEnd();

    String destinationName
        = annotation.getStringValue(destinationNameFeature);

    SymbolLocation symbolLocation = new SymbolLocation(
        destinationName,
        begin - lastEnd,
        end - begin,
        index++
    );

    symbolLocations.add(symbolLocation);

    int destinationIndex
        = annotation.getIntValue(destinationIndexFeature);

    destinationMap.compute(destinationName,
        (String key, @Nullable Map<Integer, Integer> value) -> {
          if (value == null) {
            value = new HashMap<>();
          }
          value.put(destinationIndex, symbolLocations.size() - 1);

          return value;
        });
    lastEnd = end;
  }
  return new SymbolIndexedDocument(symbolLocations, destinationMap,
      originalDocumentView.getDocumentText());
}
 
Example 14
Source File: TestAnnotator2.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * @see org.apache.uima.analysis_engine.annotator.TextAnnotator#process(CAS,ResultSpecification)
 */
public void process(CAS aCAS) {
  // set static fields to contain document text, result spec,
  // and value of StringParam configuration parameter.
  lastDocument = aCAS.getDocumentText();
}
 
Example 15
Source File: PersonTitleAnnotator.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
   * Annotates a document. This annotator searches for person titles using simple string matching.
   * 
   * @param aCAS
   *          CAS containing document text and previously discovered annotations, and to which new
   *          annotations are to be written.
   * 
   * @see CasAnnotator_ImplBase#process(CAS)
   */
  public void process(CAS aCAS) throws AnalysisEngineProcessException {
    try {
      // If the ResultSpec doesn't include the PersonTitle type, we have
      // nothing to do.
      if (!getResultSpecification().containsType("example.PersonTitle",aCAS.getDocumentLanguage())) {
        if (!warningMsgShown) {
          String m = String.format(
              "No output is being produced by the PersonTitleAnnotator because the Result Specification did not contain" +
              " a request for the type example.PersonTitle with the language '%s'%n" +
              "  (Note: this message will only be shown once.)%n", 
              aCAS.getDocumentLanguage());               
          System.err.println(m);
          logger.log(Level.WARNING, m);
          warningMsgShown = true;
        }
        return;
      }

      if (mContainingType == null) {
        // Search the whole document for PersonTitle annotations
        String text = aCAS.getDocumentText();
        annotateRange(aCAS, text, 0);
      } else {
        // Search only within annotations of type mContainingType

        //v3
        
        for (Annotation annot : aCAS.<Annotation>select(mContainingType)) {
          
          String coveredText = annot.getCoveredText();  // Get text covered by this annotation
          int annotBegin = annot.getBegin();            // Get begin position of this annotation
          annotateRange(aCAS, coveredText, annotBegin); // search for matches within this
        
        }
        
        // v2
//        // Get an iterator over the annotations of type mContainingType.
//        FSIterator it = aCAS.getAnnotationIndex(mContainingType).iterator();
//        // Loop over the iterator.
//        while (it.isValid()) {
//          // Get the next annotation from the iterator
//          AnnotationFS annot = (AnnotationFS) it.get();
//          // Get text covered by this annotation
//          String coveredText = annot.getCoveredText();
//          // Get begin position of this annotation
//          int annotBegin = annot.getBegin();
//          // search for matches within this
//          annotateRange(aCAS, coveredText, annotBegin);
//          // Advance the iterator.
//          it.moveToNext();
//        }
      }
    } catch (Exception e) {
      throw new AnalysisEngineProcessException(e);
    }
  }
 
Example 16
Source File: RegExAnnotator.java    From uima-uimaj with Apache License 2.0 4 votes vote down vote up
/**
 * Invokes this annotator's analysis logic. This annotator uses the java regular expression
 * package to find annotations using the regular expressions defined by its configuration
 * parameters.
 * 
 * @param aCAS
 *          the CAS to process 
 * @throws AnalysisEngineProcessException
 *           if a failure occurs during processing.
 * 
 * @see CasAnnotator_ImplBase#process(CAS)
 */
public void process(CAS aCAS) throws AnalysisEngineProcessException {
  try {
    String docText = aCAS.getDocumentText();
    // Determine which regions of the document we are going to annotate
    int[] rangesToAnnotate = getRangesToAnnotate(aCAS);

    // We treat the rangesToAnnotate array as a list of (start,end) offset
    // pairs. Iterate through all of these pairs.
    for (int i = 0; i < rangesToAnnotate.length; i += 2) {
      int startPos = rangesToAnnotate[i];
      int endPos = rangesToAnnotate[i + 1];
      // get the substring of text to be annotated
      String subText = docText.substring(startPos, endPos);

      // iterate over all annotation types for which we have patterns
      for (int j = 0; j < mCASTypes.length; j++) {
        // see if the ResultSpec contains this type
        if (getResultSpecification().containsType(mCASTypes[j].getName(),aCAS.getDocumentLanguage()) || getResultSpecification().containsType(mCASTypes[j].getName())) {
          // try to match each pattern that we have for this annotation type
          for (int k = 0; k < mPatterns[j].length; k++) {
            int pos = 0;
            Matcher matcher = mPatterns[j][k].matcher(subText);
            while (pos < subText.length() && matcher.find(pos)) {
              getContext().getLogger().log(Level.FINER,
                      "RegEx match found: [" + matcher.group() + "]");
              // match found; extract locations of start and end of match
              // (or of entire containing annotation, if that option is on)
              int annotStart, annotEnd;
              if (mAnnotateEntireContainingAnnotation) {
                annotStart = startPos;
                annotEnd = endPos;
              } else {
                annotStart = startPos + matcher.start();
                annotEnd = startPos + matcher.end();
              }
              // create Annotation in CAS
              FeatureStructure fs = aCAS.createAnnotation(mCASTypes[j], annotStart, annotEnd);
              aCAS.getIndexRepository().addFS(fs);
              pos = annotEnd - startPos;
            }
          }
        }
      }
    }
  } catch (Exception e) {
    throw new AnalysisEngineProcessException(e);
  }
}
 
Example 17
Source File: CASArtifact.java    From biomedicus with Apache License 2.0 4 votes vote down vote up
CASDocument(CAS view, @Nullable LabelAdapters labelAdapters) {
  super(view.getViewName(), view.getDocumentText());
  this.view = view;
  this.labelAdapters = labelAdapters;
}
 
Example 18
Source File: PdfAnnoRendererTest.java    From inception with Apache License 2.0 4 votes vote down vote up
/**
 * Tests if given offsets for PDFAnno can be converted to offsets for the document in INCEpTION
 */
@Test
public void testConvertToDocumentOffset() throws Exception
{
    String file = "src/test/resources/tcf04-karin-wl.xml";
    String pdftxt = new Scanner(
        new File("src/test/resources/rendererTestPdfExtract.txt")).useDelimiter("\\Z").next();
    PdfExtractFile pdfExtractFile = new PdfExtractFile(pdftxt, new HashMap<>());

    CAS cas = JCasFactory.createJCas().getCas();
    CollectionReader reader = CollectionReaderFactory.createReader(TcfReader.class,
        TcfReader.PARAM_SOURCE_LOCATION, file);
    reader.getNext(cas);

    AnnotatorState state = new AnnotatorStateImpl(Mode.ANNOTATION);
    state.setPagingStrategy(new SentenceOrientedPagingStrategy());
    state.getPreferences().setWindowSize(10);
    state.setProject(project);

    DocumentModel documentModel = new DocumentModel(cas.getDocumentText());
    // List of PDFAnno offsets
    // indices represent line numbers in the PDFExtractFile for the according character
    List<Offset> offsets = new ArrayList<>();
    offsets.add(new Offset(3, 3));
    offsets.add(new Offset(3, 4));
    offsets.add(new Offset(3, 5));
    offsets.add(new Offset(3, 6));
    offsets.add(new Offset(3, 7));
    offsets.add(new Offset(3, 8));
    offsets.add(new Offset(6, 8));
    offsets.add(new Offset(7, 7));
    offsets.add(new Offset(7, 8));
    offsets.add(new Offset(8, 8));
    offsets.add(new Offset(8, 13));
    offsets.add(new Offset(28, 28));
    offsets.add(new Offset(28, 30));
    offsets.add(new Offset(35, 38));
    // convert to offests for document in INCEpTION
    List<Offset> docOffsets =
        PdfAnnoRenderer.convertToDocumentOffsets(offsets, documentModel, pdfExtractFile);
    List<Offset> expectedOffsets = new ArrayList<>();
    expectedOffsets.add(new Offset(0, 0));
    expectedOffsets.add(new Offset(0, 1));
    expectedOffsets.add(new Offset(0, 2));
    expectedOffsets.add(new Offset(0, 3));
    expectedOffsets.add(new Offset(0, 4));
    expectedOffsets.add(new Offset(0, 6));
    expectedOffsets.add(new Offset(3, 6));
    expectedOffsets.add(new Offset(4, 4));
    expectedOffsets.add(new Offset(4, 6));
    expectedOffsets.add(new Offset(6, 6));
    expectedOffsets.add(new Offset(6, 11));
    expectedOffsets.add(new Offset(29, 29));
    expectedOffsets.add(new Offset(29, 31));
    expectedOffsets.add(new Offset(38, 41));
    assertThat(docOffsets).isEqualTo(expectedOffsets);
}