Java Code Examples for org.apache.lucene.analysis.tokenattributes.CharTermAttribute#buffer()

The following examples show how to use org.apache.lucene.analysis.tokenattributes.CharTermAttribute#buffer() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MinHashFilterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private ArrayList<String> getTokens(TokenStream ts) throws IOException {
  ArrayList<String> tokens = new ArrayList<>();
  ts.reset();
  while (ts.incrementToken()) {
    CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
    String token = new String(termAttribute.buffer(), 0, termAttribute.length());
    tokens.add(token);
  }
  ts.end();
  ts.close();

  return tokens;
}
 
Example 2
Source File: AutoPhrasingTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
private char[] nextToken() throws IOException {
    if (input.incrementToken()) {
        CharTermAttribute termAttr = getTermAttribute();
        if (termAttr != null) {
            char[] termBuf = termAttr.buffer();
            char[] nextTok = new char[termAttr.length()];
            System.arraycopy(termBuf, 0, nextTok, 0, termAttr.length());
            return nextTok;
        }
    }
    return null;
}
 
Example 3
Source File: SolrInformationServer.java    From SearchServices with GNU Lesser General Public License v3.0 4 votes vote down vote up
private void addContentPropertyToDocUsingAlfrescoRepository(
        SolrInputDocument doc,
        QName propertyQName,
        long dbId,
        String locale) throws AuthenticationException, IOException
{
    long start = System.nanoTime();

    // Expensive call to be done with ContentTracker
    try (GetTextContentResponse response = repositoryClient.getTextContent(dbId, propertyQName, null)) {
        addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_STATUS, response);
        addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_EXCEPTION, response);
        addContentPropertyMetadata(doc, propertyQName, AlfrescoSolrDataModel.SpecializedFieldType.TRANSFORMATION_TIME, response);

        final String textContent = textContentFrom(response);

        if (fingerprintHasBeenEnabledOnThisInstance && !textContent.isBlank()) {
            Analyzer analyzer = core.getLatestSchema().getFieldType("min_hash").getIndexAnalyzer();
            TokenStream ts = analyzer.tokenStream("dummy_field", textContent);
            CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
            ts.reset();
            while (ts.incrementToken()) {
                StringBuilder tokenBuff = new StringBuilder();
                char[] buff = termAttribute.buffer();

                for (int i = 0; i < termAttribute.length(); i++) {
                    tokenBuff.append(Integer.toHexString(buff[i]));
                }
                doc.addField(FINGERPRINT_FIELD, tokenBuff.toString());

            }
            ts.end();
            ts.close();
        }

        this.getTrackerStats().addDocTransformationTime(System.nanoTime() - start);

        String storedField = dataModel.getStoredContentField(propertyQName);
        doc.setField(storedField, "\u0000" + languageFrom(locale) + "\u0000" + textContent);

        dataModel.getIndexedFieldNamesForProperty(propertyQName)
                .getFields()
                .forEach(field -> addFieldIfNotSet(doc, field.getField()));
    }
}
 
Example 4
Source File: ICUTransformFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
void setText(final CharTermAttribute token) {
  this.token = token;
  this.buffer = token.buffer();
  this.length = token.length();
}
 
Example 5
Source File: JsonPreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public String toFormattedString(Field f) throws IOException {
  Map<String,Object> map = new LinkedHashMap<>();
  map.put(VERSION_KEY, VERSION);
  if (f.fieldType().stored()) {
    String stringValue = f.stringValue();
    if (stringValue != null) {
      map.put(STRING_KEY, stringValue);
    }
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    List<Map<String,Object>> tokens = new LinkedList<>();
    while (ts.incrementToken()) {
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      Map<String,Object> tok = new TreeMap<>();
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = new String(catt.buffer(), 0, catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          tTerm = tatt.getBytesRef().utf8ToString();
        } else {
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
            tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.put(TYPE_KEY, ((TypeAttribute)att).type());
          } else {
            tok.put(cl.getName(), att.toString());
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        tok.put(TOKEN_KEY, term);
      }
      tokens.add(tok);
    }
    map.put(TOKENS_KEY, tokens);
  }
  return JSONUtil.toJSON(map, -1);
}
 
Example 6
Source File: IcuTransformTokenFilter.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
void setText(final CharTermAttribute token) {
    this.token = token;
    this.buffer = token.buffer();
    this.length = token.length();
}