org.apache.lucene.util.Attribute Java Examples

The following examples show how to use org.apache.lucene.util.Attribute. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConcatenatingTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static AttributeSource combineSources(TokenStream... sources) {
  AttributeSource base = sources[0].cloneAttributes();
  try {
    for (int i = 1; i < sources.length; i++) {
      Iterator<Class<? extends Attribute>> it = sources[i].getAttributeClassesIterator();
      while (it.hasNext()) {
        base.addAttribute(it.next());
      }
      // check attributes can be captured
      sources[i].copyTo(base);
    }
    return base;
  }
  catch (IllegalArgumentException e) {
    throw new IllegalArgumentException("Attempted to concatenate TokenStreams with different attribute types", e);
  }
}
 
Example #2
Source File: NumericTokenizer.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/** Make this tokenizer get attributes from the delegate token stream. */
private static final AttributeFactory delegatingAttributeFactory(final AttributeSource source) {
    return new AttributeFactory() {
        @Override
        public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
            return (AttributeImpl) source.addAttribute(attClass);
        }
    };
}
 
Example #3
Source File: NumericTokenizer.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
protected NumericTokenizer(NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException {
    super(delegatingAttributeFactory(numericTokenStream));
    this.numericTokenStream = numericTokenStream;
    // Add attributes from the numeric token stream, this works fine because the attribute factory delegates to numericTokenStream
    for (Iterator<Class<? extends Attribute>> it = numericTokenStream.getAttributeClassesIterator(); it.hasNext();) {
        addAttribute(it.next());
    }
    this.extra = extra;
    this.buffer = buffer;
    started = true;
}
 
Example #4
Source File: TransportAnalyzeAction.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * other attribute extract object.
 * Extracted object group by AttributeClassName
 *
 * @param stream current TokenStream
 * @param includeAttributes filtering attributes
 * @return Map&lt;key value&gt;
 */
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
    final Map<String, Object> extendedAttributes = new TreeMap<>();

    stream.reflectWith(new AttributeReflector() {
        @Override
        public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
            if (CharTermAttribute.class.isAssignableFrom(attClass))
                return;
            if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                return;
            if (OffsetAttribute.class.isAssignableFrom(attClass))
                return;
            if (TypeAttribute.class.isAssignableFrom(attClass))
                return;
            if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }
                extendedAttributes.put(key, value);
            }
        }
    });

    return extendedAttributes;
}
 
Example #5
Source File: Test2BTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
  if (attClass == TermToBytesRefAttribute.class)
    return new MyTermAttributeImpl();
  if (CharTermAttribute.class.isAssignableFrom(attClass))
    throw new IllegalArgumentException("no");
  return delegate.createAttributeInstance(attClass);
}
 
Example #6
Source File: OpenNLPPOSTaggerFilter.java    From jate with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public boolean incrementToken() throws IOException {
    //clearAttributes();
    if (first) {
        //gather all tokens from doc
        String[] words = walkTokens();
        if (words.length == 0) {
            return false;
        }
        //tagging
        posTags = createTags(words);
        first = false;
        tokenIdx = 0;
    }

    if (tokenIdx == tokenAttrs.size()) {
        resetParams();
        return false;
    }

    AttributeSource as = tokenAttrs.get(tokenIdx);
    Iterator<? extends Class<? extends Attribute>> it = as.getAttributeClassesIterator();
    while (it.hasNext()) {
        Class<? extends Attribute> attrClass = it.next();
        if (!hasAttribute(attrClass)) {
            addAttribute(attrClass);
        }
    }
    as.copyTo(this);
    MWEMetadata metadata = exitingPayload.getPayload() == null ? new MWEMetadata() :
            MWEMetadata.deserialize(exitingPayload.getPayload().utf8ToString());
    metadata.addMetaData(MWEMetadataType.POS, posTags[tokenIdx]);
    exitingPayload.setPayload(new BytesRef(MWEMetadata.serialize(metadata)));
    tokenIdx++;
    return true;
}
 
Example #7
Source File: TransportExtendedAnalyzeAction.java    From elasticsearch-extended-analyze with Apache License 2.0 5 votes vote down vote up
/**
 * other attribute extract object.<br/>
 * Extracted object group by AttributeClassName
 *
 * @param stream current TokenStream
 * @param includeAttributes filtering attributes
 * @param shortAttrName if true, return short attribute name
 * @return Nested Object : Map<attrClass, Map<key, value>>
 */
private Map<String, Map<String, Object>> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes, final boolean shortAttrName) {
    final Map<String, Map<String, Object>> extendedAttributes = new TreeMap<>();

    stream.reflectWith(new AttributeReflector() {
        @Override
        public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
            if (CharTermAttribute.class.isAssignableFrom(attClass))
                return;
            if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
                return;
            if (OffsetAttribute.class.isAssignableFrom(attClass))
                return;
            if (TypeAttribute.class.isAssignableFrom(attClass))
                return;
            if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(attClass.getSimpleName().toLowerCase())) {
                Map<String, Object> currentAttributes = extendedAttributes.get(attClass.getName());
                if (currentAttributes == null) {
                    currentAttributes = new HashMap<>();
                }

                if (value instanceof BytesRef) {
                    final BytesRef p = (BytesRef) value;
                    value = p.toString();
                }
                currentAttributes.put(key, value);
                if (shortAttrName) {
                    extendedAttributes.put(attClass.getName().substring(attClass.getName().lastIndexOf(".")+1), currentAttributes);
                } else {
                    extendedAttributes.put(attClass.getName(), currentAttributes);
                }
            }
        }
    });

    return extendedAttributes;
}
 
Example #8
Source File: LegacyNumericTokenStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
  if (CharTermAttribute.class.isAssignableFrom(attClass))
    throw new IllegalArgumentException("LegacyNumericTokenStream does not support CharTermAttribute.");
  return delegate.createAttributeInstance(attClass);
}
 
Example #9
Source File: JsonPreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public String toFormattedString(Field f) throws IOException {
  Map<String,Object> map = new LinkedHashMap<>();
  map.put(VERSION_KEY, VERSION);
  if (f.fieldType().stored()) {
    String stringValue = f.stringValue();
    if (stringValue != null) {
      map.put(STRING_KEY, stringValue);
    }
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    List<Map<String,Object>> tokens = new LinkedList<>();
    while (ts.incrementToken()) {
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      Map<String,Object> tok = new TreeMap<>();
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = new String(catt.buffer(), 0, catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          tTerm = tatt.getBytesRef().utf8ToString();
        } else {
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
            tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.put(TYPE_KEY, ((TypeAttribute)att).type());
          } else {
            tok.put(cl.getName(), att.toString());
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        tok.put(TOKEN_KEY, term);
      }
      tokens.add(tok);
    }
    map.put(TOKENS_KEY, tokens);
  }
  return JSONUtil.toJSON(map, -1);
}
 
Example #10
Source File: SimplePreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public String toFormattedString(Field f) throws IOException {
  StringBuilder sb = new StringBuilder();
  sb.append(VERSION + " ");
  if (f.fieldType().stored()) {
    String s = f.stringValue();
    if (s != null) {
      // encode the equals sign
      s = s.replaceAll("=", "\\=");
      sb.append('=');
      sb.append(s);
      sb.append('=');
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    StringBuilder tok = new StringBuilder();
    boolean next = false;
    while (ts.incrementToken()) {
      if (next) {
        sb.append(' ');
      } else {
        next = true;
      }
      tok.setLength(0);
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = escape(catt.buffer(), catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
          tTerm = escape(tTermChars, tTermChars.length);
        } else {
          if (tok.length() > 0) tok.append(',');
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.append("f=").append(Integer.toHexString(((FlagsAttribute) att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.append("s=").append(((OffsetAttribute) att).startOffset()).append(",e=").append(((OffsetAttribute) att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.append("p=").append(bytesToHex(p.bytes, p.offset, p.length));
            } else if (tok.length() > 0) {
              tok.setLength(tok.length() - 1); // remove the last comma
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.append("i=").append(((PositionIncrementAttribute) att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.append("y=").append(escape(((TypeAttribute) att).type()));
          } else {
            
            tok.append(cl.getName()).append('=').append(escape(att.toString()));
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        if (tok.length() > 0) {
          tok.insert(0, term + ",");
        } else {
          tok.insert(0, term);
        }
      }
      sb.append(tok);
    }
  }
  return sb.toString();
}
 
Example #11
Source File: ITokenizer.java    From datawave with Apache License 2.0 2 votes vote down vote up
/**
 * 
 * @param clazz
 *            the attribute class
 * @return true if the tokenizer has that attribute available
 */
boolean hasAttribute(Class<? extends Attribute> clazz);
 
Example #12
Source File: ITokenizer.java    From datawave with Apache License 2.0 2 votes vote down vote up
/**
 * 
 * @param clazz
 *            the attribute class
 * @return the Attribute for the specified class
 */
<A extends Attribute> A getAttribute(Class<A> clazz);