org.apache.lucene.analysis.tokenattributes.PayloadAttribute Java Examples

The following examples show how to use org.apache.lucene.analysis.tokenattributes.PayloadAttribute. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestConcatenatingTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInconsistentAttributes() throws IOException {

    AttributeFactory factory = newAttributeFactory();

    final MockTokenizer first = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
    first.setReader(new StringReader("first words "));
    first.addAttribute(PayloadAttribute.class);
    final MockTokenizer second = new MockTokenizer(factory, MockTokenizer.WHITESPACE, false);
    second.setReader(new StringReader("second words"));
    second.addAttribute(FlagsAttribute.class);

    TokenStream ts = new ConcatenatingTokenStream(first, second);
    assertTrue(ts.hasAttribute(FlagsAttribute.class));
    assertTrue(ts.hasAttribute(PayloadAttribute.class));

    assertTokenStreamContents(ts,
        new String[] { "first", "words", "second", "words" },
        new int[]{ 0, 6, 12, 19, },
        new int[]{ 5, 11, 18, 24, });

  }
 
Example #2
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
  CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
  PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payloadAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example #3
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIntEncoding() throws Exception {
  String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new IntegerEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
  assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeInt(2));
  assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeInt(3));
  assertTermEquals("jumped", filter, termAtt, payAtt, null);
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeInt(5));
  assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeInt(99));
  assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeInt(83));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example #4
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFloatEncoding() throws Exception {
  String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(whitespaceMockTokenizer(test), '|', new FloatEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
  assertTermEquals("red", filter, termAtt, payAtt, PayloadHelper.encodeFloat(2.0f));
  assertTermEquals("fox", filter, termAtt, payAtt, PayloadHelper.encodeFloat(3.5f));
  assertTermEquals("jumped", filter, termAtt, payAtt, PayloadHelper.encodeFloat(0.5f));
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, PayloadHelper.encodeFloat(5.0f));
  assertTermEquals("brown", filter, termAtt, payAtt, PayloadHelper.encodeFloat(99.3f));
  assertTermEquals("dogs", filter, termAtt, payAtt, PayloadHelper.encodeFloat(83.7f));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example #5
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPayloads() throws Exception {
  String test = "The quick|JJ red|JJ fox|NN jumped|VB over the lazy|JJ brown|JJ dogs|NN";
  DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
    (whitespaceMockTokenizer(test), 
     DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
  filter.reset();
  assertTermEquals("The", filter, termAtt, payAtt, null);
  assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("red", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("fox", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("jumped", filter, termAtt, payAtt, "VB".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("over", filter, termAtt, payAtt, null);
  assertTermEquals("the", filter, termAtt, payAtt, null);
  assertTermEquals("lazy", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("brown", filter, termAtt, payAtt, "JJ".getBytes(StandardCharsets.UTF_8));
  assertTermEquals("dogs", filter, termAtt, payAtt, "NN".getBytes(StandardCharsets.UTF_8));
  assertFalse(filter.incrementToken());
  filter.end();
  filter.close();
}
 
Example #6
Source File: TestDelimitedPayloadTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDelim() throws Exception {
  Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload",
      "encoder", "float",
      "delimiter", "*").create(stream);
  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #7
Source File: TestDelimitedPayloadTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testEncoder() throws Exception {
  Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    byte[] payData = payAttr.getPayload().bytes;
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #8
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test EdgeNGramFilterFactory on tokens with payloads
 */
public void testEdgeNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("EdgeNGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #9
Source File: TestNGramFilters.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test NGramFilterFactory on tokens with payloads
 */
public void testNGramFilterPayload() throws Exception {
  Reader reader = new StringReader("test|0.1");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
  stream = tokenFilterFactory("NGram", "minGramSize", "1", "maxGramSize", "2").create(stream);

  stream.reset();
  while (stream.incrementToken()) {
    PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
    assertNotNull(payAttr);
    BytesRef payData = payAttr.getPayload();
    assertNotNull(payData);
    float payFloat = PayloadHelper.decodeFloat(payData.bytes);
    assertEquals(0.1f, payFloat, 0.0f);
  }
  stream.end();
  stream.close();
}
 
Example #10
Source File: TestSnowball.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFilterTokens() throws Exception {
  SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
  CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
  OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
  TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = filter.getAttribute(PositionIncrementAttribute.class);
  FlagsAttribute flagsAtt = filter.getAttribute(FlagsAttribute.class);
  
  filter.incrementToken();

  assertEquals("accent", termAtt.toString());
  assertEquals(2, offsetAtt.startOffset());
  assertEquals(7, offsetAtt.endOffset());
  assertEquals("wrd", typeAtt.type());
  assertEquals(3, posIncAtt.getPositionIncrement());
  assertEquals(77, flagsAtt.getFlags());
  assertEquals(new BytesRef(new byte[]{0,1,2,3}), payloadAtt.getPayload());
}
 
Example #11
Source File: SpellingQueryConverter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException {
  TokenStream stream = analyzer.tokenStream("", text);
  // TODO: support custom attributes
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
  PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
  PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {      
    Token token = new Token();
    token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
    token.setOffset(offset + offsetAtt.startOffset(), 
                    offset + offsetAtt.endOffset());
    token.setFlags(flagsAttValue); //overwriting any flags already set...
    token.setType(typeAtt.type());
    token.setPayload(payloadAtt.getPayload());
    token.setPositionIncrement(posIncAtt.getPositionIncrement());
    result.add(token);
  }
  stream.end();
  stream.close();
}
 
Example #12
Source File: PayloadTokenizer.java    From clue with Apache License 2.0 5 votes vote down vote up
public PayloadTokenizer(String text)
        throws IOException {
  setReader(new StringReader(text));
  this.tokens = text.toLowerCase().split(",");
  
  termAttr = addAttribute(CharTermAttribute.class);
  termAttr.resizeBuffer(text.length()); // maximum size necessary is the size of the input
  payloadAttr = addAttribute(PayloadAttribute.class);
  payload = new BytesRef(new byte[4]);
  positionAttr = addAttribute(PositionIncrementAttribute.class);
  offsetAttr = addAttribute(OffsetAttribute.class);
}
 
Example #13
Source File: TestAnalyzers.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
void verifyPayload(TokenStream ts) throws IOException {
  PayloadAttribute payloadAtt = ts.getAttribute(PayloadAttribute.class);
  ts.reset();
  for(byte b=1;;b++) {
    boolean hasNext = ts.incrementToken();
    if (!hasNext) break;
    // System.out.println("id="+System.identityHashCode(nextToken) + " " + t);
    // System.out.println("payload=" + (int)nextToken.getPayload().toByteArray()[0]);
    assertEquals(b, payloadAtt.getPayload().bytes[0]);
  }
}
 
Example #14
Source File: MockPayloadAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public MockPayloadFilter(TokenStream input, String fieldName) {
  super(input);
  this.fieldName = fieldName;
  pos = 0;
  i = 0;
  posIncrAttr = input.addAttribute(PositionIncrementAttribute.class);
  payloadAttr = input.addAttribute(PayloadAttribute.class);
  termAttr = input.addAttribute(CharTermAttribute.class);
}
 
Example #15
Source File: TestPayloads.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
PoolingPayloadTokenStream(ByteArrayPool pool) {
  this.pool = pool;
  payload = pool.get();
  generateRandomData(payload);
  term = new String(payload, 0, payload.length, utf8);
  first = true;
  payloadAtt = addAttribute(PayloadAttribute.class);
  termAtt = addAttribute(CharTermAttribute.class);
}
 
Example #16
Source File: FieldInvertState.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Sets attributeSource to a new instance.
 */
void setAttributeSource(AttributeSource attributeSource) {
  if (this.attributeSource != attributeSource) {
    this.attributeSource = attributeSource;
    termAttribute = attributeSource.getAttribute(TermToBytesRefAttribute.class);
    termFreqAttribute = attributeSource.addAttribute(TermFrequencyAttribute.class);
    posIncrAttribute = attributeSource.addAttribute(PositionIncrementAttribute.class);
    offsetAttribute = attributeSource.addAttribute(OffsetAttribute.class);
    payloadAttribute = attributeSource.getAttribute(PayloadAttribute.class);
  }
}
 
Example #17
Source File: TestPayloads.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream in, String fieldName, Map<String,PayloadData> fieldToData) {
  super(in);
  this.fieldToData = fieldToData;
  this.fieldName = fieldName;
  payloadAtt = addAttribute(PayloadAttribute.class);
  termAttribute = addAttribute(CharTermAttribute.class);
}
 
Example #18
Source File: DelimitedPayloadTokenFilterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
  assertTrue(stream.incrementToken());
  assertEquals(expected, termAtt.toString());
  BytesRef payload = payAtt.getPayload();
  if (payload != null) {
    assertTrue(payload.length + " does not equal: " + expectPay.length, payload.length == expectPay.length);
    for (int i = 0; i < expectPay.length; i++) {
      assertTrue(expectPay[i] + " does not equal: " + payload.bytes[i + payload.offset], expectPay[i] == payload.bytes[i + payload.offset]);

    }
  } else {
    assertTrue("expectPay is not null and it should be", expectPay == null);
  }
}
 
Example #19
Source File: TestPayloadSpanUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream input) {
  super(input);
  pos = 0;
  entities.add("xx");
  entities.add("one");
  nopayload.add("nopayload");
  nopayload.add("np");
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example #20
Source File: SimpleQueryConverter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Token> convert(String origQuery) {
  Collection<Token> result = new HashSet<>();

  try (WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", origQuery)) {
    // TODO: support custom attributes
    CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
    TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
    FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class);
    PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
    PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);

    ts.reset();

    while (ts.incrementToken()) {
      Token tok = new Token();
      tok.copyBuffer(termAtt.buffer(), 0, termAtt.length());
      tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
      tok.setFlags(flagsAtt.getFlags());
      tok.setPayload(payloadAtt.getPayload());
      tok.setPositionIncrement(posIncAtt.getPositionIncrement());
      tok.setType(typeAtt.type());
      result.add(tok);
    }
    ts.end();      
    return result;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #21
Source File: TestPayloadSpans.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public PayloadFilter(TokenStream input) {
  super(input);
  pos = 0;
  entities.add("xx");
  entities.add("one");
  nopayload.add("nopayload");
  nopayload.add("np");
  termAtt = addAttribute(CharTermAttribute.class);
  posIncrAtt = addAttribute(PositionIncrementAttribute.class);
  payloadAtt = addAttribute(PayloadAttribute.class);
}
 
Example #22
Source File: SimplePreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public String toFormattedString(Field f) throws IOException {
  StringBuilder sb = new StringBuilder();
  sb.append(VERSION + " ");
  if (f.fieldType().stored()) {
    String s = f.stringValue();
    if (s != null) {
      // encode the equals sign
      s = s.replaceAll("=", "\\=");
      sb.append('=');
      sb.append(s);
      sb.append('=');
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    StringBuilder tok = new StringBuilder();
    boolean next = false;
    while (ts.incrementToken()) {
      if (next) {
        sb.append(' ');
      } else {
        next = true;
      }
      tok.setLength(0);
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = escape(catt.buffer(), catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
          tTerm = escape(tTermChars, tTermChars.length);
        } else {
          if (tok.length() > 0) tok.append(',');
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.append("f=").append(Integer.toHexString(((FlagsAttribute) att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.append("s=").append(((OffsetAttribute) att).startOffset()).append(",e=").append(((OffsetAttribute) att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.append("p=").append(bytesToHex(p.bytes, p.offset, p.length));
            } else if (tok.length() > 0) {
              tok.setLength(tok.length() - 1); // remove the last comma
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.append("i=").append(((PositionIncrementAttribute) att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.append("y=").append(escape(((TypeAttribute) att).type()));
          } else {
            
            tok.append(cl.getName()).append('=').append(escape(att.toString()));
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        if (tok.length() > 0) {
          tok.insert(0, term + ",");
        } else {
          tok.insert(0, term);
        }
      }
      sb.append(tok);
    }
  }
  return sb.toString();
}
 
Example #23
Source File: SimplePreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static AttributeSource.State createState(AttributeSource a, Tok state, int tokenEnd) {
  a.clearAttributes();
  CharTermAttribute termAtt = a.addAttribute(CharTermAttribute.class);
  char[] tokChars = state.token.toString().toCharArray();
  termAtt.copyBuffer(tokChars, 0, tokChars.length);
  int tokenStart = tokenEnd - state.token.length();
  for (Entry<String, String> e : state.attr.entrySet()) {
    String k = e.getKey();
    if (k.equals("i")) {
      // position increment
      int incr = Integer.parseInt(e.getValue());
      PositionIncrementAttribute posIncr = a.addAttribute(PositionIncrementAttribute.class);
      posIncr.setPositionIncrement(incr);
    } else if (k.equals("s")) {
      tokenStart = Integer.parseInt(e.getValue());
    } else if (k.equals("e")) {
      tokenEnd = Integer.parseInt(e.getValue());
    } else if (k.equals("y")) {
      TypeAttribute type = a.addAttribute(TypeAttribute.class);
      type.setType(e.getValue());
    } else if (k.equals("f")) {
      FlagsAttribute flags = a.addAttribute(FlagsAttribute.class);
      int f = Integer.parseInt(e.getValue(), 16);
      flags.setFlags(f);
    } else if (k.equals("p")) {
      PayloadAttribute p = a.addAttribute(PayloadAttribute.class);
      byte[] data = hexToBytes(e.getValue());
      if (data != null && data.length > 0) {
        p.setPayload(new BytesRef(data));
      }
    } else {
      // unknown attribute
    }
  }
  // handle offset attr
  OffsetAttribute offset = a.addAttribute(OffsetAttribute.class);
  offset.setOffset(tokenStart, tokenEnd);
  State resState = a.captureState();
  a.clearAttributes();
  return resState;
}
 
Example #24
Source File: JsonPreAnalyzedParser.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public String toFormattedString(Field f) throws IOException {
  Map<String,Object> map = new LinkedHashMap<>();
  map.put(VERSION_KEY, VERSION);
  if (f.fieldType().stored()) {
    String stringValue = f.stringValue();
    if (stringValue != null) {
      map.put(STRING_KEY, stringValue);
    }
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
    }
  }
  TokenStream ts = f.tokenStreamValue();
  if (ts != null) {
    List<Map<String,Object>> tokens = new LinkedList<>();
    while (ts.incrementToken()) {
      Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
      String cTerm = null;
      String tTerm = null;
      Map<String,Object> tok = new TreeMap<>();
      while (it.hasNext()) {
        Class<? extends Attribute> cl = it.next();
        Attribute att = ts.getAttribute(cl);
        if (att == null) {
          continue;
        }
        if (cl.isAssignableFrom(CharTermAttribute.class)) {
          CharTermAttribute catt = (CharTermAttribute)att;
          cTerm = new String(catt.buffer(), 0, catt.length());
        } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
          TermToBytesRefAttribute tatt = (TermToBytesRefAttribute)att;
          tTerm = tatt.getBytesRef().utf8ToString();
        } else {
          if (cl.isAssignableFrom(FlagsAttribute.class)) {
            tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute)att).getFlags()));
          } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
            tok.put(OFFSET_START_KEY, ((OffsetAttribute)att).startOffset());
            tok.put(OFFSET_END_KEY, ((OffsetAttribute)att).endOffset());
          } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
            BytesRef p = ((PayloadAttribute)att).getPayload();
            if (p != null && p.length > 0) {
              tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
            }
          } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
            tok.put(POSINCR_KEY, ((PositionIncrementAttribute)att).getPositionIncrement());
          } else if (cl.isAssignableFrom(TypeAttribute.class)) {
            tok.put(TYPE_KEY, ((TypeAttribute)att).type());
          } else {
            tok.put(cl.getName(), att.toString());
          }
        }
      }
      String term = null;
      if (cTerm != null) {
        term = cTerm;
      } else {
        term = tTerm;
      }
      if (term != null && term.length() > 0) {
        tok.put(TOKEN_KEY, term);
      }
      tokens.add(tok);
    }
    map.put(TOKENS_KEY, tokens);
  }
  return JSONUtil.toJSON(map, -1);
}
 
Example #25
Source File: Token.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void reflectWith(AttributeReflector reflector) {
  super.reflectWith(reflector);
  reflector.reflect(FlagsAttribute.class, "flags", flags);
  reflector.reflect(PayloadAttribute.class, "payload", payload);
}
 
Example #26
Source File: Token.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void copyTo(AttributeImpl target) {
  super.copyTo(target);
  ((FlagsAttribute) target).setFlags(flags);
  ((PayloadAttribute) target).setPayload((payload == null) ? null : BytesRef.deepCopyOf(payload));
}
 
Example #27
Source File: MtasPreAnalyzedParser.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public ParseResult parse(Reader reader, AttributeSource parent)
    throws IOException {
  ParseResult res = new ParseResult();

  // get MtasUpdateRequestProcessorResult
  StringBuilder sb = new StringBuilder();
  char[] buf = new char[128];
  int cnt;
  while ((cnt = reader.read(buf)) > 0) {
    sb.append(buf, 0, cnt);
  }
  Iterator<MtasUpdateRequestProcessorResultItem> iterator;

  try (
      MtasUpdateRequestProcessorResultReader result = new MtasUpdateRequestProcessorResultReader(
          sb.toString());) {
    iterator = result.getIterator();
    if (iterator != null && iterator.hasNext()) {
      res.str = result.getStoredStringValue();
      res.bin = result.getStoredBinValue();
    } else {
      res.str = null;
      res.bin = null;
      result.close();
      return res;
    }
    parent.clearAttributes();
    while (iterator.hasNext()) {
      MtasUpdateRequestProcessorResultItem item = iterator.next();
      if (item.tokenTerm != null) {
        CharTermAttribute catt = parent.addAttribute(CharTermAttribute.class);
        catt.append(item.tokenTerm);
      }
      if (item.tokenFlags != null) {
        FlagsAttribute flags = parent.addAttribute(FlagsAttribute.class);
        flags.setFlags(item.tokenFlags);
      }
      if (item.tokenPosIncr != null) {
        PositionIncrementAttribute patt = parent
            .addAttribute(PositionIncrementAttribute.class);
        patt.setPositionIncrement(item.tokenPosIncr);
      }
      if (item.tokenPayload != null) {
        PayloadAttribute p = parent.addAttribute(PayloadAttribute.class);
        p.setPayload(new BytesRef(item.tokenPayload));
      }
      if (item.tokenOffsetStart != null && item.tokenOffsetEnd != null) {
        OffsetAttribute offset = parent.addAttribute(OffsetAttribute.class);
        offset.setOffset(item.tokenOffsetStart, item.tokenOffsetEnd);
      }
      // capture state and add to result
      State state = parent.captureState();
      res.states.add(state.clone());
      // reset for reuse
      parent.clearAttributes();
    }
  } catch (IOException e) {
    // ignore
    log.debug(e);
  }
  return res;
}
 
Example #28
Source File: OpenNLPTokenizer.java    From jate with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void addPayloadAttribute(PayloadAttribute attribute, MWEMetadata ctx) {
    String data = MWEMetadata.serialize(ctx);
    attribute.setPayload(new BytesRef(data));
}
 
Example #29
Source File: MWEFilter.java    From jate with GNU Lesser General Public License v3.0 4 votes vote down vote up
public void addPayloadAttribute(PayloadAttribute attribute, MWEMetadata ctx) {
    String data = MWEMetadata.serialize(ctx);
    attribute.setPayload(new BytesRef(data));
}
 
Example #30
Source File: BaseTermVectorsFormatTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) {
  terms = new String[len];
  termBytes = new BytesRef[len];
  positionsIncrements = new int[len];
  positions = new int[len];
  startOffsets = new int[len];
  endOffsets = new int[len];
  payloads = new BytesRef[len];
  for (int i = 0; i < len; ++i) {
    final int o = random().nextInt(sampleTerms.length);
    terms[i] = sampleTerms[o];
    termBytes[i] = sampleTermBytes[o];
    positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10);
    if (i == 0) {
      startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16);
    } else {
      startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
    }
    endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
  }

  for (int i = 0; i < len; ++i) {
    if (i == 0) {
      positions[i] = positionsIncrements[i] - 1;
    } else {
      positions[i] = positions[i - 1] + positionsIncrements[i];
    }
  }
  if (rarely()) {
    Arrays.fill(payloads, randomPayload());
  } else {
    for (int i = 0; i < len; ++i) {
      payloads[i] = randomPayload();
    }
  }

  positionToTerms = new HashMap<>(len);
  startOffsetToTerms = new HashMap<>(len);
  for (int i = 0; i < len; ++i) {
    if (!positionToTerms.containsKey(positions[i])) {
      positionToTerms.put(positions[i], new HashSet<Integer>(1));
    }
    positionToTerms.get(positions[i]).add(i);
    if (!startOffsetToTerms.containsKey(startOffsets[i])) {
      startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
    }
    startOffsetToTerms.get(startOffsets[i]).add(i);
  }

  freqs = new HashMap<>();
  for (String term : terms) {
    if (freqs.containsKey(term)) {
      freqs.put(term, freqs.get(term) + 1);
    } else {
      freqs.put(term, 1);
    }
  }

  addAttributeImpl(new PermissiveOffsetAttributeImpl());

  termAtt = addAttribute(CharTermAttribute.class);
  piAtt = addAttribute(PositionIncrementAttribute.class);
  oAtt = addAttribute(OffsetAttribute.class);
  pAtt = addAttribute(PayloadAttribute.class);
}