Java Code Examples for org.apache.lucene.analysis.synonym.SynonymMap

The following examples show how to use org.apache.lucene.analysis.synonym.SynonymMap. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestConcatenateGraphFilter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSeparatorWithSynonyms() throws IOException {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("three words synonym"), true);
  Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = " mykeyword another keyword   ";
  tokenizer.setReader(new StringReader(input));
  SynonymGraphFilter filter = new SynonymGraphFilter(tokenizer, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, '-', false, 100);
  assertTokenStreamContents(stream, new String[] {
      "mykeyword-another-keyword",
      "mysynonym-another-keyword",
      "three words synonym-another-keyword"
  }, null, null, new int[] { 1, 0 ,0});
}
 
Example 2
Source Project: lucene-solr   Source File: TestConditionalTokenFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testWrapGraphs() throws Exception {

    TokenStream stream = whitespaceMockTokenizer("a b c d e");

    SynonymMap sm;
    try (Analyzer analyzer = new MockAnalyzer(random())) {
      SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
      parser.parse(new StringReader("a b, f\nc d, g"));
      sm = parser.build();
    }

    TokenStream ts = new SkipMatchingFilter(stream, in -> new SynonymGraphFilter(in, sm, true), "c");

    assertTokenStreamContents(ts, new String[]{
        "f", "a", "b", "c", "d", "e"
        },
        null, null, null,
        new int[]{
        1, 0, 1, 1, 1, 1
        },
        new int[]{
        2, 1, 1, 1, 1, 1
        });

  }
 
Example 3
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymGraphFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
    analyzer.close();
  }
}
 
Example 4
Source Project: lucene-solr   Source File: TestLimitTokenPositionFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    @SuppressWarnings("deprecation")
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
 
Example 5
/**
 * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
 *
 * @param synonymMap
 */
@Override
public void update(SynonymMap synonymMap) {
    this.synonyms = synonymMap;
    this.fst = synonyms.fst;
    if(this.fst == null) {
        throw new IllegalArgumentException("fst must be non-null");
    } else {
        this.fstReader = this.fst.getBytesReader();
        this.rollBufferSize = 1 + synonyms.maxHorizontalContext;
        this.futureInputs = new DynamicSynonymFilter.PendingInput[this.rollBufferSize];
        this.futureOutputs = new DynamicSynonymFilter.PendingOutputs[this.rollBufferSize];

        for(int pos = 0; pos < this.rollBufferSize; ++pos) {
            this.futureInputs[pos] = new DynamicSynonymFilter.PendingInput();
            this.futureOutputs[pos] = new DynamicSynonymFilter.PendingOutputs();
        }

        this.scratchArc = new FST.Arc();
    }
}
 
Example 6
@Override
public void run() {
    try {
        if (synonymFile.isNeedReloadSynonymMap()) {
            SynonymMap newSynonymMap = synonymFile.reloadSynonymMap();
            if (newSynonymMap == null || newSynonymMap.fst == null) {
                logger.error("Monitor thread reload remote synonym non-null! indexName:{} path:{}",
                        indexName, synonymFile.getLocation());
                return;
            }
            synonymMap = newSynonymMap;
            Iterator<SynonymDynamicSupport> filters = dynamicSynonymFilters.get(indexName).iterator();
            while (filters.hasNext()) {
                filters.next().update(synonymMap);
                logger.info("success reload synonym success! indexName:{} path:{}", indexName, synonymFile.getLocation());
            }
        }
    } catch (Exception e) {
        logger.error("Monitor thread reload remote synonym error! indexName:{} path:{}",
                indexName, synonymFile.getLocation());
    }
}
 
Example 7
@Override
public void reset() throws IOException {
    super.reset();
    block.setLength(0);
    prevToken = null;
    readBufferIndex = BUFFER_SIZE;
    readBufferLen = 0;
    ch = 0;
    blkStart = 0;
    nextBlkStart = 0;
    if (synonymLoader != null && synonymLoader.isUpdate(lastModified)) {
        lastModified = synonymLoader.getLastModified();
        final SynonymMap map = synonymLoader.getSynonymMap();
        if (map != null) {
            synonymMap = map;
            fst = synonymMap.fst;
            if (fst == null) {
                throw new IllegalArgumentException("fst must be non-null");
            }
            fstReader = fst.getBytesReader();
            scratchArc = new FST.Arc<>();
            clearAttributes();
        }
    }
}
 
Example 8
protected TokenStreamComponents createComponents(String fieldName) {
        final Tokenizer source = new NGramSynonymTokenizer(n,
                delimiters, expand, true, new SynonymLoader(null, null,
                        expand, null) {
                    @Override
                    public SynonymMap getSynonymMap() {
                        return synonyms;
                    }

                    @Override
                    protected void createSynonymMap(boolean reload) {
                        // nothing
                    }
                });
  return new TokenStreamComponents(source);
}
 
Example 9
Source Project: crate   Source File: SynonymGraphTokenFilterFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
                                                          List<TokenFilterFactory> previousTokenFilters,
                                                          Function<String, TokenFilterFactory> allFilters) {
    final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
    final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
    final String name = name();
    return new TokenFilterFactory() {
        @Override
        public String name() {
            return name;
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            // fst is null means no synonyms
            return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, ignoreCase);
        }
    };
}
 
Example 10
Source Project: crate   Source File: SynonymTokenFilterFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
                                                          List<TokenFilterFactory> previousTokenFilters,
                                                          Function<String, TokenFilterFactory> allFilters) {
    final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
    final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
    final String name = name();
    return new TokenFilterFactory() {
        @Override
        public String name() {
            return name;
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return synonyms.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonyms, false);
        }
    };
}
 
Example 11
Source Project: lucene-solr   Source File: TestConcatenateGraphFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSynonym() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokenizer.setReader(new StringReader("mykeyword"));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter);
  assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new int[] { 1, 0 });
}
 
Example 12
Source Project: lucene-solr   Source File: TestConcatenateGraphFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSynonyms() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, SEP_LABEL, false, 100);
  String[] expectedOutputs = new String[2];
  CharsRefBuilder expectedOutput = new CharsRefBuilder();
  expectedOutput.append("mykeyword");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[0] = expectedOutput.toCharsRef().toString();
  expectedOutput.clear();
  expectedOutput.append("mysynonym");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[1] = expectedOutput.toCharsRef().toString();
  assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
}
 
Example 13
Source Project: lucene-solr   Source File: TestConcatenateGraphFilter.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testValidNumberOfExpansions() throws IOException {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  for (int i = 0; i < 256; i++) {
    builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
  }
  StringBuilder valueBuilder = new StringBuilder();
  for (int i = 0 ; i < 8 ; i++) {
    valueBuilder.append(i+1);
    valueBuilder.append(" ");
  }
  MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokenizer.setReader(new StringReader(valueBuilder.toString()));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

  int count;
  try (ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter)) {
    stream.reset();
    ConcatenateGraphFilter.BytesRefBuilderTermAttribute attr = stream.addAttribute(ConcatenateGraphFilter.BytesRefBuilderTermAttribute.class);
    count = 0;
    while (stream.incrementToken()) {
      count++;
      assertNotNull(attr.getBytesRef());
      assertTrue(attr.getBytesRef().length > 0);
    }
  }
  assertEquals(count, 256);
}
 
Example 14
Source Project: lucene-solr   Source File: TestRandomChains.java    License: Apache License 2.0 5 votes vote down vote up
@Override public Object apply(Random random) {
  SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
  final int numEntries = atLeast(10);
  for (int j = 0; j < numEntries; j++) {
    addSyn(b, randomNonEmptyString(random), randomNonEmptyString(random), random.nextBoolean());
  }
  try {
    return b.build();
  } catch (Exception ex) {
    Rethrow.rethrow(ex);
    return null; // unreachable code
  }
}
 
Example 15
/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res)
    throws SolrException
{
  NamedList<Object> args = (NamedList<Object>)initArgs;
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");

  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new SynonymGraphFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build();
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }
}
 
Example 16
Source Project: lucene-solr   Source File: ManagedSynonymFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) 
    throws SolrException
{    
  NamedList<Object> args = (NamedList<Object>)initArgs;    
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");
  
  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new SynonymFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build(); 
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }    
}
 
Example 17
/**
 * 增加update逻辑,此方法中所有赋值的属性皆为final改造,注意只能在此方法中使用,否则可能导致bug
 *
 * @param synonymMap
 */
@Override
public void update(SynonymMap synonymMap) {
    this.synonyms = synonymMap;
    this.fst = synonyms.fst;
    if(this.fst == null) {
        throw new IllegalArgumentException("fst must be non-null");
    } else {
        this.fstReader = this.fst.getBytesReader();
        this.scratchArc = new FST.Arc();
        //this.ignoreCase = ignoreCase;
    }
}
 
Example 18
private void addTerms( NamedList<NamedList<Number>> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList<String> searchFields ) throws IOException {
  TermsResponse termsResponse = new TermsResponse( terms );
  for (String fieldName : searchFields ) {
    CharsRef fieldChars = new CharsRef( fieldName );
    List<TermsResponse.Term> termList = termsResponse.getTerms( fieldName );
    if (termList != null) {
      for (TermsResponse.Term tc : termList) {
        String term = tc.getTerm();
        Log.debug( "Add distributed term: " + fieldName + " = " + term );
        addTerm( fieldChars, term, fieldBuilder, termBuilder );
      }
    }
  }
}
 
Example 19
private void buildFieldMap( ResponseBuilder rb ) throws IOException {
  Log.debug( "buildFieldMap" );
  SolrIndexSearcher searcher = rb.req.getSearcher();
  // build a synonym map from the SortedDocValues -
  // for each field value: lower case, stemmed, lookup synonyms from synonyms.txt - map to fieldValue
  SynonymMap.Builder fieldBuilder = new SynonymMap.Builder( true );
  SynonymMap.Builder termBuilder = new SynonymMap.Builder( true );
    
  ArrayList<String> searchFields = getStringFields( searcher );

  for (String searchField : searchFields ) {
    Log.debug( "adding searchField " + searchField );
    CharsRef fieldChars = new CharsRef( searchField );
    SortedSetDocValues sdv = FieldCache.DEFAULT.getDocTermOrds( searcher.getAtomicReader( ), searchField );
    if (sdv == null) continue;
    Log.debug( "got SortedSetDocValues for " + searchField );
    TermsEnum te = sdv.termsEnum();
    while (te.next() != null) {
      BytesRef term = te.term();
      String fieldValue = term.utf8ToString( );
      addTerm ( fieldChars, fieldValue, fieldBuilder, termBuilder );
    }
  }
    
  addDistributedTerms( rb, fieldBuilder, termBuilder, searchFields );
    
  fieldMap = fieldBuilder.build( );
  termMap = termBuilder.build( );
}
 
Example 20
private void addTerms( NamedList<NamedList<Number>> terms, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList<String> searchFields ) throws IOException {
  TermsResponse termsResponse = new TermsResponse( terms );
  for (String fieldName : searchFields ) {
    CharsRef fieldChars = new CharsRef( fieldName );
    List<TermsResponse.Term> termList = termsResponse.getTerms( fieldName );
    if (termList != null) {
      for (TermsResponse.Term tc : termList) {
        String term = tc.getTerm();
        Log.debug( "Add distributed term: " + fieldName + " = " + term );
        addTerm( fieldChars, term, fieldBuilder, termBuilder );
      }
    }
  }
}
 
Example 21
@Override
public void reset() throws IOException {
  super.reset();
  captureCount = 0;
  finished = false;
  inputSkipCount = 0;
  nextRead = nextWrite = 0;

  // In normal usage these resets would not be needed,
  // since they reset-as-they-are-consumed, but the app
  // may not consume all input tokens (or we might hit an
  // exception), in which case we have leftover state
  // here:
  for (final PendingInput input : futureInputs) {
    input.reset();
  }
  for (final PendingOutputs output : futureOutputs) {
    output.reset();
  }

  if (synonymLoader != null && synonymLoader.isUpdate(lastModified)) {
      lastModified = synonymLoader.getLastModified();
      final SynonymMap map = synonymLoader.getSynonymMap();
      if (map != null) {
          synonyms = map;
          fst = synonyms.fst;
          if (fst == null) {
              throw new IllegalArgumentException("fst must be non-null");
          }
          fstReader = fst.getBytesReader();
          scratchArc = new FST.Arc<>();
          clearAttributes();
      }
  }
}
 
Example 22
Source Project: crate   Source File: SynonymTokenFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
    try {
        SynonymMap.Builder parser;
        if ("wordnet".equalsIgnoreCase(format)) {
            parser = new ESWordnetSynonymParser(true, expand, lenient, analyzer);
            ((ESWordnetSynonymParser) parser).parse(rules);
        } else {
            parser = new ESSolrSynonymParser(true, expand, lenient, analyzer);
            ((ESSolrSynonymParser) parser).parse(rules);
        }
        return parser.build();
    } catch (Exception e) {
        throw new IllegalArgumentException("failed to build synonyms", e);
    }
}
 
Example 23
private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
  b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
        new CharsRef(output.replaceAll(" +", "\u0000")),
        keepOrig);
}
 
Example 24
Source Project: lucene-solr   Source File: TestRandomChains.java    License: Apache License 2.0 4 votes vote down vote up
private void addSyn(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
  b.add(new CharsRef(input.replaceAll(" +", "\u0000")),
        new CharsRef(output.replaceAll(" +", "\u0000")),
        keepOrig);
}
 
Example 25
public DynamicSynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
    super(input);
    this.ignoreCase = ignoreCase;
    update(synonyms);
}
 
Example 26
public DynamicSynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) {
    super(input);
    this.ignoreCase = ignoreCase;

    update(synonyms);
}
 
Example 27
private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IOException {
  Log.debug( "getMappedFieldName: '" + phrase + "'" );
  FST<BytesRef> fst = termMap.fst;
  FST.BytesReader fstReader = fst.getBytesReader();
  FST.Arc<BytesRef> scratchArc = new FST.Arc<>( );
  BytesRef scratchBytes = new BytesRef();
  CharsRefBuilder scratchChars = new CharsRefBuilder();
  ByteArrayDataInput bytesReader = new ByteArrayDataInput();
      
  BytesRef pendingOutput = fst.outputs.getNoOutput();
  fst.getFirstArc( scratchArc );
  BytesRef matchOutput = null;
    
  String noSpPhrase = phrase.replace( ' ', '_' );
  int charPos = 0;
  while(charPos < noSpPhrase.length()) {
    final int codePoint = noSpPhrase.codePointAt( charPos );
    if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) {
      Log.debug( "No FieldName for " + phrase );
      return null;
    }
              
    pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
    charPos += Character.charCount(codePoint);
  }

  if (scratchArc.isFinal()) {
    Log.debug( "creating matchOutput" );
    matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
    ArrayList<String> mappedFields = new ArrayList<String>( );
    bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length );
          
    final int code = bytesReader.readVInt();
    final int count = code >>> 1;
    for( int outputIDX = 0; outputIDX < count; outputIDX++ ) {
      termMap.words.get( bytesReader.readVInt(), scratchBytes );
      scratchChars.copyUTF8Bytes(scratchBytes);
      int lastStart = 0;
      final int chEnd = lastStart + scratchChars.length();
      for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) {
        if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
          int outputLen = chIDX - lastStart;
          assert outputLen > 0: "output contains empty string: " + scratchChars;
          mappedFields.add( new String( scratchChars.chars(), lastStart, outputLen ) );
          lastStart = chIDX + 1;
        }
      }
    }

    if (mappedFields.size() == 1) {
      Log.debug( "returning mapped fieldName " + mappedFields.get( 0 ) );
      return mappedFields.get( 0 );
    }
    else {
      StringBuilder fieldBuilder = new StringBuilder( );
      for (String fieldName : mappedFields ) {
        if (fieldBuilder.length() > 0) fieldBuilder.append( fieldDelim );
        fieldBuilder.append( fieldName );
      }
      Log.debug( "returning mapped fieldName " + fieldBuilder.toString( ) );
      return fieldBuilder.toString( );
    }
  }
    
  Log.warn( "matchOutput but no FieldName for " + phrase );
  return null;
}
 
Example 28
private void addDistributedTerms( ResponseBuilder rb, SynonymMap.Builder fieldBuilder, SynonymMap.Builder termBuilder, ArrayList<String> searchFields ) throws IOException {
  SolrIndexSearcher searcher = rb.req.getSearcher();
  CoreContainer container = searcher.getCore().getCoreDescriptor().getCoreContainer();
    
  ShardHandlerFactory shardHandlerFactory = container.getShardHandlerFactory( );
  ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
  shardHandler.checkDistributed( rb );
    
  Log.debug( "Is Distributed = " + rb.isDistrib );
    
  if( rb.isDistrib ) {
    // create a ShardRequest that contains a Terms Request.
    // don't send to this shard???
    ShardRequest sreq = new ShardRequest();
    sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
    sreq.actualShards = rb.shards;
    ModifiableSolrParams params = new ModifiableSolrParams( );
      
    params.set( TermsParams.TERMS_LIMIT, -1);
    params.set( TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
    String[] fields = searchFields.toArray( new String[ searchFields.size( )] );
    params.set( TermsParams.TERMS_FIELD, fields );
      
    params.set( CommonParams.DISTRIB, "false" );
    params.set( ShardParams.IS_SHARD, true );
    params.set( ShardParams.SHARDS_PURPOSE, sreq.purpose );
    params.set( CommonParams.QT, termsHandler );
    params.set( TermsParams.TERMS, "true" );
      
    if (rb.requestInfo != null) {
      params.set("NOW", Long.toString(rb.requestInfo.getNOW().getTime()));
    }
    sreq.params = params;
      
    for (String shard : rb.shards ) {
      Log.debug( "sending request to shard " + shard );
      params.set(ShardParams.SHARD_URL, shard );
      shardHandler.submit( sreq, shard, params );
    }
      
    ShardResponse rsp = shardHandler.takeCompletedIncludingErrors( );
    if (rsp != null) {
      Log.debug( "got " + rsp.getShardRequest().responses.size( ) + " responses" );
      for ( ShardResponse srsp : rsp.getShardRequest().responses ) {
        Log.debug( "Got terms response from " + srsp.getShard( ));
      
        if (srsp.getException() != null) {
          Log.debug( "ShardResponse Exception!! " + srsp.getException( ) );
        }
      
        @SuppressWarnings("unchecked")
        NamedList<NamedList<Number>> terms = (NamedList<NamedList<Number>>) srsp.getSolrResponse().getResponse().get("terms");
        if (terms != null) {
          addTerms( terms, fieldBuilder, termBuilder, searchFields );
        }
        else {
          Log.warn( "terms was NULL! - make sure that /terms request handler is defined in solrconfig.xml" );
        }
      }
    }
  }
}
 
Example 29
private ArrayList<String> getSynonymsFor( SynonymMap synMap, String term ) throws IOException {
  Log.debug( "getSynonymsFor '" + term + "'" );
        
  FST<BytesRef> fst = synMap.fst;
  FST.BytesReader fstReader = fst.getBytesReader();
  FST.Arc<BytesRef> scratchArc = new FST.Arc<>( );
  BytesRef scratchBytes = new BytesRef();
  CharsRefBuilder scratchChars = new CharsRefBuilder();
  ByteArrayDataInput bytesReader = new ByteArrayDataInput();
        
  BytesRef pendingOutput = fst.outputs.getNoOutput();
  fst.getFirstArc( scratchArc );
  BytesRef matchOutput = null;
        
  String[] tokens = term.split( " " );
  for (int i = 0; i < tokens.length; i++) {
            
    int charPos = 0;
    while( charPos < tokens[i].length() ) {
      final int codePoint = tokens[i].codePointAt( charPos );
      if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) {
        Log.debug( "No Synonym for " + term );
        return null;
      }
                
      pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
      charPos += Character.charCount(codePoint);
    }
            
    if (scratchArc.isFinal()) {
      matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
    }
            
    if (i < tokens.length-1 && fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) != null) {
      pendingOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
    }
  }
        
  if (matchOutput != null) {
    ArrayList<String> synonymLst = new ArrayList<String>( );
    bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length );
            
    final int code = bytesReader.readVInt();
    final int count = code >>> 1;
    for( int outputIDX = 0; outputIDX < count; outputIDX++ ) {
      synMap.words.get( bytesReader.readVInt(), scratchBytes);
      scratchChars.copyUTF8Bytes(scratchBytes);
      int lastStart = 0;
      final int chEnd = lastStart + scratchChars.length();
      for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) {
        if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
          int outputLen = chIDX - lastStart;
          assert outputLen > 0: "output contains empty string: " + scratchChars;
          String synonym = new String( scratchChars.chars(), lastStart, outputLen );
          Log.debug( "got synonym '" + synonym + "'" );
          synonymLst.add( synonym );
          lastStart = chIDX + 1;
        }
      }
    }
            
    return synonymLst;
  }
        
  return null;
}
 
Example 30
private String getMappedFieldName( SynonymMap termMap, String phrase ) throws IOException {
  Log.debug( "getMappedFieldName: '" + phrase + "'" );
  FST<BytesRef> fst = termMap.fst;
  FST.BytesReader fstReader = fst.getBytesReader();
  FST.Arc<BytesRef> scratchArc = new FST.Arc<>( );
  BytesRef scratchBytes = new BytesRef();
  CharsRefBuilder scratchChars = new CharsRefBuilder();
  ByteArrayDataInput bytesReader = new ByteArrayDataInput();
      
  BytesRef pendingOutput = fst.outputs.getNoOutput();
  fst.getFirstArc( scratchArc );
  BytesRef matchOutput = null;
    
  String noSpPhrase = phrase.replace( ' ', '_' );
  int charPos = 0;
  while(charPos < noSpPhrase.length()) {
    final int codePoint = noSpPhrase.codePointAt( charPos );
    if (fst.findTargetArc( codePoint, scratchArc, scratchArc, fstReader) == null) {
      Log.debug( "No FieldName for " + phrase );
      return null;
    }
              
    pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output);
    charPos += Character.charCount(codePoint);
  }

  if (scratchArc.isFinal()) {
    Log.debug( "creating matchOutput" );
    matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput);
    ArrayList<String> mappedFields = new ArrayList<String>( );
    bytesReader.reset( matchOutput.bytes, matchOutput.offset, matchOutput.length );
          
    final int code = bytesReader.readVInt();
    final int count = code >>> 1;
    for( int outputIDX = 0; outputIDX < count; outputIDX++ ) {
      termMap.words.get( bytesReader.readVInt(), scratchBytes );
      scratchChars.copyUTF8Bytes(scratchBytes);
      int lastStart = 0;
      final int chEnd = lastStart + scratchChars.length();
      for( int chIDX = lastStart; chIDX <= chEnd; chIDX++ ) {
        if (chIDX == chEnd || scratchChars.charAt(chIDX) == SynonymMap.WORD_SEPARATOR) {
          int outputLen = chIDX - lastStart;
          assert outputLen > 0: "output contains empty string: " + scratchChars;
          mappedFields.add( new String( scratchChars.chars(), lastStart, outputLen ) );
          lastStart = chIDX + 1;
        }
      }
    }

    if (mappedFields.size() == 1) {
      Log.debug( "returning mapped fieldName " + mappedFields.get( 0 ) );
      return mappedFields.get( 0 );
    }
    else {
      StringBuilder fieldBuilder = new StringBuilder( );
      for (String fieldName : mappedFields ) {
        if (fieldBuilder.length() > 0) fieldBuilder.append( fieldDelim );
        fieldBuilder.append( fieldName );
      }
      Log.debug( "returning mapped fieldName " + fieldBuilder.toString( ) );
      return fieldBuilder.toString( );
    }
  }
    
  Log.warn( "matchOutput but no FieldName for " + phrase );
  return null;
}