org.apache.lucene.facet.FacetsConfig Java Examples

The following examples show how to use org.apache.lucene.facet.FacetsConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: IndexAndTaxonomyReplicationClientTest.java From lucene-solr with Apache License 2.0

6 votes

@Override
@Before
public void setUp() throws Exception {
  super.setUp();
  publishIndexDir = newDirectory();
  publishTaxoDir = newDirectory();
  handlerIndexDir = newMockDirectory();
  handlerTaxoDir = newMockDirectory();
  clientWorkDir = createTempDir("replicationClientTest");
  sourceDirFactory = new PerSessionDirectoryFactory(clientWorkDir);
  replicator = new LocalReplicator();
  callback = new IndexAndTaxonomyReadyCallback(handlerIndexDir, handlerTaxoDir);
  handler = new IndexAndTaxonomyReplicationHandler(handlerIndexDir, handlerTaxoDir, callback);
  client = new ReplicationClient(replicator, handler, sourceDirFactory);
  
  IndexWriterConfig conf = newIndexWriterConfig(null);
  conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()));
  publishIndexWriter = new IndexWriter(publishIndexDir, conf);
  publishTaxoWriter = new SnapshotDirectoryTaxonomyWriter(publishTaxoDir);
  config = new FacetsConfig();
  config.setHierarchical("A", true);
}

Example #2

Source File: TaxonomyMergeUtils.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Merges the given taxonomy and index directories and commits the changes to
 * the given writers.
 */
public static void merge(Directory srcIndexDir, Directory srcTaxoDir, OrdinalMap map, IndexWriter destIndexWriter,
    DirectoryTaxonomyWriter destTaxoWriter, FacetsConfig srcConfig) throws IOException {
  
  // merge the taxonomies
  destTaxoWriter.addTaxonomy(srcTaxoDir, map);
  int ordinalMap[] = map.getMap();
  DirectoryReader reader = DirectoryReader.open(srcIndexDir);
  try {
    List<LeafReaderContext> leaves = reader.leaves();
    int numReaders = leaves.size();
    CodecReader wrappedLeaves[] = new CodecReader[numReaders];
    for (int i = 0; i < numReaders; i++) {
      wrappedLeaves[i] = SlowCodecReaderWrapper.wrap(new OrdinalMappingLeafReader(leaves.get(i).reader(), ordinalMap, srcConfig));
    }
    destIndexWriter.addIndexes(wrappedLeaves);
    
    // commit changes to taxonomy and index respectively.
    destTaxoWriter.commit();
    destIndexWriter.commit();
  } finally {
    reader.close();
  }
}

Example #3

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

private static Facets getAllFacets(String indexFieldName, IndexSearcher searcher, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
  if (random().nextBoolean()) {
    // Aggregate the facet counts:
    FacetsCollector c = new FacetsCollector();

    // MatchAllDocsQuery is for "browsing" (counts facets
    // for all non-deleted docs in the index); normally
    // you'd use a "normal" query, and use MultiCollector to
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);

    return new FastTaxonomyFacetCounts(taxoReader, config, c);
  } else {
    return new FastTaxonomyFacetCounts(indexFieldName, searcher.getIndexReader(), taxoReader, config);
  }
}

Example #4

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testDetectHierarchicalField() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path", "other"));
  expectThrows(IllegalArgumentException.class, () -> {
    config.build(taxoWriter, doc);
  });

  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #5

Source File: TestTaxonomyFacetCounts2.java From lucene-solr with Apache License 2.0

6 votes

private static void addFacets(Document doc, FacetsConfig config, boolean updateTermExpectedCounts) 
    throws IOException {
  List<FacetField> docCategories = randomCategories(random());
  for (FacetField ff : docCategories) {
    doc.add(ff);
    String cp = ff.dim + "/" + ff.path[0];
    allExpectedCounts.put(cp, allExpectedCounts.get(cp) + 1);
    if (updateTermExpectedCounts) {
      termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
    }
  }
  // add 1 to each NO_PARENTS dimension
  allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
  allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
  allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
  if (updateTermExpectedCounts) {
    termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
    termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
    termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
  }
}

Example #6

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, boolean withContent) throws Exception {
  for (int i = 0; i < 2; i++) {
    Document doc = new Document();
    if (withContent) {
      doc.add(new StringField("f", "a", Field.Store.NO));
    }
    if (config != null) {
      doc.add(new FacetField("A", Integer.toString(i)));
      indexWriter.addDocument(config.build(taxoWriter, doc));
    } else {
      indexWriter.addDocument(doc);
    }
  }
  
  indexWriter.commit();
}

Example #7

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testChildCount() throws Exception {
  // LUCENE-4885: FacetResult.numValidDescendants was not set properly by FacetsAccumulator
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
  FacetsConfig config = new FacetsConfig();
  for (int i = 0; i < 10; i++) {
    Document doc = new Document();
    doc.add(new FacetField("a", Integer.toString(i)));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
  
  assertEquals(10, facets.getTopChildren(2, "a").childCount);

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #8

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testGetFacetResultsTwice() throws Exception {
  // LUCENE-4893: counts were multiplied as many times as getFacetResults was called.
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(new FacetField("a", "1"));
  doc.add(new FacetField("b", "1"));
  iw.addDocument(config.build(taxoWriter, doc));
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

  Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, newSearcher(r), taxoReader, config);
  
  List<FacetResult> res1 = facets.getAllDims(10);
  List<FacetResult> res2 = facets.getAllDims(10);
  assertEquals("calling getFacetResults twice should return the .equals()=true result", res1, res2);

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #9

Source File: TestTaxonomyFacetCounts2.java From lucene-solr with Apache License 2.0

6 votes

private static void indexDocsWithFacetsAndSomeTerms(IndexWriter indexWriter, TaxonomyWriter taxoWriter, 
                                                    Map<String,Integer> expectedCounts) throws IOException {
  Random random = random();
  int numDocs = atLeast(random, 2);
  FacetsConfig config = getConfig();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    boolean hasContent = random.nextBoolean();
    if (hasContent) {
      addField(doc);
    }
    addFacets(doc, config, hasContent);
    indexWriter.addDocument(config.build(taxoWriter, doc));
  }
  indexWriter.commit(); // flush a segment
}

Example #10

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #11

Source File: TestTaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0

6 votes

public void testNoScore() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
  FacetsConfig config = new FacetsConfig();
  for (int i = 0; i < 4; i++) {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("price", (i+1)));
    doc.add(new FacetField("a", Integer.toString(i % 2)));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

  FacetsCollector sfc = new FacetsCollector();
  newSearcher(r).search(new MatchAllDocsQuery(), sfc);
  Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, sfc, DoubleValuesSource.fromLongField("price"));
  assertEquals("dim=a path=[] value=10.0 childCount=2\n  1 (6.0)\n  0 (4.0)\n", facets.getTopChildren(10, "a").toString());

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #12

Source File: TestTaxonomyFacetAssociations.java From lucene-solr with Apache License 2.0

6 votes

public void testRequireDimCount() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  config.setRequireDimCount("a", true);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  Document doc = new Document();
  doc.add(new IntAssociationFacetField(14, "a", "x"));
  expectThrows(IllegalArgumentException.class, () -> {
    writer.addDocument(config.build(taxoWriter, doc));
  });

  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #13

Source File: TestTaxonomyFacetAssociations.java From lucene-solr with Apache License 2.0

6 votes

public void testNoHierarchy() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  config.setHierarchical("a", true);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  Document doc = new Document();
  doc.add(new IntAssociationFacetField(14, "a", "x"));
  expectThrows(IllegalArgumentException.class, () -> {
    writer.addDocument(config.build(taxoWriter, doc));
  });

  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #14

Source File: TestTaxonomyFacetAssociations.java From lucene-solr with Apache License 2.0

6 votes

public void testMixedTypesInSameIndexField() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  Document doc = new Document();
  doc.add(new IntAssociationFacetField(14, "a", "x"));
  doc.add(new FloatAssociationFacetField(55.0f, "b", "y"));
  expectThrows(IllegalArgumentException.class, () -> {
    writer.addDocument(config.build(taxoWriter, doc));
  });
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #15

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testDetectMultiValuedField() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  doc.add(new FacetField("a", "path2"));
  expectThrows(IllegalArgumentException.class, () -> {
    config.build(taxoWriter, doc);
  });

  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #16

Source File: TaxonomyFacets.java From lucene-solr with Apache License 2.0

6 votes

@Override
public List<FacetResult> getAllDims(int topN) throws IOException {
  int[] children = getChildren();
  int[] siblings = getSiblings();
  int ord = children[TaxonomyReader.ROOT_ORDINAL];
  List<FacetResult> results = new ArrayList<>();
  while (ord != TaxonomyReader.INVALID_ORDINAL) {
    String dim = taxoReader.getPath(ord).components[0];
    FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
    if (dimConfig.indexFieldName.equals(indexFieldName)) {
      FacetResult result = getTopChildren(topN, dim);
      if (result != null) {
        results.add(result);
      }
    }
    ord = siblings[ord];
  }

  // Sort by highest value, tie break by dim:
  Collections.sort(results, BY_VALUE_THEN_DIM);
  return results;
}

Example #17

Source File: ShardWriter.java From linden with Apache License 2.0

6 votes

/**
 * Process an intermediate form by carrying out, on the Lucene instance of
 * the shard, the deletes and the inserts (a ram index) in the form.
 * @param form  the intermediate form containing deletes and a ram index
 * @throws IOException
 */
public void process(IntermediateForm form, FacetsConfig facetsConfig) throws IOException {
  if (facetsConfig != null) {
    DirectoryTaxonomyWriter.OrdinalMap map = new DirectoryTaxonomyWriter.MemoryOrdinalMap();
    // merge the taxonomies
    taxoWriter.addTaxonomy(form.getTaxoDirectory(), map);
    int ordinalMap[] = map.getMap();
    DirectoryReader reader = DirectoryReader.open(form.getDirectory());
    try {
      List<AtomicReaderContext> leaves = reader.leaves();
      int numReaders = leaves.size();
      AtomicReader wrappedLeaves[] = new AtomicReader[numReaders];
      for (int i = 0; i < numReaders; i++) {
        wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordinalMap, facetsConfig);
      }
      writer.addIndexes(new MultiReader(wrappedLeaves));
    } finally {
      reader.close();
    }
  } else {
    writer.addIndexes(new Directory[] { form.getDirectory() });
  }
  numForms++;
}

Example #18

Source File: TestTaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0

5 votes

public void testSumScoreAggregator() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));

  FacetsConfig config = new FacetsConfig();

  for(int i = atLeast(30); i > 0; --i) {
    Document doc = new Document();
    if (random().nextBoolean()) { // don't match all documents
      doc.add(new StringField("f", "v", Field.Store.NO));
    }
    doc.add(new FacetField("dim", "a"));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  FacetsCollector fc = new FacetsCollector(true);
  BoostQuery csq = new BoostQuery(new ConstantScoreQuery(new MatchAllDocsQuery()), 2f);
  
  TopDocs td = FacetsCollector.search(newSearcher(r), csq, 10, fc);

  Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, DoubleValuesSource.SCORES);
  
  int expected = (int) (csq.getBoost() * td.totalHits.value);
  assertEquals(expected, facets.getSpecificValue("dim", "a").intValue());

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #19

Source File: TestTaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0

5 votes

public void testWithScore() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));

  FacetsConfig config = new FacetsConfig();
  for (int i = 0; i < 4; i++) {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("price", (i+1)));
    doc.add(new FacetField("a", Integer.toString(i % 2)));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  FacetsCollector fc = new FacetsCollector(true);
  // score documents by their 'price' field - makes asserting the correct counts for the categories easier
  Query q = new FunctionQuery(new LongFieldSource("price"));
  FacetsCollector.search(newSearcher(r), q, 10, fc);
  Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, DoubleValuesSource.SCORES);
  
  assertEquals("dim=a path=[] value=10.0 childCount=2\n  1 (6.0)\n  0 (4.0)\n", facets.getTopChildren(10, "a").toString());

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #20

Source File: TestTaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0

5 votes

public void testRollupValues() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
  FacetsConfig config = new FacetsConfig();
  config.setHierarchical("a", true);
  //config.setRequireDimCount("a", true);
  
  for (int i = 0; i < 4; i++) {
    Document doc = new Document();
    doc.add(new NumericDocValuesField("price", (i+1)));
    doc.add(new FacetField("a", Integer.toString(i % 2), "1"));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

  FacetsCollector sfc = new FacetsCollector();
  newSearcher(r).search(new MatchAllDocsQuery(), sfc);
  Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, sfc, DoubleValuesSource.fromLongField("price"));
  
  assertEquals("dim=a path=[] value=10.0 childCount=2\n  1 (6.0)\n  0 (4.0)\n", facets.getTopChildren(10, "a").toString());

  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #21

Source File: TestTaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0

5 votes

public void testCountAndSumScore() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  IndexWriter iw = new IndexWriter(indexDir, newIndexWriterConfig(new MockAnalyzer(random())));
  FacetsConfig config = new FacetsConfig();
  config.setIndexFieldName("b", "$b");
  
  for(int i = atLeast(30); i > 0; --i) {
    Document doc = new Document();
    doc.add(new StringField("f", "v", Field.Store.NO));
    doc.add(new FacetField("a", "1"));
    doc.add(new FacetField("b", "1"));
    iw.addDocument(config.build(taxoWriter, doc));
  }
  
  DirectoryReader r = DirectoryReader.open(iw);
  DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  FacetsCollector fc = new FacetsCollector(true);
  FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);
  
  Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
  Facets facets2 = new TaxonomyFacetSumValueSource(new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);

  assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
  assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
  iw.close();
  IOUtils.close(taxoWriter, taxoReader, taxoDir, r, indexDir);
}

Example #22

Source File: LumongoSegment.java From lumongo with Apache License 2.0

5 votes

public LumongoSegment(int segmentNumber, IndexSegmentInterface indexSegmentInterface, IndexConfig indexConfig, FacetsConfig facetsConfig,
		DocumentStorage documentStorage) throws Exception {
	setupCaches(indexConfig);

	this.segmentNumber = segmentNumber;
	this.documentStorage = documentStorage;

	this.indexSegmentInterface = indexSegmentInterface;
	this.indexConfig = indexConfig;

	openIndexWriters();

	this.facetsConfig = facetsConfig;

	this.fetchSet = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(LumongoConstants.ID_FIELD, LumongoConstants.TIMESTAMP_FIELD)));

	this.fetchSetWithMeta = Collections
			.unmodifiableSet(new HashSet<>(Arrays.asList(LumongoConstants.ID_FIELD, LumongoConstants.TIMESTAMP_FIELD, LumongoConstants.STORED_META_FIELD)));

	this.fetchSetWithDocument = Collections.unmodifiableSet(new HashSet<>(
			Arrays.asList(LumongoConstants.ID_FIELD, LumongoConstants.TIMESTAMP_FIELD, LumongoConstants.STORED_META_FIELD,
					LumongoConstants.STORED_DOC_FIELD)));

	this.counter = new AtomicLong();
	this.lastCommit = null;
	this.lastChange = null;
	this.indexName = indexConfig.getIndexName();

}

Example #23

Source File: LumongoIndex.java From lumongo with Apache License 2.0

5 votes

private void loadSegment(int segmentNumber) throws Exception {
	indexLock.writeLock().lock();
	try {
		if (!segmentMap.containsKey(segmentNumber)) {
			String lockName = indexName + "-" + segmentNumber;
			ILock hzLock = hazelcastManager.getLock(lockName);
			hazelLockMap.put(segmentNumber, hzLock);
			log.info("Waiting for lock for index <" + indexName + "> segment <" + segmentNumber + ">");
			hzLock.lock();
			log.info("Obtained lock for index <" + indexName + "> segment <" + segmentNumber + ">");

			//Just for clarity
			IndexSegmentInterface indexSegmentInterface = this;

			//doesnt need to be done each time and it is done in StartNode but helps with test cases that take different paths
			FacetsConfig.DEFAULT_DIM_CONFIG.multiValued = true;
			facetsConfig = new FacetsConfig();

			LumongoSegment s = new LumongoSegment(segmentNumber, indexSegmentInterface, indexConfig, facetsConfig, documentStorage);
			segmentMap.put(segmentNumber, s);

			log.info("Loaded segment <" + segmentNumber + "> for index <" + indexName + ">");
			log.info("Current segments <" + (new TreeSet<>(segmentMap.keySet())) + "> for index <" + indexName + ">");

		}
	}
	finally {
		indexLock.writeLock().unlock();
	}
}

Example #24

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

public void testSegmentsWithoutCategoriesOrResults() throws Exception {
  // tests the accumulator when there are segments with no results
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges
  IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 2nd segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 3rd segment ok
  indexTwoDocs(taxoWriter, indexWriter, null, false);        // 4th segment, no content, or categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 5th segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 6th segment, with content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 7th segment, with content, no categories
  indexWriter.close();
  IOUtils.close(taxoWriter);

  DirectoryReader indexReader = DirectoryReader.open(indexDir);
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
  IndexSearcher indexSearcher = newSearcher(indexReader);
  
  // search for "f:a", only segments 1 and 3 should match results
  Query q = new TermQuery(new Term("f", "a"));
  FacetsCollector sfc = new FacetsCollector();
  indexSearcher.search(q, sfc);
  Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
  FacetResult result = facets.getTopChildren(10, "A");
  assertEquals("wrong number of children", 2, result.labelValues.length);
  for (LabelAndValue labelValue : result.labelValues) {
    assertEquals("wrong weight for child " + labelValue.label, 2, labelValue.value.intValue());
  }

  IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}

Example #25

Source File: IndexBuilderLuceneImpl.java From yes-cart with Apache License 2.0

5 votes

/**
 * Process single entity update in the FT index.
 *
 * @param iw         index writer
 * @param indexName  index name
 * @param documents  documents to index and PK
 * @param remove     remove only
 * @param indexTime  time of this index (added as field to added documents)
 * @param counts     counts[3] = { added, removed, failed }
 *
 * @throws IOException error
 */
protected void fullTextSearchReindexSingleEntity(final IndexWriter iw,
                                                  final String indexName,
                                                  final Pair<PK, Document[]> documents,
                                                  final boolean remove,
                                                  final long indexTime,
                                                  final long[] counts) throws IOException {

    final PK primaryKey = documents.getFirst();

    // Remove all documents with primary key (could be multiple)
    iw.deleteDocuments(new Term(AdapterUtils.FIELD_PK, String.valueOf(primaryKey)));
    counts[1]++;
    LOGFTQ.trace("Removing {} document _PK:{}", indexName, primaryKey);

    if (!remove) {
        // Add documents
        final FacetsConfig facetsConfig = new FacetsConfig();
        for (final Document document : documents.getSecond()) {
            try {
                LuceneDocumentAdapterUtils.addNumericField(document, AdapterUtils.FIELD_INDEXTIME, indexTime, false);
                for (final IndexableField ixf : document) {
                    if (ixf.fieldType() == SortedSetDocValuesFacetField.TYPE) {
                        SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) ixf;
                        facetsConfig.setIndexFieldName(facetField.dim, facetField.dim);
                        facetsConfig.setMultiValued(facetField.dim, true); // TODO: revisit this but for now all fields assumed to have multivalue
                    }
                }
                iw.addDocument(facetsConfig.build(document));
                counts[0]++;
            } catch (Exception sde) {
                LOGFTQ.error("Updating {} document _PK:{} failed ... cause: {}", indexName, documents.getFirst(), sde.getMessage());
                counts[2]++;
            }
        }
        LOGFTQ.trace("Updating {} document _PK:{}", indexName, primaryKey);
    }

}

Example #26

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

public void testManyFacetsInOneDocument() throws Exception {
  assumeTrue("default Codec doesn't support huge BinaryDocValues", TestUtil.fieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  FacetsConfig config = new FacetsConfig();
  config.setMultiValued("dim", true);
  
  int numLabels = TEST_NIGHTLY ? TestUtil.nextInt(random(), 40000, 100000) : TestUtil.nextInt(random(), 4000, 10000);
  
  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  for (int i = 0; i < numLabels; i++) {
    doc.add(new FacetField("dim", "" + i));
  }
  writer.addDocument(config.build(taxoWriter, doc));
  
  // NRT open
  IndexSearcher searcher = newSearcher(writer.getReader());
  
  // NRT open
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);

  FacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim");
  assertEquals(numLabels, result.labelValues.length);
  Set<String> allLabels = new HashSet<>();
  for (LabelAndValue labelValue : result.labelValues) {
    allLabels.add(labelValue.label);
    assertEquals(1, labelValue.value.intValue());
  }
  assertEquals(numLabels, allLabels.size());

  writer.close();
  IOUtils.close(searcher.getIndexReader(), taxoWriter, taxoReader, dir, taxoDir);
}

Example #27

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

public void testLabelWithDelimiter() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);

  FacetsConfig config = new FacetsConfig();
  config.setMultiValued("dim", true);

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("dim", "test\u001Fone"));
  doc.add(new FacetField("dim", "test\u001Etwo"));
  writer.addDocument(config.build(taxoWriter, doc));

  // NRT open
  IndexSearcher searcher = newSearcher(writer.getReader());

  // NRT open
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);

  Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);
  
  assertEquals(1, facets.getSpecificValue("dim", "test\u001Fone"));
  assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo"));

  // no hierarchy
  assertFalse(((TaxonomyFacets) facets).siblingsLoaded());
  assertFalse(((TaxonomyFacets) facets).childrenLoaded());

  FacetResult result = facets.getTopChildren(10, "dim");
  assertEquals("dim=dim path=[] value=-1 childCount=2\n  test\u001Fone (1)\n  test\u001Etwo (1)\n", result.toString());
  writer.close();
  IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
}

Example #28

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

public void testMultiValuedHierarchy() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  FacetsConfig config = new FacetsConfig();
  config.setHierarchical("a", true);
  config.setMultiValued("a", true);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path", "x"));
  doc.add(new FacetField("a", "path", "y"));
  writer.addDocument(config.build(taxoWriter, doc));

  // NRT open
  IndexSearcher searcher = newSearcher(writer.getReader());

  // NRT open
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
  
  Facets facets = getAllFacets(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, searcher, taxoReader, config);

  expectThrows(IllegalArgumentException.class, () -> {
    facets.getSpecificValue("a");
  });

  FacetResult result = facets.getTopChildren(10, "a");
  assertEquals(1, result.labelValues.length);
  assertEquals(1, result.labelValues[0].value.intValue());

  writer.close();
  IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
}

Example #29

Source File: TestTaxonomyFacetAssociations.java From lucene-solr with Apache License 2.0

5 votes

@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  taxoDir = newDirectory();
  // preparations - index, taxonomy, content
  
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

  // Cannot mix ints & floats in the same indexed field:
  config = new FacetsConfig();
  config.setIndexFieldName("int", "$facets.int");
  config.setMultiValued("int", true);
  config.setIndexFieldName("float", "$facets.float");
  config.setMultiValued("float", true);

  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  // index documents, 50% have only 'b' and all have 'a'
  for (int i = 0; i < 110; i++) {
    Document doc = new Document();
    // every 11th document is added empty, this used to cause the association
    // aggregators to go into an infinite loop
    if (i % 11 != 0) {
      doc.add(new IntAssociationFacetField(2, "int", "a"));
      doc.add(new FloatAssociationFacetField(0.5f, "float", "a"));
      if (i % 2 == 0) { // 50
        doc.add(new IntAssociationFacetField(3, "int", "b"));
        doc.add(new FloatAssociationFacetField(0.2f, "float", "b"));
      }
    }
    writer.addDocument(config.build(taxoWriter, doc));
  }
  
  taxoWriter.close();
  reader = writer.getReader();
  writer.close();
  taxoReader = new DirectoryTaxonomyReader(taxoDir);
}

Example #30

Source File: TaxonomyFacets.java From lucene-solr with Apache License 2.0

5 votes

/** Throws {@code IllegalArgumentException} if the
 *  dimension is not recognized.  Otherwise, returns the
 *  {@link DimConfig} for this dimension. */
protected FacetsConfig.DimConfig verifyDim(String dim) {
  FacetsConfig.DimConfig dimConfig = config.getDimConfig(dim);
  if (!dimConfig.indexFieldName.equals(indexFieldName)) {
    throw new IllegalArgumentException("dimension \"" + dim + "\" was not indexed into field \"" + indexFieldName + "\"");
  }
  return dimConfig;
}