Java Code Examples for org.apache.lucene.facet.FacetsConfig#stringToPath()

The following examples show how to use org.apache.lucene.facet.FacetsConfig#stringToPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DirectoryTaxonomyWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Takes the categories from the given taxonomy directory, and adds the
 * missing ones to this taxonomy. Additionally, it fills the given
 * {@link OrdinalMap} with a mapping from the original ordinal to the new
 * ordinal.
 */
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
  ensureOpen();
  DirectoryReader r = DirectoryReader.open(taxoDir);
  try {
    final int size = r.numDocs();
    final OrdinalMap ordinalMap = map;
    ordinalMap.setSize(size);
    int base = 0;
    PostingsEnum docs = null;
    for (final LeafReaderContext ctx : r.leaves()) {
      final LeafReader ar = ctx.reader();
      final Terms terms = ar.terms(Consts.FULL);
      // TODO: share per-segment TermsEnum here!
      TermsEnum te = terms.iterator();
      while (te.next() != null) {
        FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
        final int ordinal = addCategory(cp);
        docs = te.postings(docs, PostingsEnum.NONE);
        ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
      }
      base += ar.maxDoc(); // no deletions, so we're ok
    }
    ordinalMap.addDone();
  } finally {
    r.close();
  }
}
 
Example 2
Source File: DirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public FacetLabel getPath(int ordinal) throws IOException {
  ensureOpen();
  
  // Since the cache is shared with DTR instances allocated from
  // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
  // instance recognizes. Therefore we do this check up front, before we hit
  // the cache.
  if (ordinal < 0 || ordinal >= indexReader.maxDoc()) {
    return null;
  }
  
  // TODO: can we use an int-based hash impl, such as IntToObjectMap,
  // wrapped as LRU?
  Integer catIDInteger = Integer.valueOf(ordinal);
  synchronized (categoryCache) {
    FacetLabel res = categoryCache.get(catIDInteger);
    if (res != null) {
      return res;
    }
  }
  
  Document doc = indexReader.document(ordinal);
  FacetLabel ret = new FacetLabel(FacetsConfig.stringToPath(doc.get(Consts.FULL)));
  synchronized (categoryCache) {
    categoryCache.put(catIDInteger, ret);
  }
  
  return ret;
}
 
Example 3
Source File: SortedSetDocValuesFacetCounts.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private final FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {

    TopOrdAndIntQueue q = null;

    int bottomCount = 0;

    int dimCount = 0;
    int childCount = 0;

    TopOrdAndIntQueue.OrdAndValue reuse = null;
    //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
    for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
      //System.out.println("  ord=" + ord + " count=" + counts[ord]);
      if (counts[ord] > 0) {
        dimCount += counts[ord];
        childCount++;
        if (counts[ord] > bottomCount) {
          if (reuse == null) {
            reuse = new TopOrdAndIntQueue.OrdAndValue();
          }
          reuse.ord = ord;
          reuse.value = counts[ord];
          if (q == null) {
            // Lazy init, so we don't create this for the
            // sparse case unnecessarily
            q = new TopOrdAndIntQueue(topN);
          }
          reuse = q.insertWithOverflow(reuse);
          if (q.size() == topN) {
            bottomCount = q.top().value;
          }
        }
      }
    }

    if (q == null) {
      return null;
    }

    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for(int i=labelValues.length-1;i>=0;i--) {
      TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
      final BytesRef term = dv.lookupOrd(ordAndValue.ord);
      String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
      labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
    }

    return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
  }
 
Example 4
Source File: DefaultSortedSetDocValuesReaderState.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Creates this, pulling doc values from the specified
 *  field. */
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
  this.field = field;
  this.reader = reader;

  // We need this to create thread-safe MultiSortedSetDV
  // per collector:
  SortedSetDocValues dv = getDocValues();
  if (dv == null) {
    throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
  }
  if (dv.getValueCount() > Integer.MAX_VALUE) {
    throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
  }
  valueCount = (int) dv.getValueCount();

  // TODO: we can make this more efficient if eg we can be
  // "involved" when OrdinalMap is being created?  Ie see
  // each term/ord it's assigning as it goes...
  String lastDim = null;
  int startOrd = -1;

  // TODO: this approach can work for full hierarchy?;
  // TaxoReader can't do this since ords are not in
  // "sorted order" ... but we should generalize this to
  // support arbitrary hierarchy:
  for(int ord=0;ord<valueCount;ord++) {
    final BytesRef term = dv.lookupOrd(ord);
    String[] components = FacetsConfig.stringToPath(term.utf8ToString());
    if (components.length != 2) {
      throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString());
    }
    if (!components[0].equals(lastDim)) {
      if (lastDim != null) {
        prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
      }
      startOrd = ord;
      lastDim = components[0];
    }
  }

  if (lastDim != null) {
    prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
  }
}
 
Example 5
Source File: ConcurrentSortedSetDocValuesFacetCounts.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private final FacetResult getDim(String dim, OrdRange ordRange, int topN) throws IOException {

    TopOrdAndIntQueue q = null;

    int bottomCount = 0;

    int dimCount = 0;
    int childCount = 0;

    TopOrdAndIntQueue.OrdAndValue reuse = null;
    //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
    for(int ord=ordRange.start; ord<=ordRange.end; ord++) {
      //System.out.println("  ord=" + ord + " count=" + counts[ord]);
      if (counts.get(ord) > 0) {
        dimCount += counts.get(ord);
        childCount++;
        if (counts.get(ord) > bottomCount) {
          if (reuse == null) {
            reuse = new TopOrdAndIntQueue.OrdAndValue();
          }
          reuse.ord = ord;
          reuse.value = counts.get(ord);
          if (q == null) {
            // Lazy init, so we don't create this for the
            // sparse case unnecessarily
            q = new TopOrdAndIntQueue(topN);
          }
          reuse = q.insertWithOverflow(reuse);
          if (q.size() == topN) {
            bottomCount = q.top().value;
          }
        }
      }
    }

    if (q == null) {
      return null;
    }

    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
    for(int i=labelValues.length-1;i>=0;i--) {
      TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
      final BytesRef term = dv.lookupOrd(ordAndValue.ord);
      String[] parts = FacetsConfig.stringToPath(term.utf8ToString());
      labelValues[i] = new LabelAndValue(parts[1], ordAndValue.value);
    }

    return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
  }