package org.apache.blur.analysis.type.spatial.lucene;

 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.prefix.PointPrefixTreeFieldCacheProvider;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.BytesRef;

import com.spatial4j.core.shape.Point;
import com.spatial4j.core.shape.Shape;

 * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
 * subclasses are {@link RecursivePrefixTreeStrategy} and
 * {@link TermQueryPrefixTreeStrategy}. This strategy is most effective as a
 * fast approximate spatial search filter.
 * <h4>Characteristics:</h4>
 * <ul>
 * <li>Can index any shape; however only {@link RecursivePrefixTreeStrategy} can
 * effectively search non-point shapes.</li>
 * <li>Can index a variable number of shapes per field value. This strategy can
 * do it via multiple calls to
 * {@link #createIndexableFields(com.spatial4j.core.shape.Shape)} for a document
 * or by giving it some sort of Shape aggregate (e.g. JTS WKT MultiPoint). The
 * shape's boundary is approximated to a grid precision.</li>
 * <li>Can query with any shape. The shape's boundary is approximated to a grid
 * precision.</li>
 * <li>Only {@link org.apache.lucene.spatial.query.SpatialOperation#Intersects}
 * is supported. If only points are indexed then this is effectively equivalent
 * to IsWithin.</li>
 * <li>The strategy supports
 * {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)} even for
 * multi-valued data, so long as the indexed data is all points; the behavior is
 * undefined otherwise. However, <em>it will likely be removed in
 * the future</em> in lieu of using another strategy with a more scalable
 * implementation. Use of this call is the only circumstance in which a cache is
 * used. The cache is simple but as such it doesn't scale to large numbers of
 * points nor is it real-time-search friendly.</li>
 * </ul>
 * <h4>Implementation:</h4> The {@link SpatialPrefixTree} does most of the work,
 * for example returning a list of terms representing grids of various sizes for
 * a supplied shape. An important configuration item is
 * {@link #setDistErrPct(double)} which balances shape precision against
 * scalability. See those javadocs.
 * @lucene.internal
public abstract class PrefixTreeStrategy extends SpatialStrategy {
  protected final SpatialPrefixTree grid;
  private final Map<String, PointPrefixTreeFieldCacheProvider> provider = new ConcurrentHashMap<String, PointPrefixTreeFieldCacheProvider>();
  protected final boolean simplifyIndexedCells;
  protected int defaultFieldValuesArrayLen = 2;
  protected double distErrPct = SpatialArgs.DEFAULT_DISTERRPCT;// [ 0 TO 0.5 ]
  protected boolean docValue;

  public PrefixTreeStrategy(SpatialPrefixTree grid, String fieldName, boolean simplifyIndexedCells, boolean docValue) {
    super(grid.getSpatialContext(), fieldName);
    this.grid = grid;
    this.simplifyIndexedCells = simplifyIndexedCells;
    this.docValue = docValue;

   * A memory hint used by
   * {@link #makeDistanceValueSource(com.spatial4j.core.shape.Point)} for how
   * big the initial size of each Document's array should be. The default is 2.
   * Set this to slightly more than the default expected number of points per
   * document.
  public void setDefaultFieldValuesArrayLen(int defaultFieldValuesArrayLen) {
    this.defaultFieldValuesArrayLen = defaultFieldValuesArrayLen;

  public double getDistErrPct() {
    return distErrPct;

   * The default measure of shape precision affecting shapes at index and query
   * times. Points don't use this as they are always indexed at the configured
   * maximum precision (
   * {@link org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree#getMaxLevels()}
   * ); this applies to all other shapes. Specific shapes at index and query
   * time can use something different than this default value. If you don't set
   * a default then the default is {@link SpatialArgs#DEFAULT_DISTERRPCT} --
   * 2.5%.
   * @see org.apache.lucene.spatial.query.SpatialArgs#getDistErrPct()
  public void setDistErrPct(double distErrPct) {
    this.distErrPct = distErrPct;

  public Field[] createIndexableFields(Shape shape) {
    double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
    return createIndexableFields(shape, distErr);

  public Field[] createIndexableFields(Shape shape, double distErr) {
    int detailLevel = grid.getLevelForDistance(distErr);
    List<Cell> cells = grid.getCells(shape, detailLevel, true, simplifyIndexedCells);// intermediates
                                                                                     // cells
    Field docValueField = null;

    if (docValue) {
      Cell cell = cells.get(cells.size() - 1);
      docValueField = new SortedDocValuesField(getFieldName(), new BytesRef(cell.getTokenString()));

    // TODO is CellTokenStream supposed to be re-used somehow? see Uwe's
    // comments:

    Field field = new Field(getFieldName(), new CellTokenStream(cells.iterator()), FIELD_TYPE);
    return docValue ? new Field[] { docValueField, field } : new Field[] { field };

  /* Indexed, tokenized, not stored. */
  public static final FieldType FIELD_TYPE = new FieldType();

  static {

   * Outputs the tokenString of a cell, and if its a leaf, outputs it again with
   * the leaf byte.
  final static class CellTokenStream extends TokenStream {

    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

    private Iterator<Cell> iter = null;

    public CellTokenStream(Iterator<Cell> tokens) {
      this.iter = tokens;

    CharSequence nextTokenStringNeedingLeaf = null;

    public boolean incrementToken() {
      if (nextTokenStringNeedingLeaf != null) {
        termAtt.append((char) Cell.LEAF_BYTE);
        nextTokenStringNeedingLeaf = null;
        return true;
      if (iter.hasNext()) {
        Cell cell =;
        CharSequence token = cell.getTokenString();
        if (cell.isLeaf())
          nextTokenStringNeedingLeaf = token;
        return true;
      return false;


  public ValueSource makeDistanceValueSource(Point queryPoint) {
    PointPrefixTreeFieldCacheProvider p = provider.get(getFieldName());
    if (p == null) {
      synchronized (this) {// double checked locking idiom is okay since
                           // provider is threadsafe
        p = provider.get(getFieldName());
        if (p == null) {
          p = new PointPrefixTreeFieldCacheProvider(grid, getFieldName(), defaultFieldValuesArrayLen);
          provider.put(getFieldName(), p);

    return new ShapeFieldCacheDistanceValueSource(ctx, p, queryPoint);

  public SpatialPrefixTree getGrid() {
    return grid;