* Copyright (c) 2015 Lemur Consulting Ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package uk.co.flax.biosolr.pruning;

import java.util.Collection;
import java.util.Comparator;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;

import uk.co.flax.biosolr.TreeFacetField;

 * A simple {@link Pruner} implementation, which attempts to strip off the
 * least significant parent nodes, returning child nodes which either have
 * content themselves, or have direct children with content.
 * @author mlp
public class SimplePruner implements Pruner {
	 * The default number of child nodes with content required for a parent
	 * node to be considered "relevant".
	public static final int MIN_CHILD_COUNT = 3;
	 * The parameter used to pass the child count into the component.
	public static final String CHILD_COUNT_PARAM = "childCount";
	private final int minChildCount;
	public SimplePruner(int minChildCount) {
		this.minChildCount = minChildCount;

	public Collection<TreeFacetField> prune(Collection<TreeFacetField> unprunedTrees) {
		// Prune the trees
		Collection<TreeFacetField> pruned = stripNonRelevantTrees(unprunedTrees);
		// Now loop through the top-level nodes, making sure none of the entries
		// are included in another entry's children
		pruned = deduplicateTrees(pruned);
		return pruned;

	 * De-duplicate a collection of top-level trees by checking whether a top-level
	 * node exists in the children of any of the other nodes, and removing it if so.
	 * @param trees the collection of top-level facet trees.
	 * @return the de-duplicated collection.
	private Collection<TreeFacetField> deduplicateTrees(Collection<TreeFacetField> trees) {
		return trees.stream().filter(t -> !isFacetInChildren(t, 0, trees)).collect(Collectors.toList());
	 * Check whether a particular facet exists in the children of any other facets
	 * in a collection.
	 * @param facet the facet to check for.
	 * @param level the current level in the hierarchy, starting from 0.
	 * @param trees the collection of trees to check through.
	 * @return <code>true</code> if the facet is found in the child lists.
	private boolean isFacetInChildren(TreeFacetField facet, int level, Collection<TreeFacetField> trees) {
		boolean retVal = false;
		if (trees != null) {
			for (TreeFacetField tree : trees) {
				if ((level != 0 && tree.equals(facet)) || (isFacetInChildren(facet, level + 1, tree.getHierarchy()))) {
					retVal = true;
		return retVal;
	 * Prune a collection of facet trees, in order to remove nodes which are
	 * unlikely to be relevant. "Relevant" is defined here to be either
	 * entries with direct hits, or entries with a pre-defined number of
	 * child nodes with direct hits. This can remove several top-level
	 * layers from the tree which don't have direct hits.
	 * @param unprunedTrees the trees which need pruning.
	 * @return a sorted list of pruned trees.
	private Collection<TreeFacetField> stripNonRelevantTrees(Collection<TreeFacetField> unprunedTrees) {
		// Use a sorted set so the trees come out in count-descending order
		Set<TreeFacetField> pruned = new TreeSet<>(Comparator.reverseOrder());
		for (TreeFacetField tff : unprunedTrees) {
			if (tff.getCount() > 0) {
				// Relevant  - entry has direct hits
			} else if (checkChildCounts(tff)) {
				// Relevant - entry has a number of children with direct hits
			} else if (tff.hasChildren()) {
				// Not relevant at this level - recurse through children
		return pruned;
	 * Check whether the given tree has enough children with direct hits to 
	 * be included in the pruned tree.
	 * @param tree the facet tree.
	 * @return <code>true</code> if the tree has enough children to be 
	 * included.
	private boolean checkChildCounts(TreeFacetField tree) {
		long hitCount = 0;
		if (tree.hasChildren()) {
			hitCount = tree.getHierarchy().stream().filter(t -> t.getCount() > 0).count();
		return hitCount >= minChildCount;