* Copyright (c) 2015 Lemur Consulting Ltd.
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.

package uk.co.flax.biosolr.builders;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import uk.co.flax.biosolr.FacetTreeParameters;
import uk.co.flax.biosolr.TreeFacetField;

 * FacetTreeBuilder implementation that uses parent node IDs to build a
 * tree from the bottom node upwards.
 * <p>
 * Minimum required parameters for this tree builder are the node field,
 * either passed in local parameters or taken from the key value, and 
 * the parent node field. {@link #initialiseParameters(SolrParams)} will
 * throw a SyntaxError if these values are not defined.
 * </p>
 * @author mlp
public class ParentNodeFacetTreeBuilder extends AbstractFacetTreeBuilder {
	private static final Logger LOGGER = LoggerFactory.getLogger(ParentNodeFacetTreeBuilder.class);

	private String parentField;
	private int maxLevels;
	private final Set<String> docFields = new HashSet<>();
	public void initialiseParameters(SolrParams localParams) throws SyntaxError {
		// Initialise the common fields

		// Initialise the parent field - REQUIRED
		parentField = localParams.get(FacetTreeParameters.PARENT_FIELD_PARAM);
		if (StringUtils.isBlank(parentField)) {
			throw new SyntaxError("Missing parent field definition in " + localParams);
		//  Initialise the optional fields
		maxLevels = localParams.getInt(FacetTreeParameters.LEVELS_PARAM, 0);
		docFields.addAll(Arrays.asList(getNodeField(), parentField));
		if (hasLabelField()) {

	public List<TreeFacetField> processFacetTree(SolrIndexSearcher searcher, Map<String, Integer> facetMap)
			throws IOException {
		checkFieldsInSchema(searcher, docFields);
		// Extract the facet keys to a volatile set
		Set<String> facetKeys = new HashSet<>(facetMap.keySet());

		// Build a map of parent - child node IDs. This should contain the parents
		// of all our starting facet terms.
		Map<String, Set<String>> nodeChildren = findParentEntries(searcher, facetKeys);

		// Find the top nodes
		Set<String> topNodes = findTopLevelNodes(nodeChildren);
		LOGGER.debug("Found {} top level nodes", topNodes.size());

		// Convert to a list of TreeFacetFields
		return topNodes.parallelStream()
				.map(node -> buildAccumulatedEntryTree(0, node, nodeChildren, facetMap))
	 * Find all parent nodes for the given set of items.
	 * @param searcher the searcher for the collection being used.
	 * @param facetValues the starting set of node IDs.
	 * @return a map of nodes, keyed by their IDs.
	 * @throws IOException
	private Map<String, Set<String>> findParentEntries(SolrIndexSearcher searcher, Collection<String> facetValues)
			throws IOException {
		Map<String, Set<String>> nodeParentIds = new HashMap<>();

		Set<String> nodesFound = new HashSet<>();
		Set<String> nodeIds = new HashSet<>(facetValues);

		int count = 0;
		while (nodeIds.size() > 0 && (maxLevels == 0 || maxLevels >= count)) {
			// Find the direct parents for the current node IDs
			Map<String, Set<String>> parents = findParentIdsForNodes(searcher, nodeIds);

			// Get the parent IDs from all the retrieved nodes - these are the next set of
			// nodes whose parents should be found.
			nodeIds = parents.values().stream()
					.flatMap(v -> v.stream())
					.filter(id -> !nodesFound.contains(id))
			count ++;
		// Now, invert the map, so it's a map of parent->child IDs
		Map<String, Set<String>> parentChildIds = new HashMap<>();
		for (Entry<String, Set<String>> entry : nodeParentIds.entrySet()) {
			for (String parentId : entry.getValue()) {
				if (!parentChildIds.containsKey(parentId)) {
					parentChildIds.put(parentId, new HashSet<String>());

		return parentChildIds;
	private Map<String, Set<String>> findParentIdsForNodes(SolrIndexSearcher searcher, Collection<String> nodeIds) throws IOException {
		Map<String, Set<String>> parentIds = new HashMap<>();
		LOGGER.debug("Looking up parents for {} nodes", nodeIds.size());
		Query filter = buildFilterQuery(getNodeField(), nodeIds);
		LOGGER.trace("Filter query: {}", filter);
		DocSet docs = searcher.getDocSet(filter);
		for (DocIterator it = docs.iterator(); it.hasNext(); ) {
			Document doc = searcher.doc(it.nextDoc(), docFields);
			String nodeId = doc.get(getNodeField());
			Set<String> parentIdValues = new HashSet<>(Arrays.asList(doc.getValues(parentField)));
			parentIds.put(nodeId, parentIdValues);
			// Record the label, if required
			if (isLabelRequired(nodeId)) {
				recordLabel(nodeId, doc.getValues(getLabelField()));
		return parentIds;

	 * Build a filter query for a field using a set of values, taken from the keys
	 * of a {@link NamedList}.
	 * @param field
	 * @param values
	 * @return a filter string.
	private Query buildFilterQuery(String field, Collection<String> values) {
		BooleanQuery.Builder builder = new BooleanQuery.Builder().setDisableCoord(true);

			.map(v -> new TermQuery(new Term(field, v)))
			.forEach(tq -> builder.add(tq, Occur.SHOULD));

		return builder.build();

	 * Recursively build an accumulated facet entry tree.
	 * @param level current level in the tree (used for debugging/logging).
	 * @param fieldValue the current node value.
	 * @param hierarchyMap the map of nodes (either in the original facet set,
	 * or parents of those entries).
	 * @param facetCounts the facet counts, keyed by node ID.
	 * @return a {@link TreeFacetField} containing details for the current node and all
	 * sub-nodes down to the lowest leaf which has a facet count.
	private TreeFacetField buildAccumulatedEntryTree(int level, String fieldValue, Map<String, Set<String>> hierarchyMap,
			Map<String, Integer> facetCounts) {
		// Build the child hierarchy for this entry.
		// We use a reverse-ordered SortedSet so entries are returned in descending
		// order by their total count.
		SortedSet<TreeFacetField> childHierarchy = new TreeSet<>(Collections.reverseOrder());
		// childTotal is the total number of facet hits below this node
		long childTotal = 0;
		if (hierarchyMap.containsKey(fieldValue)) {
			// Loop through all the direct child URIs, looking for those which are in the annotation map
			for (String childId : hierarchyMap.get(fieldValue)) {
				if (!childId.equals(fieldValue)) {
					// Found a child of this node - recurse to build its facet tree
					LOGGER.trace("[{}] Building child tree for {}, with {} children", level, childId, 
							(hierarchyMap.containsKey(childId) ? hierarchyMap.get(childId).size(): 0));
					TreeFacetField childTree = buildAccumulatedEntryTree(level + 1, childId, hierarchyMap, facetCounts);
					// Only add to the total count if this node isn't already in the child hierarchy
					if (childHierarchy.add(childTree)) {
						childTotal += childTree.getTotal();
					LOGGER.trace("[{}] child tree total: {} - child Total {}, child count {}", level, childTree.getTotal(), childTotal, childHierarchy.size());
				} else {
					LOGGER.trace("[{}] found self-referring ID {}->{}", level, fieldValue, childId);

		// Build the accumulated facet entry
		LOGGER.trace("[{}] Building facet tree for {}", level, fieldValue);
		return new TreeFacetField(getLabel(fieldValue), fieldValue, getFacetCount(fieldValue, facetCounts), childTotal, childHierarchy);

	 * Get the count for the facet with the given key.
	 * @param key the key to look up.
	 * @param facetCounts the map of facet counts.
	 * @return the count, or <code>0</code> if the key does not exist in the map.
	private long getFacetCount(String key, Map<String, Integer> facetCounts) {
		if (facetCounts.containsKey(key)) {
			return facetCounts.get(key);
		return 0;
	protected Logger getLogger() {
		return LOGGER;