java source code of ElevateQueryComparer

quaerite-master
- pom.xml
- quaerite-analysis
  - src
    - main
      - resources
        log4j.properties
      - java
        org
        mitre
        quaerite
        analysis
        CompareAnalyzers.java
        EquivalenceSet.java
    - test
      - java
        org
        mitre
        quaerite
        analysis
        TestCompareAnalyzers.java
        TestEquivalenceSet.java
  - pom.xml
- LICENSE
- quaerite-examples
  - src
    - main
      - java
        org
        mitre
        quaerite
        examples
        IndexTMDB.java
    - test
      - java
        org
        mitre
        quaerite
        examples
        TestIndexTMDB.java
  - pom.xml
  - README_ES.md
  - example_files_advanced
    - movie_judgments_boosting.csv
    - es
      - experiment_features_es_1.json
      - experiments_es_1.json
  - README.md
  - example_files
    - movie_judgments.csv
    - movie_judgments_train.csv
    - es
      - es-7.x
        max_buckets.json
        tmdb_template.json
      - es-6.x
        tmdb_template.json
      - experiment_features_es_1.json
      - experiments_es_4.json
      - experiments_es_1.json
      - experiments_es_3.json
      - experiments_es_2.json
    - solr
      - experiment_features_solr_2.json
      - experiment_features_solr_4.json
      - experiment_features_solr_5.json
      - solr-8.x
        tmdb
        core.properties
        conf
        schema_extra_fields.xml
        protwords.txt
        synonyms.txt
        params.json
        stopwords.txt
        lang
        stopwords_tr.txt
        contractions_fr.txt
        stopwords_it.txt
        hyphenations_ga.txt
        stopwords_en.txt
        stopwords_es.txt
        stopwords_da.txt
        stopwords_ar.txt
        userdict_ja.txt
        stopwords_fa.txt
        stopwords_hy.txt
        contractions_ga.txt
        stopwords_hu.txt
        stopwords_pt.txt
        stopwords_nl.txt
        stopwords_gl.txt
        stopwords_hi.txt
        stopwords_lv.txt
        stopwords_fr.txt
        stemdict_nl.txt
        stopwords_ga.txt
        stopwords_de.txt
        contractions_ca.txt
        stopwords_fi.txt
        stopwords_bg.txt
        stopwords_no.txt
        stopwords_ro.txt
        stopwords_id.txt
        stopwords_eu.txt
        stopwords_sv.txt
        stopwords_el.txt
        stopwords_cz.txt
        stoptags_ja.txt
        stopwords_th.txt
        stopwords_ca.txt
        stopwords_ru.txt
        stopwords_ja.txt
        contractions_it.txt
        schema.xml
        schema_extra_types.xml
        solrconfig.xml
      - experiments_solr_1.json
      - experiment_features_solr_1.json
      - experiment_features_solr_3.json
      - solr-4.x
        tmdb
        core.properties
        conf
        _rest_managed.json
        mapping-ISOLatin1Accent.txt
        xslt
        updateXml.xsl
        example_atom.xsl
        example_rss.xsl
        example.xsl
        luke.xsl
        admin-extra.html
        update-script.js
        schema_extra_fields.xml
        protwords.txt
        synonyms.txt
        currency.xml
        spellings.txt
        stopwords.txt
        admin-extra.menu-top.html
        scripts.conf
        lang
        stopwords_tr.txt
        contractions_fr.txt
        stopwords_it.txt
        hyphenations_ga.txt
        stopwords_en.txt
        stopwords_es.txt
        stopwords_da.txt
        stopwords_ar.txt
        userdict_ja.txt
        stopwords_fa.txt
        stopwords_hy.txt
        contractions_ga.txt
        stopwords_hu.txt
        stopwords_pt.txt
        stopwords_nl.txt
        stopwords_gl.txt
        stopwords_hi.txt
        stopwords_lv.txt
        stopwords_fr.txt
        stopwords_ckb.txt
        stemdict_nl.txt
        stopwords_ga.txt
        stopwords_de.txt
        contractions_ca.txt
        stopwords_fi.txt
        stopwords_bg.txt
        stopwords_no.txt
        stopwords_ro.txt
        stopwords_id.txt
        stopwords_eu.txt
        stopwords_sv.txt
        stopwords_el.txt
        stopwords_cz.txt
        stoptags_ja.txt
        stopwords_th.txt
        stopwords_ca.txt
        stopwords_ru.txt
        stopwords_ja.txt
        contractions_it.txt
        clustering
        carrot2
        kmeans-attributes.xml
        lingo-attributes.xml
        stc-attributes.xml
        _schema_analysis_synonyms_english.json
        schema.xml
        velocity
        query_spatial.vm
        debug.vm
        query_group.vm
        main.css
        header.vm
        jquery.autocomplete.js
        suggest.vm
        facet_fields.vm
        footer.vm
        error.vm
        results_list.vm
        facet_pivot.vm
        mime_type_lists.vm
        facet_queries.vm
        facet_ranges.vm
        query_form.vm
        pagination_bottom.vm
        did_you_mean.vm
        README.txt
        browse.vm
        hit_plain.vm
        layout.vm
        tabs.vm
        pagination_top.vm
        hit.vm
        cluster_results.vm
        cluster.vm
        facets.vm
        jquery.autocomplete.css
        product_doc.vm
        richtext_doc.vm
        VM_global_library.vm
        head.vm
        hit_grouped.vm
        join_doc.vm
        query.vm
        schema_extra_types.xml
        mapping-FoldToASCII.txt
        solrconfig.xml
        _schema_analysis_stopwords_english.json
        admin-extra.menu-bottom.html
        elevate.xml
      - experiments_solr_2.json
      - solr-7.x
        tmdb
        core.properties
        conf
        schema_extra_fields.xml
        protwords.txt
        synonyms.txt
        params.json
        stopwords.txt
        lang
        stopwords_tr.txt
        contractions_fr.txt
        stopwords_it.txt
        hyphenations_ga.txt
        stopwords_en.txt
        stopwords_es.txt
        stopwords_da.txt
        stopwords_ar.txt
        userdict_ja.txt
        stopwords_fa.txt
        stopwords_hy.txt
        contractions_ga.txt
        stopwords_hu.txt
        stopwords_pt.txt
        stopwords_nl.txt
        stopwords_gl.txt
        stopwords_hi.txt
        stopwords_lv.txt
        stopwords_fr.txt
        stemdict_nl.txt
        stopwords_ga.txt
        stopwords_de.txt
        contractions_ca.txt
        stopwords_fi.txt
        stopwords_bg.txt
        stopwords_no.txt
        stopwords_ro.txt
        stopwords_id.txt
        stopwords_eu.txt
        stopwords_sv.txt
        stopwords_el.txt
        stopwords_cz.txt
        stoptags_ja.txt
        stopwords_th.txt
        stopwords_ca.txt
        stopwords_ru.txt
        stopwords_ja.txt
        contractions_it.txt
        schema.xml
        schema_extra_types.xml
        solrconfig.xml
    - NOTICE
    - movie_judgments_test.csv
- quaerite-parent
  - checkstyle.xml
  - pom.xml
- quaerite-cli
  - src
    - main
      - resources
        log4j.properties
      - java
        org
        mitre
        quaerite
        db
        QueryRunnerDBClient.java
        GADB.java
        TrainTestJudmentListPair.java
        ExperimentDB.java
        cli
        GenerateExperiments.java
        ComparePerQuery.java
        DumpResults.java
        RunExperiments.java
        RunGA.java
        QueryLoader.java
        AbstractCLI.java
        CopyIndex.java
        DumpExperiments.java
        AbstractExperimentRunner.java
        QuaeriteCLI.java
        FindFeatures.java
    - test
      - resources
        test-documents
        experiment_features_es_1.json
        experiment_features_solr_1.json
        qf.json
      - java
        org
        mitre
        quaerite
        cli
        TestESExamples.java
        TestSolrExamples.java
        TestGenerateSolrExperiments.java
        TestESExamplesAdvanced.java
        TestRunGA.java
        TestGenerateESExperiments.java
        TestExperimentDB.java
  - pom.xml
- README.md
- .gitignore
- quaerite-logs
  - src
    - main
      - java
        org
        tallison
        quaerite
        logs
        ExtractQueries.java
  - pom.xml
- quaerite-solr-tools
  - src
    - main
      - resources
        log4j.properties
      - java
        org
        mitre
        quaerite
        solrtools
        ElevateAnalysisEvaluator.java
        ElevateScraper.java
        ElevateQueryComparer.java
        Elevate.java
        WinnowAnalyzedElevate.java
        ElevateElevateComparer.java
  - pom.xml
- quaerite-connectors
  - src
    - main
      - java
        org
        mitre
        quaerite
        connectors
        IdGrabber.java
        ESClient.java
        HttpUtils.java
        SearchClientFactory.java
        SearchClientException.java
        ES6Client.java
        JsonResponse.java
        QueryRequest.java
        StoredDocument.java
        SolrClient.java
        Solr4Client.java
        SearchClient.java
    - test
      - resources
        log4j.properties
      - java
        org
        mitre
        quaerite
        connectors
        TestSolrClient.java
        TestESClient.java
  - pom.xml
- quaerite-core
  - src
    - main
      - java
        org
        mitre
        quaerite
        core
        QueryInfo.java
        SearchResultSet.java
        ExperimentSet.java
        ExperimentConfig.java
        Experiment.java
        GAConfig.java
        JudgmentList.java
        FacetResult.java
        util
        GAOperation.java
        StringUtil.java
        MapUtil.java
        CommandLineUtil.java
        JsonUtil.java
        MathUtil.java
        scorers
        AverageDocsReturned.java
        DiscountedCumulativeGain2005.java
        NDCG.java
        JudgmentScorer.java
        SummingScoreAggregator.java
        ZeroResults.java
        HighestRankReciprocal.java
        HighestRank.java
        DiscountedCumulativeGain2002.java
        RecallAtN.java
        SearchResultSetComparer.java
        DistributionalScoreAggregator.java
        TotalElapsedTime.java
        AbstractJudgmentScorer.java
        SearchResultSetScorer.java
        PrecisionAtN.java
        ExpectedReciprocalRank.java
        AtLeastOneAtN.java
        AverageElapsedTime.java
        Scorer.java
        TotalQueryTime.java
        TotalDocsReturned.java
        CumulativeGain.java
        QueryStrings.java
        Judgments.java
        features
        MultiMatchType.java
        ParameterizableStringListFeature.java
        URL.java
        BQ.java
        QueryOperator.java
        BF.java
        WeightableListFeature.java
        StringListFeature.java
        CustomHandler.java
        SimpleStringFeature.java
        QF.java
        ParameterizableString.java
        PF.java
        AbstractFeature.java
        Boost.java
        WeightableField.java
        PS2.java
        PF3.java
        DisMaxBoost.java
        ParamsMap.java
        PF2.java
        IntFeature.java
        FloatFeature.java
        StringFeature.java
        NegativeBoost.java
        Fuzziness.java
        PS3.java
        PS.java
        Feature.java
        factories
        IntFeatureFactory.java
        StringFeatureFactory.java
        ParameterizableStringListFactory.java
        BoostingQueryFactory.java
        FloatFeatureFactory.java
        QueryFactory.java
        WeightableListFeatureFactory.java
        FeatureFactory.java
        AbstractFeatureFactory.java
        FeatureFactories.java
        StringListFeatureFactory.java
        ParameterizableStringFactory.java
        CustomHandlerFactory.java
        QueryOperatorFeatureFactory.java
        TIE.java
        FQ.java
        queries
        SingleStringQuery.java
        TermQuery.java
        TermsQuery.java
        MultiFieldQuery.java
        Query.java
        MatchPhraseQuery.java
        AbstractSingleFieldQuery.java
        BooleanQuery.java
        PhraseQuery.java
        EDisMaxQuery.java
        MultiMatchQuery.java
        BooleanClause.java
        MatchAllDocsQuery.java
        BoostingQuery.java
        LuceneQuery.java
        DisMaxQuery.java
        MatchQuery.java
        ExperimentFactory.java
        stats
        ContrastResult.java
        TokenDF.java
        ExperimentNameScorePair.java
        ExperimentScorePair.java
        serializers
        QuerySerializer.java
        ScorerListSerializer.java
        AbstractFeatureSerializer.java
        FeatureFactorySerializer.java
    - test
      - resources
        test-documents
        experiment_features_solr_2.json
        experiment_features_solr_4.json
        experiments_solr_err.json
        experiments_solr_no_atN.json
        experiment_features_solr_5.json
        experiment_features_solr_3ex.json
        experiments_solr_1.json
        experiments_solr_queryOp.json
        experiment_features_es_1.json
        experiment_features_solr_1.json
        experiment_features_es_2.json
        boolean_query_solr.json
        experiment_features_solr_3.json
        experiments_es_1.json
        experiments_solr_2.json
        experiment_features_solr_queryOp.json
      - java
        org
        mitre
        quaerite
        core
        TestExperimentSet.java
        util
        TestMathUtil.java
        scorers
        TestScorerListSerializer.java
        TestExpectedReciprocalRank.java
        TestNDCG.java
        features
        factories
        TestParameterizableStringFactory.java
        TestQueryFactory.java
        TestParameterizableStringListFactory.java
        TestFeatureFactories.java
        TestStringListFeatureFactory.java
  - pom.xml

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
package org.mitre.quaerite.solrtools;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.input.BOMInputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.mitre.quaerite.connectors.QueryRequest;
import org.mitre.quaerite.connectors.SearchClient;
import org.mitre.quaerite.connectors.SearchClientFactory;
import org.mitre.quaerite.core.SearchResultSet;
import org.mitre.quaerite.core.queries.TermQuery;
import org.mitre.quaerite.core.util.MapUtil;

public class ElevateQueryComparer {
    static Logger LOG = Logger.getLogger(ElevateQueryComparer.class);

    static Options OPTIONS = new Options();


    static {
        OPTIONS.addOption(
                Option.builder("s")
                        .hasArg().required().desc("solr url").build()
        );

        OPTIONS.addOption(
                Option.builder("e")
                        .longOpt("elevate")
                        .hasArg(true)
                        .required(true)
                        .desc("elevate file (xml)").build()
        );
        OPTIONS.addOption(
                Option.builder("q")
                        .longOpt("queries")
                        .hasArg(true)
                        .required(true)
                        .desc("queries (with optional counts)").build()
        );
        OPTIONS.addOption(
                Option.builder("d")
                        .longOpt("outputDirectory")
                        .hasArg(true)
                        .required(false)
                        .desc("directory to which to write reports").build()
        );

        //if you are analyzing e.g. top 10k of GoogleAnalytics
        //you'll need to supply the actual total number of queries
        OPTIONS.addOption(
                Option.builder("t")
                        .longOpt("totalQueries")
                        .hasArg(true)
                        .required(false)
                        .desc("denominator for total number of queries -- " +
                                "sum of queries if used if this is not specified").build()
        );
        //let's say you have a single solr index that hosts
        //several logical indices: "general", "catlovers", "doglovers",
        //and there ids include the logical index, e.g. general-1; catlovers-1
        //you may only want to focus on ids that match this regex:
        //~/(?i)general-\\d+/
        OPTIONS.addOption(
                Option.builder("r")
                        .longOpt("regex")
                        .hasArg(true)
                        .required(false)
                        .desc("regex to subset ids (in case of multiple logical " +
                                "indices stored in a single Solr index)").build()
        );
    }

    public static void main(String[] args) throws Exception {
        CommandLine commandLine = null;

        try {
            commandLine = new DefaultParser().parse(OPTIONS, args);
        } catch (ParseException e) {
            HelpFormatter helpFormatter = new HelpFormatter();
            helpFormatter.printHelp(
                    "java -jar org.mitre.quaerite.solrtools.ElevateQueryComparer",
                    OPTIONS);
            return;
        }
        Matcher idMatcher = null;
        if (commandLine.hasOption("r")) {
            idMatcher = Pattern.compile(commandLine.getOptionValue("r")).matcher("");
        }
        Path reportsRoot = Paths.get(".");
        if (commandLine.hasOption("d")) {
            reportsRoot = Paths.get(commandLine.getOptionValue("d"));
        }

        DecimalFormat df = new DecimalFormat("##.###%",
                DecimalFormatSymbols.getInstance(Locale.ROOT));
        //TODO: lowercase queries or run them through an analyzer from a specific field?
        QuerySet queries = loadQueries(Paths.get(commandLine.getOptionValue("q")));
        Map<String, Elevate> elevateMap = ElevateScraper.scrape(Paths.get(
                commandLine.getOptionValue("e")),
                idMatcher);

        int totalQueries = queries.total;
        if (commandLine.hasOption("t")) {
            totalQueries = Integer.parseInt(commandLine.getOptionValue("t"));
        }

        List<Query> sorted = new ArrayList<>(queries.queries.values());
        Collections.sort(sorted);

        if (!Files.isDirectory(reportsRoot)) {
            Files.createDirectories(reportsRoot);
        }

        dumpAllElevated(elevateMap, queries, totalQueries, df, reportsRoot);
        dumpElevatedQueries(sorted, totalQueries, elevateMap, df, reportsRoot);
        dumpElevatedButNoQueries(elevateMap.keySet(), queries.queries.keySet(), reportsRoot);
        dumpElevatedCountDistributions(elevateMap, reportsRoot);

        Set<String> ids = new HashSet<>();
        int elevated = 0;
        for (Elevate e : elevateMap.values()) {
            List<String> docs = e.getIds();
            elevated += docs.size();
            ids.addAll(docs);
        }
        LOG.info(String.format(Locale.US,
                "There are %s elevate entries", elevateMap.keySet().size()));
        LOG.info(String.format(Locale.US,
                "There are %s unique elevated document ids " +
                        "and %s total elevated document ids",
                ids.size(), elevated));
        if (commandLine.hasOption("s")) {
            dumpElevateVsIndex(commandLine.getOptionValue("s"), sorted, elevateMap, df,
                    totalQueries, reportsRoot);
        }
    }

    private static void dumpElevatedCountDistributions(Map<String, Elevate> elevateMap,
                                                       Path reportsRoot) throws IOException {
        //histogram of document ids per query

        //<number of ids, number of entries
        Map<Integer, Integer> m = new HashMap<>();
        for (Elevate e : elevateMap.values()) {
            int numOfDocs = e.getIds().size();
            Integer cnt = m.get(numOfDocs);
            if (cnt == null) {
                cnt = 1;
            } else {
                cnt++;
            }
            m.put(numOfDocs, cnt);
        }
        try (BufferedWriter writer = Files.newBufferedWriter(
                reportsRoot.resolve("elevated_num_docs_histogram.csv"), StandardCharsets.UTF_8
        )) {
            writer.write(StringUtils.joinWith(",",
                    "Number of Documents in an Elevate Entry,Number of Entries\n"));
            for (Map.Entry<Integer, Integer> e : MapUtil.sortByDescendingValue(m).entrySet()) {
                writer.write(
                        String.format(Locale.US,
                                "%s,%s\n", e.getKey(), e.getValue()));
            }

        }
    }

    private static void dumpAllElevated(Map<String, Elevate> elevateMap,
                                        QuerySet queries, int totalCount,
                                        NumberFormat df,
                                        Path reportsRoot) throws Exception {


        try (BufferedWriter writer = Files.newBufferedWriter(
                reportsRoot.resolve("elevated.csv"), StandardCharsets.UTF_8
        )) {
            writer.write(StringUtils.joinWith(",", "Elevated",
                    "QueryCount", "QueryPercentage", "\n"));
            for (String elevated : elevateMap.keySet()) {
                if (elevateMap.get(elevated).ids.size() == 0) {
                    LOG.warn("no ids for this elevated item >" + elevated + "<");
                    continue;
                }
                int cnt = 0;
                Query q = queries.queries.get(elevated);
                if (q != null) {
                    cnt = q.getCount();
                }
                writer.write(
                        StringUtils.joinWith(",",
                                clean(elevated),
                                cnt,
                                df.format(((double) cnt / (double) totalCount))
                        ) + "\n"
                );

            }
        }
    }

    private static void dumpElevateVsIndex(String searchServer,
                                           List<Query> sorted,
                                           Map<String, Elevate> elevateMap,
                                           DecimalFormat df, int totalCount,
                                           Path reportsRoot) throws Exception {
        SearchClient searchClient = SearchClientFactory.getClient(searchServer);

        Set<String> indexContains = new HashSet<>();
        Set<String> indexMissing = new HashSet<>();
        try (BufferedWriter writer = Files.newBufferedWriter(
                reportsRoot.resolve("elevated_vs_index.csv"), StandardCharsets.UTF_8
        )) {
            writer.write(StringUtils.joinWith(",", "Query", "Id",
                    "IndexContainsId",
                    "QueryCount", "QueryPercentage", "\n"));

            for (Query q : sorted) {
                if (elevateMap.containsKey(q.q)) {
                    Elevate e = elevateMap.get(q.q);
                    for (String id : e.getIds()) {
                        if (!indexContains.contains(id) && !indexMissing.contains(id)) {
                            boolean contains = indexContains(id, searchClient);
                            if (contains) {
                                indexContains.add(id);
                            } else {
                                indexMissing.add(id);
                            }
                        }
                        String contains = "index contains";
                        if (indexMissing.contains(id)) {
                            contains = "index missing";
                        }

                        writer.write(
                                StringUtils.joinWith(",",
                                        clean(q.getQ()),
                                        clean(id),
                                        contains,
                                        q.getCount(),
                                        df.format(((double) q.getCount() / (double) totalCount))
                                ) + "\n"
                        );
                    }
                }
            }
        }

        //now go get all the elevated irrespective of queries
        for (Elevate e : elevateMap.values()) {
            for (String id : e.getIds()) {
                if (!indexContains.contains(id) && !indexMissing.contains(id)) {
                    boolean contains = indexContains(id, searchClient);
                    if (contains) {
                        indexContains.add(id);
                    } else {
                        indexContains.add(id);
                    }
                }
            }
        }
        int zeroValidDocs = 0;
        int totalValidDocs = 0;
        int totalInvalidDocs = 0;
        Map<String, Integer> valid = new HashMap<>();
        Map<String, Integer> invalid = new HashMap<>();
        for (Elevate e : elevateMap.values()) {
            int v = 0;
            for (String id : e.getIds()) {
                if (indexContains.contains(id)) {
                    v++;
                    increment(valid, id);
                    totalValidDocs++;
                } else {
                    increment(invalid, id);
                    totalInvalidDocs++;
                }
            }
            if (v == 0) {
                zeroValidDocs++;
            }
        }
        LOG.info(
                String.format(Locale.US,
                        "There are %s unique valid docs and %s " +
                                "total docs in the elevate file.",
                        valid.size(), totalValidDocs)
        );
        LOG.info(
                String.format(Locale.US,
                        "There are %s unique missing docs and %s " +
                                "missing docs in the elevate file.",
                        invalid.size(), totalInvalidDocs)
        );
        LOG.info(
                String.format(Locale.US,
                        "There are %s entries with zero valid docs.",
                        zeroValidDocs)
        );
        ;
    }

    private static void increment(Map<String, Integer> m, String k) {
        Integer val = m.get(k);
        if (val == null) {
            m.put(k, 1);
        } else {
            m.put(k, ++val);
        }
    }

    private static void dumpElevatedButNoQueries(Set<String> elevated, Set<String> queries,
                                                 Path reportsRoot) throws IOException {
        try (BufferedWriter writer = Files.newBufferedWriter(
                reportsRoot.resolve("elevated_zero_queries.csv"),
                StandardCharsets.UTF_8)) {
            writer.write("ElevatedQueryNotInQueryLog\n");
            List<String> sorted = new ArrayList<>(elevated);
            Collections.sort(sorted);
            for (String q : sorted) {
                if (!queries.contains(q)) {
                    writer.write(clean(q) + "\n");
                }
            }
        }

    }

    private static void dumpElevatedQueries(List<Query> sorted, int totalQueries,
                                            Map<String, Elevate> elevateMap,
                                            DecimalFormat df, Path reportsRoot) throws Exception {

        try (Writer writer = Files.newBufferedWriter(
                reportsRoot.resolve("queries_elevated_or_not.csv"),
                StandardCharsets.UTF_8)) {
            //header
            writer.write(
                    StringUtils.joinWith(",", "Query", "ElevatedOrNot",
                            "QueryCount", "QueryPercentage", "\n")
            );
            for (Query q : sorted) {
                String elevated = "not_elevated";
                if (elevateMap.containsKey(q.q)) {
                    elevated = "elevated";
                }
                writer.write(StringUtils.joinWith(",",
                        clean(q.getQ()),
                        elevated,
                        q.getCount(),
                        clean(df.format(((double) q.getCount() / (double) totalQueries)))
                ));
                writer.write("\n");
            }
        }
    }

    private static boolean indexContains(String id, SearchClient searchClient) throws Exception {
        QueryRequest qr = new QueryRequest(new TermQuery("id", id));
        SearchResultSet rs = searchClient.search(qr);
        return rs.getIds().size() > 0;
    }

    private static String clean(String s) {
        if (s == null) {
            return StringUtils.EMPTY;
        }
        if (s.contains(",") || s.contains("\n") || s.contains("\r") || s.contains("\"")) {
            s = "\"" + s.replaceAll("\"", "\"\"") + "\"";
        }
        return s;
    }

    private static QuerySet loadQueries(Path file) throws Exception {
        QuerySet querySet = new QuerySet();
        Matcher uc = Pattern.compile("[A-Z]").matcher("");
        try (InputStream is = Files.newInputStream(file)) {
            try (Reader reader = new InputStreamReader(new BOMInputStream(is), "UTF-8")) {
                Iterable<CSVRecord> records = CSVFormat.EXCEL
                        .withFirstRecordAsHeader().parse(reader);
                for (CSVRecord record : records) {
                    String q = record.get("query");
                    Integer c = Integer.parseInt(record.get("count"));
                    if (querySet.queries.containsKey(q)) {
                        LOG.warn("duplicate queries?! >" + q + "<");
                    }

                    querySet.set(q, c);
                }
            }
        }
        LOG.info("loaded " + querySet.queries.size() + " queries");
        return querySet;
    }

    private static class ElevateSet {

        Map<String, List<String>> queryToIds = new HashMap<>();

        public void add(String query, String id) {
            List<String> ids = queryToIds.get(query);
            if (ids == null) {
                ids = new ArrayList<>();
                queryToIds.put(query, ids);
            }
            ids.add(id);
        }

        @Override
        public String toString() {
            return "ElevateSet{" +
                    "queryToIds=" + queryToIds +
                    '}';
        }
    }

    private static class QuerySet {
        int total;
        Map<String, Query> queries = new HashMap<>();

        public void set(String query, int count) {
            if (!query.equals("(other)")) {
                queries.put(query, new Query(query, count));
            }
            total += count;
        }

        @Override
        public String toString() {
            return "QuerySet{" +
                    "total=" + total +
                    ", queries=" + queries +
                    '}';
        }
    }

    private static class Query implements Comparable<Query> {
        String q;
        int count = -1;

        public Query(String q, int count) {
            this.q = q;
            this.count = count;
        }

        public String getQ() {
            return q;
        }

        public int getCount() {
            return count;
        }

        @Override
        public String toString() {
            return "Query{" +
                    "q='" + q + '\'' +
                    ", count=" + count +
                    '}';
        }

        @Override
        public int compareTo(Query other) {
            if (other.getCount() == count) {
                return q.compareTo(other.q);
            }
            return Integer.compare(other.count, count);
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (!(o instanceof Query)) return false;
            Query query = (Query) o;
            return count == query.count &&
                    q.equals(query.q);
        }

        @Override
        public int hashCode() {
            return Objects.hash(q, count);
        }
    }


}