/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.admin; import java.io.IOException; import java.lang.invoke.MethodHandles; import java.nio.file.Paths; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Objects; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Function; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.StandardDirectoryReader; import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.MapWriter; import org.apache.solr.common.util.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Estimates the raw size of all uncompressed indexed data by scanning term, docValues and * stored fields data. This utility also provides detailed statistics about term, docValues, * postings and stored fields distributions. */ public class IndexSizeEstimator { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String TERMS = "terms"; public static final String STORED_FIELDS = "storedFields"; public static final String NORMS = "norms"; public static final String DOC_VALUES = "docValues"; public static final String POINTS = "points"; public static final String TERM_VECTORS = "termVectors"; public static final String SUMMARY = "summary"; public static final String DETAILS = "details"; public static final String FIELDS_BY_SIZE = "fieldsBySize"; public static final String TYPES_BY_SIZE = "typesBySize"; public static final int DEFAULT_SAMPLING_THRESHOLD = 100_000; public static final float DEFAULT_SAMPLING_PERCENT = 5.0f; private final IndexReader reader; private final int topN; private final int maxLength; private final boolean withSummary; private final boolean withDetails; private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD; private float samplingPercent = DEFAULT_SAMPLING_PERCENT; private int samplingStep = 1; public static final class Estimate implements MapWriter { private final Map<String, Long> fieldsBySize; private final Map<String, Long> typesBySize; private final Map<String, Object> summary; private final Map<String, Object> details; public Estimate(Map<String, Long> fieldsBySize, Map<String, Long> typesBySize, Map<String, Object> summary, Map<String, Object> details) { Objects.requireNonNull(fieldsBySize); Objects.requireNonNull(typesBySize); this.fieldsBySize = fieldsBySize; this.typesBySize = typesBySize; this.summary = summary; this.details = details; } public Map<String, Long> getFieldsBySize() { return fieldsBySize; } public Map<String, Long> getTypesBySize() { return typesBySize; } public Map<String, String> getHumanReadableFieldsBySize() { LinkedHashMap<String, String> result = new LinkedHashMap<>(); fieldsBySize.forEach((field, size) -> result.put(field, RamUsageEstimator.humanReadableUnits(size))); return result; } public Map<String, String> getHumanReadableTypesBySize() { LinkedHashMap<String, String> result = new LinkedHashMap<>(); typesBySize.forEach((field, size) -> result.put(field, RamUsageEstimator.humanReadableUnits(size))); return result; } public Map<String, Object> getSummary() { return summary; } public Map<String, Object> getDetails() { return details; } @Override public void writeMap(EntryWriter ew) throws IOException { ew.put(FIELDS_BY_SIZE, fieldsBySize); ew.put(TYPES_BY_SIZE, typesBySize); if (summary != null) { ew.put(SUMMARY, summary); } if (details != null) { ew.put(DETAILS, details); } } } public IndexSizeEstimator(IndexReader reader, int topN, int maxLength, boolean withSummary, boolean withDetails) { this.reader = reader; this.topN = topN; this.maxLength = maxLength; this.withSummary = withSummary; this.withDetails = withDetails; } /** * Set the sampling threshold. If the index has more documents than this threshold * then only some values will be sampled and the totals will be extrapolated. * @param threshold size threshold (number of documents). Default value is {@link #DEFAULT_SAMPLING_THRESHOLD}. * Setting this to values <= 0 means no threshold (and no sampling). */ public void setSamplingThreshold(int threshold) { if (threshold <= 0) { threshold = Integer.MAX_VALUE; } this.samplingThreshold = threshold; } /** * Sampling percent (a number greater than 0 and less or equal to 100). When index size exceeds * the threshold then approximately only this percent of data will be retrieved from the index and the * totals will be extrapolated. * @param percent sample percent. Default value is {@link #DEFAULT_SAMPLING_PERCENT}. * @throws IllegalArgumentException when value is less than or equal to 0.0 or greater than 100.0, or * the sampling percent is so small that less than 10 documents would be sampled. */ public void setSamplingPercent(float percent) throws IllegalArgumentException { if (percent <= 0 || percent > 100) { throw new IllegalArgumentException("samplingPercent must be 0 < percent <= 100"); } if (reader.maxDoc() > samplingThreshold) { samplingStep = Math.round(100.0f / samplingPercent); if (log.isInfoEnabled()) { log.info("- number of documents {} larger than {}, sampling percent is {} and sampling step {}", reader.maxDoc(), samplingThreshold, samplingPercent, samplingStep); } if (reader.maxDoc() / samplingStep < 10) { throw new IllegalArgumentException("Out of " + reader.maxDoc() + " less than 10 documents would be sampled, which is too unreliable. Increase the samplingPercent."); } } this.samplingPercent = percent; } @SuppressWarnings({"unchecked"}) public Estimate estimate() throws Exception { Map<String, Object> details = new LinkedHashMap<>(); Map<String, Object> summary = new LinkedHashMap<>(); estimateStoredFields(details); estimateTerms(details); estimateNorms(details); estimatePoints(details); estimateTermVectors(details); estimateDocValues(details); estimateSummary(details, summary); if (samplingStep > 1) { details.put("samplingPercent", samplingPercent); details.put("samplingStep", samplingStep); } ItemPriorityQueue fieldSizeQueue = new ItemPriorityQueue(summary.size()); summary.forEach((field, perField) -> { long size = ((AtomicLong)((Map<String, Object>)perField).get("totalSize")).get(); if (size > 0) { fieldSizeQueue.insertWithOverflow(new Item(field, size)); } }); Map<String, Long> fieldsBySize = new LinkedHashMap<>(); fieldSizeQueue._forEachEntry((k, v) -> fieldsBySize.put((String)k, (Long)v)); Map<String, AtomicLong> typeSizes = new HashMap<>(); summary.forEach((field, perField) -> { Map<String, Object> perType = (Map<String, Object>)((Map<String, Object>)perField).get("perType"); perType.forEach((type, size) -> { if (type.contains("_lengths")) { AtomicLong totalSize = typeSizes.computeIfAbsent(type.replace("_lengths", ""), t -> new AtomicLong()); totalSize.addAndGet(((AtomicLong)size).get()); } }); }); ItemPriorityQueue typesSizeQueue = new ItemPriorityQueue(typeSizes.size()); typeSizes.forEach((type, size) -> { if (size.get() > 0) { typesSizeQueue.insertWithOverflow(new Item(type, size.get())); } }); Map<String, Long> typesBySize = new LinkedHashMap<>(); typesSizeQueue._forEachEntry((k, v) -> typesBySize.put((String)k, (Long)v)); // sort summary by field size Map<String, Object> newSummary = new LinkedHashMap<>(); fieldsBySize.keySet().forEach(k -> newSummary.put(String.valueOf(k), summary.get(k))); // convert everything to maps and primitives convert(newSummary); convert(details); return new Estimate(fieldsBySize, typesBySize, withSummary ? newSummary : null, withDetails ? details : null); } @SuppressWarnings({"unchecked"}) private void convert(Map<String, Object> result) { for (Map.Entry<String, Object> entry : result.entrySet()) { Object value = entry.getValue(); if (value instanceof ItemPriorityQueue) { ItemPriorityQueue queue = (ItemPriorityQueue)value; Map<String, Object> map = new LinkedHashMap<>(); queue.toMap(map); entry.setValue(map); } else if (value instanceof MapWriterSummaryStatistics) { MapWriterSummaryStatistics stats = (MapWriterSummaryStatistics)value; Map<String, Object> map = new LinkedHashMap<>(); stats.toMap(map); entry.setValue(map); } else if (value instanceof AtomicLong) { entry.setValue(((AtomicLong)value).longValue()); } else if (value instanceof Map) { // recurse convert((Map<String, Object>)value); } } } @SuppressWarnings({"unchecked"}) private void estimateSummary(Map<String, Object> details, Map<String, Object> summary) { log.info("- preparing summary..."); details.forEach((type, perType) -> { ((Map<String, Object>)perType).forEach((field, perField) -> { Map<String, Object> perFieldSummary = (Map<String, Object>)summary.computeIfAbsent(field, f -> new HashMap<>()); ((Map<String, Object>)perField).forEach((k, val) -> { if (val instanceof SummaryStatistics) { SummaryStatistics stats = (SummaryStatistics)val; if (k.startsWith("lengths")) { AtomicLong total = (AtomicLong)perFieldSummary.computeIfAbsent("totalSize", kt -> new AtomicLong()); total.addAndGet((long)stats.getSum()); } Map<String, Object> perTypeSummary = (Map<String, Object>)perFieldSummary.computeIfAbsent("perType", pt -> new HashMap<>()); AtomicLong total = (AtomicLong)perTypeSummary.computeIfAbsent(type + "_" + k, t -> new AtomicLong()); total.addAndGet((long)stats.getSum()); } }); }); }); } private void estimateNorms(Map<String, Object> result) throws IOException { log.info("- estimating norms..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext leafReaderContext : reader.leaves()) { LeafReader leafReader = leafReaderContext.reader(); FieldInfos fieldInfos = leafReader.getFieldInfos(); for (FieldInfo info : fieldInfos) { NumericDocValues norms = leafReader.getNormValues(info.name); if (norms == null) { continue; } Map<String, Object> perField = stats.computeIfAbsent(info.name, n -> new HashMap<>()); SummaryStatistics lengthSummary = (SummaryStatistics)perField.computeIfAbsent("lengths", s -> new MapWriterSummaryStatistics()); while (norms.advance(norms.docID() + samplingStep) != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 0; i < samplingStep; i++) { lengthSummary.addValue(8); } } } } result.put(NORMS, stats); } private void estimatePoints(Map<String, Object> result) throws IOException { log.info("- estimating points..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext leafReaderContext : reader.leaves()) { LeafReader leafReader = leafReaderContext.reader(); FieldInfos fieldInfos = leafReader.getFieldInfos(); for (FieldInfo info : fieldInfos) { PointValues values = leafReader.getPointValues(info.name); if (values == null) { continue; } Map<String, Object> perField = stats.computeIfAbsent(info.name, n -> new HashMap<>()); SummaryStatistics lengthSummary = (SummaryStatistics)perField.computeIfAbsent("lengths", s -> new MapWriterSummaryStatistics()); lengthSummary.addValue(values.size() * values.getBytesPerDimension() * values.getNumIndexDimensions()); } } result.put(POINTS, stats); } private void estimateTermVectors(Map<String, Object> result) throws IOException { log.info("- estimating term vectors..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext leafReaderContext : reader.leaves()) { LeafReader leafReader = leafReaderContext.reader(); Bits liveDocs = leafReader.getLiveDocs(); for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) { if (liveDocs != null && !liveDocs.get(docId)) { continue; } Fields termVectors = leafReader.getTermVectors(docId); if (termVectors == null) { continue; } for (String field : termVectors) { Terms terms = termVectors.terms(field); if (terms == null) { continue; } estimateTermStats(field, terms, stats, true); } } } result.put(TERM_VECTORS, stats); } private void estimateDocValues(Map<String, Object> result) throws IOException { log.info("- estimating docValues..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext context : reader.leaves()) { LeafReader leafReader = context.reader(); FieldInfos fieldInfos = leafReader.getFieldInfos(); for (FieldInfo info : fieldInfos) { // binary countDocValues(stats, info.name, "binary", leafReader.getBinaryDocValues(info.name), values -> { try { BytesRef value = ((BinaryDocValues) values).binaryValue(); return value.length; } catch (IOException e) { // ignore } return 0; }); // numeric countDocValues(stats, info.name, "numeric", leafReader.getNumericDocValues(info.name), values -> 8); countDocValues(stats, info.name, "sorted", leafReader.getSortedDocValues(info.name), values -> { try { TermsEnum termsEnum = ((SortedDocValues) values).termsEnum(); BytesRef term; while ((term = termsEnum.next()) != null) { return term.length; } } catch (IOException e) { // ignore } return 0; }); countDocValues(stats, info.name, "sortedNumeric", leafReader.getSortedNumericDocValues(info.name), values -> ((SortedNumericDocValues) values).docValueCount() * 8); countDocValues(stats, info.name, "sortedSet", leafReader.getSortedSetDocValues(info.name), values -> { try { TermsEnum termsEnum = ((SortedSetDocValues) values).termsEnum(); BytesRef term; while ((term = termsEnum.next()) != null) { return term.length; } } catch (IOException e) { // ignore } return 0; }); } } result.put(DOC_VALUES, stats); } private void countDocValues(Map<String, Map<String, Object>> stats, String field, String type, DocIdSetIterator values, Function<DocIdSetIterator, Integer> valueLength) throws IOException { if (values == null) { return; } Map<String, Object> perField = stats.computeIfAbsent(field, n -> new HashMap<>()); SummaryStatistics lengthSummary = (SummaryStatistics)perField.computeIfAbsent("lengths_" + type, s -> new MapWriterSummaryStatistics()); while (values.advance(values.docID() + samplingStep) != DocIdSetIterator.NO_MORE_DOCS) { int len = valueLength.apply(values); for (int i = 0; i < samplingStep; i++) { lengthSummary.addValue(len); } } } private void estimateTerms(Map<String, Object> result) throws IOException { log.info("- estimating terms..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext context : reader.leaves()) { LeafReader leafReader = context.reader(); FieldInfos fieldInfos = leafReader.getFieldInfos(); for (FieldInfo info : fieldInfos) { Terms terms = leafReader.terms(info.name); if (terms == null) { continue; } estimateTermStats(info.name, terms, stats, false); } } result.put(TERMS, stats); } private void estimateTermStats(String field, Terms terms, Map<String, Map<String, Object>> stats, boolean isSampling) throws IOException { Map<String, Object> perField = stats.computeIfAbsent(field, n -> new HashMap<>()); SummaryStatistics lengthSummary = (SummaryStatistics)perField.computeIfAbsent("lengths_terms", s -> new MapWriterSummaryStatistics()); SummaryStatistics docFreqSummary = (SummaryStatistics)perField.computeIfAbsent("docFreqs", s -> new MapWriterSummaryStatistics()); SummaryStatistics totalFreqSummary = (SummaryStatistics)perField.computeIfAbsent("lengths_postings", s -> new MapWriterSummaryStatistics()); // TODO: add this at some point //SummaryStatistics impactsSummary = (SummaryStatistics)perField.computeIfAbsent("lengths_impacts", s -> new MapWriterSummaryStatistics()); SummaryStatistics payloadSummary = null; if (terms.hasPayloads()) { payloadSummary = (SummaryStatistics)perField.computeIfAbsent("lengths_payloads", s -> new MapWriterSummaryStatistics()); } ItemPriorityQueue topLen = (ItemPriorityQueue)perField.computeIfAbsent("topLen", s -> new ItemPriorityQueue(topN)); ItemPriorityQueue topTotalFreq = (ItemPriorityQueue)perField.computeIfAbsent("topTotalFreq", s -> new ItemPriorityQueue(topN)); TermsEnum termsEnum = terms.iterator(); BytesRef term; PostingsEnum postings = null; while ((term = termsEnum.next()) != null) { if (isSampling) { for (int i = 0; i < samplingStep; i++) { lengthSummary.addValue(term.length); docFreqSummary.addValue(termsEnum.docFreq()); totalFreqSummary.addValue(termsEnum.totalTermFreq()); } } else { lengthSummary.addValue(term.length); docFreqSummary.addValue(termsEnum.docFreq()); totalFreqSummary.addValue(termsEnum.totalTermFreq()); } if (terms.hasPayloads()) { postings = termsEnum.postings(postings, PostingsEnum.ALL); while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = postings.freq(); for (int i = 0; i < freq; i++) { if (postings.nextPosition() < 0) { break; } BytesRef payload = postings.getPayload(); if (payload != null) { if (isSampling) { for (int k = 0; k < samplingStep; k++) { payloadSummary.addValue(payload.length); } } else { payloadSummary.addValue(payload.length); } } } } } String value = term.utf8ToString(); if (value.length() > maxLength) { value = value.substring(0, maxLength); } topLen.insertWithOverflow(new Item(value, term.length)); topTotalFreq.insertWithOverflow(new Item(value, termsEnum.totalTermFreq())); } } private void estimateStoredFields(Map<String, Object> result) throws IOException { log.info("- estimating stored fields..."); Map<String, Map<String, Object>> stats = new HashMap<>(); for (LeafReaderContext context : reader.leaves()) { LeafReader leafReader = context.reader(); EstimatingVisitor visitor = new EstimatingVisitor(stats, topN, maxLength, samplingStep); Bits liveDocs = leafReader.getLiveDocs(); if (leafReader instanceof CodecReader) { CodecReader codecReader = (CodecReader)leafReader; StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader(); // this instance may be faster for a full sequential pass StoredFieldsReader mergeInstance = storedFieldsReader.getMergeInstance(); for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) { if (liveDocs != null && !liveDocs.get(docId)) { continue; } mergeInstance.visitDocument(docId, visitor); } if (mergeInstance != storedFieldsReader) { mergeInstance.close(); } } else { for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) { if (liveDocs != null && !liveDocs.get(docId)) { continue; } leafReader.document(docId, visitor); } } } result.put(STORED_FIELDS, stats); } public static class Item { Object value; long size; public Item(Object value, long size) { this.value = value; this.size = size; } public String toString() { return "size=" + size + ", value=" + value; } } public static class MapWriterSummaryStatistics extends SummaryStatistics implements MapWriter { @Override public void writeMap(EntryWriter ew) throws IOException { ew.put("n", getN()); ew.put("min", getMin()); ew.put("max", getMax()); ew.put("sum", getSum()); ew.put("mean", getMean()); ew.put("geoMean", getGeometricMean()); ew.put("variance", getVariance()); ew.put("populationVariance", getPopulationVariance()); ew.put("stddev", getStandardDeviation()); ew.put("secondMoment", getSecondMoment()); ew.put("sumOfSquares", getSumsq()); ew.put("sumOfLogs", getSumOfLogs()); } } public static class ItemPriorityQueue extends PriorityQueue<Item> implements MapWriter { public ItemPriorityQueue(int maxSize) { super(maxSize); } @Override protected boolean lessThan(Item a, Item b) { return a.size < b.size; } public String toString() { StringBuilder sb = new StringBuilder(); Iterator<Item> it = iterator(); while (it.hasNext()) { if (sb.length() > 0) { sb.append('\n'); } sb.append(it.next()); } return sb.toString(); } // WARNING: destructive! empties the queue @Override public void writeMap(EntryWriter ew) throws IOException { Item[] items = new Item[size()]; int pos = size() - 1; while (size() > 0) { items[pos] = pop(); pos--; } for (Item item : items) { ew.put(String.valueOf(item.value), item.size); } } } private static class EstimatingVisitor extends StoredFieldVisitor { final Map<String, Map<String, Object>> stats; final int topN; final int maxLength; final int samplingStep; EstimatingVisitor(Map<String, Map<String, Object>> stats, int topN, int maxLength, int samplingStep) { this.stats = stats; this.topN = topN; this.maxLength = maxLength; this.samplingStep = samplingStep; } /** Process a binary field. * @param value newly allocated byte array with the binary contents. */ public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { // trim the value if needed int len = value != null ? value.length : 0; if (len > maxLength) { byte[] newValue = new byte[maxLength]; System.arraycopy(value, 0, newValue, 0, maxLength); value = newValue; } String strValue = new BytesRef(value).toString(); countItem(fieldInfo.name, strValue, len); } /** Process a string field. */ public void stringField(FieldInfo fieldInfo, String value) throws IOException { // trim the value if needed int len = value != null ? UnicodeUtil.calcUTF16toUTF8Length(value, 0, value.length()) : 0; if (value.length() > maxLength) { value = value.substring(0, maxLength); } countItem(fieldInfo.name, value, len); } /** Process a int numeric field. */ public void intField(FieldInfo fieldInfo, int value) throws IOException { countItem(fieldInfo.name, String.valueOf(value), 4); } /** Process a long numeric field. */ public void longField(FieldInfo fieldInfo, long value) throws IOException { countItem(fieldInfo.name, String.valueOf(value), 8); } /** Process a float numeric field. */ public void floatField(FieldInfo fieldInfo, float value) throws IOException { countItem(fieldInfo.name, String.valueOf(value), 4); } /** Process a double numeric field. */ public void doubleField(FieldInfo fieldInfo, double value) throws IOException { countItem(fieldInfo.name, String.valueOf(value), 8); } private void countItem(String field, Object value, int size) { Map<String, Object> perField = stats.computeIfAbsent(field, n -> new HashMap<>()); SummaryStatistics summary = (SummaryStatistics)perField.computeIfAbsent("lengths", s -> new MapWriterSummaryStatistics()); for (int i = 0; i < samplingStep; i++) { summary.addValue(size); } ItemPriorityQueue topNqueue = (ItemPriorityQueue)perField.computeIfAbsent("topLen", s-> new ItemPriorityQueue(topN)); topNqueue.insertWithOverflow(new Item(value, size)); } @Override public Status needsField(FieldInfo fieldInfo) throws IOException { return Status.YES; } } @SuppressForbidden(reason = "System.err and System.out required for a command-line utility") public static void main(String[] args) throws Exception { if (args.length == 0) { System.err.println("Usage: " + IndexSizeEstimator.class.getName() + " [-topN NUM] [-maxLen NUM] [-summary] [-details] <indexDir>"); System.err.println(); System.err.println("\t<indexDir>\tpath to the index (parent path of 'segments_N' file)"); System.err.println("\t-topN NUM\tnumber of top largest items to collect"); System.err.println("\t-maxLen NUM\ttruncate the largest items to NUM bytes / characters"); System.err.println(-1); } String path = null; int topN = 20; int maxLen = 100; boolean details = false; boolean summary = false; for (int i = 0; i < args.length; i++) { if (args[i].equals("-topN")) { topN = Integer.parseInt(args[++i]); } else if (args[i].equals("-maxLen")) { maxLen = Integer.parseInt(args[++i]); } else if (args[i].equals("-details")) { details = true; } else if (args[i].equals("-summary")) { summary = true; } else { path = args[i]; } } if (path == null) { System.err.println("ERROR: <indexDir> argument is required."); System.exit(-2); } Directory dir = FSDirectory.open(Paths.get(path)); DirectoryReader reader = StandardDirectoryReader.open(dir); IndexSizeEstimator stats = new IndexSizeEstimator(reader, topN, maxLen, summary, details); System.out.println(Utils.toJSONString(stats.estimate())); System.exit(0); } }