package mtas.codec.util; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; import mtas.analysis.token.MtasToken; import mtas.codec.MtasCodecPostingsFormat; import mtas.parser.function.util.MtasFunctionParserFunction; import mtas.search.spans.util.MtasSpanQuery; import mtas.codec.util.CodecComponent.ComponentField; import mtas.codec.util.CodecComponent.ComponentCollection; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.spans.SpanWeight; /** * The Class CodecUtil. */ public class CodecUtil { /** The Constant STATS_TYPE_GEOMETRICMEAN. */ public static final String STATS_TYPE_GEOMETRICMEAN = "geometricmean"; /** The Constant STATS_TYPE_KURTOSIS. */ public static final String STATS_TYPE_KURTOSIS = "kurtosis"; /** The Constant STATS_TYPE_MAX. */ public static final String STATS_TYPE_MAX = "max"; /** The Constant STATS_TYPE_MEAN. */ public static final String STATS_TYPE_MEAN = "mean"; /** The Constant STATS_TYPE_MIN. */ public static final String STATS_TYPE_MIN = "min"; /** The Constant STATS_TYPE_N. */ public static final String STATS_TYPE_N = "n"; /** The Constant STATS_TYPE_MEDIAN. */ public static final String STATS_TYPE_MEDIAN = "median"; /** The Constant STATS_TYPE_POPULATIONVARIANCE. */ public static final String STATS_TYPE_POPULATIONVARIANCE = "populationvariance"; /** The Constant STATS_TYPE_QUADRATICMEAN. */ public static final String STATS_TYPE_QUADRATICMEAN = "quadraticmean"; /** The Constant STATS_TYPE_SKEWNESS. */ public static final String STATS_TYPE_SKEWNESS = "skewness"; /** The Constant STATS_TYPE_STANDARDDEVIATION. */ public static final String STATS_TYPE_STANDARDDEVIATION = "standarddeviation"; /** The Constant STATS_TYPE_SUM. */ public static final String STATS_TYPE_SUM = "sum"; /** The Constant STATS_TYPE_SUMSQ. */ public static final String STATS_TYPE_SUMSQ = "sumsq"; /** The Constant STATS_TYPE_SUMOFLOGS. */ public static final String STATS_TYPE_SUMOFLOGS = "sumoflogs"; /** The Constant STATS_TYPE_VARIANCE. */ public static final String STATS_TYPE_VARIANCE = "variance"; /** The Constant STATS_TYPE_ALL. */ public static final String STATS_TYPE_ALL = "all"; /** The Constant STATS_FUNCTION_DISTRIBUTION. */ public static final String STATS_FUNCTION_DISTRIBUTION = "distribution"; /** The Constant SORT_TERM. */ public static final String SORT_TERM = "term"; /** The Constant SORT_ASC. */ public static final String SORT_ASC = "asc"; /** The Constant SORT_DESC. */ public static final String SORT_DESC = "desc"; /** The Constant STATS_FUNCTIONS. */ private static final List<String> STATS_FUNCTIONS = Arrays .asList(STATS_FUNCTION_DISTRIBUTION); /** The Constant STATS_TYPES. */ private static final List<String> STATS_TYPES = Arrays.asList( STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_KURTOSIS, STATS_TYPE_MAX, STATS_TYPE_MEAN, STATS_TYPE_MIN, STATS_TYPE_N, STATS_TYPE_MEDIAN, STATS_TYPE_POPULATIONVARIANCE, STATS_TYPE_QUADRATICMEAN, STATS_TYPE_SKEWNESS, STATS_TYPE_STANDARDDEVIATION, STATS_TYPE_SUM, STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS, STATS_TYPE_VARIANCE); /** The Constant STATS_BASIC_TYPES. */ private static final List<String> STATS_BASIC_TYPES = Arrays .asList(STATS_TYPE_N, STATS_TYPE_SUM, STATS_TYPE_MEAN); /** The Constant STATS_ADVANCED_TYPES. */ private static final List<String> STATS_ADVANCED_TYPES = Arrays.asList( STATS_TYPE_MAX, STATS_TYPE_MIN, STATS_TYPE_SUMSQ, STATS_TYPE_SUMOFLOGS, STATS_TYPE_GEOMETRICMEAN, STATS_TYPE_STANDARDDEVIATION, STATS_TYPE_VARIANCE, STATS_TYPE_POPULATIONVARIANCE, STATS_TYPE_QUADRATICMEAN); /** The Constant STATS_FULL_TYPES. */ private static final List<String> STATS_FULL_TYPES = Arrays .asList(STATS_TYPE_KURTOSIS, STATS_TYPE_MEDIAN, STATS_TYPE_SKEWNESS); /** The Constant STATS_BASIC. */ public static final String STATS_BASIC = "basic"; /** The Constant STATS_ADVANCED. */ public static final String STATS_ADVANCED = "advanced"; /** The Constant STATS_FULL. */ public static final String STATS_FULL = "full"; /** The Constant DATA_TYPE_LONG. */ public static final String DATA_TYPE_LONG = "long"; /** The Constant DATA_TYPE_DOUBLE. */ public static final String DATA_TYPE_DOUBLE = "double"; /** The fp stats items. */ private static Pattern fpStatsItems = Pattern .compile("(([^\\(,]+)(\\([^\\)]*\\))?)"); /** The fp stats function items. */ private static Pattern fpStatsFunctionItems = Pattern .compile("(([^\\(,]+)(\\(([^\\)]*)\\)))"); /** * Instantiates a new codec util. */ private CodecUtil() { // don't do anything } /** * Checks if is single position prefix. * * @param fieldInfo * the field info * @param prefix * the prefix * @return true, if is single position prefix * @throws IOException * Signals that an I/O exception has occurred. */ public static boolean isSinglePositionPrefix(FieldInfo fieldInfo, String prefix) throws IOException { if (fieldInfo == null) { throw new IOException("no fieldInfo"); } else { String info = fieldInfo.getAttribute( MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION); if (info == null) { throw new IOException("no " + MtasCodecPostingsFormat.MTAS_FIELDINFO_ATTRIBUTE_PREFIX_SINGLE_POSITION); } else { return Arrays.asList(info.split(Pattern.quote(MtasToken.DELIMITER))) .contains(prefix); } } } /** * Term value. * * @param term * the term * @return the string */ public static String termValue(String term) { int i = term.indexOf(MtasToken.DELIMITER); String value = null; if (i >= 0) { value = term.substring((i + MtasToken.DELIMITER.length())); value = (value.length() > 0) ? value : null; } return (value == null) ? null : value.replace("\u0000", ""); } /** * Term prefix. * * @param term * the term * @return the string */ public static String termPrefix(String term) { int i = term.indexOf(MtasToken.DELIMITER); String prefix = term; if (i >= 0) { prefix = term.substring(0, i); } return prefix.replace("\u0000", ""); } /** * Term prefix value. * * @param term * the term * @return the string */ public static String termPrefixValue(String term) { return (term == null) ? null : term.replace("\u0000", ""); } /** * Collect field. * * @param field * the field * @param searcher * the searcher * @param rawReader * the raw reader * @param fullDocList * the full doc list * @param fullDocSet * the full doc set * @param fieldStats * the field stats * @throws IllegalAccessException * the illegal access exception * @throws IllegalArgumentException * the illegal argument exception * @throws InvocationTargetException * the invocation target exception * @throws IOException * Signals that an I/O exception has occurred. */ public static void collectField(String field, IndexSearcher searcher, IndexReader rawReader, ArrayList<Integer> fullDocList, ArrayList<Integer> fullDocSet, ComponentField fieldStats, Status status) throws IllegalAccessException, IllegalArgumentException, InvocationTargetException, IOException { if (fieldStats != null) { IndexReader reader = searcher.getIndexReader(); HashMap<MtasSpanQuery, SpanWeight> spansQueryWeight = new HashMap<>(); // only if spanQueryList is not empty if (fieldStats.spanQueryList.size() > 0) { final float boost = 0; for (MtasSpanQuery sq : fieldStats.spanQueryList) { spansQueryWeight.put(sq, ((MtasSpanQuery) sq.rewrite(reader)) .createWeight(searcher, false, boost)); } } // collect CodecCollector.collectField(field, searcher, reader, rawReader, fullDocList, fullDocSet, fieldStats, spansQueryWeight, status); } } /** * Collect collection. * * @param reader * the reader * @param fullDocSet * the full doc set * @param collectionInfo * the collection info * @throws IOException * Signals that an I/O exception has occurred. */ public static void collectCollection(IndexReader reader, List<Integer> fullDocSet, ComponentCollection collectionInfo) throws IOException { if (collectionInfo != null) { CodecCollector.collectCollection(reader, fullDocSet, collectionInfo); } } /** * Creates the stats items. * * @param statsType * the stats type * @return the sorted set * @throws IOException * Signals that an I/O exception has occurred. */ static SortedSet<String> createStatsItems(String statsType) throws IOException { SortedSet<String> statsItems = new TreeSet<>(); SortedSet<String> functionItems = new TreeSet<>(); if (statsType != null) { Matcher m = fpStatsItems.matcher(statsType.trim()); while (m.find()) { String tmpStatsItem = m.group(2).trim(); if (STATS_TYPES.contains(tmpStatsItem)) { statsItems.add(tmpStatsItem); } else if (tmpStatsItem.equals(STATS_TYPE_ALL)) { for (String type : STATS_TYPES) { statsItems.add(type); } } else if (STATS_FUNCTIONS.contains(tmpStatsItem)) { if (m.group(3) == null) { throw new IOException("'" + tmpStatsItem + "' should be called as '" + tmpStatsItem + "()' with an optional argument"); } else { functionItems.add(m.group(1).trim()); } } else { throw new IOException("unknown statsType '" + tmpStatsItem + "'"); } } } if (statsItems.size() == 0 && functionItems.size() == 0) { statsItems.add(STATS_TYPE_SUM); statsItems.add(STATS_TYPE_N); statsItems.add(STATS_TYPE_MEAN); } if (functionItems.size() > 0) { statsItems.addAll(functionItems); } return statsItems; } /** * Creates the stats type. * * @param statsItems * the stats items * @param sortType * the sort type * @param functionParser * the function parser * @return the string */ static String createStatsType(Set<String> statsItems, String sortType, MtasFunctionParserFunction functionParser) { String statsType = STATS_BASIC; for (String statsItem : statsItems) { if (STATS_FULL_TYPES.contains(statsItem)) { statsType = STATS_FULL; break; } else if (STATS_ADVANCED_TYPES.contains(statsItem)) { statsType = STATS_ADVANCED; } else if (statsType != STATS_ADVANCED && STATS_BASIC_TYPES.contains(statsItem)) { statsType = STATS_BASIC; } else { Matcher m = fpStatsFunctionItems.matcher(statsItem.trim()); if (m.find()) { if (STATS_FUNCTIONS.contains(m.group(2).trim())) { statsType = STATS_FULL; break; } } } } if (sortType != null && STATS_TYPES.contains(sortType)) { if (STATS_FULL_TYPES.contains(sortType)) { statsType = STATS_FULL; } else if (STATS_ADVANCED_TYPES.contains(sortType)) { statsType = (statsType == null || statsType != STATS_FULL) ? STATS_ADVANCED : statsType; } } return statsType; } /** * Checks if is stats type. * * @param type * the type * @return true, if is stats type */ public static boolean isStatsType(String type) { return STATS_TYPES.contains(type); } }