org.apache.accumulo.core.data.Range Java Examples

The following examples show how to use org.apache.accumulo.core.data.Range. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AccumuloTemporalIndexer.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * Get intervals stored in the repository matching the given interval.
 * Indexing Intervals  will probably change or be removed.
 * Currently predicate and subject constraints are filtered on the client.
 */
@Override
public CloseableIteration<Statement, QueryEvaluationException> queryIntervalEquals(
        final TemporalInterval query, final StatementConstraints contraints)
        throws QueryEvaluationException {
    final Scanner scanner = getScanner();
    if (scanner != null) {
        // get rows where the start and end match.
        final Range range = Range.prefix(new Text(query.getAsKeyBeginning()));
        scanner.setRange(range);
        if (contraints.hasContext()) {
            scanner.fetchColumn(new Text(contraints.getContext().toString()), new Text(KeyParts.CQ_BEGIN));
        } else {
            scanner.fetchColumn(new Text(""), new Text(KeyParts.CQ_BEGIN));
        }
    }
    // Iterator<Entry<Key, Value>> iter = scanner.iterator();
    // while (iter.hasNext()) {
    // System.out.println("queryIntervalEquals results:"+iter.next());
    // }
    //return getConstrainedIteratorWrapper(scanner, contraints);
    return getIteratorWrapper(scanner);
}
 
Example #2
Source File: DocumentAggregatingIterator.java    From datawave with Apache License 2.0 6 votes vote down vote up
public void move(Key pointer) throws IOException {
    // check the current position
    if (nextKey != null && nextKey.compareTo(pointer) >= 0) {
        throw new IllegalStateException("Tried to call move when already at or beyond move point: topkey=" + nextKey + ", movekey=" + pointer);
    }
    
    if (!getSource().hasTop()) {
        // there is nothing beyond the current key
        nextKey = null;
        nextValue = null;
        document = null;
    } else if (getSource().getTopKey().compareTo(pointer) >= 0) {
        // load that into next
        next();
    } else {
        // we have to seek
        seek(new Range(pointer, true, seekRange.getEndKey(), seekRange.isEndKeyInclusive()), seekColumnFamilies, seekInclusive);
    }
}
 
Example #3
Source File: DescendantCountFunction.java    From datawave with Apache License 2.0 6 votes vote down vote up
private boolean skipExcessiveNumberOfDescendants(final String childSuffix, final Matcher matcher, final Text row, final String fiRootValue, final Key endKey)
                throws IOException {
    boolean skipped;
    if (matcher.find() && (matcher.start() < childSuffix.length())) {
        // Get the base matching child suffix
        final String baseMatch = childSuffix.substring(0, matcher.start());
        
        // create the skipping range
        final Key skipStartKey = new Key(row, this.indexCf, new Text(fiRootValue + baseMatch + '0'));
        final Range skipRange = new Range(skipStartKey, true, endKey, false);
        
        // seek to the next first-generation child, if one exists
        final Set<ByteSequence> emptyCfs = Collections.emptySet();
        this.source.seek(skipRange, emptyCfs, false);
        
        // Assign the return value
        skipped = true;
    } else {
        skipped = false;
    }
    
    return skipped;
}
 
Example #4
Source File: AccumuloClient.java    From presto with Apache License 2.0 6 votes vote down vote up
/**
 * Gets a collection of Accumulo Range objects from the given Presto domain.
 * This maps the column constraints of the given Domain to an Accumulo Range scan.
 *
 * @param domain Domain, can be null (returns (-inf, +inf) Range)
 * @param serializer Instance of an {@link AccumuloRowSerializer}
 * @return A collection of Accumulo Range objects
 * @throws TableNotFoundException If the Accumulo table is not found
 */
public static Collection<Range> getRangesFromDomain(Optional<Domain> domain, AccumuloRowSerializer serializer)
        throws TableNotFoundException
{
    // if we have no predicate pushdown, use the full range
    if (domain.isEmpty()) {
        return ImmutableSet.of(new Range());
    }

    ImmutableSet.Builder<Range> rangeBuilder = ImmutableSet.builder();
    for (io.prestosql.spi.predicate.Range range : domain.get().getValues().getRanges().getOrderedRanges()) {
        rangeBuilder.add(getRangeFromPrestoRange(range, serializer));
    }

    return rangeBuilder.build();
}
 
Example #5
Source File: AccumuloCounterSource.java    From datawave with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws AccumuloException, AccumuloSecurityException {
    String instance = args[0];
    String zookeepers = args[1];
    String username = args[2];
    String password = PasswordConverter.parseArg(args[3]);
    String table = args[4];
    String startRow = args[5];
    String endRow = args[6];
    String columnFamily = args[7];
    AccumuloCounterSource source = new AccumuloCounterSource(instance, zookeepers, username, password, table);
    Range range = new Range(startRow, endRow);
    source.addRange(range);
    source.addColumnFaily(columnFamily);
    CounterDump dumper = new CounterDump(source);
    System.out.println(dumper);
}
 
Example #6
Source File: DatawaveFieldIndexIteratorJexlTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Test
public void buildBoundingFiRange_notUpperInclusive_multiChar_test() throws IOException {
    DatawaveFieldIndexFilterIteratorJexl iteratorJexl = DatawaveFieldIndexFilterIteratorJexl.builder().upperInclusive(false).lowerInclusive(true)
                    .withMaxRangeSplit(1).withFieldName("FIELD").withFieldValue("a").withUpperBound("az").withIvaratorCacheDirs(cacheDirs).build();
    
    Text row = new Text("row");
    Text fiName = new Text("fi" + Constants.NULL + "FIELD");
    Text fieldValue = new Text("aa");
    Text fieldValueNullAppended = new Text("aa" + Constants.NULL);
    
    List<Range> ranges = iteratorJexl.buildBoundingFiRanges(row, fiName, fieldValue);
    
    Assert.assertNotEquals(null, ranges);
    Assert.assertEquals(1, ranges.size());
    Range r = ranges.get(0);
    
    // note that the end key is expected to be inclusive even though upperInclusive is set to false because the value has been decremented by one
    Assert.assertTrue(r.isStartKeyInclusive());
    Assert.assertTrue(r.isEndKeyInclusive());
    Assert.assertEquals(new Key(row, fiName, fieldValueNullAppended), r.getStartKey());
    Assert.assertEquals(new Key(row, fiName, new Text("ay" + Constants.MAX_UNICODE_STRING)), r.getEndKey());
}
 
Example #7
Source File: EnrichingIterator.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    if (this.subIter == null) {
        return;
    }
    
    this.subIter.seek(range, columnFamilies, inclusive);
    
    if (this.subIter.hasTop()) {
        this.topKey = this.subIter.getTopKey();
        this.topValue = this.subIter.getTopValue();
        
        if (this.topKey != null) {
            this.enrich();
        }
    } else {
        this.topKey = null;
        this.topValue = null;
    }
    
}
 
Example #8
Source File: DatawaveFieldIndexCachingIteratorJexl.java    From datawave with Apache License 2.0 6 votes vote down vote up
/**
 * Does the last range seeked contain the passed in range
 * 
 * @param r
 * @return true if there is a last seeked range and it contains the passed in range
 */
protected boolean lastRangeSeekedContains(Range r) {
    boolean subRange = false;
    if (this.lastRangeSeeked != null) {
        Key beginOfThisRange = r.getStartKey();
        Key endOfThisRange = r.getEndKey();
        subRange = true;
        if (beginOfThisRange == null && this.lastRangeSeeked.getStartKey() != null) {
            subRange = false;
        } else if (!Objects.equal(beginOfThisRange, this.lastRangeSeeked.getStartKey()) && !this.lastRangeSeeked.contains(beginOfThisRange)) {
            subRange = false;
        } else if (endOfThisRange == null && this.lastRangeSeeked.getEndKey() != null) {
            subRange = false;
        } else if (!Objects.equal(endOfThisRange, this.lastRangeSeeked.getEndKey()) && !this.lastRangeSeeked.contains(endOfThisRange)) {
            subRange = false;
        }
    }
    
    return subRange;
}
 
Example #9
Source File: GeoTemporalTweetQuery.java    From OSTMap with Apache License 2.0 6 votes vote down vote up
/**
 * @return ranges for rowkeys [0-255][startDay-endDay][setOfGeohashes]
 */
private List<Range> getRangeList() {
    List<Range> rangeList = new ArrayList<>();

    Coverage coverage = GeoHash.coverBoundingBoxMaxHashes(north, west, south, east, 100);
    log.debug("coverage:  [size:" + coverage.getHashes().size() + ", ratio:" + coverage.getRatio() + "]");

    Set<String> hashes = coverage.getHashes();
    for (String hash : hashes) {
        for (short day = startDay; day <= endDay; day++) {
            for (int spreadingByte = 0; spreadingByte <= 255; spreadingByte++) {
                ByteBuffer startKey = ByteBuffer.allocate(3 + hash.length());
                if (hash.length() > 8) {
                    hash = hash.substring(0, 8);
                }
                startKey.put((byte) spreadingByte).putShort(day).put(hash.getBytes());
                rangeList.add(Range.prefix(new Text(startKey.array())));
            }
        }
    }

    return rangeList;
}
 
Example #10
Source File: AccumuloChangelogStore.java    From accumulo-recipes with Apache License 2.0 6 votes vote down vote up
/**
 * Gets the actual change objects that live inside of the specified buckets
 *
 * @param buckets dates representing time increments (i.e. 15 minutes)
 * @return
 */
@Override
public CloseableIterable<Event> getChanges(Iterable<Date> buckets, Auths auths) {
    checkNotNull(buckets);
    checkNotNull(auths);
    try {
        final BatchScanner scanner = connector.createBatchScanner(tableName, auths.getAuths(), config.getMaxQueryThreads());

        List<Range> ranges = new ArrayList<Range>();
        for (Date date : buckets) {

            Range range = new Range(String.format("%d", truncatedReverseTimestamp(date.getTime(), bucketSize)));
            ranges.add(range);
        }

        scanner.setRanges(ranges);

        return transform(closeableIterable(scanner), entityTransform);

    } catch (TableNotFoundException e) {
        throw new RuntimeException(e);
    }
}
 
Example #11
Source File: AccumuloTemporalIndexer.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * Interval after given interval.  Find intervals that begin after the endings of the given interval.
 * Use the special following prefix mechanism to avoid matching the beginning date.
 * Indexing Intervals  will probably change or be removed.
 * Currently predicate and subject and context constraints are filtered on the client.
 */
@Override
public CloseableIteration<Statement, QueryEvaluationException> queryIntervalAfter(
        final TemporalInterval queryInterval, final StatementConstraints constraints)
        throws QueryEvaluationException {

    final Scanner scanner = getScanner();
    if (scanner != null) {
        // get rows where the start date is greater than the queryInterval.getEnd()
        final Range range = new Range(new Key(Range.followingPrefix(new Text(queryInterval.getHasEnd().getAsKeyBytes()))), false, null, true);
        scanner.setRange(range);

        if (constraints.hasContext()) {
            scanner.fetchColumn(new Text(constraints.getContext().toString()), new Text(KeyParts.CQ_BEGIN));
        } else {
            scanner.fetchColumn(new Text(""), new Text(KeyParts.CQ_BEGIN));
        }
    }
    // TODO currently predicate, subject and context constraints are filtered on the clients
    return getIteratorWrapper(scanner);
}
 
Example #12
Source File: TermFrequencyIndexIteratorTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Test
public void testScanMinorRangeTLD() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true);
    TermFrequencyAggregator aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    
    // jump to the first doc
    iterator.seek(null, null, true);
    
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("baz"));
}
 
Example #13
Source File: AccumuloQueryRuleset.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * Get the rules that apply to all statements within a Range. The range may not
 * contain every row relevant to the associated rule(s), but every row within the
 * range is relevant to the rule(s).
 * @param layout Defines which table the range is meant to scan
 * @param range The Range of rows in that table
 * @return Any rules in this ruleset that match the given table and contain the given range
 * @throws IOException if the Range can't be resolved
 */
public List<CopyRule> getRules(final TABLE_LAYOUT layout, final Range range) throws IOException {
    final List<CopyRule> matchingRules = new LinkedList<>();
    for (final CopyRule rule : rules) {
        // Compare the rule to the given range
        final Map.Entry<TABLE_LAYOUT, ByteRange> entry = getRange(rule.getStatement());
        final TABLE_LAYOUT ruleLayout = entry.getKey();
        // If they apply to different tables, they are unrelated.
        if (!ruleLayout.equals(layout)) {
            continue;
        }
        // If the given range is contained in (or equal to) the rule's range, then the
        // rule matches and should be included.
        final ByteRange byteRange = entry.getValue();
        final Range ruleRange = new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd()));
        if (rangeContainsRange(ruleRange, range)) {
            matchingRules.add(rule);
        }
    }
    return matchingRules;
}
 
Example #14
Source File: TupleToRangeTest.java    From datawave with Apache License 2.0 6 votes vote down vote up
@Test
public void testGenerateDocumentRanges() {
    String shard = "20190314_0";
    Set<String> docIds = Sets.newHashSet("docId0", "docId1", "docId2");
    IndexInfo indexInfo = new IndexInfo(docIds);
    indexInfo.applyNode(queryNode);
    
    // Build expected shard ranges
    List<Range> expectedRanges = new ArrayList<>(3);
    expectedRanges.add(makeTestRange(shard, "docId0"));
    expectedRanges.add(makeTestRange(shard, "docId1"));
    expectedRanges.add(makeTestRange(shard, "docId2"));
    
    // Create the ranges
    Iterator<QueryPlan> ranges = TupleToRange.createDocumentRanges(queryNode, shard, indexInfo, config.isTldQuery());
    
    // Assert ranges against expected ranges
    eval(expectedRanges, ranges);
}
 
Example #15
Source File: RangeBindingSetEntries.java    From rya with Apache License 2.0 6 votes vote down vote up
public Collection<BindingSet> containsKey(Key key) {
    Set<BindingSet> bsSet = new HashSet<>();
    for (Range range : ranges.keySet()) {
        // Check to see if the Key falls within Range and has same ColumnFamily
        // as beginning and ending key of Range.
        // The additional ColumnFamily check by the method
        // validateContext(...) is necessary because range.contains(key)
        // returns true if only the Row is within the Range but the ColumnFamily
        // doesn't fall within the Range ColumnFamily bounds.
        if (range.contains(key) && validateContext(key.getColumnFamily(), range.getStartKey().getColumnFamily(),
                range.getEndKey().getColumnFamily())) {
            bsSet.addAll(ranges.get(range));
        }
    }
    return bsSet;
}
 
Example #16
Source File: UpgradeCounterValues.java    From datawave with Apache License 2.0 6 votes vote down vote up
private void parseConfig(String[] args) throws ParseException {
    CommandLine cl = new BasicParser().parse(options, args);
    instanceName = cl.getOptionValue(instanceNameOpt.getOpt());
    zookeepers = cl.getOptionValue(zookeeperOpt.getOpt());
    username = cl.getOptionValue(usernameOpt.getOpt());
    password = cl.getOptionValue(passwordOpt.getOpt());
    tableName = cl.getOptionValue(tableNameOpt.getOpt());
    ranges = new ArrayList<>();
    if (!cl.hasOption(rangesOpt.getOpt())) {
        System.out.println("NOTE: no ranges specified on the command line. Scanning the entire table.");
        ranges.add(new Range());
    } else {
        for (String rangeStr : cl.getOptionValues(rangesOpt.getOpt())) {
            String[] startEnd = rangeStr.split("\\s*,\\s*");
            ranges.add(new Range(startEnd[0], false, startEnd[1], false));
        }
        System.out.println("Using ranges: " + ranges);
    }
    
    if (cl.hasOption(bsThreadsOpt.getOpt()))
        bsThreads = Integer.parseInt(cl.getOptionValue(bsThreadsOpt.getOpt()));
    if (cl.hasOption(bwThreadsOpt.getOpt()))
        bwThreads = Integer.parseInt(cl.getOptionValue(bwThreadsOpt.getOpt()));
    if (cl.hasOption(bwMemoryOpt.getOpt()))
        bwMemory = Long.parseLong(cl.getOptionValue(bwMemoryOpt.getOpt()));
}
 
Example #17
Source File: AccumuloTemporalIndexer.java    From rya with Apache License 2.0 6 votes vote down vote up
/**
 * get statements where the date object is after the given queryInstant.
 */
@Override
public CloseableIteration<Statement, QueryEvaluationException> queryInstantAfterInstant(
        final TemporalInstant queryInstant, final StatementConstraints constraints)
        throws QueryEvaluationException {
    final Query query = new Query() {
        @Override
        public Range getRange(final KeyParts keyParts) {
            final Text start = Range.followingPrefix(keyParts.getQueryKey());  // <-- specific logic
            Text endAt = null;  // no constraints                            // <-- specific logic
            if (keyParts.constraintPrefix != null ) {
                endAt = Range.followingPrefix(keyParts.constraintPrefix);
            }
            //System.out.println("Scanning queryInstantAfterInstant from after:" + KeyParts.toHumanString(start) + " up to:" + KeyParts.toHumanString(endAt));
            return new Range(start, true, endAt, false);
        }
    };
    final ScannerBase scanner = query.doQuery(queryInstant, constraints);
    return getContextIteratorWrapper(scanner, constraints.getContext());
}
 
Example #18
Source File: BulkInputFormat.java    From datawave with Apache License 2.0 6 votes vote down vote up
private void clipRanges(Map<String,Map<KeyExtent,List<Range>>> binnedRanges) {
    // truncate the ranges to within the tablets... this makes it easier to know what work
    // needs to be redone when failures occurs and tablets have merged or split
    Map<String,Map<KeyExtent,List<Range>>> binnedRanges2 = new HashMap<>();
    for (Entry<String,Map<KeyExtent,List<Range>>> entry : binnedRanges.entrySet()) {
        Map<KeyExtent,List<Range>> tabletMap = new HashMap<>();
        binnedRanges2.put(entry.getKey(), tabletMap);
        for (Entry<KeyExtent,List<Range>> tabletRanges : entry.getValue().entrySet()) {
            Range tabletRange = tabletRanges.getKey().toDataRange();
            List<Range> clippedRanges = new ArrayList<>();
            tabletMap.put(tabletRanges.getKey(), clippedRanges);
            for (Range range : tabletRanges.getValue())
                clippedRanges.add(tabletRange.clip(range));
        }
    }
    
    binnedRanges.clear();
    binnedRanges.putAll(binnedRanges2);
    
}
 
Example #19
Source File: LockResolver.java    From fluo with Apache License 2.0 5 votes vote down vote up
static List<Entry<Key, Value>> getOpenReadLocks(Environment env,
    Map<Bytes, Set<Column>> rowColsToCheck) throws Exception {

  List<Range> ranges = new ArrayList<>();

  for (Entry<Bytes, Set<Column>> e1 : rowColsToCheck.entrySet()) {
    for (Column col : e1.getValue()) {
      Key start = SpanUtil.toKey(new RowColumn(e1.getKey(), col));
      Key end = new Key(start);
      end.setTimestamp(ColumnType.LOCK.first());
      ranges.add(new Range(start, true, end, false));
    }
  }


  try (BatchScanner bscanner =
      env.getAccumuloClient().createBatchScanner(env.getTable(), env.getAuthorizations(), 1)) {

    bscanner.setRanges(ranges);
    IteratorSetting iterCfg = new IteratorSetting(10, OpenReadLockIterator.class);

    bscanner.addScanIterator(iterCfg);

    List<Entry<Key, Value>> ret = new ArrayList<>();
    for (Entry<Key, Value> entry : bscanner) {
      if (ColumnType.from(entry.getKey()) == ColumnType.RLOCK) {
        ret.add(entry);
      }
    }

    return ret;

  }
}
 
Example #20
Source File: RangeSplit.java    From datawave with Apache License 2.0 5 votes vote down vote up
public void readFields(DataInput in) throws IOException {
    ranges = Sets.newTreeSet();
    int numLocs = in.readInt();
    for (int i = 0; i < numLocs; ++i) {
        Range range = new Range();
        range.readFields(in);
        ranges.add(range);
    }
    numLocs = in.readInt();
    locations = new String[numLocs];
    for (int i = 0; i < numLocs; ++i)
        locations[i] = in.readUTF();
}
 
Example #21
Source File: AnyFieldScanner.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * @param tableName
 * @param auths
 * @param delegator
 * @param maxResults
 */
public AnyFieldScanner(String tableName, Set<Authorizations> auths, ResourceQueue delegator, int maxResults, Query settings, SessionOptions options,
                Collection<Range> ranges) {
    super(tableName, auths, delegator, maxResults, settings, options, ranges);
    // ensure that we only use a local uncaught exception handler instead of the one in settings as exceptions may not
    // be critical to the overall query execution
    this.uncaughtExceptionHandler = new QueryUncaughtExceptionHandler();
}
 
Example #22
Source File: TLDEventDataFilterTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void getSeekRange_maxFieldSeekNotEqualToLimit() {
    Map<String,Integer> fieldLimits = new HashMap<>(1);
    fieldLimits.put("field1", 1);
    
    expect(mockScript.jjtGetNumChildren()).andReturn(0).anyTimes();
    expect(mockScript.jjtAccept(isA(EventDataQueryExpressionVisitor.class), eq(""))).andReturn(null);
    
    replayAll();
    
    Key key1 = new Key("row", "column", "field1" + Constants.NULL_BYTE_STRING + "value");
    Key key2 = new Key("row", "column", "field2" + Constants.NULL_BYTE_STRING + "value");
    filter = new TLDEventDataFilter(mockScript, mockAttributeFactory, null, null, 3, -1, fieldLimits, "LIMIT_FIELD", Collections.EMPTY_SET);
    
    assertTrue(filter.keep(key1));
    // increments counts = 1
    assertTrue(filter.apply(new AbstractMap.SimpleEntry<>(key1, null)));
    assertNull(filter.getSeekRange(key1, key1.followingKey(PartialKey.ROW), false));
    // does not increment counts so will still return true
    assertTrue(filter.keep(key1));
    // increments counts = 2 rejected by field count
    assertFalse(filter.apply(new AbstractMap.SimpleEntry<>(key1, null)));
    assertNull(filter.getSeekRange(key1, key1.followingKey(PartialKey.ROW), false));
    
    // now fails
    assertFalse(filter.keep(key1));
    
    // see another key on apply to trigger the seek range
    assertFalse(filter.apply(new AbstractMap.SimpleEntry<>(key1, null)));
    Range seekRange = filter.getSeekRange(key1, key1.followingKey(PartialKey.ROW), false);
    assertNotNull(seekRange);
    assertEquals(seekRange.getStartKey().getRow(), key1.getRow());
    assertEquals(seekRange.getStartKey().getColumnFamily(), key1.getColumnFamily());
    assertEquals(seekRange.getStartKey().getColumnQualifier().toString(), "field1" + "\u0001");
    assertEquals(true, seekRange.isStartKeyInclusive());
    
    verifyAll();
}
 
Example #23
Source File: TLDTermFrequencyAggregatorTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void apply_buildDocOnlyKeepToKeep() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    
    TreeMap<Key,Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456.1", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    
    SortedKeyValueIterator<Key,Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.emptySet());
    aggregator = new TLDTermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    
    // test result key
    assertTrue(result == null);
    
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
 
Example #24
Source File: RangeStreamTest.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void testBothIndexedPrune() throws Exception {
    String originalQuery = "(FOO == 'barter' || FOO == 'baggy')";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(originalQuery);
    
    config.setBeginDate(new Date(0));
    config.setEndDate(new Date(System.currentTimeMillis()));
    
    Multimap<String,Type<?>> dataTypes = HashMultimap.create();
    dataTypes.putAll("FOO", Sets.newHashSet(new LcNoDiacriticsType()));
    dataTypes.putAll("NUM", Sets.newHashSet(new NumberType()));
    
    config.setQueryFieldsDatatypes(dataTypes);
    config.setIndexedFields(dataTypes);
    
    MockMetadataHelper helper = new MockMetadataHelper();
    helper.setIndexedFields(dataTypes.keySet());
    
    Range range1 = makeTestRange("20190314_1", "datatype1\u0000123");
    Range range2 = makeTestRange("20190314_1", "datatype1\u0000345");
    Range range3 = makeTestRange("20190414_1", "datatype1\u0000123");
    Range range4 = makeTestRange("20190414_1", "datatype1\u0000345");
    Set<Range> expectedRanges = Sets.newHashSet(range1, range2, range3, range4);
    
    RangeStream rangeStream = new RangeStream(config, new ScannerFactory(config.getConnector(), 1), helper).setLimitScanners(true);
    for (QueryPlan queryPlan : rangeStream.streamPlans(script)) {
        for (Range range : queryPlan.getRanges()) {
            assertTrue("Tried to remove unexpected range " + range.toString() + " from expected ranges: " + expectedRanges.toString(),
                            expectedRanges.remove(range));
        }
    }
    assertTrue("Expected ranges not found in query plan: " + expectedRanges.toString(), expectedRanges.isEmpty());
}
 
Example #25
Source File: QueryIteratorIT.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void index_documentSpecific_test() throws IOException {
    // build the seek range for a document specific pull
    Range seekRange = getDocumentRange("123.345.456");
    String query = "EVENT_FIELD4 == 'd'";
    index_test(seekRange, query, false, Collections.EMPTY_LIST, Collections.EMPTY_LIST);
}
 
Example #26
Source File: BaseTableCache.java    From datawave with Apache License 2.0 5 votes vote down vote up
public void setupScanner(BatchScanner scanner) {
    scanner.setRanges(Lists.newArrayList(new Range()));
    Map<String,String> options = new HashMap<>();
    options.put(RegExFilter.COLF_REGEX, "^f$");
    options.put("negate", "true");
    IteratorSetting settings = new IteratorSetting(100, "skipFColumn", RegExFilter.class, options);
    scanner.addScanIterator(settings);
}
 
Example #27
Source File: QueryIteratorIT.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void tf_exceededValue_trailingWildcard_shardRange_test() throws IOException {
    // build the seek range for a document specific pull
    Range seekRange = getShardRange();
    String query = "EVENT_FIELD1 =='a' && ((ExceededValueThresholdMarkerJexlNode = true) && (TF_FIELD1 =~ 'b.*'))";
    tf_test(seekRange, query, getBaseExpectedEvent("123.345.456"), Collections.EMPTY_LIST, Collections.EMPTY_LIST);
}
 
Example #28
Source File: JobSetupUtil.java    From datawave with Apache License 2.0 5 votes vote down vote up
public static Range formatReverseTimeRange(Range dayRange, Logger log) {
    long start = Long.parseLong(dayRange.getStartKey().getRow().toString());
    long end = Long.parseLong(dayRange.getEndKey().getRow().toString());
    
    String from = DateHelper.format(new Date(start));
    String until = DateHelper.format(new Date(end));
    
    return new Range(from, true, until, false);
}
 
Example #29
Source File: QueryIteratorIT.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Test
public void index_shardRange_miss_test() throws IOException {
    // build the seek range for a document specific pull
    Range seekRange = getShardRange();
    String query = "EVENT_FIELD4 == 'e'";
    index_test(seekRange, query, true, Collections.EMPTY_LIST, Collections.EMPTY_LIST);
}
 
Example #30
Source File: RangeFactory.java    From datawave with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a tld document range that can be passed to the {@link datawave.query.iterator.QueryIterator}
 *
 * Example: Given shard 20190314_4 and document docId0, will return tld doc range [20190314_4 docId0, 20190314_4 docId0xff)
 *
 * @param shard
 * @param docId
 * @return
 */
public static Range createTldDocumentSpecificRange(String shard, String docId) {
    Key start = new Key(shard, docId);
    Key end = new Key(shard, docId + MAX_UNICODE_STRING);
    
    // Technically, we don't want to be inclusive of the start key,
    // however if we mark the startKey as non-inclusive, when we create
    // the fi\x00 range in IndexIterator, we lost the context of "do we
    // want a single event" or "did we get restarted and this is the last
    // event we returned.
    return new Range(start, true, end, false);
}