Java Code Examples for org.apache.accumulo.core.data.Key#getColumnFamily()

The following examples show how to use org.apache.accumulo.core.data.Key#getColumnFamily() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FirstEntryInPrefixedRowIterator.java    From accumulo-recipes with Apache License 2.0 6 votes vote down vote up
@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    // save parameters for future internal seeks
    latestRange = range;
    latestColumnFamilies = columnFamilies;
    latestInclusive = inclusive;
    lastRowFound = null;

    Key startKey = range.getStartKey();
    Range seekRange = new Range(startKey == null ? null : new Key(startKey.getRow(), startKey.getColumnFamily()), true, range.getEndKey(), range.isEndKeyInclusive());
    super.seek(seekRange, columnFamilies, inclusive);
    finished = false;

    if (getSource().hasTop()) {
        lastRowFound = getSource().getTopKey().getRow();
        if (range.beforeStartKey(getSource().getTopKey()))
            consume();
    }
}
 
Example 2
Source File: DocumentDataIterator.java    From datawave with Apache License 2.0 5 votes vote down vote up
protected boolean isEventKey(Key k) {
    Text cf = k.getColumnFamily();
    return cf.getLength() > 0
                    && cf.find("\u0000") != -1
                    && !((cf.charAt(0) == 'f' && cf.charAt(1) == 'i' && cf.charAt(2) == 0) || (cf.getLength() == 1 && cf.charAt(0) == 'd') || (cf
                                    .getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f'));
}
 
Example 3
Source File: BucketRollupIterator.java    From accumulo-recipes with Apache License 2.0 5 votes vote down vote up
@Override
public Key getTopKey() {
    Key topKey = super.getTopKey();

    long timestamp = reverseTimestampToNormalTime(Long.parseLong(topKey.getRow().toString()));

    Key retKey = new Key(new Text(truncatedReverseTimestamp(timestamp, bucketSize).toString()),
            topKey.getColumnFamily(), topKey.getColumnQualifier(),
            new Text(topKey.getColumnVisibility().toString()), topKey.getTimestamp());

    return retKey;
}
 
Example 4
Source File: RowHashingPartitioner.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
public int getPartition(BulkIngestKey bKey, Value value, int reducers) {
    HashCodeBuilder hcb = new HashCodeBuilder(157, 41);
    Key key = bKey.getKey();
    Text cf = key.getColumnFamily();
    if (colFams.contains(cf)) {
        hcb.append(cf);
    }
    hcb.append(key.getRow());
    int partition = (hcb.toHashCode() >>> 1) % reducers;
    if (log.isTraceEnabled()) {
        log.trace("Returning " + partition + " for BIK " + bKey);
    }
    return partition;
}
 
Example 5
Source File: AccumuloFreeTextIndexer.java    From rya with Apache License 2.0 5 votes vote down vote up
/**
 * Checks to see if the provided term appears in other documents.
 * @param term the term to search for.
 * @param currentDocId the current document ID that the search term exists in.
 * @return {@code true} if the term was found in other documents. {@code false} otherwise.
 */
private boolean doesTermExistInOtherDocs(final String term, final int currentDocId, final Text docIdText) {
    try {
        final String freeTextDocTableName = getFreeTextDocTablename(conf);
        final Scanner scanner = getScanner(freeTextDocTableName);

        final String t = StringUtils.removeEnd(term, "*").toLowerCase();
        final Text queryTerm = ColumnPrefixes.getTermColFam(t);

        // perform query and read results
        scanner.fetchColumnFamily(queryTerm);

        for (final Entry<Key, Value> entry : scanner) {
            final Key key = entry.getKey();
            final Text row = key.getRow();
            final int rowId = Integer.parseInt(row.toString());
            // We only want to check other documents from the one we're deleting
            if (rowId != currentDocId) {
                final Text columnFamily = key.getColumnFamily();
                final String columnFamilyValue = columnFamily.toString();
                // Check that the value has the term prefix
                if (columnFamilyValue.startsWith(ColumnPrefixes.TERM_CF_PREFIX.toString())) {
                    final Text text = ColumnPrefixes.removePrefix(columnFamily);
                    final String value = text.toString();
                    if (value.equals(term)) {
                        return true;
                    }
                }
            }
        }
    } catch (final IOException e) {
        logger.error("Error searching for the existance of the term in other documents", e);
    }
    return false;
}
 
Example 6
Source File: TLDFieldIndexAggregator.java    From datawave with Apache License 2.0 5 votes vote down vote up
@Override
protected Key getSeekStartKey(Key current, ByteSequence pointer) {
    int lastNullIndex = current.getColumnQualifier().toString().lastIndexOf(Constants.NULL);
    lastNullIndex = current.getColumnQualifier().toString().lastIndexOf(Constants.NULL, lastNullIndex - 1);
    String prefix = current.getColumnQualifier().toString().substring(0, lastNullIndex + 1);
    return new Key(current.getRow(), current.getColumnFamily(), new Text(prefix + pointer + Constants.MAX_UNICODE_STRING));
}
 
Example 7
Source File: IndexOnlyKeyToDocumentData.java    From datawave with Apache License 2.0 5 votes vote down vote up
private Key newDocumentKey(final Key fieldKey, long timestamp) {
    final Text row = fieldKey.getRow();
    final Text cf = fieldKey.getColumnFamily();
    final Text cq = new Text();
    final Text visibility = new Text();
    return new Key(row, cf, cq, visibility, timestamp);
}
 
Example 8
Source File: FirstAndLastSeenIterator.java    From datawave with Apache License 2.0 4 votes vote down vote up
private Text constructColFamilyForResult(Key lastSeenKey, String lastSeenDate) {
    return lastSeenKey.getColumnFamily();
}
 
Example 9
Source File: QueryIterator.java    From datawave with Apache License 2.0 4 votes vote down vote up
public ValueComparator(Key metadata) {
    fi = (metadata == null ? new Text() : metadata.getColumnFamily());
}
 
Example 10
Source File: CollisionIT.java    From fluo with Apache License 2.0 4 votes vote down vote up
@Test
public void testLotsOfCollisions() throws Exception {

  Random rand = new Random(45734985);

  int[] nums = new int[1000];
  int[] expectedCounts = new int[5];

  for (int i = 0; i < nums.length; i++) {
    nums[i] = rand.nextInt(expectedCounts.length);
    expectedCounts[nums[i]]++;
  }

  try (LoaderExecutor loader = client.newLoaderExecutor()) {
    for (int num : nums) {
      loader.execute(new NumLoader(num));
    }
  }

  miniFluo.waitForObservers();

  long recentTS;

  try (Snapshot snapshot = client.newSnapshot()) {

    for (int i = 0; i < expectedCounts.length; i++) {
      String total = snapshot.gets(i + "", STAT_TOTAL);
      Assert.assertNotNull(total);
      Assert.assertEquals(expectedCounts[i], Integer.parseInt(total));
      String processed = snapshot.gets(i + "", STAT_PROCESSED);
      Assert.assertNotNull(processed);
      Assert.assertEquals(expectedCounts[i], Integer.parseInt(processed));
    }

    String allTotal = snapshot.gets("all", STAT_TOTAL);
    Assert.assertNotNull(allTotal);
    Assert.assertEquals(1000, Integer.parseInt(allTotal));

    recentTS = snapshot.getStartTimestamp();
  }

  long oldestTS = ZookeeperUtil.getGcTimestamp(config.getAppZookeepers());

  while (oldestTS < recentTS) {
    UtilWaitThread.sleep(300);
    oldestTS = ZookeeperUtil.getGcTimestamp(config.getAppZookeepers());
  }

  aClient.tableOperations().compact(getCurTableName(), null, null, true, true);

  Scanner scanner = aClient.createScanner(getCurTableName(), Authorizations.EMPTY);

  HashSet<String> rowCols = new HashSet<>();

  for (Entry<Key, Value> entry : scanner) {
    Key k = entry.getKey();
    String rowCol = k.getRow() + ":" + k.getColumnFamily() + ":" + k.getColumnQualifier() + ":"
        + String.format("%x", k.getTimestamp() & ColumnConstants.PREFIX_MASK);
    if (rowCols.contains(rowCol)) {
      System.err.println("DEBUG oldestTs : " + oldestTS + " recentTS : " + recentTS);
      Iterables.transform(scanner, e -> "DEBUG " + FluoFormatter.toString(e))
          .forEach(System.err::println);
    }
    Assert.assertFalse("Duplicate row col " + rowCol, rowCols.contains(rowCol));
    rowCols.add(rowCol);
  }
}
 
Example 11
Source File: DocumentIndexIntersectingIterator.java    From rya with Apache License 2.0 4 votes vote down vote up
protected Text getTerm(Key key) {
  return key.getColumnFamily();
}
 
Example 12
Source File: EventDataScanNestedIterator.java    From datawave with Apache License 2.0 4 votes vote down vote up
protected void findNextDocument() {
    topKey = null;
    
    try {
        Text cf = new Text();
        
        /*
         * Given that we are already at a document key, this method will continue to advance the underlying source until it is either exhausted (hasTop()
         * returns false), the returned key is not in the totalRange, and the current top key shares the same row and column family as the source's next
         * key.
         */
        while (topKey == null && source.hasTop()) {
            Key k = source.getTopKey();
            if (log.isTraceEnabled())
                log.trace("Sought to " + k);
            k.getColumnFamily(cf);
            
            if (!isEventKey(k)) {
                if (cf.find("fi\0") == 0) {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'fi')");
                    }
                    // Try to do an optimized jump over the field index
                    cf.set("fi\1");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else if (cf.getLength() == 1 && cf.charAt(0) == 'd') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'd'");
                    }
                    // Try to do an optimized jump over the raw documents
                    cf.set("d\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else if (cf.getLength() == 2 && cf.charAt(0) == 't' && cf.charAt(1) == 'f') {
                    if (log.isDebugEnabled()) {
                        log.debug("Seeking over 'tf'");
                    }
                    // Try to do an optimized jump over the term frequencies
                    cf.set("tf\0");
                    source.seek(new Range(new Key(source.getTopKey().getRow(), cf), false, totalRange.getEndKey(), totalRange.isEndKeyInclusive()),
                                    columnFamilies, inclusive);
                } else {
                    if (log.isDebugEnabled()) {
                        log.debug("Next()'ing over the current key");
                    }
                    source.next();
                }
            } else {
                if (dataTypeFilter.apply(source.getTopKey())) {
                    this.topKey = source.getTopKey();
                } else {
                    Range nextCF = new Range(nextStartKey(source.getTopKey()), true, totalRange.getEndKey(), totalRange.isEndKeyInclusive());
                    source.seek(nextCF, columnFamilies, inclusive);
                }
            }
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not seek in findNextDocument", e);
    }
}
 
Example 13
Source File: TermFrequencyAggregator.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
protected Key getSeekStartKey(Key current, ByteSequence pointer) {
    // CQ = dataType\0UID\0Normalized field value\0Field name
    // seek to the next documents TF
    return new Key(current.getRow(), current.getColumnFamily(), new Text(pointer + Constants.NULL_BYTE_STRING + Constants.MAX_UNICODE_STRING));
}
 
Example 14
Source File: EventDataQueryFieldFilter.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public Key getStartKey(Key from) {
    return new Key(from.getRow(), from.getColumnFamily());
}
 
Example 15
Source File: EventDataQueryExpressionFilter.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public Key getStartKey(Key from) {
    return new Key(from.getRow(), from.getColumnFamily());
}
 
Example 16
Source File: AndingIterator.java    From rya with Apache License 2.0 4 votes vote down vote up
protected Text getTerm(final Key key) {
	return key.getColumnFamily();
}
 
Example 17
Source File: ConfigurableEventDataQueryFilter.java    From datawave with Apache License 2.0 4 votes vote down vote up
@Override
public Key getStartKey(Key from) {
    // don't delegate to the filter, we need an implementation here to override subclasses.
    return new Key(from.getRow(), from.getColumnFamily());
}
 
Example 18
Source File: AnyFieldScanner.java    From datawave with Apache License 2.0 3 votes vote down vote up
/**
 * Override this for your specific implementation.
 * 
 * In this specific implementation our row key will be the term, the column family will be the field name, and the column family will be the shard,so we
 * should have the following as our last key
 * 
 * bar FOO:20130101_0
 * 
 * so we should append a null so that we we don't skip shards. similarly, an assumption is made of the key structure within this class.
 * 
 * @param lastKey
 * @param previousRange
 */
public Range buildNextRange(final Key lastKey, final Range previousRange) {
    
    /**
     * This will re-seek the next column family when performing any field expansion.
     */
    Range r = new Range(new Key(lastKey.getRow(), new Text(lastKey.getColumnFamily() + "\u0000\uffff")), true, previousRange.getEndKey(),
                    previousRange.isEndKeyInclusive());
    if (log.isTraceEnabled())
        log.trace(r);
    return r;
    
}
 
Example 19
Source File: TLDEventDataFilter.java    From datawave with Apache License 2.0 2 votes vote down vote up
/**
 * Seek starting from the end of the current field
 * 
 * @param current
 *            the current key
 * @param fieldName
 *            the field name to be seeked
 * @param endKey
 *            the current seek end key
 * @param endKeyInclusive
 *            the current seek end key inclusive flag
 * @return a new range that begins at the end of the current field
 */
private Range getFieldSeek(Key current, String fieldName, Key endKey, boolean endKeyInclusive) {
    Key startKey = new Key(current.getRow(), current.getColumnFamily(), new Text(fieldName + "\u0001"));
    return new Range(startKey, true, endKey, endKeyInclusive);
}
 
Example 20
Source File: AndIterator.java    From accumulo-recipes with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the given key's dataLocation
 *
 * @param key
 * @return The given key's dataLocation
 */
protected Text getDataLocation(Key key) {
    return key.getColumnFamily();
}