/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.salesforce.hbase.index.covered.example; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import com.google.common.collect.Lists; import com.salesforce.hbase.index.covered.IndexUpdate; import com.salesforce.hbase.index.covered.TableState; import com.salesforce.hbase.index.scanner.Scanner; import com.salesforce.phoenix.index.BaseIndexCodec; /** * */ public class CoveredColumnIndexCodec extends BaseIndexCodec { private static final byte[] EMPTY_BYTES = new byte[0]; public static final byte[] INDEX_ROW_COLUMN_FAMILY = Bytes.toBytes("INDEXED_COLUMNS"); private List<ColumnGroup> groups; /** * @param groups to initialize the codec with * @return an instance that is initialized with the given {@link ColumnGroup}s, for testing * purposes */ public static CoveredColumnIndexCodec getCodecForTesting(List<ColumnGroup> groups) { CoveredColumnIndexCodec codec = new CoveredColumnIndexCodec(); codec.groups = Lists.newArrayList(groups); return codec; } @Override public void initialize(RegionCoprocessorEnvironment env) { groups = CoveredColumnIndexSpecifierBuilder.getColumns(env.getConfiguration()); } @Override public Iterable<IndexUpdate> getIndexUpserts(TableState state) { List<IndexUpdate> updates = new ArrayList<IndexUpdate>(); for (ColumnGroup group : groups) { IndexUpdate update = getIndexUpdateForGroup(group, state); updates.add(update); } return updates; } /** * @param group * @param state * @return the update that should be made to the table */ private IndexUpdate getIndexUpdateForGroup(ColumnGroup group, TableState state) { List<CoveredColumn> refs = group.getColumns(); try { Pair<Scanner, IndexUpdate> stateInfo = state.getIndexedColumnsTableState(refs); Scanner kvs = stateInfo.getFirst(); Pair<Integer, List<ColumnEntry>> columns = getNextEntries(refs, kvs, state.getCurrentRowKey()); // make sure we close the scanner kvs.close(); if (columns.getFirst().intValue() == 0) { return stateInfo.getSecond(); } // have all the column entries, so just turn it into a Delete for the row // convert the entries to the needed values byte[] rowKey = composeRowKey(state.getCurrentRowKey(), columns.getFirst(), columns.getSecond()); Put p = new Put(rowKey, state.getCurrentTimestamp()); // add the columns to the put addColumnsToPut(p, columns.getSecond()); // update the index info IndexUpdate update = stateInfo.getSecond(); update.setTable(Bytes.toBytes(group.getTable())); update.setUpdate(p); return update; } catch (IOException e) { throw new RuntimeException("Unexpected exception when getting state for columns: " + refs); } } private static void addColumnsToPut(Put indexInsert, List<ColumnEntry> columns) { // add each of the corresponding families to the put int count = 0; for (ColumnEntry column : columns) { indexInsert.add(INDEX_ROW_COLUMN_FAMILY, ArrayUtils.addAll(Bytes.toBytes(count++), toIndexQualifier(column.ref)), null); } } private static byte[] toIndexQualifier(CoveredColumn column) { return ArrayUtils.addAll(Bytes.toBytes(column.familyString + CoveredColumn.SEPARATOR), column.getQualifier()); } @Override public Iterable<IndexUpdate> getIndexDeletes(TableState state) { List<IndexUpdate> deletes = new ArrayList<IndexUpdate>(); for (ColumnGroup group : groups) { deletes.add(getDeleteForGroup(group, state)); } return deletes; } /** * Get all the deletes necessary for a group of columns - logically, the cleanup the index table * for a given index. * @param group index information * @return the cleanup for the given index, or <tt>null</tt> if no cleanup is necessary */ private IndexUpdate getDeleteForGroup(ColumnGroup group, TableState state) { List<CoveredColumn> refs = group.getColumns(); try { Pair<Scanner, IndexUpdate> kvs = state.getIndexedColumnsTableState(refs); Pair<Integer, List<ColumnEntry>> columns = getNextEntries(refs, kvs.getFirst(), state.getCurrentRowKey()); // make sure we close the scanner reference kvs.getFirst().close(); // no change, just return the passed update if (columns.getFirst() == 0) { return kvs.getSecond(); } // have all the column entries, so just turn it into a Delete for the row // convert the entries to the needed values byte[] rowKey = composeRowKey(state.getCurrentRowKey(), columns.getFirst(), columns.getSecond()); Delete d = new Delete(rowKey); d.setTimestamp(state.getCurrentTimestamp()); IndexUpdate update = kvs.getSecond(); update.setUpdate(d); update.setTable(Bytes.toBytes(group.getTable())); return update; } catch (IOException e) { throw new RuntimeException("Unexpected exception when getting state for columns: " + refs); } } /** * Get the next batch of primary table values for the given columns * @param refs columns to match against * @param state * @return the total length of all values found and the entries to add for the index */ private Pair<Integer, List<ColumnEntry>> getNextEntries(List<CoveredColumn> refs, Scanner kvs, byte[] currentRow) throws IOException { int totalValueLength = 0; List<ColumnEntry> entries = new ArrayList<ColumnEntry>(refs.size()); // pull out the latest state for each column reference, in order for (CoveredColumn ref : refs) { KeyValue first = ref.getFirstKeyValueForRow(currentRow); if (!kvs.seek(first)) { // no more keys, so add a null value entries.add(new ColumnEntry(null, ref)); continue; } // there is a next value - we only care about the current value, so we can just snag that KeyValue next = kvs.next(); if (ref.matchesFamily(next.getFamily()) && ref.matchesQualifier(next.getQualifier())) { byte[] v = next.getValue(); totalValueLength += v.length; entries.add(new ColumnEntry(v, ref)); } else { // this first one didn't match at all, so we have to put in a null entry entries.add(new ColumnEntry(null, ref)); continue; } // here's where is gets a little tricky - we either need to decide if we should continue // adding entries (matches all qualifiers) or if we are done (matches a single qualifier) if (!ref.allColumns()) { continue; } // matches all columns, so we need to iterate until we hit the next column with the same // family as the current key byte[] lastQual = next.getQualifier(); byte[] nextQual = null; while ((next = kvs.next()) != null) { // different family, done with this column if (!ref.matchesFamily(next.getFamily())) { break; } nextQual = next.getQualifier(); // we are still on the same qualifier - skip it, since we already added a column for it if (Arrays.equals(lastQual, nextQual)) { continue; } // this must match the qualifier since its an all-qualifiers specifier, so we add it byte[] v = next.getValue(); totalValueLength += v.length; entries.add(new ColumnEntry(v, ref)); // update the last qualifier to check against lastQual = nextQual; } } return new Pair<Integer, List<ColumnEntry>>(totalValueLength, entries); } static class ColumnEntry { byte[] value = EMPTY_BYTES; CoveredColumn ref; public ColumnEntry(byte[] value, CoveredColumn ref) { this.value = value == null ? EMPTY_BYTES : value; this.ref = ref; } } /** * Compose the final index row key. * <p> * This is faster than adding each value independently as we can just build a single a array and * copy everything over once. * @param pk primary key of the original row * @param length total number of bytes of all the values that should be added * @param values to use when building the key * @return */ static byte[] composeRowKey(byte[] pk, int length, List<ColumnEntry> values) { // now build up expected row key, each of the values, in order, followed by the PK and then some // info about lengths so we can deserialize each value byte[] output = new byte[length + pk.length]; int pos = 0; int[] lengths = new int[values.size()]; int i = 0; for (ColumnEntry entry : values) { byte[] v = entry.value; // skip doing the copy attempt, if we don't need to if (v.length != 0) { System.arraycopy(v, 0, output, pos, v.length); pos += v.length; } lengths[i++] = v.length; } // add the primary key to the end of the row key System.arraycopy(pk, 0, output, pos, pk.length); // add the lengths as suffixes so we can deserialize the elements again for (int l : lengths) { output = ArrayUtils.addAll(output, Bytes.toBytes(l)); } // and the last integer is the number of values return ArrayUtils.addAll(output, Bytes.toBytes(values.size())); } /** * Essentially a short-cut from building a {@link Put}. * @param pk row key * @param timestamp timestamp of all the keyvalues * @param values expected value--column pair * @return a keyvalues that the index contains for a given row at a timestamp with the given value * -- column pairs. */ public static List<KeyValue> getIndexKeyValueForTesting(byte[] pk, long timestamp, List<Pair<byte[], CoveredColumn>> values) { int length = 0; List<ColumnEntry> expected = new ArrayList<ColumnEntry>(values.size()); for (Pair<byte[], CoveredColumn> value : values) { ColumnEntry entry = new ColumnEntry(value.getFirst(), value.getSecond()); length += value.getFirst().length; expected.add(entry); } byte[] rowKey = CoveredColumnIndexCodec.composeRowKey(pk, length, expected); Put p = new Put(rowKey, timestamp); CoveredColumnIndexCodec.addColumnsToPut(p, expected); List<KeyValue> kvs = new ArrayList<KeyValue>(); for (Entry<byte[], List<KeyValue>> entry : p.getFamilyMap().entrySet()) { kvs.addAll(entry.getValue()); } return kvs; } public static List<byte[]> getValues(byte[] bytes) { // get the total number of keys in the bytes int keyCount = CoveredColumnIndexCodec.getPreviousInteger(bytes, bytes.length); List<byte[]> keys = new ArrayList<byte[]>(keyCount); int[] lengths = new int[keyCount]; int lengthPos = keyCount - 1; int pos = bytes.length - Bytes.SIZEOF_INT; // figure out the length of each key for (int i = 0; i < keyCount; i++) { lengths[lengthPos--] = CoveredColumnIndexCodec.getPreviousInteger(bytes, pos); pos -= Bytes.SIZEOF_INT; } int current = 0; for (int length : lengths) { byte[] key = Arrays.copyOfRange(bytes, current, current + length); keys.add(key); current += length; } return keys; } /** * Read an integer from the preceding {@value Bytes#SIZEOF_INT} bytes * @param bytes array to read from * @param start start point, backwards from which to read. For example, if specifying "25", we * would try to read an integer from 21 -> 25 * @return an integer from the proceeding {@value Bytes#SIZEOF_INT} bytes, if it exists. */ private static int getPreviousInteger(byte[] bytes, int start) { return Bytes.toInt(bytes, start - Bytes.SIZEOF_INT); } /** * Check to see if an row key just contains a list of null values. * @param bytes row key to examine * @return <tt>true</tt> if all the values are zero-length, <tt>false</tt> otherwise */ public static boolean checkRowKeyForAllNulls(byte[] bytes) { int keyCount = CoveredColumnIndexCodec.getPreviousInteger(bytes, bytes.length); int pos = bytes.length - Bytes.SIZEOF_INT; for (int i = 0; i < keyCount; i++) { int next = CoveredColumnIndexCodec.getPreviousInteger(bytes, pos); if (next > 0) { return false; } pos -= Bytes.SIZEOF_INT; } return true; } @Override public boolean isEnabled(Mutation m) { // this could be a bit smarter, looking at the groups for the mutation, but we leave it at this // simple check for the moment. return groups.size() > 0; } }