/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.phoenix.hbase.index.covered.filter; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Set; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.filter.FilterBase; import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr; import org.apache.phoenix.util.PhoenixKeyValueUtil; /** * Only allow the 'latest' timestamp of each family:qualifier pair, ensuring that they aren't * covered by a previous delete. This is similar to some of the work the ScanQueryMatcher does to * ensure correct visibility of keys based on deletes. * <p> * No actual delete {@link KeyValue}s are allowed to pass through this filter - they are always * skipped. * <p> * Note there is a little bit of conceptually odd behavior (though it matches the HBase * specifications) around point deletes ({@link KeyValue} of type {@link Type#Delete}. These deletes * only apply to a single {@link KeyValue} at a single point in time - they essentially completely * 'cover' the existing {@link Put} at that timestamp. However, they don't 'cover' any other * keyvalues at older timestamps. Therefore, if there is a point-delete at ts = 5, and puts at ts = * 4, and ts = 5, we will only allow the put at ts = 4. * <p> * Expects {@link KeyValue}s to arrive in sorted order, with 'Delete' {@link Type} {@link KeyValue}s * ({@link Type#DeleteColumn}, {@link Type#DeleteFamily}, {@link Type#Delete})) before their regular * {@link Type#Put} counterparts. */ public class ApplyAndFilterDeletesFilter extends FilterBase { List<ImmutableBytesPtr> families; private final DeleteTracker coveringDelete = new DeleteTracker(); private Hinter currentHint; private DeleteColumnHinter columnHint = new DeleteColumnHinter(); private DeleteFamilyHinter familyHint = new DeleteFamilyHinter(); /** * Setup the filter to only include the given families. This allows us to seek intelligently pass * families we don't care about. * @param families */ public ApplyAndFilterDeletesFilter(Set<ImmutableBytesPtr> families) { this.families = new ArrayList<ImmutableBytesPtr>(families); Collections.sort(this.families); } public DeleteTracker getDeleteTracker() { return coveringDelete; } private ImmutableBytesPtr getNextFamily(ImmutableBytesPtr family) { int index = Collections.binarySearch(families, family); //doesn't match exactly, be we can find the right next match //this is pretty unlikely, but just incase if(index < 0){ //the actual location of the next match index = -index -1; }else{ //its an exact match for a family, so we get the next entry index = index +1; } //now we have the location of the next entry if(index >= families.size()){ return null; } return families.get(index); } @Override public void reset(){ this.coveringDelete.reset(); } @Override public Cell getNextCellHint(Cell peeked){ return currentHint.getHint(PhoenixKeyValueUtil.maybeCopyCell(peeked)); } @Override public ReturnCode filterKeyValue(Cell next) { KeyValue nextKV = PhoenixKeyValueUtil.maybeCopyCell(next); switch (KeyValue.Type.codeToType(next.getTypeByte())) { /* * DeleteFamily will always sort first because those KVs (we assume) don't have qualifiers (or * rather are null). Therefore, we have to keep a hold of all the delete families until we get * to a Put entry that is covered by that delete (in which case, we are done with the family). */ case DeleteFamily: // track the family to delete. If we are updating the delete, that means we have passed all // kvs in the last column, so we can safely ignore the last deleteFamily, and just use this // one. In fact, it means that all the previous deletes can be ignored because the family must // not match anymore. // We could potentially have multiple deleteFamily for the same row and family // (e.g. upsert row+family, delete it, upsert again, delete again), // in which case we keep the first one since its timestamp dominates if (coveringDelete.deleteFamily == null || !CellUtil.matchingFamily(coveringDelete.deleteFamily, nextKV)) { this.coveringDelete.reset(); this.coveringDelete.deleteFamily = nextKV; } return ReturnCode.SKIP; case DeleteColumn: // similar to deleteFamily, all the newer deletes/puts would have been seen at this point, so // we can safely replace the more recent delete column with the more recent one this.coveringDelete.pointDelete = null; this.coveringDelete.deleteColumn = nextKV; return ReturnCode.SKIP; case Delete: // we are just deleting the single column value at this point. // therefore we just skip this entry and go onto the next one. The only caveat is that // we should still cover the next entry if this delete applies to the next entry, so we // have to keep around a reference to the KV to compare against the next valid entry this.coveringDelete.pointDelete = nextKV; return ReturnCode.SKIP; default: // no covering deletes if (coveringDelete.empty()) { return ReturnCode.INCLUDE; } if (coveringDelete.matchesFamily(nextKV)) { this.currentHint = familyHint; return ReturnCode.SEEK_NEXT_USING_HINT; } if (coveringDelete.matchesColumn(nextKV)) { // hint to the next column this.currentHint = columnHint; return ReturnCode.SEEK_NEXT_USING_HINT; } if (coveringDelete.matchesPoint(nextKV)) { return ReturnCode.SKIP; } } // none of the deletes matches, we are done return ReturnCode.INCLUDE; } /** * Get the next hint for a given peeked keyvalue */ interface Hinter { public abstract Cell getHint(Cell peek); } /** * Entire family has been deleted, so either seek to the next family, or if none are present in * the original set of families to include, seek to the "last possible key"(or rather our best * guess) and be done. */ class DeleteFamilyHinter implements Hinter { @Override public Cell getHint(Cell peeked) { // check to see if we have another column to seek ImmutableBytesPtr nextFamily = getNextFamily(new ImmutableBytesPtr(peeked.getFamilyArray(), peeked.getFamilyOffset(), peeked.getFamilyLength())); if (nextFamily == null) { return KeyValue.LOWESTKEY; } // there is a valid family, so we should seek to that return org.apache.hadoop.hbase.KeyValueUtil.createFirstOnRow(peeked.getRowArray(), peeked.getRowOffset(), peeked.getRowLength(), nextFamily.get(), nextFamily.getOffset(), nextFamily.getLength(), HConstants.EMPTY_BYTE_ARRAY, 0, 0); } } /** * Hint the next column-qualifier after the given keyvalue. We can't be smart like in the * ScanQueryMatcher since we don't know the columns ahead of time. */ class DeleteColumnHinter implements Hinter { @Override public Cell getHint(Cell kv) { return org.apache.hadoop.hbase.KeyValueUtil.createLastOnRow(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength(), kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()); } } public static class DeleteTracker { public KeyValue deleteFamily; public KeyValue deleteColumn; public KeyValue pointDelete; public void reset() { this.deleteFamily = null; this.deleteColumn = null; this.pointDelete = null; } /** * Check to see if we should skip this {@link KeyValue} based on the family. * <p> * Internally, also resets the currently tracked "Delete Family" marker we are tracking if the * keyvalue is into another family (since CFs sort lexicographically, we can discard the current * marker since it must not be applicable to any more kvs in a linear scan). * @param next * @return <tt>true</tt> if this {@link KeyValue} matches a delete. */ public boolean matchesFamily(KeyValue next) { if (deleteFamily == null) { return false; } if (CellUtil.matchingFamily(deleteFamily, next)) { // falls within the timestamp range if (deleteFamily.getTimestamp() >= next.getTimestamp()) { return true; } } else { // only can reset the delete family because we are on to another family deleteFamily = null; } return false; } /** * @param next * @return */ public boolean matchesColumn(KeyValue next) { if (deleteColumn == null) { return false; } if (CellUtil.matchingFamily(deleteColumn, next) && CellUtil.matchingQualifier(deleteColumn, next)) { // falls within the timestamp range if (deleteColumn.getTimestamp() >= next.getTimestamp()) { return true; } } else { deleteColumn = null; } return false; } /** * @param next * @return */ public boolean matchesPoint(KeyValue next) { // point deletes only apply to the exact KV that they reference, so we only need to ensure // that the timestamp matches exactly. Because we sort by timestamp first, either the next // keyvalue has the exact timestamp or is an older (smaller) timestamp, and we can allow that // one. if (pointDelete != null && CellUtil.matchingFamily(pointDelete, next) && CellUtil.matchingQualifier(pointDelete, next)) { if (pointDelete.getTimestamp() == next.getTimestamp()) { return true; } // clear the point delete since the TS must not be matching pointDelete = null; } return false; } /** * @return <tt>true</tt> if no delete has been set */ public boolean empty() { return deleteFamily == null && deleteColumn == null && pointDelete == null; } } }