Java Code Examples for io.airlift.slice.Slice#length()

The following examples show how to use io.airlift.slice.Slice#length() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Chars.java    From presto with Apache License 2.0 7 votes vote down vote up
public static Slice padSpaces(Slice slice, int length)
{
    int textLength = countCodePoints(slice);

    if (textLength > length) {
        throw new IllegalArgumentException("pad length is smaller than slice length");
    }

    if (textLength == length) {
        return slice;
    }

    int bufferSize = slice.length() + length - textLength;
    Slice buffer = Slices.allocate(bufferSize);

    buffer.setBytes(0, slice);

    for (int i = slice.length(); i < bufferSize; ++i) {
        buffer.setByte(i, ' ');
    }

    return buffer;
}
 
Example 2
Source File: VarbinaryFunctions.java    From presto with Apache License 2.0 6 votes vote down vote up
@Description("Decode hex encoded binary data")
@ScalarFunction("from_hex")
@LiteralParameters("x")
@SqlType(StandardTypes.VARBINARY)
public static Slice fromHexVarchar(@SqlType("varchar(x)") Slice slice)
{
    if (slice.length() % 2 != 0) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "invalid input length " + slice.length());
    }

    byte[] result = new byte[slice.length() / 2];
    for (int i = 0; i < slice.length(); i += 2) {
        result[i / 2] = (byte) ((hexDigitCharToInt(slice.getByte(i)) << 4) | hexDigitCharToInt(slice.getByte(i + 1)));
    }
    return Slices.wrappedBuffer(result);
}
 
Example 3
Source File: StringEncoding.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public void encodeColumn(Block block, SliceOutput output, EncodeOutput encodeOutput)
{
    for (int position = 0; position < block.getPositionCount(); position++) {
        if (!block.isNull(position)) {
            Slice slice = type.getSlice(block, position);
            if (slice.length() == 0) {
                output.writeByte(HIVE_EMPTY_STRING_BYTE);
            }
            else {
                output.writeBytes(slice);
            }
        }
        encodeOutput.closeEntry();
    }
}
 
Example 4
Source File: StringFunctions.java    From presto with Apache License 2.0 6 votes vote down vote up
private static long stringPositionFromEnd(Slice string, Slice substring, long instance)
{
    if (instance <= 0) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "'instance' must be a positive or negative number.");
    }
    if (substring.length() == 0) {
        return 1;
    }

    int foundInstances = 0;
    int index = string.length();
    do {
        // step backwards through string
        index = string.toStringUtf8().lastIndexOf(substring.toStringUtf8(), index - 1);
        if (index < 0) {
            return 0;
        }
        foundInstances++;
    }
    while (foundInstances < instance);

    return index + 1;
}
 
Example 5
Source File: BooleanColumnWriter.java    From presto with Apache License 2.0 6 votes vote down vote up
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter)
        throws IOException
{
    checkState(closed);

    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();

    List<BooleanStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroupColumnStatistics.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroupColumnStatistics.get(groupId);
        BooleanStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createBooleanColumnPositionList(compressed, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }

    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
 
Example 6
Source File: ColorFunctions.java    From presto with Apache License 2.0 6 votes vote down vote up
@ScalarFunction
@LiteralParameters({"x", "y"})
@Constraint(variable = "y", expression = "min(2147483647, x + 15)")
// Color formatting uses 15 characters. Note that if the ansiColorEscape function implementation
// changes, this value may be invalidated.
@SqlType("varchar(y)")
public static Slice render(@SqlType("varchar(x)") Slice value, @SqlType(ColorType.NAME) long color)
{
    StringBuilder builder = new StringBuilder(value.length());

    // color
    builder.append(ansiColorEscape(color))
            .append(value.toStringUtf8())
            .append(ANSI_RESET);

    return utf8Slice(builder.toString());
}
 
Example 7
Source File: UuidOperators.java    From presto with Apache License 2.0 6 votes vote down vote up
@LiteralParameters("x")
@ScalarOperator(CAST)
@SqlType(StandardTypes.UUID)
public static Slice castFromVarcharToUuid(@SqlType("varchar(x)") Slice slice)
{
    try {
        java.util.UUID uuid = java.util.UUID.fromString(slice.toStringUtf8());
        if (slice.length() == 36) {
            return wrappedLongArray(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
        }
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Invalid UUID string length: " + slice.length());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Cannot cast value to UUID: " + slice.toStringUtf8());
    }
}
 
Example 8
Source File: ColorFunctions.java    From presto with Apache License 2.0 6 votes vote down vote up
@VisibleForTesting
static int parseRgb(Slice color)
{
    if (color.length() != 4 || color.getByte(0) != '#') {
        return -1;
    }

    int red = Character.digit((char) color.getByte(1), 16);
    int green = Character.digit((char) color.getByte(2), 16);
    int blue = Character.digit((char) color.getByte(3), 16);

    if (red == -1 || green == -1 || blue == -1) {
        return -1;
    }

    // replicate the nibbles to turn a color of the form #rgb => #rrggbb (css semantics)
    red = (red << 4) | red;
    green = (green << 4) | green;
    blue = (blue << 4) | blue;

    return (int) rgb(red, green, blue);
}
 
Example 9
Source File: OrcInputStream.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
private int decompressSnappy(Slice in)
        throws IOException
{
    byte[] inArray = (byte[]) in.getBase();
    int inOffset = (int) (in.getAddress() - ARRAY_BYTE_BASE_OFFSET);
    int inLength = in.length();

    int uncompressedLength = Snappy.getUncompressedLength(inArray, inOffset);
    checkArgument(uncompressedLength <= maxBufferSize, "Snappy requires buffer (%s) larger than max size (%s)", uncompressedLength, maxBufferSize);
    allocateOrGrowBuffer(uncompressedLength, false);

    return Snappy.uncompress(inArray, inOffset, inLength, buffer, 0);
}
 
Example 10
Source File: VarbinaryFunctions.java    From presto with Apache License 2.0 5 votes vote down vote up
@Description("Decode bigint value from a 32-bit 2's complement big endian varbinary")
@ScalarFunction("from_big_endian_32")
@SqlType(StandardTypes.INTEGER)
public static long fromBigEndian32(@SqlType(StandardTypes.VARBINARY) Slice slice)
{
    if (slice.length() != Integer.BYTES) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "expected 4-byte input, but got instead: " + slice.length());
    }
    return Integer.reverseBytes(slice.getInt(0));
}
 
Example 11
Source File: BinaryStatisticsBuilder.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void addValue(Slice value)
{
    requireNonNull(value, "value is null");

    sum += value.length();
    nonNullValueCount++;
}
 
Example 12
Source File: UDFStringLevenshteinDistance.java    From hive-third-functions with Apache License 2.0 5 votes vote down vote up
private static int safeCountCodePoints(Slice slice) throws HiveException {
    int codePoints = 0;
    for (int position = 0; position < slice.length(); ) {
        int codePoint = tryGetCodePointAt(slice, position);
        if (codePoint < 0) {
            throw new HiveException("Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
        }
        position += lengthOfCodePoint(codePoint);
        codePoints++;
    }
    return codePoints;
}
 
Example 13
Source File: SliceDictionaryColumnWriter.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public List<StreamDataOutput> getIndexStreams(CompressedMetadataWriter metadataWriter)
        throws IOException
{
    checkState(closed);

    if (directEncoded) {
        return directColumnWriter.getIndexStreams(metadataWriter);
    }

    ImmutableList.Builder<RowGroupIndex> rowGroupIndexes = ImmutableList.builder();

    List<LongStreamCheckpoint> dataCheckpoints = dataStream.getCheckpoints();
    Optional<List<BooleanStreamCheckpoint>> presentCheckpoints = presentStream.getCheckpoints();
    for (int i = 0; i < rowGroups.size(); i++) {
        int groupId = i;
        ColumnStatistics columnStatistics = rowGroups.get(groupId).getColumnStatistics();
        LongStreamCheckpoint dataCheckpoint = dataCheckpoints.get(groupId);
        Optional<BooleanStreamCheckpoint> presentCheckpoint = presentCheckpoints.map(checkpoints -> checkpoints.get(groupId));
        List<Integer> positions = createSliceColumnPositionList(compression != NONE, dataCheckpoint, presentCheckpoint);
        rowGroupIndexes.add(new RowGroupIndex(positions, columnStatistics));
    }

    Slice slice = metadataWriter.writeRowIndexes(rowGroupIndexes.build());
    Stream stream = new Stream(columnId, StreamKind.ROW_INDEX, slice.length(), false);
    return ImmutableList.of(new StreamDataOutput(slice, stream));
}
 
Example 14
Source File: TestLongDecimalType.java    From presto with Apache License 2.0 5 votes vote down vote up
private Block decimalAsBlock(String value)
{
    Slice slice = encodeScaledValue(new BigDecimal(value));
    BlockBuilder blockBuilder = new VariableWidthBlockBuilder(null, 1, slice.length());
    TYPE.writeSlice(blockBuilder, slice);
    return blockBuilder.build();
}
 
Example 15
Source File: StringFunctions.java    From presto with Apache License 2.0 5 votes vote down vote up
private static int safeCountCodePoints(Slice slice)
{
    int codePoints = 0;
    for (int position = 0; position < slice.length(); ) {
        int codePoint = tryGetCodePointAt(slice, position);
        if (codePoint < 0) {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
        }
        position += lengthOfCodePoint(codePoint);
        codePoints++;
    }
    return codePoints;
}
 
Example 16
Source File: TestRcFileReaderManual.java    From presto with Apache License 2.0 5 votes vote down vote up
private static List<Integer> readValues(Slice data, int offset, int length)
        throws IOException
{
    // to simplify the testing:
    //     change negative offsets to 0
    //     truncate length so it is not off the end of the file

    if (offset < 0) {
        // adjust length to new offset
        length += offset;
        offset = 0;
    }
    if (offset + length > data.length()) {
        length = data.length() - offset;
    }

    RcFileReader reader = new RcFileReader(
            new SliceRcFileDataSource(data),
            new BinaryRcFileEncoding(),
            ImmutableMap.of(0, SMALLINT),
            new BogusRcFileCodecFactory(),
            offset,
            length,
            DataSize.of(8, MEGABYTE));

    ImmutableList.Builder<Integer> values = ImmutableList.builder();
    while (reader.advance() >= 0) {
        Block block = reader.readBlock(0);
        for (int position = 0; position < block.getPositionCount(); position++) {
            values.add((int) SMALLINT.getLong(block, position));
        }
    }

    return values.build();
}
 
Example 17
Source File: StreamDataOutput.java    From presto with Apache License 2.0 5 votes vote down vote up
public StreamDataOutput(Slice slice, Stream stream)
{
    this(
            sliceOutput -> {
                sliceOutput.writeBytes(slice);
                return slice.length();
            },
            stream);
}
 
Example 18
Source File: Re2JRegexpReplaceLambdaFunction.java    From presto with Apache License 2.0 4 votes vote down vote up
@LiteralParameters("x")
@SqlType("varchar")
@SqlNullable
public Slice regexpReplace(
        @SqlType("varchar") Slice source,
        @SqlType(Re2JRegexpType.NAME) Re2JRegexp pattern,
        @SqlType("function(array(varchar), varchar(x))") UnaryFunctionInterface replaceFunction)
{
    // If there is no match we can simply return the original source without doing copy.
    Matcher matcher = pattern.matcher(source);
    if (!matcher.find()) {
        return source;
    }

    SliceOutput output = new DynamicSliceOutput(source.length());

    // Prepare a BlockBuilder that will be used to create the target block
    // that will be passed to the lambda function.
    if (pageBuilder.isFull()) {
        pageBuilder.reset();
    }
    BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);

    int groupCount = matcher.groupCount();
    int appendPosition = 0;

    do {
        int start = matcher.start();
        int end = matcher.end();

        // Append the un-matched part
        if (appendPosition < start) {
            output.writeBytes(source, appendPosition, start - appendPosition);
        }
        appendPosition = end;

        // Append the capturing groups to the target block that will be passed to lambda
        for (int i = 1; i <= groupCount; i++) {
            Slice matchedGroupSlice = matcher.group(i);
            if (matchedGroupSlice != null) {
                VARCHAR.writeSlice(blockBuilder, matchedGroupSlice);
            }
            else {
                blockBuilder.appendNull();
            }
        }
        pageBuilder.declarePositions(groupCount);
        Block target = blockBuilder.getRegion(blockBuilder.getPositionCount() - groupCount, groupCount);

        // Call the lambda function to replace the block, and append the result to output
        Slice replaced = (Slice) replaceFunction.apply(target);
        if (replaced == null) {
            // replacing a substring with null (unknown) makes the entire string null
            return null;
        }
        output.appendBytes(replaced);
    }
    while (matcher.find());

    // Append the rest of un-matched
    output.writeBytes(source, appendPosition, source.length() - appendPosition);
    return output.slice();
}
 
Example 19
Source File: MachineInput.java    From hive-third-functions with Apache License 2.0 4 votes vote down vote up
MachineInput(Slice slice) {
    this.slice = slice;
    this.base = slice.getBase();
    this.address = slice.getAddress();
    this.length = slice.length();
}
 
Example 20
Source File: JoniRegexpFunctions.java    From presto with Apache License 2.0 4 votes vote down vote up
private static void appendReplacement(SliceOutput result, Slice source, Regex pattern, Region region, Slice replacement)
{
    // Handle the following items:
    // 1. ${name};
    // 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
    // 3. \\, \$, \t (literal 't').
    // 4. Anything that doesn't starts with \ or $ is considered regular bytes

    int idx = 0;

    while (idx < replacement.length()) {
        byte nextByte = replacement.getByte(idx);
        if (nextByte == '$') {
            idx++;
            if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
            }
            nextByte = replacement.getByte(idx);
            int backref;
            if (nextByte == '{') { // case 1 in the above comment
                idx++;
                int startCursor = idx;
                while (idx < replacement.length()) {
                    nextByte = replacement.getByte(idx);
                    if (nextByte == '}') {
                        break;
                    }
                    idx++;
                }
                byte[] groupName = replacement.getBytes(startCursor, idx - startCursor);
                try {
                    backref = pattern.nameToBackrefNumber(groupName, 0, groupName.length, region);
                }
                catch (ValueException e) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group { " + new String(groupName, StandardCharsets.UTF_8) + " }");
                }
                idx++;
            }
            else { // case 2 in the above comment
                backref = nextByte - '0';
                if (backref < 0 || backref > 9) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                if (region.numRegs <= backref) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group " + backref);
                }
                idx++;
                while (idx < replacement.length()) { // Adaptive group number: find largest group num that is not greater than actual number of groups
                    int nextDigit = replacement.getByte(idx) - '0';
                    if (nextDigit < 0 || nextDigit > 9) {
                        break;
                    }
                    int newBackref = (backref * 10) + nextDigit;
                    if (region.numRegs <= newBackref) {
                        break;
                    }
                    backref = newBackref;
                    idx++;
                }
            }
            int beg = region.beg[backref];
            int end = region.end[backref];
            if (beg != -1 && end != -1) { // the specific group doesn't exist in the current match, skip
                result.appendBytes(source.slice(beg, end - beg));
            }
        }
        else { // case 3 and 4 in the above comment
            if (nextByte == '\\') {
                idx++;
                if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                nextByte = replacement.getByte(idx);
            }
            result.appendByte(nextByte);
            idx++;
        }
    }
}