Java Code Examples for io.airlift.slice.Slice#toStringUtf8()

The following examples show how to use io.airlift.slice.Slice#toStringUtf8() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AccumuloRowSerializer.java    From presto with Apache License 2.0 6 votes vote down vote up
/**
 * Recursive helper function used by {@link AccumuloRowSerializer#getArrayFromBlock} and
 * {@link AccumuloRowSerializer#getMapFromBlock} to decode the Block into a Java type.
 *
 * @param type Presto type
 * @param block Block to decode
 * @param position Position in the block to get
 * @return Java object from the Block
 */
static Object readObject(Type type, Block block, int position)
{
    if (Types.isArrayType(type)) {
        Type elementType = Types.getElementType(type);
        return getArrayFromBlock(elementType, block.getObject(position, Block.class));
    }
    else if (Types.isMapType(type)) {
        return getMapFromBlock(type, block.getObject(position, Block.class));
    }
    else {
        if (type.getJavaType() == Slice.class) {
            Slice slice = (Slice) TypeUtils.readNativeValue(type, block, position);
            return type.equals(VarcharType.VARCHAR) ? slice.toStringUtf8() : slice.getBytes();
        }

        return TypeUtils.readNativeValue(type, block, position);
    }
}
 
Example 2
Source File: Re2JRegexp.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
public Re2JRegexp(int dfaStatesLimit, int dfaRetries, Slice pattern) {
    this.dfaStatesLimit = dfaStatesLimit;
    this.dfaRetries = dfaRetries;

    Options options = Options.builder()
            .setAlgorithm(DFA_FALLBACK_TO_NFA)
            .setMaximumNumberOfDFAStates(dfaStatesLimit)
            .setNumberOfDFARetries(dfaRetries)
            .setEventsListener(new RE2JEventsListener())
            .build();

    String patternString = pattern.toStringUtf8();
    re2jPattern = Pattern.compile(patternString, options);

    // Remove .*? prefix. DFA has optimization which does fast lookup for first byte of a potential match.
    // When pattern is prefixed with .*? this optimization doesn't work in Pattern.find() function.
    java.util.regex.Matcher dotStarPrefixMatcher = DOT_STAR_PREFIX_PATTERN.matcher(patternString);
    checkState(dotStarPrefixMatcher.matches());
    String patternStringWithoutDotStartPrefix = dotStarPrefixMatcher.group(CORE_PATTERN_INDEX);

    if (!patternStringWithoutDotStartPrefix.equals(patternString)) {
        re2jPatternWithoutDotStartPrefix = Pattern.compile(patternStringWithoutDotStartPrefix, options);
    } else {
        re2jPatternWithoutDotStartPrefix = re2jPattern;
    }
}
 
Example 3
Source File: UuidOperators.java    From presto with Apache License 2.0 6 votes vote down vote up
@LiteralParameters("x")
@ScalarOperator(CAST)
@SqlType(StandardTypes.UUID)
public static Slice castFromVarcharToUuid(@SqlType("varchar(x)") Slice slice)
{
    try {
        java.util.UUID uuid = java.util.UUID.fromString(slice.toStringUtf8());
        if (slice.length() == 36) {
            return wrappedLongArray(uuid.getMostSignificantBits(), uuid.getLeastSignificantBits());
        }
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Invalid UUID string length: " + slice.length());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Cannot cast value to UUID: " + slice.toStringUtf8());
    }
}
 
Example 4
Source File: VarcharToTimestampWithTimeZoneCast.java    From presto with Apache License 2.0 6 votes vote down vote up
@LiteralParameters({"x", "p"})
@SqlType("timestamp(p) with time zone")
public static long castToShort(@LiteralParameter("p") long precision, ConnectorSession session, @SqlType("varchar(x)") Slice value)
{
    try {
        return toShort((int) precision, trim(value).toStringUtf8(), timezone -> {
            if (timezone == null) {
                return session.getTimeZoneKey().getZoneId();
            }
            return ZoneId.of(timezone);
        });
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to timestamp: " + value.toStringUtf8(), e);
    }
}
 
Example 5
Source File: MatchQueryFunction.java    From presto-connectors with Apache License 2.0 5 votes vote down vote up
@ScalarFunction("match_query")
@Description("es match_query")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchQuery(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}
 
Example 6
Source File: TestVarBinaryMinAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Object getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }
    Slice min = null;
    for (int i = start; i < start + length; i++) {
        Slice slice = Slices.wrappedBuffer(Ints.toByteArray(i));
        min = (min == null) ? slice : Ordering.natural().min(min, slice);
    }
    return min.toStringUtf8();
}
 
Example 7
Source File: TimeOperators.java    From presto with Apache License 2.0 5 votes vote down vote up
@ScalarOperator(CAST)
@LiteralParameters("x")
@SqlType(StandardTypes.TIME)
public static long castFromSlice(ConnectorSession session, @SqlType("varchar(x)") Slice value)
{
    try {
        if (session.isLegacyTimestamp()) {
            return DateTimeUtils.parseLegacyTime(session.getTimeZoneKey(), value.toStringUtf8());
        }
        return parseTimeWithoutTimeZone(value.toStringUtf8());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Value cannot be cast to time: " + value.toStringUtf8(), e);
    }
}
 
Example 8
Source File: IpAddressOperators.java    From presto with Apache License 2.0 5 votes vote down vote up
@LiteralParameters("x")
@ScalarOperator(CAST)
@SqlType(StandardTypes.IPADDRESS)
public static Slice castFromVarcharToIpAddress(@SqlType("varchar(x)") Slice slice)
{
    byte[] address;
    try {
        address = InetAddresses.forString(slice.toStringUtf8()).getAddress();
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_CAST_ARGUMENT, "Cannot cast value to IPADDRESS: " + slice.toStringUtf8());
    }

    byte[] bytes;
    if (address.length == 4) {
        bytes = new byte[16];
        bytes[10] = (byte) 0xff;
        bytes[11] = (byte) 0xff;
        arraycopy(address, 0, bytes, 12, 4);
    }
    else if (address.length == 16) {
        bytes = address;
    }
    else {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, "Invalid InetAddress length: " + address.length);
    }

    return wrappedBuffer(bytes);
}
 
Example 9
Source File: JsonUtil.java    From presto with Apache License 2.0 5 votes vote down vote up
public static String truncateIfNecessaryForErrorMessage(Slice json)
{
    if (json.length() <= MAX_JSON_LENGTH_IN_ERROR_MESSAGE) {
        return json.toStringUtf8();
    }
    else {
        return json.slice(0, MAX_JSON_LENGTH_IN_ERROR_MESSAGE).toStringUtf8() + "...(truncated)";
    }
}
 
Example 10
Source File: Re2JRegexp.java    From presto with Apache License 2.0 5 votes vote down vote up
public Re2JRegexp(int dfaStatesLimit, int dfaRetries, Slice pattern)
{
    this.dfaStatesLimit = dfaStatesLimit;
    this.dfaRetries = dfaRetries;

    Options options = Options.builder()
            .setAlgorithm(DFA_FALLBACK_TO_NFA)
            .setMaximumNumberOfDFAStates(dfaStatesLimit)
            .setNumberOfDFARetries(dfaRetries)
            .setEventsListener(new RE2JEventsListener())
            .build();

    String patternString = pattern.toStringUtf8();
    re2jPattern = Pattern.compile(patternString, options);

    // Remove .*? prefix. DFA has optimization which does fast lookup for first byte of a potential match.
    // When pattern is prefixed with .*? this optimization doesn't work in Pattern.find() function.
    java.util.regex.Matcher dotStarPrefixMatcher = DOT_STAR_PREFIX_PATTERN.matcher(patternString);
    checkState(dotStarPrefixMatcher.matches());
    String patternStringWithoutDotStartPrefix = dotStarPrefixMatcher.group(CORE_PATTERN_INDEX);

    if (!patternStringWithoutDotStartPrefix.equals(patternString)) {
        re2jPatternWithoutDotStartPrefix = Pattern.compile(patternStringWithoutDotStartPrefix, options);
    }
    else {
        re2jPatternWithoutDotStartPrefix = re2jPattern;
    }
}
 
Example 11
Source File: WordStemFunction.java    From presto with Apache License 2.0 5 votes vote down vote up
@Description("Returns the stem of a word in the given language")
@ScalarFunction
@LiteralParameters("x")
@SqlType("varchar(x)")
public static Slice wordStem(@SqlType("varchar(x)") Slice slice, @SqlType("varchar(2)") Slice language)
{
    Supplier<SnowballProgram> stemmer = STEMMERS.get(language);
    if (stemmer == null) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Unknown stemmer language: " + language.toStringUtf8());
    }
    return wordStem(slice, stemmer.get());
}
 
Example 12
Source File: TestVarBinaryMaxAggregation.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
protected Object getExpectedValue(int start, int length)
{
    if (length == 0) {
        return null;
    }
    Slice max = null;
    for (int i = start; i < start + length; i++) {
        Slice slice = Slices.wrappedBuffer(Ints.toByteArray(i));
        max = (max == null) ? slice : Ordering.natural().max(max, slice);
    }
    return max.toStringUtf8();
}
 
Example 13
Source File: StringFunctions.java    From presto with Apache License 2.0 5 votes vote down vote up
private static int safeCountCodePoints(Slice slice)
{
    int codePoints = 0;
    for (int position = 0; position < slice.length(); ) {
        int codePoint = tryGetCodePointAt(slice, position);
        if (codePoint < 0) {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid UTF-8 encoding in characters: " + slice.toStringUtf8());
        }
        position += lengthOfCodePoint(codePoint);
        codePoints++;
    }
    return codePoints;
}
 
Example 14
Source File: MatchQueryFunction.java    From presto-connectors with Apache License 2.0 5 votes vote down vote up
@ScalarFunction("match_query")
@Description("es match_query")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchQuery(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}
 
Example 15
Source File: GeoFunctions.java    From presto with Apache License 2.0 5 votes vote down vote up
private static OGCGeometry geometryFromText(Slice input)
{
    OGCGeometry geometry;
    try {
        geometry = OGCGeometry.fromText(input.toStringUtf8());
    }
    catch (IllegalArgumentException e) {
        throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Invalid WKT: " + input.toStringUtf8(), e);
    }
    geometry.setSpatialReference(null);
    return geometry;
}
 
Example 16
Source File: MatchQueryFunction.java    From presto-connectors with Apache License 2.0 5 votes vote down vote up
@ScalarFunction("match_phrase")
@Description("es match_phrase")
@SqlType(StandardTypes.VARCHAR)
@SqlNullable
public static Slice matchPhrase(
        @SqlType(StandardTypes.VARCHAR) Slice filter)
{
    if (filter == null) {
        return null;
    }
    String filterStr = filter.toStringUtf8();

    QueryBuilder builder = QueryBuilders.matchPhraseQuery(MATCH_COLUMN_SEP, filterStr);
    return Slices.utf8Slice(builder.toString());
}
 
Example 17
Source File: SplitToMapFunction.java    From presto with Apache License 2.0 4 votes vote down vote up
@SqlType("map(varchar,varchar)")
public Block splitToMap(@TypeParameter("map(varchar,varchar)") Type mapType, @SqlType(StandardTypes.VARCHAR) Slice string, @SqlType(StandardTypes.VARCHAR) Slice entryDelimiter, @SqlType(StandardTypes.VARCHAR) Slice keyValueDelimiter)
{
    checkCondition(entryDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "entryDelimiter is empty");
    checkCondition(keyValueDelimiter.length() > 0, INVALID_FUNCTION_ARGUMENT, "keyValueDelimiter is empty");
    checkCondition(!entryDelimiter.equals(keyValueDelimiter), INVALID_FUNCTION_ARGUMENT, "entryDelimiter and keyValueDelimiter must not be the same");

    Map<Slice, Slice> map = new HashMap<>();
    int entryStart = 0;
    while (entryStart < string.length()) {
        // Extract key-value pair based on current index
        // then add the pair if it can be split by keyValueDelimiter
        Slice keyValuePair;
        int entryEnd = string.indexOf(entryDelimiter, entryStart);
        if (entryEnd >= 0) {
            keyValuePair = string.slice(entryStart, entryEnd - entryStart);
        }
        else {
            // The rest of the string is the last possible pair.
            keyValuePair = string.slice(entryStart, string.length() - entryStart);
        }

        int keyEnd = keyValuePair.indexOf(keyValueDelimiter);
        if (keyEnd >= 0) {
            int valueStart = keyEnd + keyValueDelimiter.length();
            Slice key = keyValuePair.slice(0, keyEnd);
            Slice value = keyValuePair.slice(valueStart, keyValuePair.length() - valueStart);

            if (value.indexOf(keyValueDelimiter) >= 0) {
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'");
            }
            if (map.containsKey(key)) {
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, format("Duplicate keys (%s) are not allowed", key.toStringUtf8()));
            }

            map.put(key, value);
        }
        else {
            throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Key-value delimiter must appear exactly once in each entry. Bad input: '" + keyValuePair.toStringUtf8() + "'");
        }

        if (entryEnd < 0) {
            // No more pairs to add
            break;
        }
        // Next possible pair is placed next to the current entryDelimiter
        entryStart = entryEnd + entryDelimiter.length();
    }

    if (pageBuilder.isFull()) {
        pageBuilder.reset();
    }
    BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(0);
    BlockBuilder singleMapBlockBuilder = blockBuilder.beginBlockEntry();
    for (Map.Entry<Slice, Slice> entry : map.entrySet()) {
        VARCHAR.writeSlice(singleMapBlockBuilder, entry.getKey());
        VARCHAR.writeSlice(singleMapBlockBuilder, entry.getValue());
    }
    blockBuilder.closeEntry();
    pageBuilder.declarePosition();

    return (Block) mapType.getObject(blockBuilder, blockBuilder.getPositionCount() - 1);
}
 
Example 18
Source File: JoniRegexpFunctions.java    From presto with Apache License 2.0 4 votes vote down vote up
private static void appendReplacement(SliceOutput result, Slice source, Regex pattern, Region region, Slice replacement)
{
    // Handle the following items:
    // 1. ${name};
    // 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
    // 3. \\, \$, \t (literal 't').
    // 4. Anything that doesn't starts with \ or $ is considered regular bytes

    int idx = 0;

    while (idx < replacement.length()) {
        byte nextByte = replacement.getByte(idx);
        if (nextByte == '$') {
            idx++;
            if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
            }
            nextByte = replacement.getByte(idx);
            int backref;
            if (nextByte == '{') { // case 1 in the above comment
                idx++;
                int startCursor = idx;
                while (idx < replacement.length()) {
                    nextByte = replacement.getByte(idx);
                    if (nextByte == '}') {
                        break;
                    }
                    idx++;
                }
                byte[] groupName = replacement.getBytes(startCursor, idx - startCursor);
                try {
                    backref = pattern.nameToBackrefNumber(groupName, 0, groupName.length, region);
                }
                catch (ValueException e) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group { " + new String(groupName, StandardCharsets.UTF_8) + " }");
                }
                idx++;
            }
            else { // case 2 in the above comment
                backref = nextByte - '0';
                if (backref < 0 || backref > 9) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                if (region.numRegs <= backref) {
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: unknown group " + backref);
                }
                idx++;
                while (idx < replacement.length()) { // Adaptive group number: find largest group num that is not greater than actual number of groups
                    int nextDigit = replacement.getByte(idx) - '0';
                    if (nextDigit < 0 || nextDigit > 9) {
                        break;
                    }
                    int newBackref = (backref * 10) + nextDigit;
                    if (region.numRegs <= newBackref) {
                        break;
                    }
                    backref = newBackref;
                    idx++;
                }
            }
            int beg = region.beg[backref];
            int end = region.end[backref];
            if (beg != -1 && end != -1) { // the specific group doesn't exist in the current match, skip
                result.appendBytes(source.slice(beg, end - beg));
            }
        }
        else { // case 3 and 4 in the above comment
            if (nextByte == '\\') {
                idx++;
                if (idx == replacement.length()) { // not using checkArgument because `.toStringUtf8` is expensive
                    throw new PrestoException(INVALID_FUNCTION_ARGUMENT, "Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                nextByte = replacement.getByte(idx);
            }
            result.appendByte(nextByte);
            idx++;
        }
    }
}
 
Example 19
Source File: TestJsonExtract.java    From presto with Apache License 2.0 4 votes vote down vote up
private static String doJsonExtract(String inputJson, String jsonPath)
{
    Slice value = JsonExtract.extract(Slices.utf8Slice(inputJson), generateExtractor(jsonPath, new JsonValueJsonExtractor()));
    return (value == null) ? null : value.toStringUtf8();
}
 
Example 20
Source File: SliceUtils.java    From hive-third-functions with Apache License 2.0 4 votes vote down vote up
static void appendReplacement(SliceOutput so, Slice replacement, Matcher matcher) {
    int idx = 0;

    // Handle the following items:
    // 1. ${name};
    // 2. $0, $1, $123 (group 123, if exists; or group 12, if exists; or group 1);
    // 3. \\, \$, \t (literal 't').
    // 4. Anything that doesn't starts with \ or $ is considered regular bytes
    while (idx < replacement.length()) {
        byte nextByte = replacement.getByte(idx);
        if (nextByte == '$') {
            idx++;
            if (idx == replacement.length()) {
                throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
            }
            nextByte = replacement.getByte(idx);
            int backref;
            if (nextByte == '{') { // case 1 in the above comment
                idx++;
                int startCursor = idx;
                while (idx < replacement.length()) {
                    nextByte = replacement.getByte(idx);
                    if (nextByte == '}') {
                        break;
                    }
                    idx++;
                }
                String groupName = replacement.slice(startCursor, idx - startCursor).toStringUtf8();
                Integer namedGroupIndex = matcher.pattern().re2().namedGroupIndexes.get(groupName);
                if (namedGroupIndex == null) {
                    throw new IndexOutOfBoundsException("Illegal replacement sequence: unknown group " + groupName);
                }
                backref = namedGroupIndex;
                idx++;
            } else { // case 2 in the above comment
                backref = nextByte - '0';
                if (backref < 0 || backref > 9) {
                    throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                if (matcher.groupCount() < backref) {
                    throw new IndexOutOfBoundsException("Illegal replacement sequence: unknown group " + backref);
                }
                idx++;
                while (idx < replacement.length()) { // Adaptive group number: find largest group num that is not greater than actual number of groups
                    int nextDigit = replacement.getByte(idx) - '0';
                    if (nextDigit < 0 || nextDigit > 9) {
                        break;
                    }
                    int newBackref = (backref * 10) + nextDigit;
                    if (matcher.groupCount() < newBackref) {
                        break;
                    }
                    backref = newBackref;
                    idx++;
                }
            }
            Slice group = matcher.group(backref);
            if (group != null) {
                so.writeBytes(group);
            }
        } else { // case 3 and 4 in the above comment
            if (nextByte == '\\') {
                idx++;
                if (idx == replacement.length()) {
                    throw new IllegalArgumentException("Illegal replacement sequence: " + replacement.toStringUtf8());
                }
                nextByte = replacement.getByte(idx);
            }
            so.appendByte(nextByte);
            idx++;
        }
    }
}