Java Code Examples for org.apache.spark.unsafe.types.UTF8String#fromBytes()

The following examples show how to use org.apache.spark.unsafe.types.UTF8String#fromBytes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkParquetReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public UTF8String read(UTF8String ignored) {
  Binary binary = column.nextBinary();
  ByteBuffer buffer = binary.toByteBuffer();
  if (buffer.hasArray()) {
    return UTF8String.fromBytes(
        buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
  } else {
    return UTF8String.fromBytes(binary.getBytes());
  }
}
 
Example 2
Source File: RowDataReader.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Object convertConstant(Type type, Object value) {
  if (value == null) {
    return null;
  }

  switch (type.typeId()) {
    case DECIMAL:
      return Decimal.apply((BigDecimal) value);
    case STRING:
      if (value instanceof Utf8) {
        Utf8 utf8 = (Utf8) value;
        return UTF8String.fromBytes(utf8.getBytes(), 0, utf8.getByteLength());
      }
      return UTF8String.fromString(value.toString());
    case FIXED:
      if (value instanceof byte[]) {
        return value;
      } else if (value instanceof GenericData.Fixed) {
        return ((GenericData.Fixed) value).bytes();
      }
      return ByteBuffers.toByteArray((ByteBuffer) value);
    case BINARY:
      return ByteBuffers.toByteArray((ByteBuffer) value);
    default:
  }
  return value;
}
 
Example 3
Source File: PartitionKey.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Object defensiveCopyIfNeeded(Object obj) {
  if (obj instanceof UTF8String) {
    // bytes backing the UTF8 string might be reused
    byte[] bytes = ((UTF8String) obj).getBytes();
    return UTF8String.fromBytes(Arrays.copyOf(bytes, bytes.length));
  }
  return obj;
}
 
Example 4
Source File: SparkParquetReaders.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
public UTF8String read(UTF8String ignored) {
  Binary binary = column.nextBinary();
  ByteBuffer buffer = binary.toByteBuffer();
  if (buffer.hasArray()) {
    return UTF8String.fromBytes(
        buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
  } else {
    return UTF8String.fromBytes(binary.getBytes());
  }
}
 
Example 5
Source File: PartitionKey.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private Object defensiveCopyIfNeeded(Object obj) {
  if (obj instanceof UTF8String) {
    // bytes backing the UTF8 string might be reused
    byte[] bytes = ((UTF8String) obj).getBytes();
    return UTF8String.fromBytes(Arrays.copyOf(bytes, bytes.length));
  }
  return obj;
}
 
Example 6
Source File: RandomData.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static UTF8String randomString(Random random) {
  int length = random.nextInt(50);
  byte[] buffer = new byte[length];

  for (int i = 0; i < length; i += 1) {
    buffer[i] = (byte) CHARS.charAt(random.nextInt(CHARS.length()));
  }

  return UTF8String.fromBytes(buffer);
}
 
Example 7
Source File: SchemaConverters.java    From spark-bigquery-connector with Apache License 2.0 4 votes vote down vote up
static Object convert(Field field, Object value) {
    if (value == null) {
        return null;
    }

    if (field.getMode() == Field.Mode.REPEATED) {
        // rather than recurring down we strip off the repeated mode
        // Due to serialization issues, reconstruct the type using reflection:
        // See: https://github.com/googleapis/google-cloud-java/issues/3942
        LegacySQLTypeName fType = LegacySQLTypeName.valueOfStrict(field.getType().name());
        Field nestedField = Field.newBuilder(field.getName(), fType, field.getSubFields())
                // As long as this is not repeated it works, but technically arrays cannot contain
                // nulls, so select required instead of nullable.
                .setMode(Field.Mode.REQUIRED)
                .build();

        List<Object> valueList = (List<Object>) value;

        return new GenericArrayData(valueList.stream().map(v -> convert(nestedField, v)).collect(Collectors.toList()));
    }

    if (LegacySQLTypeName.INTEGER.equals(field.getType()) ||
            LegacySQLTypeName.FLOAT.equals(field.getType()) ||
            LegacySQLTypeName.BOOLEAN.equals(field.getType()) ||
            LegacySQLTypeName.DATE.equals(field.getType()) ||
            LegacySQLTypeName.TIME.equals(field.getType()) ||
            LegacySQLTypeName.TIMESTAMP.equals(field.getType())) {
        return value;
    }

    if (LegacySQLTypeName.STRING.equals(field.getType()) ||
            LegacySQLTypeName.DATETIME.equals(field.getType()) ||
            LegacySQLTypeName.GEOGRAPHY.equals(field.getType())) {
        return UTF8String.fromBytes(((Utf8) value).getBytes());
    }

    if (LegacySQLTypeName.BYTES.equals(field.getType())) {
        return getBytes((ByteBuffer) value);
    }

    if (LegacySQLTypeName.NUMERIC.equals(field.getType())) {
        byte[] bytes = getBytes((ByteBuffer) value);
        BigDecimal b = new BigDecimal(new BigInteger(bytes), BQ_NUMERIC_SCALE);
        Decimal d = Decimal.apply(b, BQ_NUMERIC_PRECISION, BQ_NUMERIC_SCALE);

        return d;
    }

    if (LegacySQLTypeName.RECORD.equals(field.getType())) {
        return convertAll(field.getSubFields(),
                (GenericRecord) value,
                field.getSubFields().stream().map(f -> f.getName()).collect(Collectors.toList()));
    }

    throw new IllegalStateException("Unexpected type: " + field.getType());
}
 
Example 8
Source File: SparkOrcValueReaders.java    From iceberg with Apache License 2.0 4 votes vote down vote up
@Override
public UTF8String nonNullRead(ColumnVector vector, int row) {
  BytesColumnVector bytesVector = (BytesColumnVector) vector;
  return UTF8String.fromBytes(bytesVector.vector[row], bytesVector.start[row], bytesVector.length[row]);
}
 
Example 9
Source File: SparkValueReaders.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private EnumReader(List<String> symbols) {
  this.symbols = new UTF8String[symbols.size()];
  for (int i = 0; i < this.symbols.length; i += 1) {
    this.symbols[i] = UTF8String.fromBytes(symbols.get(i).getBytes(StandardCharsets.UTF_8));
  }
}
 
Example 10
Source File: SimpleRow.java    From indexr with Apache License 2.0 4 votes vote down vote up
@Override
public UTF8String getString(int colId) {
    int offset = colId == 0 ? 0 : sums[colId - 1];
    int to = sums[colId];
    return UTF8String.fromBytes(data.array(), offset, to - offset);
}