org.apache.arrow.vector.types.pojo.Field Java Examples

The following examples show how to use org.apache.arrow.vector.types.pojo.Field. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Block.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
@Override
public String toString()
{
    MoreObjects.ToStringHelper helper = MoreObjects.toStringHelper(this);
    helper.add("rows", getRowCount());

    int rowsToPrint = this.vectorSchema.getRowCount() > 10 ? 10 : this.vectorSchema.getRowCount();
    for (Field next : this.schema.getFields()) {
        FieldReader thisReader = vectorSchema.getVector(next.getName()).getReader();
        List<String> values = new ArrayList<>();
        for (int i = 0; i < rowsToPrint; i++) {
            thisReader.setPosition(i);
            values.add(fieldToString(thisReader));
        }
        helper.add(next.getName(), values);
    }

    return helper.toString();
}
 
Example #2
Source File: ArrowUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
private static Field toArrowField(String fieldName, LogicalType logicalType) {
	FieldType fieldType = new FieldType(
		logicalType.isNullable(),
		logicalType.accept(LogicalTypeToArrowTypeConverter.INSTANCE),
		null);
	List<Field> children = null;
	if (logicalType instanceof ArrayType) {
		children = Collections.singletonList(toArrowField(
			"element", ((ArrayType) logicalType).getElementType()));
	} else if (logicalType instanceof RowType) {
		RowType rowType = (RowType) logicalType;
		children = new ArrayList<>(rowType.getFieldCount());
		for (RowType.RowField field : rowType.getFields()) {
			children.add(toArrowField(field.getName(), field.getType()));
		}
	}
	return new Field(fieldName, fieldType, children);
}
 
Example #3
Source File: UserDefinedFunctionHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private FieldWriterFactory makeFactory(Field field, List<ArrowValueProjector> valueProjectors, Method udfMethod)
{
    Object[] arguments = new Object[valueProjectors.size()];

    Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
    switch (fieldType) {
        case LIST:
        case STRUCT:
            return (FieldVector vector, Extractor extractor, ConstraintProjector ignored) ->
                    (Object inputRowNum, int outputRowNum) -> {
                        Object result = invokeMethod(udfMethod, arguments, (int) inputRowNum, valueProjectors);
                        BlockUtils.setComplexValue(vector, outputRowNum, FieldResolver.DEFAULT, result);
                        return true;    // push-down does not apply in UDFs
                    };

        default:
            throw new IllegalArgumentException("Unsupported type " + fieldType);
    }
}
 
Example #4
Source File: RedisRecordHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private void loadHashRow(Jedis client, String keyString, BlockSpiller spiller, List<Field> fieldList)
{
    spiller.writeRows((Block block, int row) -> {
        boolean hashMatched = block.offerValue(KEY_COLUMN_NAME, row, keyString);

        Map<String, String> rawValues = new HashMap<>();
        //Glue only supports lowercase column names / also could do a better job only fetching the columns
        //that are needed
        client.hgetAll(keyString).forEach((key, entry) -> rawValues.put(key.toLowerCase(), entry));

        for (Field hfield : fieldList) {
            Object hvalue = ValueConverter.convert(hfield, rawValues.get(hfield.getName()));
            if (hashMatched && !block.offerValue(hfield.getName(), row, hvalue)) {
                return 0;
            }
        }

        return 1;
    });
}
 
Example #5
Source File: FileSystemCreateTableEntry.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Override
public Writer getWriter(
    OpProps props,
    PhysicalOperator child
    ) throws IOException {
  if (child != null && child.getProps() != null && icebergTableProps != null) {
    BatchSchema writerSchema = child.getProps().getSchema();
    SchemaBuilder schemaBuilder = BatchSchema.newBuilder();
    // current parquet writer uses a few extra columns in the schema for partitioning and distribution
    // For iceberg, filter those extra columns
    for (Field field : writerSchema) {
      if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
        continue;
      }
      if (field.getName().equalsIgnoreCase(WriterPrel.BUCKET_NUMBER_FIELD)) {
        continue;
      }
      schemaBuilder.addField(field);
    }
    writerSchema = schemaBuilder.build();
    icebergTableProps.setFullSchema(writerSchema);
  }
  return formatPlugin.getWriter(child, location, plugin, options, props);
}
 
Example #6
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
ParquetGroupConverter(
    ParquetColumnResolver columnResolver, OutputMutator mutator,
    GroupType schema,
    Collection<SchemaPath> columns,
    OptionManager options,
    List<Field> arrowSchema,
    Function<String, String> childNameResolver,
    SchemaDerivationHelper schemaHelper) {
  this.converters = Lists.newArrayList();
  this.mutator = mutator;
  this.schema = schema;
  this.columns = columns;
  this.options = options;
  this.arrowSchema = arrowSchema;
  this.childNameResolver = childNameResolver;
  this.schemaHelper = schemaHelper;
  this.columnResolver = columnResolver;
  this.maxFieldSizeLimit = Math.toIntExact(options.getOption(ExecConstants.LIMIT_FIELD_SIZE_BYTES));
}
 
Example #7
Source File: RedisRecordHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private void loadZSetRows(Jedis client, String keyString, BlockSpiller spiller, List<Field> fieldList)
{
    if (fieldList.size() != 1) {
        throw new RuntimeException("Ambiguous field mapping, more than 1 field for ZSET value type.");
    }

    Field zfield = fieldList.get(0);
    String cursor = SCAN_POINTER_START;
    do {
        ScanResult<Tuple> result = client.zscan(keyString, cursor);
        cursor = result.getCursor();
        for (Tuple nextElement : result.getResult()) {
            spiller.writeRows((Block block, int rowNum) -> {
                Object zvalue = ValueConverter.convert(zfield, nextElement.getElement());
                boolean zsetMatched = block.offerValue(KEY_COLUMN_NAME, rowNum, keyString);
                zsetMatched &= block.offerValue(zfield.getName(), rowNum, zvalue);
                return zsetMatched ? 1 : 0;
            });
        }
    }
    while (cursor != null && !END_CURSOR.equals(cursor));
}
 
Example #8
Source File: ArrowRecordBatchLoader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public static void load(RecordBatch recordBatch, VectorAccessible vectorAccessible, ArrowBuf body) {
  List<Field> fields = vectorAccessible.getSchema().getFields();
  List<FieldVector> fieldVectors = FluentIterable.from(vectorAccessible)
    .transform(new Function<VectorWrapper<?>, FieldVector>() {
      @Override
      public FieldVector apply(VectorWrapper<?> wrapper) {
        return (FieldVector) wrapper.getValueVector();
      }
    }).toList();
  try {
    ArrowRecordBatch arrowRecordBatch = deserializeRecordBatch(recordBatch, body);
    Iterator<ArrowFieldNode> nodes = arrowRecordBatch.getNodes().iterator();
    Iterator<ArrowBuf> buffers = arrowRecordBatch.getBuffers().iterator();
    for (int i = 0; i < fields.size(); ++i) {
      Field field = fields.get(i);
      FieldVector fieldVector = fieldVectors.get(i);
      loadBuffers(fieldVector, field, buffers, nodes);
    }
    if (buffers.hasNext()) {
      throw new IllegalArgumentException("not all buffers were consumed. " + buffers);
    }
  } catch (IOException e) {
    throw new RuntimeException("could not deserialize batch for " + vectorAccessible.getSchema(), e);
  }
}
 
Example #9
Source File: DocDBFieldResolver.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Override
public Object getFieldValue(Field field, Object value)
{
    Types.MinorType minorType = Types.getMinorTypeForArrowType(field.getType());
    if (minorType == Types.MinorType.LIST) {
        return TypeUtils.coerce(field, ((Document) value).get(field.getName()));
    }
    else if (value instanceof Document) {
        Object rawVal = ((Document) value).get(field.getName());
        return TypeUtils.coerce(field, rawVal);
    }
    throw new RuntimeException("Expected LIST or Document type but found " + minorType);
}
 
Example #10
Source File: ComplexToJsonPrel.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public PhysicalOperator getPhysicalOperator(PhysicalPlanCreator creator) throws IOException {
  PhysicalOperator child = ((Prel) getInput()).getPhysicalOperator(creator);
  final SchemaBuilder builder = BatchSchema.newBuilder();
  for(Field f : child.getProps().getSchema()){
    builder.addField(f.getType().accept(new SchemaConverter(f)));
  }
  BatchSchema schema = builder.build();

  return new ComplexToJson(creator.props(this, null, schema, RESERVE, LIMIT), child);
}
 
Example #11
Source File: FlattenPrel.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public Field visit(ArrowType.List type) {
  if(field.getName().equals(column.getAsUnescapedPath())){
    Field child = field.getChildren().get(0);
    return new Field(field.getName(), child.isNullable(), child.getType(), child.getChildren());
  }
  return field;
}
 
Example #12
Source File: ElasticsearchTypeUtils.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
/**
 * Create the appropriate field extractor used for extracting field values from a Document based on the field type.
 * @param field is used to determine which extractor to generate based on the field type.
 * @return a field extractor.
 */
protected Extractor makeExtractor(Field field)
{
    Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());

    switch (fieldType) {
        case VARCHAR:
            return makeVarCharExtractor(field);
        case BIGINT:
            return makeBigIntExtractor(field);
        case INT:
            return makeIntExtractor(field);
        case SMALLINT:
            return makeSmallIntExtractor(field);
        case TINYINT:
            return makeTinyIntExtractor(field);
        case FLOAT8:
            return makeFloat8Extractor(field);
        case FLOAT4:
            return makeFloat4Extractor(field);
        case DATEMILLI:
            return makeDateMilliExtractor(field);
        case BIT:
            return makeBitExtractor(field);
        default:
            return null;
    }
}
 
Example #13
Source File: HiveNonVarcharCoercionReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
protected void createCoercions() {
  for (Field field : targetSchema.getFields()) {
    final FieldReference inputRef = FieldReference.getWithQuotedRef(field.getName());
    final CompleteType targetType = CompleteType.fromField(field);
    if(targetType.isUnion() || targetType.isComplex()) {
      // do not add any expressions for non primitive fields
      exprs.add(null);
    } else {
      addExpression(field, inputRef);
    }
  }
}
 
Example #14
Source File: TestEasyScanOperatorCreator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void selectionIgnoresIncremental() {
  BatchSchema schema = mock(BatchSchema.class);
  when(schema.iterator())
      .thenReturn(Lists.newArrayList(Field.nullable("a1", new ArrowType.Bool()),
          Field.nullable(IncrementalUpdateUtils.UPDATE_COLUMN, new ArrowType.Bool())).iterator());
  assertTrue(EasyScanOperatorCreator.selectsAllColumns(schema,
    Lists.<SchemaPath>newArrayList(SchemaPath.getSimplePath("a1"))));
}
 
Example #15
Source File: ProjectOperator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private List<NamedExpression> getExpressionList() {
  if (config.getExprs() != null) {
    return config.getExprs();
  }

  // project also supports the ComplexToJson operation. If we get here, we're in that state.
  final List<NamedExpression> exprs = Lists.newArrayList();
  for (final Field field : incoming.getSchema()) {
    CompleteType type = CompleteType.fromField(field);
    if (type.isComplex() || type.isUnion()) {
      final LogicalExpression convertToJson = FunctionCallFactory.createConvert(ConvertExpression.CONVERT_TO, "JSON", SchemaPath.getSimplePath(field.getName()));
      final String castFuncName = CastFunctions.getCastFunc(MinorType.VARCHAR);
      final List<LogicalExpression> castArgs = Lists.newArrayList();
      castArgs.add(convertToJson);  //input_expr
      /*
       * We are implicitly casting to VARCHAR so we don't have a max length,
       * using an arbitrary value. We trim down the size of the stored bytes
       * to the actual size so this size doesn't really matter.
       */
      castArgs.add(new ValueExpressions.LongExpression(TypeHelper.VARCHAR_DEFAULT_CAST_LEN)); //
      final FunctionCall castCall = new FunctionCall(castFuncName, castArgs);
      exprs.add(new NamedExpression(castCall, new FieldReference(field.getName())));
    } else {
      exprs.add(new NamedExpression(SchemaPath.getSimplePath(field.getName()), new FieldReference(field.getName())));
    }
  }
  return exprs;
}
 
Example #16
Source File: Ec2TableProviderTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
protected void validateRow(Block block, int pos)
{
    for (FieldReader fieldReader : block.getFieldReaders()) {
        fieldReader.setPosition(pos);
        Field field = fieldReader.getField();

        if (field.getName().equals(getIdField())) {
            assertEquals(getIdValue(), fieldReader.readText().toString());
        }
        else {
            validate(fieldReader);
        }
    }
}
 
Example #17
Source File: StructGroupConverter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public StructGroupConverter(
    ParquetColumnResolver columnResolver,
    String fieldName,
    OutputMutator mutator,
    StructWriter structWriter,
    GroupType schema,
    Collection<SchemaPath> columns,
    OptionManager options,
    List<Field> arrowSchema,
    SchemaDerivationHelper schemaHelper) {
  super(
      columnResolver,
      mutator,
      schema,
      columns,
      options,
      arrowSchema,
      new Function<String, String>() {
        @Override
        public String apply(String input) {
          // each child has it's own name
          return input;
        }
      },
      schemaHelper);
  this.structWriter = structWriter;
  this.writerProvider = new StructWriterProvider(structWriter);

  convertChildren(fieldName);
}
 
Example #18
Source File: S3BucketsTableProviderTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
protected void validateRow(Block block, int pos)
{
    for (FieldReader fieldReader : block.getFieldReaders()) {
        fieldReader.setPosition(pos);
        Field field = fieldReader.getField();

        if (field.getName().equals(getIdField())) {
            assertEquals(getIdValue(), fieldReader.readText().toString());
        }
        else {
            validate(fieldReader);
        }
    }
}
 
Example #19
Source File: JobDataFragmentWrapper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static ImmutableMap<String, Column> getColumnsFromSchema(BatchSchema schema){
  ImmutableMap.Builder<String, Column> columns = ImmutableMap.builder();
  for (int i = 0; i < schema.getFieldCount(); ++i) {
    final Field column = schema.getColumn(i);
    final String name = column.getName();
    final MajorType type = getMajorTypeForField(column);
    DataType dataType = DataTypeUtil.getDataType(type);

    columns.put(name, new Column(name, dataType, i));
  }

  return columns.build();
}
 
Example #20
Source File: VectorContainerWriter.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public <T extends FieldVector> T addOrGet(String childName, FieldType fieldType, Class<T> clazz) {
  try {
    Field field = new Field(childName, fieldType, null);
    final FieldVector v = mutator.addField(field, clazz);
    putChild(childName, v);
    return this.typeify(v, clazz);
  } catch (SchemaChangeException e) {
    throw new IllegalStateException(e);
  }
}
 
Example #21
Source File: WindowFunction.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
boolean materialize(final NamedExpression ne, final VectorContainer batch, final ClassProducer producer) {
  final LogicalExpression aggregate = producer.materialize(ne.getExpr(), batch);
  if (aggregate == null) {
    return false;
  }

  // add corresponding ValueVector to container
  final Field output = aggregate.getCompleteType().toField(ne.getRef());
  batch.addOrGet(output);
  TypedFieldId outputId = batch.getValueVectorId(ne.getRef());
  writeAggregationToOutput = new ValueVectorWriteExpression(outputId, aggregate, true);

  return true;
}
 
Example #22
Source File: HbaseFieldResolver.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
/**
 * @param field The Apache Arrow field we'd like to extract from the val.
 * @param val The value from which we'd like to extract the provide field.
 * @return Object containing the value for the requested field.
 * @see FieldResolver in the Athena Query Federation SDK
 */
@Override
public Object getFieldValue(Field field, Object val)
{
    if (!(val instanceof Result)) {
        String clazz = (val != null) ? val.getClass().getName() : "null";
        throw new IllegalArgumentException("Expected value of type Result but found " + clazz);
    }

    byte[] rawFieldValue = ((Result) val).getValue(family, field.getName().getBytes());
    return HbaseSchemaUtils.coerceType(isNative, field.getType(), rawFieldValue);
}
 
Example #23
Source File: UserDefinedFunctionHandler.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
private GeneratedRowWriter createOutputRowWriter(Field outputField, List<ArrowValueProjector> valueProjectors, Method udfMethod)
{
    GeneratedRowWriter.RowWriterBuilder builder = GeneratedRowWriter.newBuilder();
    Extractor extractor = makeExtractor(outputField, valueProjectors, udfMethod);
    if (extractor != null) {
        builder.withExtractor(outputField.getName(), extractor);
    }
    else {
        builder.withFieldWriterFactory(outputField.getName(), makeFactory(outputField, valueProjectors, udfMethod));
    }
    return builder.build();
}
 
Example #24
Source File: UserDefinedFunctionHandler.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
/**
 * Processes a group by rows. This method takes in a block of data (containing multiple rows), process them and
 * returns multiple rows of the output column in a block.
 * <p>
 * UDF methods are invoked row-by-row in a for loop. Arrow values are converted to Java Objects and then passed into
 * the UDF java method. This is not very efficient because we might potentially be doing a lot of data copying.
 * Advanced users could choose to override this method and directly deal with Arrow data to achieve better
 * performance.
 *
 * @param allocator arrow memory allocator
 * @param udfMethod the extracted java method matching the User-Defined-Function defined in Athena.
 * @param inputRecords input data in Arrow format
 * @param outputSchema output data schema in Arrow format
 * @return output data in Arrow format
 */
protected Block processRows(BlockAllocator allocator, Method udfMethod, Block inputRecords, Schema outputSchema)
        throws Exception
{
    int rowCount = inputRecords.getRowCount();

    List<ArrowValueProjector> valueProjectors = Lists.newArrayList();

    for (Field field : inputRecords.getFields()) {
        FieldReader fieldReader = inputRecords.getFieldReader(field.getName());
        ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
        valueProjectors.add(arrowValueProjector);
    }

    Field outputField = outputSchema.getFields().get(0);
    GeneratedRowWriter outputRowWriter = createOutputRowWriter(outputField, valueProjectors, udfMethod);

    Block outputRecords = allocator.createBlock(outputSchema);
    outputRecords.setRowCount(rowCount);

    try {
        for (int rowNum = 0; rowNum < rowCount; ++rowNum) {
            outputRowWriter.writeRow(outputRecords, rowNum, rowNum);
        }
    }
    catch (Throwable t) {
        try {
            outputRecords.close();
        }
        catch (Exception e) {
            logger.error("Error closing output block", e);
        }
        throw t;
    }

    return outputRecords;
}
 
Example #25
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Convert a data vec {@link Schema}
 * to an arrow {@link org.apache.arrow.vector.types.pojo.Schema}
 * @param schema the input schema
 * @return the schema for arrow
 */
public static org.apache.arrow.vector.types.pojo.Schema toArrowSchema(Schema schema) {
    List<Field> fields = new ArrayList<>(schema.numColumns());
    for(int i = 0; i < schema.numColumns(); i++) {
        fields.add(getFieldForColumn(schema.getName(i),schema.getType(i)));
    }

    return new org.apache.arrow.vector.types.pojo.Schema(fields);
}
 
Example #26
Source File: FieldIdUtil2.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static TypedFieldId getFieldId(Schema schema, BasePath path, boolean isHyper){
  int i = 0;
  for (Field f : schema.getFields()) {
    TypedFieldId id = getFieldId(f, i, path, isHyper);
    if (id != null) {
      return id;
    }
    i++;
  }
  return null;
}
 
Example #27
Source File: TestOutputMutator.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public <T extends ValueVector> T addField(Field field, Class<T> clazz) throws SchemaChangeException {
  ValueVector v = TypeHelper.getNewVector(field, allocator);
  if (!clazz.isAssignableFrom(v.getClass())) {
    throw new SchemaChangeException(String.format("The class that was provided %s does not correspond to the expected vector type of %s.", clazz.getSimpleName(), v.getClass().getSimpleName()));
  }
  addField(v);
  return (T) v;
}
 
Example #28
Source File: FlattenPrel.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public Field visitGeneric(ArrowType type) {
  if(field.getName().equals(column.getAsUnescapedPath())){
    throw UserException.validationError().message("You're trying to flatten a field that is not a list. The offending field is %s.", Describer.describe(field)).build(logger);
  }
  return super.visitGeneric(type);
}
 
Example #29
Source File: GlueFieldLexer.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
public static Field lex(String name, String input, BaseTypeMapper mapper)
{
    Field result = mapper.getField(name, input);
    if (result != null) {
        return result;
    }

    GlueTypeParser parser = new GlueTypeParser(input);
    return lexComplex(name, parser.next(), parser, mapper);
}
 
Example #30
Source File: Describer.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static String describe(Iterable<Field> fields){
  StringBuilder sb = new StringBuilder();
  boolean first = true;
  for(Field f : fields){
    if(!first){
      sb.append(", ");
    }
    sb.append(describe(f));
    first = false;
  }
  return sb.toString();
}