Java Code Examples for org.apache.arrow.vector.types.pojo.Schema#getFields()

The following examples show how to use org.apache.arrow.vector.types.pojo.Schema#getFields() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TPCDSRecordHandler.java From aws-athena-query-federation with Apache License 2.0

6 votes

/**
 * Generates the CellWriters used to convert the TPCDS Generators data to Apache Arrow.
 *
 * @param schemaForRead The schema to read/project.
 * @param table The TPCDS Table we are reading from.
 * @return Map<Integer, CellWriter> where integer is the Column position in the TPCDS data set and the CellWriter
 * can be used to read,convert,write the value at that position for any row into the correct position and type
 * in our Apache Arrow response.
 */
private Map<Integer, CellWriter> makeWriters(Schema schemaForRead, Table table)
{
    Map<String, Column> columnPositions = new HashMap<>();
    for (Column next : table.getColumns()) {
        columnPositions.put(next.getName(), next);
    }

    //We use this approach to reduce the overhead of field lookups. This isn't as good as true columnar processing
    //using Arrow but it gets us ~80% of the way there from a rows/second per cpu-cycle perspective.
    Map<Integer, CellWriter> writers = new HashMap<>();
    for (Field nextField : schemaForRead.getFields()) {
        Column column = columnPositions.get(nextField.getName());
        writers.put(column.getPosition(), makeWriter(nextField, column));
    }
    return writers;
}

Example 2

Source File: ParquetRecordMaterializer.java From dremio-oss with Apache License 2.0

6 votes

public ParquetRecordMaterializer(ParquetColumnResolver columnResolver, OutputMutator mutator, ComplexWriter complexWriter, MessageType schema,
                                 Collection<SchemaPath> columns, OptionManager options, Schema arrowSchema,
                                 SchemaDerivationHelper schemaHelper) {
  this.complexWriter = complexWriter;
  root = new StructGroupConverter(
      columnResolver,
      "",
      mutator,
      complexWriter.rootAsStruct(),
      schema,
      columns,
      options,
      arrowSchema == null ? null : arrowSchema.getFields(),
      schemaHelper
  );
}

Example 3

Source File: UserDefinedFunctionHandler.java From aws-athena-query-federation with Apache License 2.0

5 votes

private Class[] extractJavaTypes(Schema schema)
{
    Class[] types = new Class[schema.getFields().size()];

    List<Field> fields = schema.getFields();
    for (int i = 0; i < fields.size(); ++i) {
        Types.MinorType minorType = Types.getMinorTypeForArrowType(fields.get(i).getType());
        types[i] = BlockUtils.getJavaType(minorType);
    }

    return types;
}

Example 4

Source File: ConstraintEvaluator.java From aws-athena-query-federation with Apache License 2.0

5 votes

public ConstraintEvaluator(BlockAllocator allocator, Schema schema, Constraints constraints)
{
    this.constraints = constraints;
    for (Field next : schema.getFields()) {
        typeMap.put(next.getName(), next.getType());
    }
    markerFactory = new MarkerFactory(allocator);
}

Example 5

Source File: DDBRecordMetadata.java From aws-athena-query-federation with Apache License 2.0

5 votes

private Set<String> getNonComparableColumns(Schema schema)
{
    Set<String> nonComparableColumns = new HashSet<>();
    if (schema != null && schema.getFields() != null) {
        for (Field field : schema.getFields()) {
            Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
            if (DefaultGlueType.getNonComparableSet().contains(fieldType.name())) {
                nonComparableColumns.add(field.getName());
            }
        }
    }
    return nonComparableColumns;
}

Example 6

Source File: DDBRecordMetadata.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * determines whether the schema contains any type that can be coercible
 * @param schema Schema to extract out the info from
 * @return boolean indicating existence of coercible type in schema
 */
private boolean isContainsCoercibleType(Schema schema)
{
    if (schema != null && schema.getFields() != null) {
        for (Field field : schema.getFields()) {
            Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
            if (isDateTimeFieldType(fieldType) || !fieldType.equals(Types.MinorType.DECIMAL)) {
                return true;
            }
        }
    }
    return false;
}

Example 7

Source File: QueryUtils.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * Given a Schema create a projection document which can be used to request only specific Document fields
 * from DocumentDB.
 *
 * @param schema The schema containing the requested projection.
 * @return A Document matching the requested field projections.
 */
public static Document makeProjection(Schema schema)
{
    Document output = new Document();
    for (Field field : schema.getFields()) {
        output.append(field.getName(), 1);
    }
    return output;
}

Example 8

Source File: ElasticsearchSchemaUtils.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * Checks that two Schema objects are equal using the following criteria:
 * 1) The Schemas must have the same number of fields.
 * 2) The corresponding fields in the two Schema objects must also be the same irrespective of ordering within
 *    the Schema object using the following criteria:
 *    a) The fields' names must match.
 *    b) The fields' Arrow types must match.
 *    c) The fields' children lists (used for complex fields, e.g. LIST and STRUCT) must match irrespective of
 *       field ordering within the lists.
 *    d) The fields' metadata maps must match. Currently that's only applicable for scaled_float data types that
 *       use the field's metadata map to store the scaling factor associated with the data type.
 * @param mapping1 is a mapping to be compared.
 * @param mapping2 is a mapping to be compared.
 * @return true if the lists are equal, false otherwise.
 */
@VisibleForTesting
protected static final boolean mappingsEqual(Schema mapping1, Schema mapping2)
{
    logger.info("mappingsEqual - Enter - Mapping1: {}, Mapping2: {}", mapping1, mapping2);

    // Schemas must have the same number of elements.
    if (mapping1.getFields().size() != mapping2.getFields().size()) {
        logger.warn("Mappings are different sizes - Mapping1: {}, Mapping2: {}",
                mapping1.getFields().size(), mapping2.getFields().size());
        return false;
    }

    // Mappings must have the same fields (irrespective of internal ordering).
    for (Field field1 : mapping1.getFields()) {
        Field field2 = mapping2.findField(field1.getName());
        // Corresponding fields must have the same Arrow types or the Schemas are deemed not equal.
        if (field2 == null || field1.getType() != field2.getType()) {
            logger.warn("Fields' types do not match - Field1: {}, Field2: {}",
                    field1.getType(), field2 == null ? "null" : field2.getType());
            return false;
        }
        logger.info("Field1 Name: {}, Field1 Type: {}, Field1 Metadata: {}",
                field1.getName(), field1.getType(), field1.getMetadata());
        logger.info("Field2 Name: {}, Field2 Type: {}, Field2 Metadata: {}",
                field2.getName(), field2.getType(), field2.getMetadata());
        // The corresponding fields' children and metadata maps must also match or the Schemas are deemed not equal.
        if (!childrenEqual(field1.getChildren(), field2.getChildren()) ||
                !field1.getMetadata().equals(field2.getMetadata())) {
            return false;
        }
    }

    return true;
}

Example 9

Source File: ElasticsearchQueryUtils.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * Creates a projection (using the schema) on which fields should be included in the search index request. For
 * complex type STRUCT, there is no need to include each individual nested field in the projection. Since the
 * schema contains all nested fields in the STRUCT, only the name of the STRUCT field is added to the projection
 * allowing Elasticsearch to return the entire object including all nested fields.
 * @param schema is the schema containing the requested projection.
 * @return a projection wrapped in a FetchSourceContext object.
 */
protected static FetchSourceContext getProjection(Schema schema)
{
    List<String> includedFields = new ArrayList<>();

    for (Field field : schema.getFields()) {
        includedFields.add(field.getName());
    }

    logger.info("Included fields: " + includedFields);

    return new FetchSourceContext(true, Strings.toStringArray(includedFields), Strings.EMPTY_ARRAY);
}

Example 10

Source File: FieldIdUtil2.java From dremio-oss with Apache License 2.0

5 votes

public static TypedFieldId getFieldId(Schema schema, BasePath path, boolean isHyper){
  int i = 0;
  for (Field f : schema.getFields()) {
    TypedFieldId id = getFieldId(f, i, path, isHyper);
    if (id != null) {
      return id;
    }
    i++;
  }
  return null;
}

Example 11

Source File: VectorContainer.java From dremio-oss with Apache License 2.0

5 votes

public static VectorContainer create(BufferAllocator allocator, Schema schema){
  VectorContainer container = new VectorContainer(allocator);
  for (Field field : schema.getFields()) {
    container.addOrGet(field);
  }
  container.buildSchema(SelectionVectorMode.NONE);
  return container;
}

Example 12

Source File: ExpandableHyperContainer.java From dremio-oss with Apache License 2.0

5 votes

public ExpandableHyperContainer(BufferAllocator allocator, Schema schema) {
  super(allocator);
  // Add all key fields for VECTORIZED_BIGINT mode
  this.isKeyBits = null;
  int i=0;
  for(Field f : schema.getFields()){
    this.addEmptyHyper(f);
  }
  this.buildSchema(SelectionVectorMode.FOUR_BYTE);
}

Example 13

Source File: ExpandableHyperContainer.java From dremio-oss with Apache License 2.0

5 votes

public ExpandableHyperContainer(BufferAllocator allocator, Schema schema, BitSet isKeyBits) {
  super(allocator);
  this.isKeyBits = isKeyBits;
  int i=0;
  for(Field f : schema.getFields()){
    /* If the bit is not set, the corresponding field will be added to hyper container,
     * otherwise the field will be ignored.
     */
    if (!this.isKeyBits.get(i)) {
      this.addEmptyHyper(f);
    }
    i ++;
  }
  this.buildSchema(SelectionVectorMode.FOUR_BYTE);
}

Example 14

Source File: ArrowUtilsTest.java From flink with Apache License 2.0

5 votes

@Test
public void testConvertBetweenLogicalTypeAndArrowType() {
	Schema schema = ArrowUtils.toArrowSchema(rowType);

	assertEquals(testFields.size(), schema.getFields().size());
	List<Field> fields = schema.getFields();
	for (int i = 0; i < schema.getFields().size(); i++) {
		// verify convert from RowType to ArrowType
		assertEquals(testFields.get(i).f0, fields.get(i).getName());
		assertEquals(testFields.get(i).f2, fields.get(i).getType());
	}
}

Example 15

Source File: SchemaConverter.java From parquet-mr with Apache License 2.0

5 votes

/**
 * Creates a Parquet Schema from an Arrow one and returns the mapping
 * @param arrowSchema the provided Arrow Schema
 * @return the mapping between the 2
 */
public SchemaMapping fromArrow(Schema arrowSchema) {
  List<Field> fields = arrowSchema.getFields();
  List<TypeMapping> parquetFields = fromArrow(fields);
  MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root");
  return new SchemaMapping(arrowSchema, parquetType, parquetFields);
}

Example 16

Source File: VectorContainer.java From dremio-oss with Apache License 2.0

4 votes

public void addSchema(Schema schema){
  clearSchema();
  for(Field f : schema.getFields()) {
    addOrGet(f);
  }
}