Java Code Examples for org.apache.arrow.vector.types.pojo.Field#getChildren()

The following examples show how to use org.apache.arrow.vector.types.pojo.Field#getChildren() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetGroupConverter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
Converter groupConverterFromArrowSchema(String fieldName, String groupTypeName, GroupType groupType, Collection<SchemaPath> c) {
  final String nameForChild = getNameForChild(fieldName);
  final Field arrowField = Schema.findField(arrowSchema, groupTypeName);
  final ArrowTypeID arrowTypeType = arrowField.getType().getTypeID();
  final List<Field> arrowChildren = arrowField.getChildren();
  if (arrowTypeType == ArrowTypeID.Union) {
    // if it's a union we will add the children directly to the parent
    return new UnionGroupConverter(columnResolver, fieldName, mutator, getWriterProvider(), groupType, c, options, arrowChildren, nameForChild,
        schemaHelper);
  } else if (arrowTypeType == ArrowTypeID.List) {
    // make sure the parquet schema matches the arrow schema and delegate handling the logical list to defaultGroupConverter()
    Preconditions.checkState(groupType.getOriginalType() == OriginalType.LIST, "parquet schema doesn't match the arrow schema for LIST " + nameForChild);
  }

  return defaultGroupConverter(fieldName, mutator, groupType, c, arrowChildren);
}
 
Example 2
Source File: HbaseRecordHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
/**
 * Addes the specified Apache Arrow field to the Scan to satisfy the requested projection.
 *
 * @param scan The scan object that will be used to read data from HBase.
 * @param field The field to be added to the scan.
 */
private void addToProjection(Scan scan, Field field)
{
    //ignore the special 'row' column since we get that by default.
    if (HbaseSchemaUtils.ROW_COLUMN_NAME.equalsIgnoreCase(field.getName())) {
        return;
    }

    Types.MinorType columnType = Types.getMinorTypeForArrowType(field.getType());
    switch (columnType) {
        case STRUCT:
            for (Field child : field.getChildren()) {
                scan.addColumn(field.getName().getBytes(UTF_8), child.getName().getBytes(UTF_8));
            }
            return;
        default:
            String[] nameParts = HbaseSchemaUtils.extractColumnParts(field.getName());
            if (nameParts.length != 2) {
                throw new RuntimeException("Column name " + field.getName() + " does not meet family:column hbase convention.");
            }
            scan.addColumn(nameParts[0].getBytes(UTF_8), nameParts[1].getBytes(UTF_8));
    }
}
 
Example 3
Source File: ArrowRecordBatchLoader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private static void loadBuffers(FieldVector vector, Field field, Iterator<ArrowBuf> buffers, Iterator<ArrowFieldNode> nodes) {
  checkArgument(nodes.hasNext(),
      "no more field nodes for for field " + field + " and vector " + vector);
  ArrowFieldNode fieldNode = nodes.next();
  List<BufferLayout> bufferLayouts = TypeLayout.getTypeLayout(field.getType()).getBufferLayouts();
  List<ArrowBuf> ownBuffers = new ArrayList<>(bufferLayouts.size());
  for (int j = 0; j < bufferLayouts.size(); j++) {
    ownBuffers.add(buffers.next());
  }
  try {
    vector.loadFieldBuffers(fieldNode, ownBuffers);
  } catch (RuntimeException e) {
    throw new IllegalArgumentException("Could not load buffers for field " +
        field + ". error message: " + e.getMessage(), e);
  }
  List<Field> children = field.getChildren();
  if (children.size() > 0) {
    List<FieldVector> childrenFromFields = vector.getChildrenFromFields();
    checkArgument(children.size() == childrenFromFields.size(), "should have as many children as in the schema: found " + childrenFromFields.size() + " expected " + children.size());
    for (int i = 0; i < childrenFromFields.size(); i++) {
      Field child = children.get(i);
      FieldVector fieldVector = childrenFromFields.get(i);
      loadBuffers(fieldVector, child, buffers, nodes);
    }
  }
}
 
Example 4
Source File: ArrowSchemaConverter.java    From spark-bigquery-connector with Apache License 2.0 5 votes vote down vote up
private static DataType fromArrowField(Field field)
{
  if (field.getType().getTypeID() == ArrowTypeID.List)
  {
    Field elementField = field.getChildren().get(0);
    DataType elementType = fromArrowField(elementField);

    return new ArrayType(elementType, elementField.isNullable());
  }

  if (field.getType().getTypeID() == ArrowTypeID.Struct)
  {
    java.util.List<Field> fieldChildren = field.getChildren();
    StructField[] structFields = new StructField[fieldChildren.size()];

    int ind = 0;

    for (Field childField : field.getChildren())
    {
      DataType childType = fromArrowField(childField);
      structFields[ind++] = new StructField(childField.getName(), childType, childField.isNullable(), Metadata.empty());
    }

    return new StructType(structFields);
  }

  return fromArrowType(field.getType());
}
 
Example 5
Source File: GlueFieldLexerTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void lexTest()
{
    logger.info("lexTest: enter");

    Field field = GlueFieldLexer.lex("testField", INPUT1);

    logger.info("lexTest: {}", field);
    assertEquals("testField", field.getName());
    assertEquals(Types.MinorType.STRUCT, Types.getMinorTypeForArrowType(field.getType()));
    assertEquals(3, field.getChildren().size());

    List<Field> level1 = field.getChildren();
    assertEquals("street_address", level1.get(0).getName());
    assertEquals(Types.MinorType.STRUCT, Types.getMinorTypeForArrowType(level1.get(0).getType()));
    assertEquals(3, level1.get(0).getChildren().size());

    List<Field> level2 = level1.get(0).getChildren();
    assertEquals("street_number", level2.get(0).getName());
    assertEquals(Types.MinorType.INT, Types.getMinorTypeForArrowType(level2.get(0).getType()));
    assertEquals(0, level2.get(0).getChildren().size());
    assertEquals("street_name", level2.get(1).getName());
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(level2.get(1).getType()));
    assertEquals(0, level2.get(1).getChildren().size());
    assertEquals("street_type", level2.get(2).getName());
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(level2.get(2).getType()));
    assertEquals(0, level2.get(2).getChildren().size());

    assertEquals("country", level1.get(1).getName());
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(level1.get(1).getType()));
    assertEquals(0, level1.get(1).getChildren().size());

    assertEquals("postal_code", level1.get(2).getName());
    assertEquals(Types.MinorType.LIST, Types.getMinorTypeForArrowType(level1.get(2).getType()));
    assertEquals(1, level1.get(2).getChildren().size());
    assertEquals(Types.MinorType.VARCHAR, Types.getMinorTypeForArrowType(level1.get(2).getChildren().get(0).getType()));

    logger.info("lexTest: exit");
}
 
Example 6
Source File: SchemaUtils.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
/**
 * Used to merge STRUCT Field into a single Field. If called with two identical STRUCTs the output is essentially
 * the same as either of the inputs.
 *
 * @param fieldName The name of the merged Field.
 * @param curParentField The current field to use as the base for the merge.
 * @param newParentField The new field to merge into the base.
 * @return The merged field.
 */
private static Field mergeStructField(String fieldName, Field curParentField, Field newParentField)
{
    FieldBuilder union = FieldBuilder.newBuilder(fieldName, Types.MinorType.STRUCT.getType());
    for (Field nextCur : curParentField.getChildren()) {
        union.addField(nextCur);
    }

    for (Field nextNew : newParentField.getChildren()) {
        Field curField = union.getChild(nextNew.getName());
        if (curField == null) {
            union.addField(nextNew);
            continue;
        }

        Types.MinorType newType = Types.getMinorTypeForArrowType(nextNew.getType());
        Types.MinorType curType = Types.getMinorTypeForArrowType(curField.getType());

        if (curType != newType) {
            //TODO: currently we resolve fields with mixed types by defaulting to VARCHAR. This is _not_ ideal
            //for various reasons but also because it will cause predicate odities if used in a filter.
            logger.warn("mergeStructField: Encountered a mixed-type field[{}] {} vs {}, defaulting to String.",
                    nextNew.getName(), newType, curType);

            union.addStringField(nextNew.getName());
        }
        else if (curType == Types.MinorType.LIST) {
            union.addField(mergeListField(nextNew.getName(), curField, nextNew));
        }
        else if (curType == Types.MinorType.STRUCT) {
            union.addField(mergeStructField(nextNew.getName(), curField, nextNew));
        }
    }

    return union.build();
}
 
Example 7
Source File: SerializedFieldHelper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static SerializedField getSerializedField(Field field) {
  SerializedField.Builder serializedFieldBuilder = getAsBuilder(getMajorTypeForField(field));
  if (field.getChildren() != null) {
    for (Field childField : field.getChildren()) {
      serializedFieldBuilder.addChild(getSerializedField(childField));
    }
  }
  serializedFieldBuilder.setNamePart(NamePart.newBuilder().setName(field.getName()));
  return serializedFieldBuilder.build();
}
 
Example 8
Source File: FieldIdUtil2.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static FieldWithOrdinal getChildField(Field f, String name) {
  Map<String, FieldWithOrdinal> children = new HashMap<>();
  int i = 0;
  for(Field child : f.getChildren()){
    children.put(child.getName().toLowerCase(), new FieldWithOrdinal(child, i));
    i++;
  }

  return children.get(name.toLowerCase());
}
 
Example 9
Source File: StructVectorHelper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void materialize(Field field) {
  List<Field> children = field.getChildren();

  for (Field child : children) {
    FieldVector v = TypeHelper.getNewVector(child, structVector.allocator, structVector.callBack);
    TypeHelper.getHelper(v).ifPresent(t -> t.materialize(child));
    structVector.putChild(child.getName(), v);
  }
}
 
Example 10
Source File: UnionVectorHelper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void materialize(Field field) {
  for (Field child : field.getChildren()) {
    FieldVector v = TypeHelper.getNewVector(child, unionVector.getAllocator());
    TypeHelper.getHelper(v).ifPresent(t -> t.materialize(child));
    unionVector.addVector(v);
  }
}
 
Example 11
Source File: NonNullableStructVectorHelper.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public void materialize(Field field) {
  List<Field> children = field.getChildren();

  for (Field child : children) {
    FieldVector v = TypeHelper.getNewVector(child, structVector.allocator, structVector.callBack);
    TypeHelper.getHelper(v).ifPresent(t -> t.materialize(child));
    structVector.putChild(child.getName(), v);
  }
}
 
Example 12
Source File: FlattenPrel.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Override
public Field visit(ArrowType.List type) {
  if(field.getName().equals(column.getAsUnescapedPath())){
    Field child = field.getChildren().get(0);
    return new Field(field.getName(), child.isNullable(), child.getType(), child.getChildren());
  }
  return field;
}
 
Example 13
Source File: BatchSchema.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static void toString(Field field, int depth, StringBuilder b) {
  b.append("\n");
  for (int i = 0; i < depth; i++) {
    b.append(" ");
  }
  b.append(field.getName());
  b.append(";");
  b.append(field.isNullable());
  b.append(";");
  b.append(Describer.describe(field.getType()));
  for (Field child : field.getChildren()) {
    toString(child, depth + 1, b);
  }
}
 
Example 14
Source File: BatchSchema.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static int countFields(Field f, int count){
  if(f.getChildren().isEmpty()){
    count++;
  }else {
    for(Field child : f.getChildren()){
      count = countFields(child, count);
    }
  }
  return count;
}
 
Example 15
Source File: FileSplitParquetRecordReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private boolean areFieldsCompatible(Field tableField, Field fileField) {
  Preconditions.checkArgument(fileField != null, "Invalid argument");

  // accept if there is no corresponding field in table
  if (tableField == null) {
    return true;
  }

  // field names have to be same
  if (!tableField.getName().equalsIgnoreCase(fileField.getName())) {
    return false;
  }

  // check the field type
  TypeProtos.MinorType fieldTypeInTable = CompleteType.fromField(tableField).toMinorType();
  TypeProtos.MinorType fieldTypeInFile = CompleteType.fromField(fileField).toMinorType();
  boolean compatible = TypeCastRules.isHiveCompatibleTypeChange(fieldTypeInFile, fieldTypeInTable);

  // if not compatible return
  if (!compatible) {
    return compatible;
  }

  // make sure all child types also are compatible
  Preconditions.checkState(fileField.getChildren() != null, "Invalid state");
  if (!fileField.getChildren().isEmpty()) {
    Preconditions.checkState(tableField.getChildren() != null, "Invalid state");
    Map<String, Field> tableFieldChildren = tableField.getChildren().stream().collect(
      Collectors.toMap(f ->  f.getName().toLowerCase(), f -> f)
    );

    for(Field child: fileField.getChildren()) {
      compatible = areFieldsCompatible(
        tableFieldChildren.getOrDefault(child.getName().toLowerCase(), null),
        child);

      // if any child is not compatible return
      if (!compatible) {
        return false;
      }
    }
  }

  // top level field and all its children are compatible
  return compatible;
}
 
Example 16
Source File: CompleteType.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static CompleteType fromField(Field field){
    // IGNORE this until the NullableMapVector.getField() returns a nullable type.
//    Preconditions.checkArgument(field.isNullable(), "Dremio only supports nullable types.");
    return new CompleteType(field.getType(), field.getChildren());
  }
 
Example 17
Source File: BasicTypeHelper.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack callBack) {
  if (field.getType() instanceof ObjectType) {
    return new ObjectVector(field.getName(), allocator);
  }

  MinorType type = org.apache.arrow.vector.types.Types.getMinorTypeForArrowType(field.getType());

  List<Field> children = field.getChildren();

  switch (type) {

  case UNION:
    UnionVector unionVector = new UnionVector(field.getName(), allocator, callBack);
    if (!children.isEmpty()) {
      unionVector.initializeChildrenFromFields(children);
    }
    return unionVector;
  case LIST:
    ListVector listVector = new ListVector(field.getName(), allocator, callBack);
    if (!children.isEmpty()) {
      listVector.initializeChildrenFromFields(children);
    }
    return listVector;
  case STRUCT:
    StructVector structVector = new StructVector(field.getName(), allocator, callBack);
    if (!children.isEmpty()) {
      structVector.initializeChildrenFromFields(children);
    }
    return structVector;

  case NULL:
    return new ZeroVector();
  case TINYINT:
    return new TinyIntVector(field, allocator);
  case UINT1:
    return new UInt1Vector(field, allocator);
  case UINT2:
    return new UInt2Vector(field, allocator);
  case SMALLINT:
    return new SmallIntVector(field, allocator);
  case INT:
    return new IntVector(field, allocator);
  case UINT4:
    return new UInt4Vector(field, allocator);
  case FLOAT4:
    return new Float4Vector(field, allocator);
  case INTERVALYEAR:
    return new IntervalYearVector(field, allocator);
  case TIMEMILLI:
    return new TimeMilliVector(field, allocator);
  case BIGINT:
    return new BigIntVector(field, allocator);
  case UINT8:
    return new UInt8Vector(field, allocator);
  case FLOAT8:
    return new Float8Vector(field, allocator);
  case DATEMILLI:
    return new DateMilliVector(field, allocator);
  case TIMESTAMPMILLI:
    return new TimeStampMilliVector(field, allocator);
  case INTERVALDAY:
    return new IntervalDayVector(field, allocator);
  case DECIMAL:
    return new DecimalVector(field, allocator);
  case FIXEDSIZEBINARY:
    return new FixedSizeBinaryVector(field.getName(), allocator, WIDTH_ESTIMATE);
  case VARBINARY:
    return new VarBinaryVector(field, allocator);
  case VARCHAR:
    return new VarCharVector(field, allocator);
  case BIT:
    return new BitVector(field, allocator);
  default:
    break;
  }
  // All ValueVector types have been handled.
  throw new UnsupportedOperationException(buildErrorMessage("get new vector", type));
}
 
Example 18
Source File: BlockUtils.java    From aws-athena-query-federation with Apache License 2.0 4 votes vote down vote up
/**
 * Used to write a Struct value.
 *
 * @param allocator The BlockAllocator which can be used to generate Apache Arrow Buffers for types
 * which require conversion to an Arrow Buffer before they can be written using the FieldWriter.
 * @param writer The FieldWriter for the Struct field we'd like to write into.
 * @param field The Schema details of the Struct Field we are writing into.
 * @param pos The position (row) in the Apache Arrow batch we are writing to.
 * @param value The value we'd like to write as a struct.
 * @param resolver The field resolver that can be used to extract individual Struct fields from the value.
 */
@VisibleForTesting
protected static void writeStruct(BufferAllocator allocator,
        StructWriter writer,
        Field field,
        int pos,
        Object value,
        FieldResolver resolver)
{
    //We expect null writes to have been handled earlier so this is a no-op.
    if (value == null) {
        return;
    }

    //Indicate the beginning of the struct value, this is how Apache Arrow handles the variable length of Struct types.
    writer.start();
    for (Field nextChild : field.getChildren()) {
        //For each child field that comprises the struct, attempt to extract and write the corresponding value
        //using the FieldResolver.
        Object childValue = resolver.getFieldValue(nextChild, value);
        switch (Types.getMinorTypeForArrowType(nextChild.getType())) {
            case LIST:
                writeList(allocator,
                        (FieldWriter) writer.list(nextChild.getName()),
                        nextChild,
                        pos,
                        ((List) childValue),
                        resolver);
                break;
            case STRUCT:
                writeStruct(allocator,
                        writer.struct(nextChild.getName()),
                        nextChild,
                        pos,
                        childValue,
                        resolver);
                break;
            default:
                writeStructValue(writer, nextChild, allocator, childValue);
                break;
        }
    }
    writer.end();
}
 
Example 19
Source File: BlockUtils.java    From aws-athena-query-federation with Apache License 2.0 4 votes vote down vote up
/**
 * Used to write a List value.
 *
 * @param allocator The BlockAllocator which can be used to generate Apache Arrow Buffers for types
 * which require conversion to an Arrow Buffer before they can be written using the FieldWriter.
 * @param writer The FieldWriter for the List field we'd like to write into.
 * @param field The Schema details of the List Field we are writing into.
 * @param pos The position (row) in the Apache Arrow batch we are writing to.
 * @param value An iterator to the collection of values we want to write into the row.
 * @param resolver The field resolver that can be used to extract individual values from the value iterator.
 */
@VisibleForTesting
protected static void writeList(BufferAllocator allocator,
        FieldWriter writer,
        Field field,
        int pos,
        Iterable value,
        FieldResolver resolver)
{
    if (value == null) {
        return;
    }

    //Apache Arrow List types have a single 'special' child field which gives us the concrete type of the values
    //stored in the list.
    Field child = null;
    if (field.getChildren() != null && !field.getChildren().isEmpty()) {
        child = field.getChildren().get(0);
    }

    //Mark the beginning of the list, this is essentially how Apache Arrow handles the variable length nature
    //of lists.
    writer.startList();

    Iterator itr = value.iterator();
    while (itr.hasNext()) {
        //For each item in the iterator, attempt to write it to the list.
        Object val = itr.next();
        if (val != null) {
            switch (Types.getMinorTypeForArrowType(child.getType())) {
                case LIST:
                    try {
                        writeList(allocator, (FieldWriter) writer.list(), child, pos, ((List) val), resolver);
                    }
                    catch (Exception ex) {
                        throw ex;
                    }
                    break;
                case STRUCT:
                    writeStruct(allocator, writer.struct(), child, pos, val, resolver);
                    break;
                default:
                    writeListValue(writer, child.getType(), allocator, val);
                    break;
            }
        }
    }
    writer.endList();
}