org.apache.arrow.vector.types.FloatingPointPrecision Java Examples

The following examples show how to use org.apache.arrow.vector.types.FloatingPointPrecision. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Create a field given the input {@link ColumnType}
 * and name
 * @param name the name of the field
 * @param columnType the column type to add
 * @return
 */
public static Field getFieldForColumn(String name,ColumnType columnType) {
    switch(columnType) {
        case Long: return field(name,new ArrowType.Int(64,false));
        case Integer: return field(name,new ArrowType.Int(32,false));
        case Double: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Float: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Boolean: return field(name, new ArrowType.Bool());
        case Categorical: return field(name,new ArrowType.Utf8());
        case Time: return field(name,new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes: return field(name,new ArrowType.Binary());
        case NDArray: return field(name,new ArrowType.Binary());
        case String: return field(name,new ArrowType.Utf8());

        default: throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #2
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Create a field given the input {@link ColumnType}
 * and name
 * @param name the name of the field
 * @param columnType the column type to add
 * @return
 */
public static Field getFieldForColumn(String name,ColumnType columnType) {
    switch(columnType) {
        case Long: return field(name,new ArrowType.Int(64,false));
        case Integer: return field(name,new ArrowType.Int(32,false));
        case Double: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Float: return field(name,new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Boolean: return field(name, new ArrowType.Bool());
        case Categorical: return field(name,new ArrowType.Utf8());
        case Time: return field(name,new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes: return field(name,new ArrowType.Binary());
        case NDArray: return field(name,new ArrowType.Binary());
        case String: return field(name,new ArrowType.Utf8());

        default: throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #3
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static Field getFieldForColumn(String name, ColumnType columnType) {
    switch (columnType) {
        case Integer:
            return field(name, new ArrowType.Int(32, false));
        case Float:
            return field(name, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
        case Double:
            return field(name, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
        case Long:
            return field(name, new ArrowType.Int(64, false));
        case NDArray:
            return field(name, new ArrowType.Binary());
        case Boolean:
            return field(name, new ArrowType.Bool());
        case Categorical:
            return field(name, new ArrowType.Utf8());
        case Time:
            return field(name, new ArrowType.Date(DateUnit.MILLISECOND));
        case Bytes:
            return field(name, new ArrowType.Binary());
        case String:
            return field(name, new ArrowType.Utf8());
        default:
            throw new IllegalArgumentException("Column type invalid " + columnType);
    }
}
 
Example #4
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
public static ColumnMetaData metaDataFromField(Field field) {
    ArrowType arrowType = field.getFieldType().getType();
    if (arrowType instanceof ArrowType.Int) {
        ArrowType.Int intType = (ArrowType.Int) arrowType;
        return intType.getBitWidth() == 32 ? new IntegerMetaData(field.getName()) : new LongMetaData(field.getName());
    } else if (arrowType instanceof ArrowType.Bool) {
        return new BooleanMetaData(field.getName());
    } else if (arrowType instanceof ArrowType.FloatingPoint) {
        ArrowType.FloatingPoint floatingPointType = (ArrowType.FloatingPoint) arrowType;
        return floatingPointType.getPrecision() == FloatingPointPrecision.DOUBLE ? new DoubleMetaData(field.getName()) : new FloatMetaData(field.getName());
    } else if (arrowType instanceof ArrowType.Binary) {
        return new BinaryMetaData(field.getName());
    } else if (arrowType instanceof ArrowType.Utf8) {
        return new StringMetaData(field.getName());
    } else if (arrowType instanceof ArrowType.Date) {
        return new TimeMetaData(field.getName());
    } else {
        throw new IllegalStateException("Illegal type " + field.getFieldType().getType());
    }
}
 
Example #5
Source File: BatchSchemaFieldTest.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Test
public void testFromFieldWithStructTypes() {
  List<Field> fields = new ArrayList<>();
  fields.add(new Field("string_field", FieldType.nullable(ArrowType.Utf8.INSTANCE), null));
  fields.add(new Field("int_field", FieldType.nullable(
    new ArrowType.Int(32, true)), null));
  fields.add(new Field("bigint_field", FieldType.nullable(
    new ArrowType.Int(64, true)), null));
  fields.add(new Field("float_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null));
  fields.add(new Field("double_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null));
  fields.add(new Field("decimal_field", FieldType.nullable(
    new ArrowType.Decimal(10,5)), null));

  Field struct_field = new Field("struct_field", FieldType.nullable(
    new ArrowType.Struct()), fields);

  String expected = "struct_field: STRUCT<string_field: VARCHAR, " +
    "int_field: INTEGER, bigint_field: BIGINT, float_field: FLOAT, " +
    "double_field: DOUBLE, decimal_field: DECIMAL>";
  Assert.assertEquals(expected, BatchSchemaField.fromField(struct_field).toString());
}
 
Example #6
Source File: BatchSchemaFieldTest.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testFromFieldWithPrimitiveTypes() {
  List<Field> fields = new ArrayList<>();
  List<String> expectedType = new ArrayList<>();
  fields.add(new Field("string_field", FieldType.nullable(ArrowType.Utf8.INSTANCE), null));
  expectedType.add("string_field: VARCHAR");

  fields.add(new Field("int_field", FieldType.nullable(
    new ArrowType.Int(32, true)), null));
  expectedType.add("int_field: INTEGER");

  fields.add(new Field("bigint_field", FieldType.nullable(
    new ArrowType.Int(64, true)), null));
  expectedType.add("bigint_field: BIGINT");

  fields.add(new Field("float_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null));
  expectedType.add("float_field: FLOAT");

  fields.add(new Field("double_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null));
  expectedType.add("double_field: DOUBLE");

  fields.add(new Field("decimal_field", FieldType.nullable(
    new ArrowType.Decimal(10,5)), null));
  expectedType.add("decimal_field: DECIMAL");

  Assert.assertEquals(fields.size(), expectedType.size());
  for(int pos = 0; pos < fields.size(); ++pos) {
    Assert.assertEquals(expectedType.get(pos), BatchSchemaField.fromField(fields.get(pos)).toString());
  }
}
 
Example #7
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSchemaAndRecordsFromByteArray() throws Exception {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);

    int valueCount = 3;
    List<Field> fields = new ArrayList<>();
    fields.add(ArrowConverter.field("field1",new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
    fields.add(ArrowConverter.intField("field2"));

    List<FieldVector> fieldVectors = new ArrayList<>();
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field1",new float[] {1,2,3}));
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field2",new int[] {1,2,3}));


    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(fields);

    VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount);
    VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
    vectorUnloader.getRecordBatch();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
        arrowFileWriter.writeBatch();
    } catch (IOException e) {
        log.error("",e);
    }

    byte[] arr = byteArrayOutputStream.toByteArray();
    val arr2 = ArrowConverter.readFromBytes(arr);
    assertEquals(2,arr2.getFirst().numColumns());
    assertEquals(3,arr2.getRight().size());

    val arrowCols = ArrowConverter.toArrowColumns(allocator,arr2.getFirst(),arr2.getRight());
    assertEquals(2,arrowCols.size());
    assertEquals(valueCount,arrowCols.get(0).getValueCount());
}
 
Example #8
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static ColumnMetaData metaDataFromField(Field field) {
    ArrowType arrowType = field.getFieldType().getType();
    if(arrowType instanceof ArrowType.Int) {
        val intType = (ArrowType.Int) arrowType;
        if(intType.getBitWidth() == 32)
            return new IntegerMetaData(field.getName());
        else {
            return new LongMetaData(field.getName());
        }
    }
    else if(arrowType instanceof ArrowType.Bool) {
        return new BooleanMetaData(field.getName());
    }
    else if(arrowType  instanceof ArrowType.FloatingPoint) {
        val floatingPointType = (ArrowType.FloatingPoint) arrowType;
        if(floatingPointType.getPrecision() == FloatingPointPrecision.DOUBLE)
            return new DoubleMetaData(field.getName());
        else {
            return new FloatMetaData(field.getName());
        }
    }
    else if(arrowType instanceof  ArrowType.Binary) {
        return new BinaryMetaData(field.getName());
    }
    else if(arrowType instanceof ArrowType.Utf8) {
        return new StringMetaData(field.getName());

    }
    else if(arrowType instanceof ArrowType.Date) {
        return new TimeMetaData(field.getName());
    }
    else {
        throw new IllegalStateException("Illegal type " + field.getFieldType().getType());
    }

}
 
Example #9
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSchemaAndRecordsFromByteArray() throws Exception {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);

    int valueCount = 3;
    List<Field> fields = new ArrayList<>();
    fields.add(ArrowConverter.field("field1",new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
    fields.add(ArrowConverter.intField("field2"));

    List<FieldVector> fieldVectors = new ArrayList<>();
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field1",new float[] {1,2,3}));
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field2",new int[] {1,2,3}));


    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(fields);

    VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount);
    VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
    vectorUnloader.getRecordBatch();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
        arrowFileWriter.writeBatch();
    } catch (IOException e) {
        e.printStackTrace();
    }

    byte[] arr = byteArrayOutputStream.toByteArray();
    val arr2 = ArrowConverter.readFromBytes(arr);
    assertEquals(2,arr2.getFirst().numColumns());
    assertEquals(3,arr2.getRight().size());

    val arrowCols = ArrowConverter.toArrowColumns(allocator,arr2.getFirst(),arr2.getRight());
    assertEquals(2,arrowCols.size());
    assertEquals(valueCount,arrowCols.get(0).getValueCount());
}
 
Example #10
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 5 votes vote down vote up
private static ColumnMetaData metaDataFromField(Field field) {
    ArrowType arrowType = field.getFieldType().getType();
    if(arrowType instanceof ArrowType.Int) {
        val intType = (ArrowType.Int) arrowType;
        if(intType.getBitWidth() == 32)
            return new IntegerMetaData(field.getName());
        else {
            return new LongMetaData(field.getName());
        }
    }
    else if(arrowType instanceof ArrowType.Bool) {
        return new BooleanMetaData(field.getName());
    }
    else if(arrowType  instanceof ArrowType.FloatingPoint) {
        val floatingPointType = (ArrowType.FloatingPoint) arrowType;
        if(floatingPointType.getPrecision() == FloatingPointPrecision.DOUBLE)
            return new DoubleMetaData(field.getName());
        else {
            return new FloatMetaData(field.getName());
        }
    }
    else if(arrowType instanceof  ArrowType.Binary) {
        return new BinaryMetaData(field.getName());
    }
    else if(arrowType instanceof ArrowType.Utf8) {
        return new StringMetaData(field.getName());

    }
    else if(arrowType instanceof ArrowType.Date) {
        return new TimeMetaData(field.getName());
    }
    else {
        throw new IllegalStateException("Illegal type " + field.getFieldType().getType());
    }

}
 
Example #11
Source File: BatchSchemaFieldTest.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testFromFieldWithNestedTypes() {
  List<Field> fields = new ArrayList<>();
  fields.add(new Field("string_field", FieldType.nullable(ArrowType.Utf8.INSTANCE), null));
  fields.add(new Field("int_field", FieldType.nullable(
    new ArrowType.Int(32, true)), null));
  fields.add(new Field("bigint_field", FieldType.nullable(
    new ArrowType.Int(64, true)), null));
  fields.add(new Field("float_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null));
  fields.add(new Field("double_field", FieldType.nullable(
    new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null));
  fields.add(new Field("decimal_field", FieldType.nullable(
    new ArrowType.Decimal(10,5)), null));

  List<Field> list_struct_child = new ArrayList<>();
  list_struct_child.add(new Field("$data$", FieldType.nullable(
    new ArrowType.Struct()), fields));
  Field list_struct_field = new Field("list_struct_field", FieldType.nullable(
    new ArrowType.List()), list_struct_child);

  String list_struct_field_expected = "list_struct_field: LIST<" +
    "$data$: STRUCT<string_field: VARCHAR, " +
    "int_field: INTEGER, bigint_field: BIGINT, float_field: FLOAT, " +
    "double_field: DOUBLE, decimal_field: DECIMAL>>";
  Assert.assertEquals(list_struct_field_expected, BatchSchemaField.fromField(list_struct_field).toString());

  List<Field> struct_list_child = new ArrayList<>();
  struct_list_child.add(list_struct_field);
  Field struct_list_field = new Field("struct_list_field", FieldType.nullable(
    new ArrowType.Struct()), struct_list_child);
  String struct_list_field_expected = "struct_list_field: STRUCT<list_struct_field: LIST<" +
    "$data$: STRUCT<string_field: VARCHAR, " +
    "int_field: INTEGER, bigint_field: BIGINT, float_field: FLOAT, " +
    "double_field: DOUBLE, decimal_field: DECIMAL>>>";
  Assert.assertEquals(struct_list_field_expected, BatchSchemaField.fromField(struct_list_field).toString());
}
 
Example #12
Source File: ArrowTypeSerDe.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Override
protected ArrowType doTypedDeserialize(JsonParser jparser, DeserializationContext ctxt)
        throws IOException
{
    FloatingPointPrecision precision = FloatingPointPrecision.valueOf(getNextStringField(jparser, PRECISION_FIELD));
    return new ArrowType.FloatingPoint(precision);
}
 
Example #13
Source File: GlobalDictionaryBuilder.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final Float4Vector floatVector = input.addOrGet(field);
  floatVector.allocateNew();
  SortedSet<Float> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToFloat(i));
    }
  }
  if (existingDict != null) {
    final Float4Vector existingDictValues = existingDict.getValueAccessorById(Float4Vector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.get(i));
    }
  }
  final Iterator<Float> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    floatVector.setSafe(recordCount++, iter.next());
  }
  floatVector.setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
Example #14
Source File: SchemaSerializationTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void serializationTest()
        throws IOException
{
    logger.info("serializationTest - enter");
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addMetadata("meta1", "meta-value-1");
    schemaBuilder.addMetadata("meta2", "meta-value-2");
    schemaBuilder.addField("intfield1", new ArrowType.Int(32, true));
    schemaBuilder.addField("doublefield2", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
    schemaBuilder.addField("varcharfield3", new ArrowType.Utf8());
    Schema expectedSchema = schemaBuilder.build();

    SchemaSerDe serDe = new SchemaSerDe();
    ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
    serDe.serialize(expectedSchema, schemaOut);

    TestPojo expected = new TestPojo(expectedSchema);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    objectMapper.writeValue(out, expected);
    TestPojo actual = objectMapper.readValue(new ByteArrayInputStream(out.toByteArray()), TestPojo.class);

    Schema actualSchema = actual.getSchema();
    logger.info("serializationTest - fields[{}]", actualSchema.getFields());
    logger.info("serializationTest - meta[{}]", actualSchema.getCustomMetadata());

    assertEquals(expectedSchema.getFields(), actualSchema.getFields());
    assertEquals(expectedSchema.getCustomMetadata(), actualSchema.getCustomMetadata());

    logger.info("serializationTest - exit");
}
 
Example #15
Source File: GlobalDictionaryBuilder.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final Float8Vector doubleVector = input.addOrGet(field);
  doubleVector.allocateNew();
  SortedSet<Double> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToDouble(i));
    }
  }
  if (existingDict != null) {
    final Float8Vector existingDictValues = existingDict.getValueAccessorById(Float8Vector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.get(i));
    }
  }
  final Iterator<Double> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    doubleVector.setSafe(recordCount++, iter.next());
  }
  doubleVector.setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
Example #16
Source File: ArrowMemoryAllocatorFactory.java    From yosegi with Apache License 2.0 4 votes vote down vote up
/**
 * Set the vector of List and initialize it.
 */
public static IMemoryAllocator getFromListVector(
      final ColumnType columnType ,
      final String columnName ,
      final BufferAllocator allocator ,
      final ListVector vector ,
      final int rowCount ) {
  switch ( columnType ) {
    case UNION:
      AddOrGetResult<UnionVector> unionVector =  vector.addOrGetVector(
          new FieldType( true , MinorType.UNION.getType() , null , null ) );
      return new ArrowUnionMemoryAllocator( allocator , unionVector.getVector() , rowCount );
    case ARRAY:
      AddOrGetResult<ListVector> listVector =  vector.addOrGetVector(
          new FieldType( true , ArrowType.List.INSTANCE , null , null ) );
      return new ArrowArrayMemoryAllocator( allocator , listVector.getVector() , rowCount );
    case SPREAD:
      AddOrGetResult<StructVector> mapVector = vector.addOrGetVector(
          new FieldType( true , ArrowType.Struct.INSTANCE , null , null ) );
      return new ArrowMapMemoryAllocator( allocator , mapVector.getVector() , rowCount );

    case BOOLEAN:
      AddOrGetResult<BitVector> bitVector = vector.addOrGetVector(
          new FieldType( true , ArrowType.Bool.INSTANCE , null , null ) );
      return new ArrowBooleanMemoryAllocator( bitVector.getVector() , rowCount );
    case BYTE:
      AddOrGetResult<TinyIntVector> byteVector = vector.addOrGetVector(
          new FieldType( true , new ArrowType.Int( 8 , true ) , null , null ) );
      return new ArrowByteMemoryAllocator( byteVector.getVector() , rowCount );
    case SHORT:
      AddOrGetResult<SmallIntVector> shortVector = vector.addOrGetVector(
          new FieldType( true , new ArrowType.Int( 16 , true ) , null , null ) );
      return new ArrowShortMemoryAllocator( shortVector.getVector() , rowCount );
    case INTEGER:
      AddOrGetResult<IntVector> integerVector =  vector.addOrGetVector(
          new FieldType( true , new ArrowType.Int( 32 , true ) , null , null ) );
      return new ArrowIntegerMemoryAllocator( integerVector.getVector() , rowCount );
    case LONG:
      AddOrGetResult<BigIntVector> longVector =  vector.addOrGetVector(
          new FieldType( true , new ArrowType.Int( 64 , true ) , null , null ) );
      return new ArrowLongMemoryAllocator( longVector.getVector() , rowCount );
    case FLOAT:
      AddOrGetResult<Float4Vector> floatVector = vector.addOrGetVector(
          new FieldType(
            true ,
            new ArrowType.FloatingPoint( FloatingPointPrecision.HALF ) ,
            null ,
            null ) );
      return new ArrowFloatMemoryAllocator( floatVector.getVector() , rowCount );
    case DOUBLE:
      AddOrGetResult<Float8Vector> doubleVector = vector.addOrGetVector(
          new FieldType(
            true ,
            new ArrowType.FloatingPoint( FloatingPointPrecision.DOUBLE ) ,
            null ,
            null ) );
      return new ArrowDoubleMemoryAllocator( doubleVector.getVector() , rowCount );
    case STRING:
      AddOrGetResult<VarCharVector> charVector = vector.addOrGetVector(
          new FieldType( true , ArrowType.Utf8.INSTANCE , null , null ) );
      return new ArrowStringMemoryAllocator( charVector.getVector() , rowCount );
    case BYTES:
      AddOrGetResult<VarBinaryVector> binaryVector = vector.addOrGetVector(
          new FieldType( true , ArrowType.Binary.INSTANCE , null , null ) );
      return new ArrowBytesMemoryAllocator( binaryVector.getVector() , rowCount );

    case NULL:
    case EMPTY_ARRAY:
    case EMPTY_SPREAD:
    default:
      return NullMemoryAllocator.INSTANCE;
  }
}
 
Example #17
Source File: BlockTest.java    From aws-athena-query-federation with Apache License 2.0 4 votes vote down vote up
public static Schema generateTestSchema()
{
    /**
     * Generate and write the schema
     */
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addMetadata("meta1", "meta-value-1");
    schemaBuilder.addMetadata("meta2", "meta-value-2");
    schemaBuilder.addField("intfield1", new ArrowType.Int(32, true));
    schemaBuilder.addField("doublefield2", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE));
    schemaBuilder.addField("varcharfield3", new ArrowType.Utf8());

    schemaBuilder.addField("datemillifield4", new ArrowType.Date(DateUnit.MILLISECOND));
    schemaBuilder.addField("tinyintfield5", new ArrowType.Int(8, true));
    schemaBuilder.addField("uint1field6", new ArrowType.Int(8, false));
    schemaBuilder.addField("smallintfield7", new ArrowType.Int(16, true));
    schemaBuilder.addField("uint2field8", new ArrowType.Int(16, false));
    schemaBuilder.addField("datedayfield9", new ArrowType.Date(DateUnit.DAY));
    schemaBuilder.addField("uint4field10", new ArrowType.Int(32, false));
    schemaBuilder.addField("bigintfield11", new ArrowType.Int(64, true));
    schemaBuilder.addField("decimalfield12", new ArrowType.Decimal(10, 2));
    schemaBuilder.addField("floatfield13", new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE));
    schemaBuilder.addField("varbinaryfield14", new ArrowType.Binary());
    schemaBuilder.addField("bitfield15", new ArrowType.Bool());

    schemaBuilder.addListField("varcharlist16", Types.MinorType.VARCHAR.getType());
    schemaBuilder.addListField("intlist17", Types.MinorType.INT.getType());
    schemaBuilder.addListField("bigintlist18", Types.MinorType.BIGINT.getType());
    schemaBuilder.addListField("tinyintlist19", Types.MinorType.TINYINT.getType());
    schemaBuilder.addListField("smallintlist20", Types.MinorType.SMALLINT.getType());
    schemaBuilder.addListField("float4list21", Types.MinorType.FLOAT4.getType());
    schemaBuilder.addListField("float8list22", Types.MinorType.FLOAT8.getType());
    schemaBuilder.addListField("shortdeclist23", new ArrowType.Decimal(10, 2));
    schemaBuilder.addListField("londdeclist24", new ArrowType.Decimal(21, 2));
    schemaBuilder.addListField("varbinarylist25", Types.MinorType.VARBINARY.getType());
    schemaBuilder.addListField("bitlist26", Types.MinorType.BIT.getType());

    schemaBuilder.addStructField("structField27");
    schemaBuilder.addChildField("structField27", "nestedBigInt", Types.MinorType.BIGINT.getType());
    schemaBuilder.addChildField("structField27", "nestedString", Types.MinorType.VARCHAR.getType());
    schemaBuilder.addChildField("structField27", "tinyintcol", Types.MinorType.TINYINT.getType());
    schemaBuilder.addChildField("structField27", "smallintcol", Types.MinorType.SMALLINT.getType());
    schemaBuilder.addChildField("structField27", "float4Col", Types.MinorType.FLOAT4.getType());
    schemaBuilder.addChildField("structField27", "float8Col", Types.MinorType.FLOAT8.getType());
    schemaBuilder.addChildField("structField27", "shortDecCol", new ArrowType.Decimal(10, 2));
    schemaBuilder.addChildField("structField27", "longDecCol", new ArrowType.Decimal(21, 2));
    schemaBuilder.addChildField("structField27", "binaryCol", Types.MinorType.VARBINARY.getType());
    schemaBuilder.addChildField("structField27", "bitCol", Types.MinorType.BIT.getType());
    schemaBuilder.addStructField("structFieldNested28");
    schemaBuilder.addChildField("structFieldNested28", "bitCol", Types.MinorType.BIT.getType());
    schemaBuilder.addChildField("structFieldNested28",
            FieldBuilder.newBuilder("nestedStruct", new ArrowType.Struct())
                    .addField("nestedString", Types.MinorType.VARCHAR.getType(), null)
                    .addField("nestedBigInt", Types.MinorType.BIGINT.getType(), null)
                    .addListField("nestedList", Types.MinorType.VARCHAR.getType())
                    .addListField("nestedListDec", new ArrowType.Decimal(10, 2))
                    .build());
    return schemaBuilder.build();
}
 
Example #18
Source File: UserDefinedFunctionHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 4 votes vote down vote up
private Field getArrowField(Class type, String columnName)
{
    if (type == Integer.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
    }

    if (type == Float.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null);
    }

    if (type == Double.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
    }

    if (type == String.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Utf8()), null);
    }

    if (type == Boolean.class) {
        return new Field(columnName, FieldType.nullable(new ArrowType.Bool()), null);
    }

    if (type == List.class) {
        Field childField = new Field(columnName, FieldType.nullable(new ArrowType.Int(32, true)), null);
        return new Field(columnName, FieldType.nullable(Types.MinorType.LIST.getType()),
                Collections.singletonList(childField));
    }

    if (type == Map.class) {
        FieldBuilder fieldBuilder = FieldBuilder.newBuilder(columnName, Types.MinorType.STRUCT.getType());

        Field childField1 = new Field("intVal", FieldType.nullable(new ArrowType.Int(32, true)), null);
        Field childField2 = new Field("doubleVal", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null);
        ;

        fieldBuilder.addField(childField1);
        fieldBuilder.addField(childField2);

        return fieldBuilder.build();
    }

    throw new IllegalArgumentException("Unsupported type " + type);
}
 
Example #19
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void init() {
	testFields = new ArrayList<>();
	testFields.add(Tuple7.of(
		"f1", new TinyIntType(), new ArrowType.Int(8, true), RowTinyIntWriter.class,
		TinyIntWriter.TinyIntWriterForRow.class, TinyIntFieldReader.class, ArrowTinyIntColumnVector.class));

	testFields.add(Tuple7.of("f2", new SmallIntType(), new ArrowType.Int(8 * 2, true),
		RowSmallIntWriter.class, SmallIntWriter.SmallIntWriterForRow.class, SmallIntFieldReader.class, ArrowSmallIntColumnVector.class));

	testFields.add(Tuple7.of("f3", new IntType(), new ArrowType.Int(8 * 4, true),
		RowIntWriter.class, IntWriter.IntWriterForRow.class, IntFieldReader.class, ArrowIntColumnVector.class));

	testFields.add(Tuple7.of("f4", new BigIntType(), new ArrowType.Int(8 * 8, true),
		RowBigIntWriter.class, BigIntWriter.BigIntWriterForRow.class, BigIntFieldReader.class, ArrowBigIntColumnVector.class));

	testFields.add(Tuple7.of("f5", new BooleanType(), new ArrowType.Bool(),
		RowBooleanWriter.class, BooleanWriter.BooleanWriterForRow.class, BooleanFieldReader.class, ArrowBooleanColumnVector.class));

	testFields.add(Tuple7.of("f6", new FloatType(), new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE),
		RowFloatWriter.class, FloatWriter.FloatWriterForRow.class, FloatFieldReader.class, ArrowFloatColumnVector.class));

	testFields.add(Tuple7.of("f7", new DoubleType(), new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE),
		RowDoubleWriter.class, DoubleWriter.DoubleWriterForRow.class, DoubleFieldReader.class, ArrowDoubleColumnVector.class));

	testFields.add(Tuple7.of("f8", new VarCharType(), ArrowType.Utf8.INSTANCE,
		RowVarCharWriter.class, VarCharWriter.VarCharWriterForRow.class, VarCharFieldReader.class, ArrowVarCharColumnVector.class));

	testFields.add(Tuple7.of("f9", new VarBinaryType(), ArrowType.Binary.INSTANCE,
		RowVarBinaryWriter.class, VarBinaryWriter.VarBinaryWriterForRow.class, VarBinaryFieldReader.class, ArrowVarBinaryColumnVector.class));

	testFields.add(Tuple7.of("f10", new DecimalType(10, 3), new ArrowType.Decimal(10, 3),
		RowDecimalWriter.class, DecimalWriter.DecimalWriterForRow.class, DecimalFieldReader.class, ArrowDecimalColumnVector.class));

	testFields.add(Tuple7.of("f11", new DateType(), new ArrowType.Date(DateUnit.DAY),
		RowDateWriter.class, DateWriter.DateWriterForRow.class, DateFieldReader.class, ArrowDateColumnVector.class));

	testFields.add(Tuple7.of("f13", new TimeType(0), new ArrowType.Time(TimeUnit.SECOND, 32),
		RowTimeWriter.class, TimeWriter.TimeWriterForRow.class, TimeFieldReader.class, ArrowTimeColumnVector.class));

	testFields.add(Tuple7.of("f14", new TimeType(2), new ArrowType.Time(TimeUnit.MILLISECOND, 32),
		RowTimeWriter.class, TimeWriter.TimeWriterForRow.class, TimeFieldReader.class, ArrowTimeColumnVector.class));

	testFields.add(Tuple7.of("f15", new TimeType(4), new ArrowType.Time(TimeUnit.MICROSECOND, 64),
		RowTimeWriter.class, TimeWriter.TimeWriterForRow.class, TimeFieldReader.class, ArrowTimeColumnVector.class));

	testFields.add(Tuple7.of("f16", new TimeType(8), new ArrowType.Time(TimeUnit.NANOSECOND, 64),
		RowTimeWriter.class, TimeWriter.TimeWriterForRow.class, TimeFieldReader.class, ArrowTimeColumnVector.class));

	testFields.add(Tuple7.of("f17", new LocalZonedTimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f18", new LocalZonedTimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f19", new LocalZonedTimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f20", new LocalZonedTimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f21", new TimestampType(0), new ArrowType.Timestamp(TimeUnit.SECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f22", new TimestampType(2), new ArrowType.Timestamp(TimeUnit.MILLISECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f23", new TimestampType(4), new ArrowType.Timestamp(TimeUnit.MICROSECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f24", new TimestampType(8), new ArrowType.Timestamp(TimeUnit.NANOSECOND, null),
		RowTimestampWriter.class, TimestampWriter.TimestampWriterForRow.class, TimestampFieldReader.class, ArrowTimestampColumnVector.class));

	testFields.add(Tuple7.of("f25", new ArrayType(new VarCharType()), ArrowType.List.INSTANCE,
		RowArrayWriter.class, ArrayWriter.ArrayWriterForRow.class, ArrayFieldReader.class, ArrowArrayColumnVector.class));

	RowType rowFieldType = new RowType(Arrays.asList(
		new RowType.RowField("a", new IntType()),
		new RowType.RowField("b", new VarCharType()),
		new RowType.RowField("c", new ArrayType(new VarCharType())),
		new RowType.RowField("d", new TimestampType(2)),
		new RowType.RowField("e", new RowType((Arrays.asList(
			new RowType.RowField("e1", new IntType()),
			new RowType.RowField("e2", new VarCharType())))))));
	testFields.add(Tuple7.of("f26", rowFieldType, ArrowType.Struct.INSTANCE,
		RowRowWriter.class, RowWriter.RowWriterForRow.class, RowFieldReader.class, ArrowRowColumnVector.class));

	List<RowType.RowField> rowFields = new ArrayList<>();
	for (Tuple7<String, LogicalType, ArrowType, Class<?>, Class<?>, Class<?>, Class<?>> field : testFields) {
		rowFields.add(new RowType.RowField(field.f0, field.f1));
	}
	rowType = new RowType(rowFields);

	allocator = ArrowUtils.getRootAllocator().newChildAllocator("stdout", 0, Long.MAX_VALUE);
}
 
Example #20
Source File: ArrowUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public ArrowType visit(DoubleType doubleType) {
	return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
}
 
Example #21
Source File: ArrowUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public ArrowType visit(FloatType floatType) {
	return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
}
 
Example #22
Source File: SqlTypeNameToArrowType.java    From dremio-flight-connector with Apache License 2.0 4 votes vote down vote up
public static ArrowType toArrowType(UserProtos.ResultColumnMetadata type) {
  String typeName = type.getDataType();
  switch (typeName) {
    case "NULL":
      return new Null();
    case "MAP":
      return new ArrowType.Map(false); //todo inner type?
    case "ARRAY":
      return new ArrowType.List(); //todo inner type?
    case "UNION":
      throw new UnsupportedOperationException("have not implemented unions");
      //return new Union(); //todo inner type?
    case "TINYINT":
      return new Int(8, true);
    case "SMALLINT":
      return new Int(16, true);
    case "INTEGER":
      return new Int(32, true);
    case "BIGINT":
      return new Int(64, true);
    case "FLOAT":
      return new FloatingPoint(FloatingPointPrecision.SINGLE);
    case "DOUBLE":
      return new FloatingPoint(FloatingPointPrecision.DOUBLE);
    case "CHARACTER VARYING":
      return new Utf8();
    case "BINARY VARYING":
      return new Binary();
    case "BOOLEAN":
      return new Bool();
    case "DECIMAL":
      return new Decimal(type.getPrecision(), type.getScale());
    case "DATE":
      return new Date(DateUnit.MILLISECOND);
    case "TIME":
      return new Time(TimeUnit.MICROSECOND, 64);
    case "TIMESTAMP":
      return new Timestamp(TimeUnit.MICROSECOND, "UTC");
    case "INTERVAL DAY TO SECOND":
      return new Interval(IntervalUnit.DAY_TIME);
    case "INTERVAL YEAR TO MONTH":
      return new Interval(IntervalUnit.YEAR_MONTH);
    case "BINARY":
      return new ArrowType.FixedSizeBinary(50);
    default:
      throw new IllegalStateException("unable to find arrow type for " + typeName);
  }
}
 
Example #23
Source File: ArrowMemoryAllocatorFactory.java    From yosegi with Apache License 2.0 4 votes vote down vote up
/**
 * Set the vector of Struct and initialize it.
 */
public static IMemoryAllocator getFromStructVector(
    final ColumnType columnType ,
    final String columnName ,
    final BufferAllocator allocator ,
    final StructVector vector ,
    final int rowCount ) {
  switch ( columnType ) {
    case UNION:
      UnionVector unionVector = vector.addOrGetUnion( columnName );
      return new ArrowUnionMemoryAllocator( allocator , unionVector , rowCount );
    case ARRAY:
      return new ArrowArrayMemoryAllocator(
          allocator , vector.addOrGetList( columnName ) , rowCount );
    case SPREAD:
      StructVector mapVector = vector.addOrGetStruct( columnName );
      return new ArrowMapMemoryAllocator( allocator , mapVector , rowCount );

    case BOOLEAN:
      BitVector bitVector =  vector.addOrGet(
          columnName ,
          new FieldType( true , ArrowType.Bool.INSTANCE , null , null ) ,
          BitVector.class );
      return new ArrowBooleanMemoryAllocator( bitVector , rowCount );
    case BYTE:
      TinyIntVector byteVector =  vector.addOrGet(
          columnName ,
          new FieldType( true , new ArrowType.Int( 8 , true ) , null , null ) ,
          TinyIntVector.class );
      return new ArrowByteMemoryAllocator( byteVector , rowCount );
    case SHORT:
      SmallIntVector shortVector = vector.addOrGet(
          columnName ,
          new FieldType(
            true ,
            new ArrowType.Int( 16 , true ) ,
            null ,
            null ) ,
          SmallIntVector.class );
      return new ArrowShortMemoryAllocator( shortVector , rowCount );
    case INTEGER:
      IntVector integerVector =  vector.addOrGet(
          columnName ,
          new FieldType( true , new ArrowType.Int( 32 , true ) , null , null ) ,
          IntVector.class );
      return new ArrowIntegerMemoryAllocator( integerVector , rowCount );
    case LONG:
      BigIntVector longVector = vector.addOrGet(
          columnName ,
          new FieldType(
            true ,
            new ArrowType.Int( 64 , true ) ,
            null ,
            null ) ,
            BigIntVector.class );
      return new ArrowLongMemoryAllocator( longVector , rowCount );
    case FLOAT:
      Float4Vector floatVector = vector.addOrGet(
          columnName ,
          new FieldType(
            true ,
            new ArrowType.FloatingPoint( FloatingPointPrecision.SINGLE ) ,
            null ,
            null ) ,
          Float4Vector.class );
      return new ArrowFloatMemoryAllocator( floatVector , rowCount );
    case DOUBLE:
      Float8Vector doubleVector = vector.addOrGet(
          columnName ,
          new FieldType(
            true ,
            new ArrowType.FloatingPoint( FloatingPointPrecision.DOUBLE ) ,
            null ,
            null ) ,
          Float8Vector.class );
      return new ArrowDoubleMemoryAllocator( doubleVector , rowCount );
    case STRING:
      VarCharVector charVector = vector.addOrGet(
          columnName ,
          new FieldType( true , ArrowType.Utf8.INSTANCE , null , null ) ,
          VarCharVector.class );
      return new ArrowStringMemoryAllocator( charVector , rowCount );
    case BYTES:
      VarBinaryVector binaryVector = vector.addOrGet(
          columnName ,
          new FieldType( true , ArrowType.Binary.INSTANCE , null , null ) ,
          VarBinaryVector.class );
      return new ArrowBytesMemoryAllocator( binaryVector , rowCount );

    case NULL:
    case EMPTY_ARRAY:
    case EMPTY_SPREAD:
    default:
      return NullMemoryAllocator.INSTANCE;
  }
}
 
Example #24
Source File: HiveSchemaConverter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static Field getArrowFieldFromHivePrimitiveType(String name, TypeInfo typeInfo) {
  switch (typeInfo.getCategory()) {
  case PRIMITIVE:
    PrimitiveTypeInfo pTypeInfo = (PrimitiveTypeInfo) typeInfo;
    switch (pTypeInfo.getPrimitiveCategory()) {
    case BOOLEAN:

      return new Field(name, true, new Bool(), null);
    case BYTE:
      return new Field(name, true, new Int(32, true), null);
    case SHORT:
      return new Field(name, true, new Int(32, true), null);

    case INT:
      return new Field(name, true, new Int(32, true), null);

    case LONG:
      return new Field(name, true, new Int(64, true), null);

    case FLOAT:
      return new Field(name, true, new FloatingPoint(FloatingPointPrecision.SINGLE), null);

    case DOUBLE:
      return new Field(name, true, new FloatingPoint(FloatingPointPrecision.DOUBLE), null);

    case DATE:
      return new Field(name, true, new Date(DateUnit.MILLISECOND), null);

    case TIMESTAMP:
      return new Field(name, true, new Timestamp(TimeUnit.MILLISECOND, null), null);

    case BINARY:
      return new Field(name, true, new Binary(), null);
    case DECIMAL: {
      DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) pTypeInfo;
      return new Field(name, true, new Decimal(decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()), null);
    }

    case STRING:
    case VARCHAR:
    case CHAR: {
      return new Field(name, true, new Utf8(), null);
    }
    case UNKNOWN:
    case VOID:
    default:
      // fall through.
    }
  default:
  }

  return null;
}
 
Example #25
Source File: TestDictionaryLookup.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testDictionaryLookup() throws Throwable {


  try (final VectorContainer dict1 = new VectorContainer(getTestAllocator());
       final VectorContainer dict2 = new VectorContainer(getTestAllocator());
       final VectorContainer dict3 = new VectorContainer(getTestAllocator())) {

    final Map<String, GlobalDictionaryFieldInfo> dictionaryFieldInfoMap = Maps.newHashMap();
    final Field field1 = new Field(SchemaPath.getSimplePath("c0").getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
    final BigIntVector longVector = dict1.addOrGet(field1);
    longVector.allocateNew();
    longVector.setSafe(0, 10L);
    longVector.setSafe(1, 20L);
    longVector.setSafe(2, 30L);
    longVector.setSafe(3, 40L);
    longVector.setSafe(4, 50L);
    longVector.setValueCount(5);
    dict1.setRecordCount(5);
    dict1.buildSchema(BatchSchema.SelectionVectorMode.NONE);


    dictionaryFieldInfoMap.put("c0", new GlobalDictionaryFieldInfo(0, "c0", null, field1.getType(), "local"));

    final Field field2 = new Field(SchemaPath.getSimplePath("c1").getAsUnescapedPath(), true, new ArrowType.Binary(), null);
    final VarBinaryVector binaryVector = dict2.addOrGet(field2);
    binaryVector.allocateNew();
    binaryVector.setSafe(0, "abc".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(1, "bcd".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(2, "cde".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(3, "def".getBytes(UTF8), 0, 3);
    binaryVector.setSafe(4, "efg".getBytes(UTF8), 0, 3);
    binaryVector.setValueCount(5);
    dict2.setRecordCount(5);
    dict2.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c1", new GlobalDictionaryFieldInfo(0, "c1", null, field2.getType(), "local"));

    final Field field3 = new Field(SchemaPath.getSimplePath("c2").getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
    final Float8Vector doubleVector = dict3.addOrGet(field3);
    doubleVector.allocateNew();
    doubleVector.setSafe(0, 100.1);
    doubleVector.setSafe(1, 200.2);
    doubleVector.setSafe(2, 300.3);
    doubleVector.setSafe(3, 400.4);
    doubleVector.setSafe(4, 500.5);
    doubleVector.setValueCount(5);
    dict3.setRecordCount(5);
    dict3.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    dictionaryFieldInfoMap.put("c2", new GlobalDictionaryFieldInfo(0, "c2", null, field3.getType(), "local"));

    OperatorCreatorRegistry registry = Mockito.mock(OperatorCreatorRegistry.class);
    Mockito.when(registry.getSingleInputOperator(Matchers.any(OperatorContext.class), Matchers.any(PhysicalOperator.class)))
      .thenAnswer(new Answer<SingleInputOperator>() {
        public SingleInputOperator answer(InvocationOnMock invocation) throws Exception {
          Object[] args = invocation.getArguments();
          DictionaryLookupOperator dictionaryLookupOperator = Mockito.spy(new DictionaryLookupOperator(
            (OperatorContext)args[0], (DictionaryLookupPOP)args[1]));

          Mockito.doReturn(dict1).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c0"));
          Mockito.doReturn(dict2).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c1"));
          Mockito.doReturn(dict3).when(dictionaryLookupOperator).loadDictionary(Matchers.eq("c2"));
          return dictionaryLookupOperator;
        }
      });

    BaseTestOperator.testContext.setRegistry(registry);

    DictionaryLookupPOP lookup = new DictionaryLookupPOP(null, PROPS, null, dictionaryFieldInfoMap);
    Table input = t(
      th("c0", "c1", "c2"),
      tr(0, 1, 2),
      tr(1, 2, 0),
      tr(2, 0, 1)
    );

    Table output = t(
      th("c0", "c1", "c2"),
      tr(10L, "bcd".getBytes(UTF8), 300.3),
      tr(20L, "cde".getBytes(UTF8), 100.1),
      tr(30L, "abc".getBytes(UTF8), 200.2)
    );

    validateSingle(lookup, DictionaryLookupOperator.class, input, output);
  }
}
 
Example #26
Source File: Fixtures.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
ArrowType getType() {
  return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
}
 
Example #27
Source File: Fixtures.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
ArrowType getType() {
  return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
}
 
Example #28
Source File: FlightDataSourceReader.java    From flight-spark-source with Apache License 2.0 4 votes vote down vote up
private DataType sparkFromArrow(FieldType fieldType) {
  switch (fieldType.getType().getTypeID()) {
    case Null:
      return DataTypes.NullType;
    case Struct:
      throw new UnsupportedOperationException("have not implemented Struct type yet");
    case List:
      throw new UnsupportedOperationException("have not implemented List type yet");
    case FixedSizeList:
      throw new UnsupportedOperationException("have not implemented FixedSizeList type yet");
    case Union:
      throw new UnsupportedOperationException("have not implemented Union type yet");
    case Int:
      ArrowType.Int intType = (ArrowType.Int) fieldType.getType();
      int bitWidth = intType.getBitWidth();
      if (bitWidth == 8) {
        return DataTypes.ByteType;
      } else if (bitWidth == 16) {
        return DataTypes.ShortType;
      } else if (bitWidth == 32) {
        return DataTypes.IntegerType;
      } else if (bitWidth == 64) {
        return DataTypes.LongType;
      }
      throw new UnsupportedOperationException("unknown int type with bitwidth " + bitWidth);
    case FloatingPoint:
      ArrowType.FloatingPoint floatType = (ArrowType.FloatingPoint) fieldType.getType();
      FloatingPointPrecision precision = floatType.getPrecision();
      switch (precision) {
        case HALF:
        case SINGLE:
          return DataTypes.FloatType;
        case DOUBLE:
          return DataTypes.DoubleType;
      }
    case Utf8:
      return DataTypes.StringType;
    case Binary:
    case FixedSizeBinary:
      return DataTypes.BinaryType;
    case Bool:
      return DataTypes.BooleanType;
    case Decimal:
      throw new UnsupportedOperationException("have not implemented Decimal type yet");
    case Date:
      return DataTypes.DateType;
    case Time:
      return DataTypes.TimestampType; //note i don't know what this will do!
    case Timestamp:
      return DataTypes.TimestampType;
    case Interval:
      return DataTypes.CalendarIntervalType;
    case NONE:
      return DataTypes.NullType;
  }
  throw new IllegalStateException("Unexpected value: " + fieldType);
}
 
Example #29
Source File: ArrowMemoryAllocatorFactory.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
public static IMemoryAllocator getFromStructVector( final ColumnType columnType , final String columnName , final BufferAllocator allocator , final StructVector vector , final int rowCount ){
  switch( columnType ){
    case UNION:
      UnionVector unionVector = vector.addOrGetUnion( columnName );
      return new ArrowUnionMemoryAllocator( allocator , unionVector , rowCount );
    case ARRAY:
      return new ArrowArrayMemoryAllocator( allocator , vector.addOrGetList( columnName ) , rowCount );
    case SPREAD:
      StructVector mapVector = vector.addOrGetStruct( columnName );
      return new ArrowMapMemoryAllocator( allocator , mapVector , rowCount );

    case BOOLEAN:
      BitVector bitVector =  vector.addOrGet( columnName , new FieldType( true , ArrowType.Bool.INSTANCE , null , null ) , BitVector.class );
      return new ArrowBooleanMemoryAllocator( bitVector , rowCount );
    case BYTE:
      TinyIntVector byteVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.Int( 8 , true ) , null , null ) , TinyIntVector.class );
      return new ArrowByteMemoryAllocator( byteVector , rowCount );
    case SHORT:
      SmallIntVector shortVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.Int( 16 , true ) , null , null ) , SmallIntVector.class );
      return new ArrowShortMemoryAllocator( shortVector , rowCount );
    case INTEGER:
      IntVector integerVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.Int( 32 , true ) , null , null ) , IntVector.class );
      return new ArrowIntegerMemoryAllocator( integerVector , rowCount );
    case LONG:
      BigIntVector longVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.Int( 64 , true ) , null , null ) , BigIntVector.class );
      return new ArrowLongMemoryAllocator( longVector , rowCount );
    case FLOAT:
      Float4Vector floatVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.FloatingPoint( FloatingPointPrecision.SINGLE ) , null , null ) , Float4Vector.class );
      return new ArrowFloatMemoryAllocator( floatVector , rowCount );
    case DOUBLE:
      Float8Vector doubleVector =  vector.addOrGet( columnName , new FieldType( true , new ArrowType.FloatingPoint( FloatingPointPrecision.DOUBLE ) , null , null ) , Float8Vector.class );
      return new ArrowDoubleMemoryAllocator( doubleVector , rowCount );
    case STRING:
      VarCharVector charVector =  vector.addOrGet( columnName , new FieldType( true , ArrowType.Utf8.INSTANCE , null , null ) , VarCharVector.class );
      return new ArrowStringMemoryAllocator( charVector , rowCount );
    case BYTES:
      VarBinaryVector binaryVector =  vector.addOrGet( columnName , new FieldType( true , ArrowType.Binary.INSTANCE , null , null ) , VarBinaryVector.class );
      return new ArrowBytesMemoryAllocator( binaryVector , rowCount );

    case NULL:
    case EMPTY_ARRAY:
    case EMPTY_SPREAD:
    default:
      return NullMemoryAllocator.INSTANCE;
  }
}
 
Example #30
Source File: ArrowMemoryAllocatorFactory.java    From multiple-dimension-spread with Apache License 2.0 4 votes vote down vote up
public static IMemoryAllocator getFromListVector( final ColumnType columnType , final String columnName , final BufferAllocator allocator , final ListVector vector , final int rowCount ){
  switch( columnType ){
    case UNION:
      AddOrGetResult<UnionVector> unionVector =  vector.addOrGetVector( new FieldType( true , MinorType.UNION.getType() , null , null ) );
      return new ArrowUnionMemoryAllocator( allocator , unionVector.getVector() , rowCount );
    case ARRAY:
      AddOrGetResult<ListVector> listVector =  vector.addOrGetVector( new FieldType( true , ArrowType.List.INSTANCE , null , null ) );
      return new ArrowArrayMemoryAllocator( allocator , listVector.getVector() , rowCount );
    case SPREAD:
      AddOrGetResult<StructVector> mapVector =  vector.addOrGetVector( new FieldType( true , ArrowType.Struct.INSTANCE , null , null ) );
      return new ArrowMapMemoryAllocator( allocator , mapVector.getVector() , rowCount );

    case BOOLEAN:
      AddOrGetResult<BitVector> bitVector =  vector.addOrGetVector( new FieldType( true , ArrowType.Bool.INSTANCE , null , null ) );
      return new ArrowBooleanMemoryAllocator( bitVector.getVector() , rowCount );
    case BYTE:
      AddOrGetResult<TinyIntVector> byteVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.Int( 8 , true ) , null , null ) );
      return new ArrowByteMemoryAllocator( byteVector.getVector() , rowCount );
    case SHORT:
      AddOrGetResult<SmallIntVector> shortVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.Int( 16 , true ) , null , null ) );
      return new ArrowShortMemoryAllocator( shortVector.getVector() , rowCount );
    case INTEGER:
      AddOrGetResult<IntVector> integerVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.Int( 32 , true ) , null , null ) );
      return new ArrowIntegerMemoryAllocator( integerVector.getVector() , rowCount );
    case LONG:
      AddOrGetResult<BigIntVector> longVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.Int( 64 , true ) , null , null ) );
      return new ArrowLongMemoryAllocator( longVector.getVector() , rowCount );
    case FLOAT:
      AddOrGetResult<Float4Vector> floatVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.FloatingPoint( FloatingPointPrecision.HALF ) , null , null ) );
      return new ArrowFloatMemoryAllocator( floatVector.getVector() , rowCount );
    case DOUBLE:
      AddOrGetResult<Float8Vector> doubleVector =  vector.addOrGetVector( new FieldType( true , new ArrowType.FloatingPoint( FloatingPointPrecision.DOUBLE ) , null , null ) );
      return new ArrowDoubleMemoryAllocator( doubleVector.getVector() , rowCount );
    case STRING:
      AddOrGetResult<VarCharVector> charVector =  vector.addOrGetVector( new FieldType( true , ArrowType.Utf8.INSTANCE , null , null ) );
      return new ArrowStringMemoryAllocator( charVector.getVector() , rowCount );
    case BYTES:
      AddOrGetResult<VarBinaryVector> binaryVector =  vector.addOrGetVector( new FieldType( true , ArrowType.Binary.INSTANCE , null , null ) );
      return new ArrowBytesMemoryAllocator( binaryVector.getVector() , rowCount );

    case NULL:
    case EMPTY_ARRAY:
    case EMPTY_SPREAD:
    default:
      return NullMemoryAllocator.INSTANCE;
  }
}