org.apache.arrow.vector.ipc.ArrowFileWriter Java Examples

The following examples show how to use org.apache.arrow.vector.ipc.ArrowFileWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: YosegiArrowReader.java    From yosegi with Apache License 2.0 8 votes vote down vote up
/**
 * Read next.
 */
public byte[] nextToBytes() throws IOException {
  ByteArrayOutputStream out = new ByteArrayOutputStream();
  VectorSchemaRoot schemaRoot = nextToSchemaRoot();
  ArrowFileWriter writer = new ArrowFileWriter( schemaRoot, null, Channels.newChannel( out ) );
  writer.start();
  writer.writeBatch();
  writer.end();
  writer.close();
  return out.toByteArray();
}
 
Example #2
Source File: ArrowWrite.java    From ArrowExample with Apache License 2.0 5 votes vote down vote up
public void setupWrite(String filename, boolean useCustom) throws Exception {
    File arrowFile = validateFile(filename, false);
    this.fileOutputStream = new FileOutputStream(arrowFile);
    Schema schema = makeSchema();
    this.root = VectorSchemaRoot.create(schema, this.ra);
    DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider();
    if (!useCustom) {
        /* default java implementation of the channel */
        this.arrowFileWriter = new ArrowFileWriter(root,
                provider,
                this.fileOutputStream.getChannel());
    } else {
        /* custom channel implementation in ArrowOutputStream */
        this.arrowFileWriter = new ArrowFileWriter(root,
                provider,
                new ArrowOutputStream(this.fileOutputStream));
    }

    if (false) {
        // show some stuff about the schema and layout
        for (Field field : root.getSchema().getFields()) {
            FieldVector vector = root.getVector(field.getName());
            showFieldLayout(field, vector);
        }
    }
    System.out.println("Generated " + this.entries + " data entries , batch size " + batchSize + " usingCustomWriter: " + useCustom + " useNullValues " + this.useNullValues);
}
 
Example #3
Source File: Twister2ArrowFileWriter.java    From twister2 with Apache License 2.0 5 votes vote down vote up
public boolean setUpTwister2ArrowWrite(int workerId) throws Exception {
  LOG.fine("%%%%%%%%% worker id details:" + workerId + "\t" + arrowFile);
  this.root = VectorSchemaRoot.create(Schema.fromJSON(arrowSchema), this.rootAllocator);
  Path path = new Path(arrowFile);
  this.fileSystem = FileSystemUtils.get(path);
  this.fsDataOutputStream = fileSystem.create(path);
  this.twister2ArrowOutputStream = new Twister2ArrowOutputStream(this.fsDataOutputStream);
  DictionaryProvider.MapDictionaryProvider provider
      = new DictionaryProvider.MapDictionaryProvider();
  if (!flag) {
    this.arrowFileWriter = new ArrowFileWriter(root, provider,
        this.fsDataOutputStream.getChannel());
  } else {
    this.arrowFileWriter = new ArrowFileWriter(root, provider, this.twister2ArrowOutputStream);
  }

  LOG.info("root schema fields:" + root.getSchema().getFields());
  for (Field field : root.getSchema().getFields()) {
    FieldVector vector = root.getVector(field.getName());
    if (vector.getMinorType().equals(Types.MinorType.INT)) {
      this.generatorMap.put(vector, new IntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.BIGINT)) {
      this.generatorMap.put(vector, new BigIntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.FLOAT4)) {
      this.generatorMap.put(vector, new FloatVectorGenerator());
    } else {
      throw new RuntimeException("unsupported arrow write type");
    }
  }
  return true;
}
 
Example #4
Source File: ArrowConverterTest.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSchemaAndRecordsFromByteArray() throws Exception {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);

    int valueCount = 3;
    List<Field> fields = new ArrayList<>();
    fields.add(ArrowConverter.field("field1",new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
    fields.add(ArrowConverter.intField("field2"));

    List<FieldVector> fieldVectors = new ArrayList<>();
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field1",new float[] {1,2,3}));
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field2",new int[] {1,2,3}));


    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(fields);

    VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount);
    VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
    vectorUnloader.getRecordBatch();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
        arrowFileWriter.writeBatch();
    } catch (IOException e) {
        e.printStackTrace();
    }

    byte[] arr = byteArrayOutputStream.toByteArray();
    val arr2 = ArrowConverter.readFromBytes(arr);
    assertEquals(2,arr2.getFirst().numColumns());
    assertEquals(3,arr2.getRight().size());

    val arrowCols = ArrowConverter.toArrowColumns(allocator,arr2.getFirst(),arr2.getRight());
    assertEquals(2,arrowCols.size());
    assertEquals(valueCount,arrowCols.get(0).getValueCount());
}
 
Example #5
Source File: ArrowConverterTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadSchemaAndRecordsFromByteArray() throws Exception {
    BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE);

    int valueCount = 3;
    List<Field> fields = new ArrayList<>();
    fields.add(ArrowConverter.field("field1",new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
    fields.add(ArrowConverter.intField("field2"));

    List<FieldVector> fieldVectors = new ArrayList<>();
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field1",new float[] {1,2,3}));
    fieldVectors.add(ArrowConverter.vectorFor(allocator,"field2",new int[] {1,2,3}));


    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(fields);

    VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount);
    VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
    vectorUnloader.getRecordBatch();
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    try(ArrowFileWriter arrowFileWriter = new ArrowFileWriter(schemaRoot1,null,newChannel(byteArrayOutputStream))) {
        arrowFileWriter.writeBatch();
    } catch (IOException e) {
        log.error("",e);
    }

    byte[] arr = byteArrayOutputStream.toByteArray();
    val arr2 = ArrowConverter.readFromBytes(arr);
    assertEquals(2,arr2.getFirst().numColumns());
    assertEquals(3,arr2.getRight().size());

    val arrowCols = ArrowConverter.toArrowColumns(allocator,arr2.getFirst(),arr2.getRight());
    assertEquals(2,arrowCols.size());
    assertEquals(valueCount,arrowCols.get(0).getValueCount());
}