Java Code Examples for org.apache.arrow.vector.VectorSchemaRoot#create()

The following examples show how to use org.apache.arrow.vector.VectorSchemaRoot#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Stream.java    From dremio-flight-connector with Apache License 2.0 5 votes vote down vote up
public void start(ServerStreamListener listener) throws InterruptedException {
  logger.debug("trying to start, waiting for schema for {}", descriptor);
  countDownLatch.await();
  if (root == null) {
    logger.warn("root was not set for {}, not starting listener properly", descriptor);
    root = VectorSchemaRoot.create(new Schema(ImmutableList.of()), allocator);
  }
  listener.start(root);
  this.listener = listener;
}
 
Example 2
Source File: Twister2ArrowFileWriter.java    From twister2 with Apache License 2.0 5 votes vote down vote up
public boolean setUpTwister2ArrowWrite(int workerId) throws Exception {
  LOG.fine("%%%%%%%%% worker id details:" + workerId + "\t" + arrowFile);
  this.root = VectorSchemaRoot.create(Schema.fromJSON(arrowSchema), this.rootAllocator);
  Path path = new Path(arrowFile);
  this.fileSystem = FileSystemUtils.get(path);
  this.fsDataOutputStream = fileSystem.create(path);
  this.twister2ArrowOutputStream = new Twister2ArrowOutputStream(this.fsDataOutputStream);
  DictionaryProvider.MapDictionaryProvider provider
      = new DictionaryProvider.MapDictionaryProvider();
  if (!flag) {
    this.arrowFileWriter = new ArrowFileWriter(root, provider,
        this.fsDataOutputStream.getChannel());
  } else {
    this.arrowFileWriter = new ArrowFileWriter(root, provider, this.twister2ArrowOutputStream);
  }

  LOG.info("root schema fields:" + root.getSchema().getFields());
  for (Field field : root.getSchema().getFields()) {
    FieldVector vector = root.getVector(field.getName());
    if (vector.getMinorType().equals(Types.MinorType.INT)) {
      this.generatorMap.put(vector, new IntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.BIGINT)) {
      this.generatorMap.put(vector, new BigIntVectorGenerator());
    } else if (vector.getMinorType().equals(Types.MinorType.FLOAT4)) {
      this.generatorMap.put(vector, new FloatVectorGenerator());
    } else {
      throw new RuntimeException("unsupported arrow write type");
    }
  }
  return true;
}
 
Example 3
Source File: ArrowSourceFunctionTest.java    From flink with Apache License 2.0 5 votes vote down vote up
public ArrowSourceFunctionTest() {
	super(VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator),
		serializer,
		Comparator.comparing(o -> o.getString(0)),
		new DeeplyEqualsChecker()
			.withCustomCheck(
				(o1, o2) -> o1 instanceof RowData && o2 instanceof RowData,
				(o1, o2, checker) -> deepEqualsBaseRow(
					(RowData) o1,
					(RowData) o2,
					(RowDataSerializer) serializer.duplicate(),
					(RowDataSerializer) serializer.duplicate())));
}
 
Example 4
Source File: SFArrowResultSetIT.java    From snowflake-jdbc with Apache License 2.0 5 votes vote down vote up
private File createArrowFile(String fileName, Schema schema, Object[][] data,
                             int rowsPerRecordBatch)
throws IOException
{
  File file = resultFolder.newFile(fileName);
  VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator);

  try (ArrowWriter writer = new ArrowStreamWriter(
      root, new DictionaryProvider.MapDictionaryProvider(),
      new FileOutputStream(file)))
  {
    writer.start();

    for (int i = 0; i < data[0].length; )
    {
      int rowsToAppend = Math.min(rowsPerRecordBatch, data[0].length - i);
      root.setRowCount(rowsToAppend);

      for (int j = 0; j < data.length; j++)
      {
        FieldVector vector = root.getFieldVectors().get(j);

        switch (vector.getMinorType())
        {
          case INT:
            writeIntToField(vector, data[j], i, rowsToAppend);
            break;
        }
      }

      writer.writeBatch();
      i += rowsToAppend;
    }
  }

  return file;
}
 
Example 5
Source File: RowDataArrowReaderWriterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<ArrowWriter<RowData>, ArrowStreamWriter> createArrowWriter(OutputStream outputStream) throws IOException {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ArrowWriter<RowData> arrowWriter = ArrowUtils.createRowDataArrowWriter(root, rowType);
	ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, outputStream);
	arrowStreamWriter.start();
	return Tuple2.of(arrowWriter, arrowStreamWriter);
}
 
Example 6
Source File: RowArrowReaderWriterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<ArrowWriter<Row>, ArrowStreamWriter> createArrowWriter(OutputStream outputStream) throws IOException {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ArrowWriter<Row> arrowWriter = ArrowUtils.createRowArrowWriter(root, rowType);
	ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, outputStream);
	arrowStreamWriter.start();
	return Tuple2.of(arrowWriter, arrowStreamWriter);
}
 
Example 7
Source File: AbstractArrowPythonScalarFunctionRunner.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open() throws Exception {
	super.open();
	allocator = ArrowUtils.getRootAllocator().newChildAllocator("writer", 0, Long.MAX_VALUE);
	root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(getInputType()), allocator);
	arrowWriter = createArrowWriter();
	arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
	arrowStreamWriter.start();
	currentBatchCount = 0;
}
 
Example 8
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateRowArrowReader() {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	RowArrowReader reader = ArrowUtils.createRowArrowReader(root, rowType);
	ArrowFieldReader[] fieldReaders = reader.getFieldReaders();
	for (int i = 0; i < fieldReaders.length; i++) {
		assertEquals(testFields.get(i).f5, fieldReaders[i].getClass());
	}
}
 
Example 9
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateRowDataArrowReader() {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	RowDataArrowReader reader = ArrowUtils.createRowDataArrowReader(root, rowType);
	ColumnVector[] columnVectors = reader.getColumnVectors();
	for (int i = 0; i < columnVectors.length; i++) {
		assertEquals(testFields.get(i).f6, columnVectors[i].getClass());
	}
}
 
Example 10
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateRowArrowWriter() {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ArrowWriter<Row> writer = ArrowUtils.createRowArrowWriter(root, rowType);
	ArrowFieldWriter<Row>[] fieldWriters = writer.getFieldWriters();
	for (int i = 0; i < fieldWriters.length; i++) {
		assertEquals(testFields.get(i).f3, fieldWriters[i].getClass());
	}
}
 
Example 11
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateRowDataArrowWriter() {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ArrowWriter<RowData> writer = ArrowUtils.createRowDataArrowWriter(root, rowType);
	ArrowFieldWriter<RowData>[] fieldWriters = writer.getFieldWriters();
	for (int i = 0; i < fieldWriters.length; i++) {
		assertEquals(testFields.get(i).f4, fieldWriters[i].getClass());
	}
}
 
Example 12
Source File: ArrowUtilsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadArrowBatches() throws IOException {
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ArrowWriter<RowData> arrowWriter = ArrowUtils.createRowDataArrowWriter(root, rowType);
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
	ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
	arrowStreamWriter.start();

	List<RowData> testData = Arrays.asList(
		new GenericRowData(rowType.getFieldCount()),
		new GenericRowData(rowType.getFieldCount()),
		new GenericRowData(rowType.getFieldCount()),
		new GenericRowData(rowType.getFieldCount()),
		new GenericRowData(rowType.getFieldCount()));
	int batches = 3;
	List<List<RowData>> subLists = Lists.partition(testData, testData.size() / batches + 1);
	for (List<RowData> subList : subLists) {
		for (RowData value : subList) {
			arrowWriter.write(value);
		}
		arrowWriter.finish();
		arrowStreamWriter.writeBatch();
		arrowWriter.reset();
	}

	assertEquals(batches,
		ArrowUtils.readArrowBatches(Channels.newChannel(new ByteArrayInputStream(baos.toByteArray()))).length);
}
 
Example 13
Source File: RowArrowSourceFunctionTest.java    From flink with Apache License 2.0 4 votes vote down vote up
public RowArrowSourceFunctionTest() {
	super(VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator),
		new RowSerializer(new TypeSerializer[]{StringSerializer.INSTANCE}, true),
		Comparator.comparing(o -> (String) (o.getField(0))));
}
 
Example 14
Source File: ArrowUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Convert Flink table to Pandas DataFrame.
 */
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
	checkArrowUsable();
	BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
	RowType rowType = (RowType) table.getSchema().toRowDataType().getLogicalType();
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
	ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
	arrowStreamWriter.start();

	ArrowWriter arrowWriter;
	Iterator<Row> results = table.execute().collect();
	Iterator convertedResults;
	if (isBlinkPlanner(table)) {
		arrowWriter = createRowDataArrowWriter(root, rowType);
		convertedResults = new Iterator<RowData>() {
			@Override
			public boolean hasNext() {
				return results.hasNext();
			}

			@Override
			public RowData next() {
				// The SelectTableSink of blink planner will convert the table schema and we
				// need to keep the table schema used here be consistent with the converted table schema
				TableSchema convertedTableSchema =
					SelectTableSinkSchemaConverter.changeDefaultConversionClass(table.getSchema());
				DataFormatConverters.DataFormatConverter converter =
					DataFormatConverters.getConverterForDataType(convertedTableSchema.toRowDataType());
				return (RowData) converter.toInternal(results.next());
			}
		};
	} else {
		arrowWriter = createRowArrowWriter(root, rowType);
		convertedResults = results;
	}

	return new CustomIterator<byte[]>() {
		@Override
		public boolean hasNext() {
			return convertedResults.hasNext();
		}

		@Override
		public byte[] next() {
			try {
				int i = 0;
				while (convertedResults.hasNext() && i < maxArrowBatchSize) {
					i++;
					arrowWriter.write(convertedResults.next());
				}
				arrowWriter.finish();
				arrowStreamWriter.writeBatch();
				return baos.toByteArray();
			} catch (Throwable t) {
				String msg = "Failed to serialize the data of the table";
				LOG.error(msg, t);
				throw new RuntimeException(msg, t);
			} finally {
				arrowWriter.reset();
				baos.reset();

				if (!hasNext()) {
					root.close();
					allocator.close();
				}
			}
		}
	};
}
 
Example 15
Source File: AbstractArrowSourceFunction.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void open(Configuration parameters) throws Exception {
	allocator = ArrowUtils.getRootAllocator().newChildAllocator("ArrowSourceFunction", 0, Long.MAX_VALUE);
	root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema((RowType) dataType.getLogicalType()), allocator);
	running = true;
}
 
Example 16
Source File: JobsFlightProducer.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public void getStream(CallContext callContext, Ticket ticket, ServerStreamListener serverStreamListener) {
  /* Note that we do not trim record batches that we receive from the Job Results Store. This may result
   * in sending record that the client does not care about, or in the case of sequential requests, sending
   * duplicate records. We may want to trim the record batches if this presents a problem.
   */
  try {
    final JobsFlightTicket jobsFlightTicket = JobsFlightTicket.from(ticket);
    final JobProtobuf.JobId jobId = JobProtobuf.JobId.newBuilder().setId(jobsFlightTicket.getJobId()).build();
    final int offset = jobsFlightTicket.getOffset();
    final int limit = jobsFlightTicket.getLimit();

    try (final JobDataFragment jobDataFragment = jobsService.get().getJobData(JobsProtoUtil.toStuff(jobId), offset, limit)) {
      final Schema schema = jobDataFragment.getSchema();
      try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) {
        serverStreamListener.start(root);
        for (RecordBatchHolder holder : jobDataFragment.getRecordBatches()) {
          // iterate over the columns
          int numRecords = holder.size();
          for (int i = 0; i < schema.getFields().size(); i++) {
            ValueVector vector = root.getVector(schema.getFields().get(i).getName());
            ValueVector dataVector = holder.getData().getVectors().get(i);
            int k = 0; // index at which value need to written in "vector" from "dataVector"
            // iterate over values in the column to copy data
            for (int j = holder.getStart(); j < holder.getEnd(); j++, k++ ) {
              // Copy value at dataVector[j] into vector[k]
              vector.copyFromSafe(j, k, dataVector);
            }
            vector.setValueCount(numRecords);
            root.setRowCount(numRecords);
          }
          serverStreamListener.putNext();
          root.allocateNew();
        }
      }
      serverStreamListener.completed();
    }
  } catch (UserException ue) {
    serverStreamListener.error(JobsRpcUtils.toStatusRuntimeException(ue));
  } catch (Exception e) {
    serverStreamListener.error(Status.UNKNOWN.withCause(e).withDescription(e.getMessage()).asException());
  }
}
 
Example 17
Source File: Stream.java    From dremio-flight-connector with Apache License 2.0 4 votes vote down vote up
public void setSchema(Schema schema) {
  root = VectorSchemaRoot.create(schema, allocator);
  loader = new VectorLoader(root);
  countDownLatch.countDown();
}