Java Code Examples for org.apache.arrow.memory.BufferAllocator#close()

The following examples show how to use org.apache.arrow.memory.BufferAllocator#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MemoryRun.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private boolean updateProtectedSize(long needed) {
  Preconditions.checkArgument(needed > 0);
  BufferAllocator oldAllocator = copyTargetAllocator;
  logger.debug("Memory Run: attempting to update resserved memory for spill copy with new size as {}", needed);
  try {
    copyTargetAllocator = allocator.newChildAllocator("sort-copy-target", needed, Long.MAX_VALUE);
    copyTargetSize = needed;
    if (oldAllocator != null) {
      oldAllocator.close();
    }
  } catch (OutOfMemoryException ex) {
    tracer.reserveMemoryForSpillOOMEvent(needed, Long.MAX_VALUE, oldAllocator);
    logger.debug("Memory Run: failed to reserve memory for spill copy");
    return false;
  }

  return true;
}
 
Example 2
Source File: TestParquetWriter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testFileSize() throws Exception {
  final Path tmpSchemaPath = new Path(getDfsTestTmpSchemaLocation());
  final Path targetPath = new Path(tmpSchemaPath, "testFileSize");

  final Configuration hadoopConf = new Configuration();
  final FileSystem newFs = targetPath.getFileSystem(hadoopConf);
  assertTrue(newFs.mkdirs(targetPath));

  final BufferAllocator ALLOCATOR = allocatorRule.newAllocator("test-parquet-writer", 0, Long.MAX_VALUE);

  OptionManager optionManager = mock(OptionManager.class);
  when(optionManager.getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR)).thenReturn("none"); //compression shouldn't matter
  when(optionManager.getOption(ExecConstants.PARQUET_PAGE_SIZE_VALIDATOR)).thenReturn(256L);
  when(optionManager.getOption(ExecConstants.PARQUET_MAXIMUM_PARTITIONS_VALIDATOR)).thenReturn(1L);
  when(optionManager.getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR)).thenReturn(4096L);

  OperatorStats operatorStats = mock(OperatorStats.class);

  OperatorContext opContext = mock(OperatorContext.class);
  when(opContext.getFragmentHandle()).thenReturn(ExecProtos.FragmentHandle.newBuilder().setMajorFragmentId(2323).setMinorFragmentId(234234).build());
  when(opContext.getAllocator()).thenReturn(ALLOCATOR);
  when(opContext.getOptions()).thenReturn(optionManager);
  when(opContext.getStats()).thenReturn(operatorStats);

  ParquetWriter writerConf = mock(ParquetWriter.class);
  when(writerConf.getLocation()).thenReturn(targetPath.toUri().toString());
  OpProps props = mock(OpProps.class);
  when(writerConf.getProps()).thenReturn(props);
  when(writerConf.getProps().getUserName()).thenReturn("testuser");

  ParquetFormatPlugin formatPlugin = mock(ParquetFormatPlugin.class);
  FileSystemPlugin fsPlugin = mock(FileSystemPlugin.class);
  when(fsPlugin.createFS((String) notNull(), (OperatorContext) notNull())).thenReturn(HadoopFileSystem.getLocal(hadoopConf));
  when(writerConf.getFormatPlugin()).thenReturn(formatPlugin);
  when(formatPlugin.getFsPlugin()).thenReturn(fsPlugin);

  ParquetRecordWriter writer = new ParquetRecordWriter(opContext, writerConf, new ParquetFormatConfig());

  RecordWriter.OutputEntryListener outputEntryListener = mock(RecordWriter.OutputEntryListener.class);
  RecordWriter.WriteStatsListener writeStatsListener = mock(RecordWriter.WriteStatsListener.class);
  ArgumentCaptor<Long> recordWrittenCaptor = ArgumentCaptor.forClass(long.class);
  ArgumentCaptor<Long> fileSizeCaptor = ArgumentCaptor.forClass(long.class);
  ArgumentCaptor<String> pathCaptor = ArgumentCaptor.forClass(String.class);
  ArgumentCaptor<byte[]> metadataCaptor = ArgumentCaptor.forClass(byte[].class);
  ArgumentCaptor<Integer> partitionCaptor = ArgumentCaptor.forClass(Integer.class);
  ArgumentCaptor<byte[]> icebergMetadataCaptor = ArgumentCaptor.forClass(byte[].class);

  BigIntVector bigIntVector = new BigIntVector("key", ALLOCATOR);
  bigIntVector.allocateNew(2);
  bigIntVector.set(0, 52459253098448904L);
  bigIntVector.set(1, 1116675951L);

  VectorContainer container = new VectorContainer();
  container.add(bigIntVector);
  container.setRecordCount(2);
  container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

  writer.setup(container, outputEntryListener, writeStatsListener);
  writer.startPartition(WritePartition.NONE);
  writer.writeBatch(0, container.getRecordCount());

  container.clear();
  writer.close();

  verify(outputEntryListener, times(1)).recordsWritten(recordWrittenCaptor.capture(),
    fileSizeCaptor.capture(), pathCaptor.capture(), metadataCaptor.capture(),
    partitionCaptor.capture(), icebergMetadataCaptor.capture());

  for (FileStatus file : newFs.listStatus(targetPath)) {
    if (file.getPath().toString().endsWith(".parquet")) { //complex243_json is in here for some reason?
      assertEquals(Long.valueOf(fileSizeCaptor.getValue()), Long.valueOf(file.getLen()));
      break;
    }
  }

  container.close();
  ALLOCATOR.close();
}
 
Example 3
Source File: TestParquetWriter.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testOutOfMemory() throws Exception {
  final Path tmpSchemaPath = new Path(getDfsTestTmpSchemaLocation());
  final Path targetPath = new Path(tmpSchemaPath, "testOutOfMemory");

  final Configuration hadoopConf = new Configuration();
  final FileSystem newFs = targetPath.getFileSystem(hadoopConf);
  assertTrue(newFs.mkdirs(targetPath));

  final BufferAllocator ALLOCATOR = allocatorRule.newAllocator("test-parquet-writer", 0, 128);

  OptionManager optionManager = mock(OptionManager.class);
  when(optionManager.getOption(ExecConstants.PARQUET_WRITER_COMPRESSION_TYPE_VALIDATOR)).thenReturn("none"); //compression shouldn't matter
  when(optionManager.getOption(ExecConstants.PARQUET_PAGE_SIZE_VALIDATOR)).thenReturn(256L);
  when(optionManager.getOption(ExecConstants.PARQUET_MAXIMUM_PARTITIONS_VALIDATOR)).thenReturn(1L);
  when(optionManager.getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR)).thenReturn(4096L);

  OperatorStats operatorStats = mock(OperatorStats.class);

  OperatorContext opContext = mock(OperatorContext.class);
  when(opContext.getFragmentHandle()).thenReturn(ExecProtos.FragmentHandle.newBuilder().setMajorFragmentId(2323).setMinorFragmentId(234235).build());
  when(opContext.getAllocator()).thenReturn(ALLOCATOR);
  when(opContext.getOptions()).thenReturn(optionManager);
  when(opContext.getStats()).thenReturn(operatorStats);

  ParquetWriter writerConf = mock(ParquetWriter.class);
  when(writerConf.getLocation()).thenReturn(targetPath.toUri().toString());
  OpProps props = mock(OpProps.class);
  when(writerConf.getProps()).thenReturn(props);
  when(writerConf.getProps().getUserName()).thenReturn("testuser");

  ParquetFormatPlugin formatPlugin = mock(ParquetFormatPlugin.class);
  FileSystemPlugin fsPlugin = mock(FileSystemPlugin.class);
  when(writerConf.getFormatPlugin()).thenReturn(formatPlugin);
  when(fsPlugin.createFS((String) notNull(), (OperatorContext) notNull())).thenReturn(HadoopFileSystem.getLocal(hadoopConf));
  when(formatPlugin.getFsPlugin()).thenReturn(fsPlugin);

  ParquetRecordWriter writer = new ParquetRecordWriter(opContext, writerConf, new ParquetFormatConfig());

  RecordWriter.OutputEntryListener outputEntryListener = mock(RecordWriter.OutputEntryListener.class);
  RecordWriter.WriteStatsListener writeStatsListener = mock(RecordWriter.WriteStatsListener.class);

  BigIntVector bigIntVector = new BigIntVector("key", ALLOCATOR);
  bigIntVector.allocateNew(2);
  bigIntVector.set(0, 52459253098448904L);
  bigIntVector.set(1, 1116675951L);

  VectorContainer container = new VectorContainer();
  container.add(bigIntVector);
  container.setRecordCount(2);
  container.buildSchema(BatchSchema.SelectionVectorMode.NONE);

  writer.setup(container, outputEntryListener, writeStatsListener);
  writer.startPartition(WritePartition.NONE);
  writer.writeBatch(0, container.getRecordCount());

  container.clear();
  try {
    writer.close();
  } catch (Exception e) {
    // ignore any exception in close(), but all the buffers should be released.
  }

  container.close();
  ALLOCATOR.close();
}
 
Example 4
Source File: TestAbstractDataCollector.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Test
public void testReserveMemory() {
  SharedResourceGroup resourceGroup = mock(SharedResourceGroup.class);
  SabotConfig config = mock(SabotConfig.class);
  FragmentWorkQueue workQueue = mock(FragmentWorkQueue.class);
  TunnelProvider tunnelProvider = mock(TunnelProvider.class);

  EndpointsIndex endpointsIndex = new EndpointsIndex(
    Arrays.asList(
      NodeEndpoint.newBuilder().setAddress("localhost").setFabricPort(12345).build(),
      NodeEndpoint.newBuilder().setAddress("localhost").setFabricPort(12345).build()
    )
  );
  List<CoordExecRPC.MinorFragmentIndexEndpoint> list =
    Arrays.asList(
      MinorFragmentIndexEndpoint.newBuilder().setEndpointIndex(0).setMinorFragmentId(0).build(),
      MinorFragmentIndexEndpoint.newBuilder().setEndpointIndex(0).setMinorFragmentId(0).build()
    );

  CoordExecRPC.Collector collector = CoordExecRPC.Collector.newBuilder()
    .setIsSpooling(true)
    .setOppositeMajorFragmentId(3)
    .setSupportsOutOfOrder(true)
    .addAllIncomingMinorFragmentIndex(list)
    .build();
  ExecProtos.FragmentHandle handle = ExecProtos.FragmentHandle.newBuilder().setMajorFragmentId(2323).setMinorFragmentId(234234).build();
  BufferAllocator allocator = allocatorRule.newAllocator("test-abstract-data-collector", 0, 2000000);
  boolean outOfMemory = false;
  final SchedulerService schedulerService = Mockito.mock(SchedulerService.class);
  final SpillService spillService = new SpillServiceImpl(DremioConfig.create(null, config), new DefaultSpillServiceOptions(),
                                                         new Provider<SchedulerService>() {
                                                           @Override
                                                           public SchedulerService get() {
                                                             return schedulerService;
                                                           }
                                                         });
  try {
    AbstractDataCollector dataCollector = new AbstractDataCollector(resourceGroup, true,
      collector, 10240, allocator, config, handle, workQueue, tunnelProvider, spillService, endpointsIndex) {
      @Override
      protected RawBatchBuffer getBuffer(int minorFragmentId) {
        return null;
      }
    };
  } catch (OutOfMemoryException e) {
    /* Each minor fragment will reserve an arrow buffer with 1024*1024 size. 2*1024*1024 memory is required
     * because there are two minor fragments. Allocator is limited to 2000000, so OutOfMemoryException is
     * expected when it tries to allocate the second arrow buffer, but it should not cause memory leak when
     * allocator is closed.
     */
    // The first allocation should succeed
    assertEquals(allocator.getPeakMemoryAllocation(), 1024*1024);

    outOfMemory = true;
  }

  // Verify that it runs out of memory for second allocation.
  assertTrue(outOfMemory);
  /* We are verifying that the first allocated arrow buffer should be released if the second allocation fail,
   * so no memory leak report is expected.
   */
  allocator.close();
}
 
Example 5
Source File: ArrowUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Convert Flink table to Pandas DataFrame.
 */
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
	checkArrowUsable();
	BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
	RowType rowType = (RowType) table.getSchema().toRowDataType().getLogicalType();
	VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
	ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
	arrowStreamWriter.start();

	ArrowWriter arrowWriter;
	Iterator<Row> results = table.execute().collect();
	Iterator convertedResults;
	if (isBlinkPlanner(table)) {
		arrowWriter = createRowDataArrowWriter(root, rowType);
		convertedResults = new Iterator<RowData>() {
			@Override
			public boolean hasNext() {
				return results.hasNext();
			}

			@Override
			public RowData next() {
				// The SelectTableSink of blink planner will convert the table schema and we
				// need to keep the table schema used here be consistent with the converted table schema
				TableSchema convertedTableSchema =
					SelectTableSinkSchemaConverter.changeDefaultConversionClass(table.getSchema());
				DataFormatConverters.DataFormatConverter converter =
					DataFormatConverters.getConverterForDataType(convertedTableSchema.toRowDataType());
				return (RowData) converter.toInternal(results.next());
			}
		};
	} else {
		arrowWriter = createRowArrowWriter(root, rowType);
		convertedResults = results;
	}

	return new CustomIterator<byte[]>() {
		@Override
		public boolean hasNext() {
			return convertedResults.hasNext();
		}

		@Override
		public byte[] next() {
			try {
				int i = 0;
				while (convertedResults.hasNext() && i < maxArrowBatchSize) {
					i++;
					arrowWriter.write(convertedResults.next());
				}
				arrowWriter.finish();
				arrowStreamWriter.writeBatch();
				return baos.toByteArray();
			} catch (Throwable t) {
				String msg = "Failed to serialize the data of the table";
				LOG.error(msg, t);
				throw new RuntimeException(msg, t);
			} finally {
				arrowWriter.reset();
				baos.reset();

				if (!hasNext()) {
					root.close();
					allocator.close();
				}
			}
		}
	};
}