org.apache.arrow.vector.types.pojo.Schema Java Examples

The following examples show how to use org.apache.arrow.vector.types.pojo.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserDefinedFunctionRequestSerDe.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
@Override
protected FederationRequest doTypedDeserialize(JsonParser jparser, DeserializationContext ctxt)
        throws IOException
{
    assertFieldName(jparser, IDENTITY_FIELD);
    FederatedIdentity identity = identityDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, INPUT_RECORDS_FIELD);
    Block inputRecords = blockDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, OUTPUT_SCHEMA_FIELD);
    Schema outputSchema = schemaDeserializer.deserialize(jparser, ctxt);

    String methodName = getNextStringField(jparser, METHOD_NAME_FIELD);
    UserDefinedFunctionType functionType = UserDefinedFunctionType.valueOf(getNextStringField(jparser, FUNCTION_TYPE_FIELD));

    return new UserDefinedFunctionRequest(identity, inputRecords, outputSchema, methodName, functionType);
}
 
Example #2
Source File: LambdaMetadataProvider.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
/**
 * This method builds and executes a GetTableLayoutRequest against the specified Lambda function.
 *
 * @param catalog the catalog name to be passed to Lambda
 * @param tableName the schema-qualified table name indicating the table whose layout should be retrieved
 * @param constraints the constraints to be applied to the request
 * @param schema the schema of the table in question
 * @param partitionCols the partition column names for the table in question
 * @param metadataFunction the name of the Lambda function to call
 * @param identity the identity of the caller
 * @return the response
 */
public static GetTableLayoutResponse getTableLayout(String catalog,
                                   TableName tableName,
                                   Constraints constraints,
                                   Schema schema,
                                   Set<String> partitionCols,
                                   String metadataFunction,
                                   FederatedIdentity identity)
{
  String queryId = generateQueryId();
  log.info("Submitting GetTableLayoutRequest with ID " + queryId);

  try (GetTableLayoutRequest request =
               new GetTableLayoutRequest(identity, queryId, catalog, tableName, constraints, schema, partitionCols)) {
    log.info("Submitting request: {}", request);
    GetTableLayoutResponse response = (GetTableLayoutResponse) getService(metadataFunction, identity, catalog).call(request);
    log.info("Received response: {}", response);
    return response;
  }
  catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #3
Source File: HbaseRecordHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private boolean scanFilterProject(ResultScanner scanner, ReadRecordsRequest request, BlockSpiller blockSpiller, QueryStatusChecker queryStatusChecker)
{
    Schema projection = request.getSchema();
    boolean isNative = projection.getCustomMetadata().get(HBASE_NATIVE_STORAGE_FLAG) != null;

    for (Result row : scanner) {
        if (!queryStatusChecker.isQueryRunning()) {
            return true;
        }
        blockSpiller.writeRows((Block block, int rowNum) -> {
            boolean match = true;
            for (Field field : projection.getFields()) {
                if (match) {
                    match &= writeField(block, field, isNative, row, rowNum);
                }
            }
            return match ? 1 : 0;
        });
    }
    return true;
}
 
Example #4
Source File: BlockTest.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
@Test
public void constrainedBlockTest()
        throws Exception
{
    Schema schema = SchemaBuilder.newBuilder()
            .addIntField("col1")
            .addIntField("col2")
            .build();

    Block block = allocator.createBlock(schema);

    ValueSet col1Constraint = EquatableValueSet.newBuilder(allocator, Types.MinorType.INT.getType(), true, false)
            .add(10).build();
    Constraints constraints = new Constraints(Collections.singletonMap("col1", col1Constraint));
    try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(allocator, schema, constraints)) {
        block.constrain(constraintEvaluator);
        assertTrue(block.setValue("col1", 0, 10));
        assertTrue(block.offerValue("col1", 0, 10));
        assertFalse(block.setValue("col1", 0, 11));
        assertFalse(block.offerValue("col1", 0, 11));
        assertTrue(block.offerValue("unkown_col", 0, 10));
    }
}
 
Example #5
Source File: BlockUtils.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a new Block with a single column and populated with the provided values.
 *
 * @param allocator The BlockAllocator to use when creating the Block.
 * @param columnName The name of the single column in the Block's Schema.
 * @param type The Apache Arrow Type of the column.
 * @param values The values to write to the new Block. Each value will be its own row.
 * @return The newly created Block with a single column Schema at populated with the provided values.
 */
public static Block newBlock(BlockAllocator allocator, String columnName, ArrowType type, Collection<Object> values)
{
    SchemaBuilder schemaBuilder = new SchemaBuilder();
    schemaBuilder.addField(columnName, type);
    Schema schema = schemaBuilder.build();
    Block block = allocator.createBlock(schema);
    int count = 0;
    for (Object next : values) {
        try {
            setValue(block.getFieldVector(columnName), count++, next);
        }
        catch (Exception ex) {
            throw new RuntimeException("Error for " + type + " " + columnName + " " + next, ex);
        }
    }
    block.setRowCount(count);
    return block;
}
 
Example #6
Source File: TPCDSRecordHandler.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
/**
 * Generates the CellWriters used to convert the TPCDS Generators data to Apache Arrow.
 *
 * @param schemaForRead The schema to read/project.
 * @param table The TPCDS Table we are reading from.
 * @return Map<Integer, CellWriter> where integer is the Column position in the TPCDS data set and the CellWriter
 * can be used to read,convert,write the value at that position for any row into the correct position and type
 * in our Apache Arrow response.
 */
private Map<Integer, CellWriter> makeWriters(Schema schemaForRead, Table table)
{
    Map<String, Column> columnPositions = new HashMap<>();
    for (Column next : table.getColumns()) {
        columnPositions.put(next.getName(), next);
    }

    //We use this approach to reduce the overhead of field lookups. This isn't as good as true columnar processing
    //using Arrow but it gets us ~80% of the way there from a rows/second per cpu-cycle perspective.
    Map<Integer, CellWriter> writers = new HashMap<>();
    for (Field nextField : schemaForRead.getFields()) {
        Column column = columnPositions.get(nextField.getName());
        writers.put(column.getPosition(), makeWriter(nextField, column));
    }
    return writers;
}
 
Example #7
Source File: DremioArrowSchema.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 * To parse Arrow Schema from JSON based on property
 * existing in Parquet Footer Metadata
 * @param properties
 * @return
 * @throws IOException
 */
public static Schema fromMetaData(Map<String, String> properties) throws IOException {
  Preconditions.checkNotNull(properties);
  String jsonArrowSchema = properties.get(DREMIO_ARROW_SCHEMA);
  String jsonArrowSchema2_1 = properties.get(DREMIO_ARROW_SCHEMA_2_1);

  // check in order
  // DREMIO_ARROW_SCHEMA - if found it is pre 2.1.0 generated file - use it
  // if DREMIO_ARROW_SCHEMA is not found
  // check DREMIO_ARROW_SCHEMA_2_1
  // if found - it is 2.1.0+ generated file - use it

  if (jsonArrowSchema != null) {
    return fromJSON(jsonArrowSchema);
  }
  if (jsonArrowSchema2_1 != null) {
    return fromJSON(jsonArrowSchema2_1);
  }
  return null;
}
 
Example #8
Source File: RedisMetadataHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
@Test
public void doGetTableLayout()
        throws Exception
{
    Schema schema = SchemaBuilder.newBuilder().build();

    GetTableLayoutRequest req = new GetTableLayoutRequest(IDENTITY, QUERY_ID, DEFAULT_CATALOG,
            TABLE_NAME,
            new Constraints(new HashMap<>()),
            schema,
            new HashSet<>());

    GetTableLayoutResponse res = handler.doGetTableLayout(allocator, req);

    logger.info("doGetTableLayout - {}", res);
    Block partitions = res.getPartitions();
    for (int row = 0; row < partitions.getRowCount() && row < 10; row++) {
        logger.info("doGetTableLayout:{} {}", row, BlockUtils.rowToString(partitions, row));
    }

    assertTrue(partitions.getRowCount() > 0);
    assertEquals(4, partitions.getFields().size());

    logger.info("doGetTableLayout: partitions[{}]", partitions.getRowCount());
}
 
Example #9
Source File: ExampleUserDefinedFunctionHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private UserDefinedFunctionResponse runAndAssertSerialization(Block inputRecords,
                                                              Schema outputSchema,
                                                              String methodName) throws IOException
{
    UserDefinedFunctionRequest request = new UserDefinedFunctionRequest(IdentityUtil.fakeIdentity(),
            inputRecords,
            outputSchema,
            methodName,
            UserDefinedFunctionType.SCALAR);
    ObjectMapperUtil.assertSerialization(request);

    ByteArrayOutputStream out = new ByteArrayOutputStream();
    mapper.writeValue(out, request);
    ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(out.toByteArray());
    ByteArrayOutputStream outputStream = new ByteArrayOutputStream();

    exampleUserDefinedFunctionHandler.handleRequest(byteArrayInputStream, outputStream, null);

    UserDefinedFunctionResponse udfResponse = (UserDefinedFunctionResponse) mapper.readValue(outputStream.toByteArray(), FederationResponse.class);
    ObjectMapperUtil.assertSerialization(udfResponse);

    return udfResponse;
}
 
Example #10
Source File: GetTableLayoutRequestSerDe.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
@Override
protected MetadataRequest doRequestDeserialize(JsonParser jparser, DeserializationContext ctxt, FederatedIdentity identity, String queryId, String catalogName)
        throws IOException
{
    assertFieldName(jparser, TABLE_NAME_FIELD);
    TableName tableName = tableNameDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, CONSTRAINTS_FIELD);
    Constraints constraints = constraintsDeserializer.deserialize(jparser, ctxt);

    assertFieldName(jparser, SCHEMA_FIELD);
    Schema schema = schemaDeserializer.deserialize(jparser, ctxt);

    ImmutableSet.Builder<String> partitionColsSet = ImmutableSet.builder();
    partitionColsSet.addAll(getNextStringArray(jparser, PARTITION_COLS_FIELD));

    return new GetTableLayoutRequest(identity, queryId, catalogName, tableName, constraints, schema, partitionColsSet.build());
}
 
Example #11
Source File: SampleSourceMetadata.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
/**
 *
 * given some number n and n pathnames, generate those many datasets
 *
 * @param numDatasets number of datasets to add
 * @param pathNames   list of pathnames
 */
public void addNDatasets(int numDatasets, List<List<String>> pathNames) {
  if (numDatasets != pathNames.size()) {
    throw new UnsupportedOperationException();
  }

  DatasetStats datasetStats = DatasetStats.of(0, 0);
  Schema schema = new Schema(new ArrayList<>());

  for (int i = 0; i < numDatasets; i++) {
    EntityPath entityPath = new EntityPath(pathNames.get(i));
    DatasetMetadata datasetMetadata = DatasetMetadata.of(datasetStats, schema);

    addDatasetHandle(SampleHandleImpl.of(datasetMetadata, entityPath));
  }
}
 
Example #12
Source File: AbstractTableProviderTest.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
protected void validateRead(Schema schema, S3BlockSpillReader reader, List<SpillLocation> locations, EncryptionKey encryptionKey)
{
    int blockNum = 0;
    int rowNum = 0;
    for (SpillLocation next : locations) {
        S3SpillLocation spillLocation = (S3SpillLocation) next;
        try (Block block = reader.read(spillLocation, encryptionKey, schema)) {
            logger.info("validateRead: blockNum[{}] and recordCount[{}]", blockNum++, block.getRowCount());

            for (int i = 0; i < block.getRowCount(); i++) {
                logger.info("validateRead: {}", BlockUtils.rowToString(block, i));
                rowNum++;
                validateRow(block, i);
            }
        }
        catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }

    assertEquals(getExpectedRows(), rowNum);
}
 
Example #13
Source File: AbstractTestNamespaceService.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
@Test
public void testDataSetSchema() throws Exception {
  Field field1 = new Field("a", true, new Int(32, true), null);
  Field child1 = new Field("c", true, Utf8.INSTANCE, null);
  Field field2 = new Field("b", true, Struct.INSTANCE, ImmutableList.of(child1));
  Schema schema = new Schema(ImmutableList.of(field1, field2));
  FlatBufferBuilder builder = new FlatBufferBuilder();
  schema.getSchema(builder);
  builder.finish(schema.getSchema(builder));
  NamespaceTestUtils.addSource(namespaceService, "s");
  NamespaceTestUtils.addPhysicalDS(namespaceService, "s.foo", builder.sizedByteArray());
  ByteBuffer bb = ByteBuffer.wrap(DatasetHelper.getSchemaBytes(namespaceService.getDataset(new NamespaceKey(PathUtils.parseFullPath("s.foo")))).toByteArray());
  Schema returnedSchema = Schema.convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(bb));
  assertEquals(schema, returnedSchema);
}
 
Example #14
Source File: BlockUtilsTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void isNullRow()
{
    Schema schema = SchemaBuilder.newBuilder()
            .addField("col1", new ArrowType.Int(32, true))
            .addField("col2", new ArrowType.Int(32, true))
            .addField("col3", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
            .build();

    LocalDateTime ldt = LocalDateTime.now();

    //Make a block with 2 rows and no null rows
    Block block = allocator.createBlock(schema);
    BlockUtils.setValue(block.getFieldVector("col1"), 0, 10);
    BlockUtils.setValue(block.getFieldVector("col2"), 0, 20);
    BlockUtils.setValue(block.getFieldVector("col3"), 0, ldt);

    BlockUtils.setValue(block.getFieldVector("col1"), 1, 11);
    BlockUtils.setValue(block.getFieldVector("col2"), 1, 21);
    BlockUtils.setValue(block.getFieldVector("col3"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00")));
    block.setRowCount(2);

    assertFalse(BlockUtils.isNullRow(block, 1));

    //now set a row to null
    BlockUtils.unsetRow(1, block);
    assertTrue(BlockUtils.isNullRow(block, 1));
}
 
Example #15
Source File: JdbcMetadataHandler.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
private Schema getSchema(Connection jdbcConnection, TableName tableName, Schema partitionSchema)
        throws SQLException
{
    SchemaBuilder schemaBuilder = SchemaBuilder.newBuilder();

    try (ResultSet resultSet = getColumns(jdbcConnection.getCatalog(), tableName, jdbcConnection.getMetaData())) {
        boolean found = false;
        while (resultSet.next()) {
            ArrowType columnType = JdbcArrowTypeConverter.toArrowType(
                    resultSet.getInt("DATA_TYPE"),
                    resultSet.getInt("COLUMN_SIZE"),
                    resultSet.getInt("DECIMAL_DIGITS"));
            String columnName = resultSet.getString("COLUMN_NAME");
            if (columnType != null && SupportedTypes.isSupported(columnType)) {
                schemaBuilder.addField(FieldBuilder.newBuilder(columnName, columnType).build());
                found = true;
            }
            else {
                LOGGER.error("getSchema: Unable to map type for column[" + columnName + "] to a supported type, attempted " + columnType);
            }
        }

        if (!found) {
            throw new RuntimeException("Could not find table in " + tableName.getSchemaName());
        }

        // add partition columns
        partitionSchema.getFields().forEach(schemaBuilder::addField);

        return schemaBuilder.build();
    }
}
 
Example #16
Source File: ExampleUserDefinedFunctionHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void testToJsonMethod() throws Exception
{
    Schema inputSchema = SchemaBuilder.newBuilder()
            .addStructField("struct")
            .addChildField("struct", "int", Types.MinorType.INT.getType())
            .addChildField("struct", "double", Types.MinorType.FLOAT8.getType())
            .addChildField("struct", "string", Types.MinorType.VARCHAR.getType())
            .build();
    Schema outputSchema = SchemaBuilder.newBuilder()
            .addField("json", Types.MinorType.VARCHAR.getType())
            .build();

    Block inputRecords = allocator.createBlock(inputSchema);
    inputRecords.setRowCount(1);
    FieldVector fieldVector = inputRecords.getFieldVector("struct");
    Map<String, Object> struct = new HashMap<>();
    struct.put("int", 10);
    struct.put("double", 2.3);
    struct.put("string", "test_string");
    BlockUtils.setComplexValue(fieldVector, 0, FieldResolver.DEFAULT, struct);

    UserDefinedFunctionResponse response = runAndAssertSerialization(inputRecords, outputSchema, "to_json");

    Block outputRecords = response.getRecords();
    assertEquals(1, outputRecords.getRowCount());
    FieldReader fieldReader = outputRecords.getFieldReader("json");
    ArrowValueProjector arrowValueProjector = ProjectorUtils.createArrowValueProjector(fieldReader);
    assertEquals(exampleUserDefinedFunctionHandler.to_json(struct), arrowValueProjector.project(0));
}
 
Example #17
Source File: BlockUtilsTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void fieldToString()
{
    Schema schema = SchemaBuilder.newBuilder()
            .addField("col1", new ArrowType.Int(32, true))
            .addField("col2", new ArrowType.Timestamp(org.apache.arrow.vector.types.TimeUnit.MILLISECOND, "UTC"))
            .build();

    LocalDateTime ldt = LocalDateTime.of(2020, 03, 18, 12,54,29);

    //Make a block with 2 rows and no null rows
    Block block = allocator.createBlock(schema);
    BlockUtils.setValue(block.getFieldVector("col1"), 0, 10);
    BlockUtils.setValue(block.getFieldVector("col2"), 0, ldt);

    BlockUtils.setValue(block.getFieldVector("col1"), 1, 11);
    BlockUtils.setValue(block.getFieldVector("col2"), 1, ZonedDateTime.of(ldt, ZoneId.of("-05:00")));
    block.setRowCount(2);

    String expectedRows = "rows=2";
    String expectedCol1 = "[10, 11]";
    String expectedCol2 = "[2020-03-18T12:54:29Z[UTC], 2020-03-18T12:54:29-05:00]";
    String actual = block.toString();
    assertTrue(actual.contains(expectedRows));
    assertTrue(actual.contains(expectedCol1));
    assertTrue(actual.contains(expectedCol2));
}
 
Example #18
Source File: ReadRecordsResponseSerDeTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Before
public void beforeTest()
        throws IOException
{
    String yearCol = "year";
    String monthCol = "month";
    String dayCol = "day";

    Schema schema = SchemaBuilder.newBuilder()
            .addField(yearCol, new ArrowType.Int(32, true))
            .addField(monthCol, new ArrowType.Int(32, true))
            .addField(dayCol, new ArrowType.Int(32, true))
            .build();

    Block records = allocator.createBlock(schema);
    int num_records = 10;
    for (int i = 0; i < num_records; i++) {
        BlockUtils.setValue(records.getFieldVector(yearCol), i, 2016 + i);
        BlockUtils.setValue(records.getFieldVector(monthCol), i, (i % 12) + 1);
        BlockUtils.setValue(records.getFieldVector(dayCol), i, (i % 28) + 1);
    }
    records.setRowCount(num_records);

    expected = new ReadRecordsResponse("test-catalog", records);

    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "ReadRecordsResponse.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
 
Example #19
Source File: PostGreSqlMetadataHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void doGetSplitsContinuation()
        throws Exception
{
    BlockAllocator blockAllocator = new BlockAllocatorImpl();
    Constraints constraints = Mockito.mock(Constraints.class);
    TableName tableName = new TableName("testSchema", "testTable");
    Schema partitionSchema = this.postGreSqlMetadataHandler.getPartitionSchema("testCatalogName");
    Set<String> partitionCols = partitionSchema.getFields().stream().map(Field::getName).collect(Collectors.toSet());
    GetTableLayoutRequest getTableLayoutRequest = new GetTableLayoutRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, constraints, partitionSchema, partitionCols);

    PreparedStatement preparedStatement = Mockito.mock(PreparedStatement.class);
    Mockito.when(this.connection.prepareStatement(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY)).thenReturn(preparedStatement);

    String[] columns = {"child_schema", "child"};
    int[] types = {Types.VARCHAR, Types.VARCHAR};
    Object[][] values = {{"s0", "p0"}, {"s1", "p1"}};
    ResultSet resultSet = mockResultSet(columns, types, values, new AtomicInteger(-1));
    final String expectedQuery = String.format(PostGreSqlMetadataHandler.GET_PARTITIONS_QUERY, tableName.getTableName(), tableName.getSchemaName());
    Mockito.when(preparedStatement.executeQuery()).thenReturn(resultSet);

    Mockito.when(this.connection.getMetaData().getSearchStringEscape()).thenReturn(null);

    GetTableLayoutResponse getTableLayoutResponse = this.postGreSqlMetadataHandler.doGetTableLayout(blockAllocator, getTableLayoutRequest);

    BlockAllocator splitBlockAllocator = new BlockAllocatorImpl();
    GetSplitsRequest getSplitsRequest = new GetSplitsRequest(this.federatedIdentity, "testQueryId", "testCatalogName", tableName, getTableLayoutResponse.getPartitions(), new ArrayList<>(partitionCols), constraints, "1");
    GetSplitsResponse getSplitsResponse = this.postGreSqlMetadataHandler.doGetSplits(splitBlockAllocator, getSplitsRequest);

    Set<Map<String, String>> expectedSplits = new HashSet<>();
    expectedSplits.add(ImmutableMap.of("partition_schema_name", "s1", "partition_name", "p1"));
    Assert.assertEquals(expectedSplits.size(), getSplitsResponse.getSplits().size());
    Set<Map<String, String>> actualSplits = getSplitsResponse.getSplits().stream().map(Split::getProperties).collect(Collectors.toSet());
    Assert.assertEquals(expectedSplits, actualSplits);
}
 
Example #20
Source File: HbaseSchemaUtilsTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Test
public void inferSchema()
        throws IOException
{
    int numToScan = 4;
    TableName tableName = new TableName("schema", "table");
    List<Result> results = TestUtils.makeResults();

    HBaseConnection mockConnection = mock(HBaseConnection.class);
    ResultScanner mockScanner = mock(ResultScanner.class);
    when(mockScanner.iterator()).thenReturn(results.iterator());
    when(mockConnection.scanTable(anyObject(), any(Scan.class), anyObject())).thenAnswer((InvocationOnMock invocationOnMock) -> {
        ResultProcessor processor = (ResultProcessor) invocationOnMock.getArguments()[2];
        return processor.scan(mockScanner);
    });

    Schema schema = HbaseSchemaUtils.inferSchema(mockConnection, tableName, numToScan);

    Map<String, Types.MinorType> actualFields = new HashMap<>();
    schema.getFields().stream().forEach(next -> actualFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));

    Map<String, Types.MinorType> expectedFields = new HashMap<>();
    TestUtils.makeSchema().build().getFields().stream()
            .forEach(next -> expectedFields.put(next.getName(), Types.getMinorTypeForArrowType(next.getType())));

    for (Map.Entry<String, Types.MinorType> nextExpected : expectedFields.entrySet()) {
        assertNotNull(actualFields.get(nextExpected.getKey()));
        assertEquals(nextExpected.getKey(), nextExpected.getValue(), actualFields.get(nextExpected.getKey()));
    }
    assertEquals(expectedFields.size(), actualFields.size());

    verify(mockConnection, times(1)).scanTable(anyObject(), any(Scan.class), any(ResultProcessor.class));
    verify(mockScanner, times(1)).iterator();
}
 
Example #21
Source File: DatasetMetadataImpl.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
DatasetMetadataImpl(
    DatasetStats stats,
    Schema schema,
    List<String> partitionColumns,
    List<String> sortColumns,
    BytesOutput extraInfo
) {
  this.stats = stats;
  this.schema = schema;
  this.partitionColumns = partitionColumns;
  this.sortColumns = sortColumns;
  this.extraInfo = extraInfo;
}
 
Example #22
Source File: NativeProjector.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
NativeProjector(VectorAccessible incoming, Schema schema, FunctionContext functionContext) {
  this.incoming = incoming;
  this.schema = schema;
  this.functionContext = functionContext;
  // preserve order of insertion
  referencedFields = Sets.newLinkedHashSet();
}
 
Example #23
Source File: HiveDatasetMetadata.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private HiveDatasetMetadata(
  final Schema schema,
  final List<String> partitionColumns,
  final List<String> sortColumns,
  final BytesOutput extraInfo,
  final MetadataAccumulator metadataAccumulator
) {
  this.schema = schema;
  this.partitionColumns = partitionColumns;
  this.sortColumns = sortColumns;
  this.extraInfo = extraInfo;
  this.metadataAccumulator = metadataAccumulator;
}
 
Example #24
Source File: DDBRecordMetadata.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves the map of glue column names to glue/normalized column names from the table schema
 * @param schema Schema to extract out the info from
 * @return mapping of glue column names to ddb column names
 */
private static Map<String, String> getColumnNameMapping(Schema schema)
{
    if (schema != null && schema.getCustomMetadata() != null) {
        String columnNameMappingParam = schema.getCustomMetadata().getOrDefault(
                COLUMN_NAME_MAPPING_PROPERTY, null);
        if (!Strings.isNullOrEmpty(columnNameMappingParam)) {
            return new HashMap<>(MAP_SPLITTER.split(columnNameMappingParam));
        }
    }
    return ImmutableMap.of();
}
 
Example #25
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetFixedBinaryToArrowDecimal() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(FIXED_LEN_BYTE_ARRAY).length(5).as(DECIMAL).precision(8).scale(2).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Decimal(8, 2))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example #26
Source File: JdbcMetadataHandlerTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Before
public void setup()
{
    this.jdbcConnectionFactory = Mockito.mock(JdbcConnectionFactory.class);
    this.connection = Mockito.mock(Connection.class, Mockito.RETURNS_DEEP_STUBS);
    Mockito.when(this.jdbcConnectionFactory.getConnection(Mockito.any(JdbcCredentialProvider.class))).thenReturn(this.connection);
    this.secretsManager = Mockito.mock(AWSSecretsManager.class);
    this.athena = Mockito.mock(AmazonAthena.class);
    Mockito.when(this.secretsManager.getSecretValue(Mockito.eq(new GetSecretValueRequest().withSecretId("testSecret")))).thenReturn(new GetSecretValueResult().withSecretString("{\"username\": \"testUser\", \"password\": \"testPassword\"}"));
    DatabaseConnectionConfig databaseConnectionConfig = new DatabaseConnectionConfig("testCatalog", JdbcConnectionFactory.DatabaseEngine.MYSQL,
            "mysql://jdbc:mysql://hostname/${testSecret}", "testSecret");
    this.jdbcMetadataHandler = new JdbcMetadataHandler(databaseConnectionConfig, this.secretsManager, this.athena, jdbcConnectionFactory)
    {
        @Override
        public Schema getPartitionSchema(final String catalogName)
        {
            return PARTITION_SCHEMA;
        }

        @Override
        public void getPartitions(final BlockWriter blockWriter, final GetTableLayoutRequest getTableLayoutRequest, QueryStatusChecker queryStatusChecker)
        {
        }

        @Override
        public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
        {
            return null;
        }
    };
    this.federatedIdentity = Mockito.mock(FederatedIdentity.class);
    this.blockAllocator = Mockito.mock(BlockAllocator.class);
}
 
Example #27
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetInt64TimestampMillisToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT64).as(TIMESTAMP_MILLIS).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}
 
Example #28
Source File: FieldIdUtil2.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static TypedFieldId getFieldId(Schema schema, BasePath path, boolean isHyper){
  int i = 0;
  for (Field f : schema.getFields()) {
    TypedFieldId id = getFieldId(f, i, path, isHyper);
    if (id != null) {
      return id;
    }
    i++;
  }
  return null;
}
 
Example #29
Source File: UserDefinedFunctionRequestSerDeTest.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@Before
public void beforeTest()
        throws IOException
{
    Schema inputSchema = SchemaBuilder.newBuilder()
            .addField("factor1", Types.MinorType.INT.getType())
            .addField("factor2", Types.MinorType.INT.getType())
            .build();
    Schema outputSchema = SchemaBuilder.newBuilder()
            .addField("product", Types.MinorType.INT.getType())
            .build();

    Block inputRecords = allocator.createBlock(inputSchema);
    inputRecords.setRowCount(1);
    IntVector inputVector1 = (IntVector) inputRecords.getFieldVector("factor1");
    IntVector inputVector2 = (IntVector) inputRecords.getFieldVector("factor2");
    inputVector1.setSafe(0, 2);
    inputVector2.setSafe(0, 3);

    expected = new UserDefinedFunctionRequest(federatedIdentity,
            inputRecords,
            outputSchema,
            "test-method",
            UserDefinedFunctionType.SCALAR);


    String expectedSerDeFile = utils.getResourceOrFail("serde/v2", "UserDefinedFunctionRequest.json");
    expectedSerDeText = utils.readAllAsString(expectedSerDeFile).trim();
}
 
Example #30
Source File: TestSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testParquetInt32TimeMillisToArrow() {
  MessageType parquet = Types.buildMessage()
    .addField(Types.optional(INT32).as(TIME_MILLIS).named("a")).named("root");
  Schema expected = new Schema(asList(
    field("a", new ArrowType.Time(TimeUnit.MILLISECOND, 32))
  ));
  Assert.assertEquals(expected, converter.fromParquet(parquet).getArrowSchema());
}