com.facebook.presto.spi.ConnectorPageSource Java Examples

The following examples show how to use com.facebook.presto.spi.ConnectorPageSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParaflowPageSourceProvider.java    From paraflow with Apache License 2.0 6 votes vote down vote up
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session,
                                            ConnectorSplit split, List<ColumnHandle> columns)
{
    List<ParaflowColumnHandle> hdfsColumns = columns.stream()
            .map(col -> (ParaflowColumnHandle) col)
            .collect(Collectors.toList());
    ParaflowSplit paraflowSplit = checkType(split, ParaflowSplit.class, "hdfs split");
    Path path = new Path(paraflowSplit.getPath());

    Optional<ConnectorPageSource> pageSource = createParaflowPageSource(
            path,
            paraflowSplit.getStart(),
            paraflowSplit.getLen(),
            hdfsColumns);
    if (pageSource.isPresent()) {
        return pageSource.get();
    }
    throw new RuntimeException("Could not find a file reader for split " + paraflowSplit);
}
 
Example #2
Source File: ElasticsearchPageSourceProvider.java    From presto-connectors with Apache License 2.0 5 votes vote down vote up
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<ColumnHandle> columns)
{
    ElasticsearchSplit elasticsearchSplit = (ElasticsearchSplit) split;

    ImmutableList.Builder<ElasticsearchColumnHandle> handles = ImmutableList.builder();
    for (ColumnHandle handle : requireNonNull(columns, "columns is null")) {
        handles.add((ElasticsearchColumnHandle) handle);
    }

    return new ElasticsearchPageSource(elasticsearchClient, elasticsearchSplit, handles.build());
}
 
Example #3
Source File: KuduPageSourceProvider.java    From presto-kudu with Apache License 2.0 5 votes vote down vote up
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle,
                                            ConnectorSession session, ConnectorSplit split, List<ColumnHandle> columns) {
    KuduRecordSet recordSet = (KuduRecordSet) recordSetProvider.getRecordSet(transactionHandle, session, split, columns);
    if (columns.contains(KuduColumnHandle.ROW_ID_HANDLE)) {
        return new KuduUpdatablePageSource(recordSet);
    } else {
        return new RecordPageSource(recordSet);
    }
}
 
Example #4
Source File: ParaflowPageSourceProvider.java    From paraflow with Apache License 2.0 4 votes vote down vote up
private Optional<ConnectorPageSource> createParaflowPageSource(
        Path path,
        long start,
        long length,
        List<ParaflowColumnHandle> columns)
{
    Optional<FileSystem> fileSystemOptional = fsFactory.getFileSystem();
    FileSystem fileSystem;
    ParquetDataSource dataSource;
    if (fileSystemOptional.isPresent()) {
        fileSystem = fileSystemOptional.get();
    }
    else {
        throw new RuntimeException("Could not find filesystem for path " + path);
    }
    try {
        dataSource = buildHdfsParquetDataSource(fileSystem, path, start, length);
        // default length is file size, which means whole file is a split
        length = dataSource.getSize();
        ParquetMetadata parquetMetadata = ParquetMetadataReader.readFooter(fileSystem, path);
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();

        List<Type> fields = columns.stream()
                .filter(column -> column.getColType() != ParaflowColumnHandle.ColumnType.NOTVALID)
                .map(column -> getParquetType(column, fileSchema))
                .filter(Objects::nonNull)
                .collect(Collectors.toList());
        MessageType requestedSchema = new MessageType(fileSchema.getName(), fields);

        List<BlockMetaData> blocks = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            if (firstDataPage >= start && firstDataPage < start + length) {
                blocks.add(block);
            }
        }

        ParquetReader parquetReader = new ParquetReader(
                fileSchema,
                requestedSchema,
                blocks,
                dataSource,
                typeManager);
        return Optional.of(new ParaflowPageSource(
                parquetReader,
                dataSource,
                fileSchema,
                requestedSchema,
                length,
                columns,
                typeManager));
    }
    catch (IOException e) {
        log.error(e);
        return Optional.empty();
    }
}