Java Code Examples for org.apache.kudu.client.KuduScanToken#KuduScanTokenBuilder

The following examples show how to use org.apache.kudu.client.KuduScanToken#KuduScanTokenBuilder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KuduReader.java    From bahir-flink with Apache License 2.0 6 votes vote down vote up
public List<KuduScanToken> scanTokens(List<KuduFilterInfo> tableFilters, List<String> tableProjections, Integer rowLimit) {
    KuduScanToken.KuduScanTokenBuilder tokenBuilder = client.newScanTokenBuilder(table);

    if (tableProjections != null) {
        tokenBuilder.setProjectedColumnNames(tableProjections);
    }

    if (CollectionUtils.isNotEmpty(tableFilters)) {
        tableFilters.stream()
                .map(filter -> filter.toPredicate(table.getSchema()))
                .forEach(tokenBuilder::addPredicate);
    }

    if (rowLimit != null && rowLimit > 0) {
        tokenBuilder.limit(rowLimit);
    }

    return tokenBuilder.build();
}
 
Example 2
Source File: AbstractKuduInputPartitioner.java    From attic-apex-malhar with Apache License 2.0 6 votes vote down vote up
/**
 * Builds a set of scan tokens. The list of scan tokens are generated as if the entire table is being scanned
 * i.e. a SELECT * FROM TABLE equivalent expression. This list is used to assign the partition pie assignments
 * for all of the planned partition of operators. Each operator gets a part of the PIE as if all columns were
 * selected. Subsequently when a query is to be processed, the query is used to generate the scan tokens applicable
 * for that query. Given that partition pie represents the entire data set, the scan assignments for the current
 * query will be a subset.
 * @return The list of scan tokens as if the entire table is getting scanned.
 * @throws Exception in cases when the connection to kudu cluster cannot be closed.
 */
public List<KuduScanToken> getKuduScanTokensForSelectAllColumns() throws Exception
{
  // We are not using the current query for deciding the partition strategy but a SELECT * as
  // we do not want to want to optimize on just the current query. This prevents rapid throttling of operator
  // instances when the scan patterns are erratic. On the other hand, this might result on under utilized
  // operator resources in the DAG but will be consistent at a minimum.
  ApexKuduConnection apexKuduConnection = prototypeKuduInputOperator.getApexKuduConnectionInfo().build();
  KuduClient clientHandle = apexKuduConnection.getKuduClient();
  KuduTable table = apexKuduConnection.getKuduTable();
  KuduScanToken.KuduScanTokenBuilder builder = clientHandle.newScanTokenBuilder(table);
  List<String> allColumns = new ArrayList<>();
  List<ColumnSchema> columnList = apexKuduConnection.getKuduTable().getSchema().getColumns();
  for ( ColumnSchema column : columnList) {
    allColumns.add(column.getName());
  }
  builder.setProjectedColumnNames(allColumns);
  LOG.debug("Building the partition pie assignments for the input operator");
  List<KuduScanToken> allPossibleTokens = builder.build();
  apexKuduConnection.close();
  return allPossibleTokens;
}
 
Example 3
Source File: KuduServiceImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public List<byte[]> createTabletScanners(KuduIO.Read spec) throws KuduException {
  try (KuduClient client = getKuduClient(spec.getMasterAddresses())) {
    KuduTable table = client.openTable(spec.getTable());
    KuduScanToken.KuduScanTokenBuilder builder = client.newScanTokenBuilder(table);
    configureBuilder(spec, table.getSchema(), builder);
    List<KuduScanToken> tokens = builder.build();
    return tokens.stream().map(t -> uncheckCall(t::serialize)).collect(Collectors.toList());
  }
}
 
Example 4
Source File: KuduClientTestCommons.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
public ApexKuduConnection buildMockWiring(AbstractKuduInputOperator abstractKuduInputOperator,
    int numScanTokens) throws Exception
{
  ApexKuduConnection mockedConnectionHandle = PowerMockito.mock(ApexKuduConnection.class);
  ApexKuduConnection.ApexKuduConnectionBuilder mockedConnectionHandleBuilder = PowerMockito.mock(
      ApexKuduConnection.ApexKuduConnectionBuilder.class);
  KuduClient mockedClient = PowerMockito.mock(KuduClient.class);
  KuduSession mockedKuduSession = PowerMockito.mock(KuduSession.class);
  KuduTable mockedKuduTable = PowerMockito.mock(KuduTable.class);
  KuduScanToken.KuduScanTokenBuilder mockedScanTokenBuilder = PowerMockito.mock(
      KuduScanToken.KuduScanTokenBuilder.class);
  List<KuduScanToken> mockedScanTokens = new ArrayList<>();
  int scanTokensToBuild = numScanTokens;
  for (int i = 0; i < scanTokensToBuild; i++) {
    mockedScanTokens.add(PowerMockito.mock(KuduScanToken.class));
  }
  PowerMockito.mockStatic(KryoCloneUtils.class);
  when(KryoCloneUtils.cloneObject(abstractKuduInputOperator)).thenReturn(abstractKuduInputOperator);
  //wire the mocks
  when(abstractKuduInputOperator.getApexKuduConnectionInfo()).thenReturn(mockedConnectionHandleBuilder);
  when(mockedConnectionHandle.getKuduClient()).thenReturn(mockedClient);
  when(mockedClient.newSession()).thenReturn(mockedKuduSession);
  when(mockedConnectionHandle.getKuduTable()).thenReturn(mockedKuduTable);
  when(mockedConnectionHandle.getKuduSession()).thenReturn(mockedKuduSession);
  when(mockedConnectionHandle.getBuilderForThisConnection()).thenReturn(mockedConnectionHandleBuilder);
  when(mockedClient.openTable(tableName)).thenReturn(mockedKuduTable);
  when(mockedConnectionHandleBuilder.build()).thenReturn(mockedConnectionHandle);
  when(mockedKuduTable.getSchema()).thenReturn(schemaForUnitTests);
  when(mockedClient.newScanTokenBuilder(mockedKuduTable)).thenReturn(mockedScanTokenBuilder);
  when(mockedScanTokenBuilder.build()).thenReturn(mockedScanTokens);
  return mockedConnectionHandle;
}
 
Example 5
Source File: AbstractKuduPartitionScanner.java    From attic-apex-malhar with Apache License 2.0 4 votes vote down vote up
/***
 * The main logic which takes the parsed in query and builds the Kudud scan tokens specific to this query.
 * It makes sure that these scan tokens are sorted before the actual scan tokens that are to be executed in the
 * current physical instance of the operator are shortlisted. Since the kudu scan taken builder gives the scan
 * tokens for the query and does not differentiate between a distributed system and a single instance system, this
 * method takes the plan as generated by the Kudu scan token builder and then chooses only those segments that were
 * decided to be the responsibility of this operator at partitioning time.
 * @param parsedQuery The parsed query instance
 * @return A list of partition scan metadata objects that are applicable for this instance of the physical operator
 * i.e. the operator owning this instance of the scanner.
 * @throws IOException If the scan assignment cannot be serialized
 */
public List<KuduPartitionScanAssignmentMeta> preparePlanForScanners(SQLToKuduPredicatesTranslator parsedQuery)
  throws IOException
{
  List<KuduPredicate> predicateList = parsedQuery.getKuduSQLParseTreeListener().getKuduPredicateList();
  ApexKuduConnection apexKuduConnection = verifyConnectionStaleness(0);// we will have atleast one connection
  KuduScanToken.KuduScanTokenBuilder builder = apexKuduConnection.getKuduClient().newScanTokenBuilder(
      apexKuduConnection.getKuduTable());
  builder = builder.setProjectedColumnNames(new ArrayList<>(
      parsedQuery.getKuduSQLParseTreeListener().getListOfColumnsUsed()));
  for (KuduPredicate aPredicate : predicateList) {
    builder = builder.addPredicate(aPredicate);
  }
  builder.setFaultTolerant(parentOperator.isFaultTolerantScanner());
  Map<String,String> optionsUsedForThisQuery = parentOperator.getOptionsEnabledForCurrentQuery();
  if ( optionsUsedForThisQuery.containsKey(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME)) {
    try {
      long readSnapShotTime = Long.valueOf(optionsUsedForThisQuery.get(KuduSQLParseTreeListener.READ_SNAPSHOT_TIME));
      builder = builder.readMode(AsyncKuduScanner.ReadMode.READ_AT_SNAPSHOT);
      builder = builder.snapshotTimestampMicros(readSnapShotTime);
      LOG.info("Using read snapshot for this query as " + readSnapShotTime);
    } catch ( Exception ex) {
      LOG.error("Cannot parse the Read snaptshot time " + ex.getMessage(), ex);
    }
  }
  List<KuduScanToken> allPossibleScanTokens = builder.build();
  Collections.sort(allPossibleScanTokens, // Make sure we deal with a sorted list of scan tokens
      new Comparator<KuduScanToken>()
    {
      @Override
      public int compare(KuduScanToken left, KuduScanToken right)
      {
        return left.compareTo(right);
      }
    });
  LOG.info(" Query will scan " + allPossibleScanTokens.size() + " tablets");
  if ( LOG.isDebugEnabled()) {
    LOG.debug(" Predicates scheduled for this query are " + predicateList.size());
    for ( int i = 0; i < allPossibleScanTokens.size(); i++) {
      LOG.debug("A tablet scheduled for all operators scanning is " + allPossibleScanTokens.get(i).getTablet());
    }
  }
  List<KuduPartitionScanAssignmentMeta> partitionPieForThisOperator = parentOperator.getPartitionPieAssignment();
  List<KuduPartitionScanAssignmentMeta> returnOfAssignments = new ArrayList<>();
  int totalScansForThisQuery = allPossibleScanTokens.size();
  int counterForPartAssignments = 0;
  for (KuduPartitionScanAssignmentMeta aPartofThePie : partitionPieForThisOperator) {
    if ( aPartofThePie.getOrdinal() < totalScansForThisQuery) { // a given query plan might have less scantokens
      KuduPartitionScanAssignmentMeta aMetaForThisQuery = new KuduPartitionScanAssignmentMeta();
      aMetaForThisQuery.setTotalSize(totalScansForThisQuery);
      aMetaForThisQuery.setOrdinal(counterForPartAssignments);
      counterForPartAssignments += 1;
      aMetaForThisQuery.setCurrentQuery(parsedQuery.getSqlExpresssion());
      // we pick up only those ordinals that are part of the original partition pie assignment
      KuduScanToken aTokenForThisOperator = allPossibleScanTokens.get(aPartofThePie.getOrdinal());
      aMetaForThisQuery.setSerializedKuduScanToken(aTokenForThisOperator.serialize());
      returnOfAssignments.add(aMetaForThisQuery);
      LOG.debug("Added query scan for this operator " + aMetaForThisQuery + " with scan tablet as " +
          allPossibleScanTokens.get(aPartofThePie.getOrdinal()).getTablet());
    }
  }
  LOG.info(" A total of " + returnOfAssignments.size() + " have been scheduled for this operator");
  return returnOfAssignments;
}