Java Code Examples for org.pentaho.di.trans.step.StepMeta#getStepPartitioningMeta()

The following examples show how to use org.pentaho.di.trans.step.StepMeta#getStepPartitioningMeta() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TransSplitter.java From pentaho-kettle with Apache License 2.0

6 votes

/**
 * Calculate the number of step copies in a step.<br>
 * If a step is not running clustered, it's simply returning getCopies().<br>
 * If a step is clustered and not doing any partitioning, it's simply returning getCopies().<br>
 * If a step is clustered and partitioned, we need to look in the partitioning map for the specified slave server.<br>
 * That is because the number of copies can vary over the slaves. (5 partitions over 3 slaves for example)
 *
 * @param slaveServer
 *          the slave server
 * @param step
 *          the reference step
 * @return the number of step copies that we run.
 */
private int determineNrOfStepCopies( SlaveServer slaveServer, StepMeta step ) {
  if ( !step.isClustered() ) {
    return step.getCopies();
  }
  if ( !step.isPartitioned() ) {
    return step.getCopies();
  }
  if ( slaveServer.isMaster() ) {
    return step.getCopies();
  }

  // Partitioned and clustered...
  //
  StepPartitioningMeta stepPartitioningMeta = step.getStepPartitioningMeta();
  PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();

  Map<PartitionSchema, List<String>> partitionMap = slaveServerPartitionsMap.get( slaveServer );
  List<String> partitionList = partitionMap.get( partitionSchema );

  return partitionList.size();
}

Example 2

Source File: TransSplitter.java From pentaho-kettle with Apache License 2.0

6 votes

/**
 * Create a copy of a step from the original transformation for use in the a slave transformation. If the step is
 * partitioned, the partitioning will be changed to "schemaName (slave)"
 *
 * @param stepMeta
 *          The step to copy / clone.
 * @return a copy of the specified step for use in a slave transformation.
 */
private StepMeta addSlaveCopy( TransMeta transMeta, StepMeta stepMeta, SlaveServer slaveServer ) {
  StepMeta copy = (StepMeta) stepMeta.clone();
  if ( copy.isPartitioned() ) {
    StepPartitioningMeta stepPartitioningMeta = copy.getStepPartitioningMeta();
    PartitionSchema partitionSchema = stepPartitioningMeta.getPartitionSchema();
    String slavePartitionSchemaName = createSlavePartitionSchemaName( partitionSchema.getName() );
    PartitionSchema slaveSchema = transMeta.findPartitionSchema( slavePartitionSchemaName );
    if ( slaveSchema != null ) {
      stepPartitioningMeta.setPartitionSchema( slaveSchema );
    }
    // Always just start a single copy on the slave server...
    // Otherwise the confusion w.r.t. to partitioning & re-partitioning would be complete.
    //
    copy.setCopies( 1 );
  }

  transMeta.addStep( copy );
  return copy;
}

Example 3

Source File: TransSplitter.java From pentaho-kettle with Apache License 2.0

5 votes

private void verifySlavePartitioningConfiguration( TransMeta slave, StepMeta stepMeta,
  ClusterSchema clusterSchema, SlaveServer slaveServer ) {
  Map<StepMeta, String> stepPartitionFlag = slaveStepPartitionFlag.get( slave );
  if ( stepPartitionFlag == null ) {
    stepPartitionFlag = new Hashtable<StepMeta, String>();
    slaveStepPartitionFlag.put( slave, stepPartitionFlag );
  }
  if ( stepPartitionFlag.get( stepMeta ) != null ) {
    return; // already done;
  }

  StepPartitioningMeta partitioningMeta = stepMeta.getStepPartitioningMeta();
  if ( partitioningMeta != null
    && partitioningMeta.getMethodType() != StepPartitioningMeta.PARTITIONING_METHOD_NONE
    && partitioningMeta.getPartitionSchema() != null ) {
    // Find the schemaPartitions map to use
    Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get( slaveServer );
    if ( schemaPartitionsMap != null ) {
      PartitionSchema partitionSchema = partitioningMeta.getPartitionSchema();
      List<String> partitionsList = schemaPartitionsMap.get( partitionSchema );
      if ( partitionsList != null ) {
        // We found a list of partitions, now let's create a new partition schema with this data.
        String targetSchemaName = createSlavePartitionSchemaName( partitionSchema.getName() );
        PartitionSchema targetSchema = slave.findPartitionSchema( targetSchemaName );
        if ( targetSchema == null ) {
          targetSchema = new PartitionSchema( targetSchemaName, partitionsList );
          slave.getPartitionSchemas().add( targetSchema ); // add it to the slave if it doesn't exist.
        }
      }
    }
  }

  stepPartitionFlag.put( stepMeta, "Y" ); // is done.
}

Example 4

Source File: TransSplitter.java From pentaho-kettle with Apache License 2.0

4 votes

/**
 * We want to divide the available partitions over the slaves. Let's create a hashtable that contains the partition
 * schema's Since we can only use a single cluster, we can divide them all over a single set of slave servers.
 *
 * @throws KettleException
 */
private void generateSlavePartitionSchemas() throws KettleException {
  slaveServerPartitionsMap = new Hashtable<SlaveServer, Map<PartitionSchema, List<String>>>();

  for ( int i = 0; i < referenceSteps.length; i++ ) {
    StepMeta stepMeta = referenceSteps[i];
    StepPartitioningMeta stepPartitioningMeta = stepMeta.getStepPartitioningMeta();

    if ( stepPartitioningMeta == null ) {
      continue;
    }
    if ( stepPartitioningMeta.getMethodType() == StepPartitioningMeta.PARTITIONING_METHOD_NONE ) {
      continue;
    }

    ClusterSchema clusterSchema = stepMeta.getClusterSchema();
    if ( clusterSchema == null ) {
      continue;
    }

    // Make a copy of the partition schema because we might change the object.
    // Let's not alter the original transformation.
    // The match is done on name, and the name is preserved in this case, so it should be safe to do so.
    // Also, all cloned steps re-match with the cloned schema name afterwards...
    //
    PartitionSchema partitionSchema = (PartitionSchema) stepPartitioningMeta.getPartitionSchema().clone();

    int nrSlaves = clusterSchema.findNrSlaves();
    if ( nrSlaves == 0 ) {
      continue; // no slaves: ignore this situation too
    }

    // Change the partitioning layout dynamically if the user requested this...
    //
    if ( partitionSchema.isDynamicallyDefined() ) {
      partitionSchema.expandPartitionsDynamically( nrSlaves, originalTransformation );
    }

    int nrPartitions = partitionSchema.getPartitionIDs().size();

    if ( nrPartitions < nrSlaves ) {
      throw new KettleException(
        "It doesn't make sense to have a partitioned, clustered step with less partitions ("
          + nrPartitions + ") than that there are slave servers (" + nrSlaves + ")" );
    }

    int slaveServerNr = 0;
    List<SlaveServer> slaveServers = clusterSchema.getSlaveServers();

    for ( int p = 0; p < nrPartitions; p++ ) {
      String partitionId = partitionSchema.getPartitionIDs().get( p );

      SlaveServer slaveServer = slaveServers.get( slaveServerNr );

      // Skip the master...
      //
      if ( slaveServer.isMaster() ) {
        slaveServerNr++;
        if ( slaveServerNr >= slaveServers.size() ) {
          slaveServerNr = 0; // re-start
        }
        slaveServer = slaveServers.get( slaveServerNr );
      }

      Map<PartitionSchema, List<String>> schemaPartitionsMap = slaveServerPartitionsMap.get( slaveServer );
      if ( schemaPartitionsMap == null ) {
        // Add the schema-partitions map to the the slave server
        //
        schemaPartitionsMap = new HashMap<PartitionSchema, List<String>>();
        slaveServerPartitionsMap.put( slaveServer, schemaPartitionsMap );
      }

      // See if we find a list of partitions
      //
      List<String> partitions = schemaPartitionsMap.get( partitionSchema );
      if ( partitions == null ) {
        partitions = new ArrayList<String>();
        schemaPartitionsMap.put( partitionSchema, partitions );
      }

      // Add the partition ID to the appropriate list
      //
      if ( partitions.indexOf( partitionId ) < 0 ) {
        partitions.add( partitionId );
      }

      // Switch to next slave.
      slaveServerNr++;
      if ( slaveServerNr >= clusterSchema.getSlaveServers().size() ) {
        slaveServerNr = 0; // re-start
      }
    }
  }
  // System.out.println("We have "+(slaveServerPartitionsMap.size())+" entries in the slave server partitions map");
}