com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig Java Examples

The following examples show how to use com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 6 votes vote down vote up
private void addCustomBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions)
{
    // Add Custom bootstrap script support if needed
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionAll()))
    {
        for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionAll())
        {
            BootstrapActionConfig customActionConfigAll = getBootstrapActionConfig(scriptDefinition.getScriptName(), scriptDefinition.getScriptLocation());

            ArrayList<String> argList = new ArrayList<>();
            if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments()))
            {
                for (String argument : scriptDefinition.getScriptArguments())
                {
                    // Trim the argument
                    argList.add(argument.trim());
                }
            }
            // Set arguments to bootstrap action
            customActionConfigAll.getScriptBootstrapAction().setArgs(argList);

            bootstrapActions.add(customActionConfigAll);
        }
    }
}
 
Example #2
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 6 votes vote down vote up
private void addDaemonBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions)
{
    // Add daemon Configuration support if needed
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getDaemonConfigurations()))
    {
        BootstrapActionConfig daemonBootstrapActionConfig = getBootstrapActionConfig(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(),
            configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON));

        // Add arguments to the bootstrap script
        ArrayList<String> argList = new ArrayList<>();
        for (Parameter daemonConfig : emrClusterDefinition.getDaemonConfigurations())
        {
            argList.add(daemonConfig.getName() + "=" + daemonConfig.getValue());
        }

        // Add the bootstrap action with arguments
        daemonBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
        bootstrapActions.add(daemonBootstrapActionConfig);
    }
}
 
Example #3
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 5 votes vote down vote up
private void addCustomMasterBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions)
{
    // Add Master custom bootstrap script support if needed
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getCustomBootstrapActionMaster()))
    {
        for (ScriptDefinition scriptDefinition : emrClusterDefinition.getCustomBootstrapActionMaster())
        {
            BootstrapActionConfig bootstrapActionConfig =
                getBootstrapActionConfig(scriptDefinition.getScriptName(), configurationHelper.getProperty(ConfigurationValue.EMR_CONDITIONAL_SCRIPT));

            // Add arguments to the bootstrap script
            ArrayList<String> argList = new ArrayList<>();

            // Execute this script only on the master node.
            argList.add(configurationHelper.getProperty(ConfigurationValue.EMR_NODE_CONDITION));
            argList.add(scriptDefinition.getScriptLocation());

            if (!CollectionUtils.isEmpty(scriptDefinition.getScriptArguments()))
            {
                for (String argument : scriptDefinition.getScriptArguments())
                {
                    // Trim the argument
                    argList.add(argument.trim());
                }
            }

            bootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
            bootstrapActions.add(bootstrapActionConfig);
        }
    }
}
 
Example #4
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 5 votes vote down vote up
private void addHadoopBootstrapActionConfig(EmrClusterDefinition emrClusterDefinition, ArrayList<BootstrapActionConfig> bootstrapActions)
{
    // Add hadoop Configuration support if needed
    if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopConfigurations()))
    {
        ArrayList<String> argList = new ArrayList<>();
        BootstrapActionConfig hadoopBootstrapActionConfig = getBootstrapActionConfig(ConfigurationValue.EMR_CONFIGURE_HADOOP.getKey(),
            configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_HADOOP));
        // If config files are available, add them as arguments
        for (Object hadoopConfigObject : emrClusterDefinition.getHadoopConfigurations())
        {
            // If the Config Files are available, add them as arguments
            if (hadoopConfigObject instanceof ConfigurationFiles)
            {
                for (ConfigurationFile configurationFile : ((ConfigurationFiles) hadoopConfigObject).getConfigurationFiles())
                {
                    argList.add(configurationFile.getFileNameShortcut());
                    argList.add(configurationFile.getConfigFileLocation());
                }
            }

            // If the key value pairs are available, add them as arguments
            if (hadoopConfigObject instanceof KeyValuePairConfigurations)
            {
                for (KeyValuePairConfiguration keyValuePairConfiguration : ((KeyValuePairConfigurations) hadoopConfigObject)
                    .getKeyValuePairConfigurations())
                {
                    argList.add(keyValuePairConfiguration.getKeyValueShortcut());
                    argList.add(keyValuePairConfiguration.getAttribKey() + "=" + keyValuePairConfiguration.getAttribVal());
                }
            }
        }

        // Add the bootstrap action with arguments
        hadoopBootstrapActionConfig.getScriptBootstrapAction().setArgs(argList);
        bootstrapActions.add(hadoopBootstrapActionConfig);
    }
}
 
Example #5
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 5 votes vote down vote up
/**
 * Create the BootstrapActionConfig object from the bootstrap script.
 *
 * @param scriptDescription bootstrap script name to be displayed.
 * @param bootstrapScript location of the bootstrap script.
 *
 * @return bootstrap action configuration that contains all the bootstrap actions for the given configuration.
 */
private BootstrapActionConfig getBootstrapActionConfig(String scriptDescription, String bootstrapScript)
{
    // Create the BootstrapActionConfig object
    BootstrapActionConfig bootstrapConfig = new BootstrapActionConfig();
    ScriptBootstrapActionConfig bootstrapConfigScript = new ScriptBootstrapActionConfig();

    // Set the bootstrapScript
    bootstrapConfig.setName(scriptDescription);
    bootstrapConfigScript.setPath(bootstrapScript);
    bootstrapConfig.setScriptBootstrapAction(bootstrapConfigScript);

    // Return the object
    return bootstrapConfig;
}
 
Example #6
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 5 votes vote down vote up
/**
 * Create the bootstrap action configuration List from all the bootstrapping scripts specified.
 *
 * @param emrClusterDefinition the EMR definition name value.
 *
 * @return list of bootstrap action configurations that contains all the bootstrap actions for the given configuration.
 */
private ArrayList<BootstrapActionConfig> getBootstrapActionConfigList(EmrClusterDefinition emrClusterDefinition)
{
    // Create the list
    ArrayList<BootstrapActionConfig> bootstrapActions = new ArrayList<>();

    // Add encryption script support if needed
    if (emrClusterDefinition.isEncryptionEnabled() != null && emrClusterDefinition.isEncryptionEnabled())
    {
        // Whenever the user requests for encryption, we have an encryption script that is stored in herd bucket.
        // We use this encryption script to encrypt all the volumes of all the instances.
        // Amazon plans to support encryption in EMR soon. Once that support is enabled, we can remove this script and use the one provided by AWS.
        bootstrapActions.add(getBootstrapActionConfig(ConfigurationValue.EMR_ENCRYPTION_SCRIPT.getKey(),
            getBootstrapScriptLocation(configurationHelper.getProperty(ConfigurationValue.EMR_ENCRYPTION_SCRIPT))));
    }

    // Add NSCD script support if the script location is not empty
    String emrNscdScript = configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    if (StringUtils.isNotEmpty(emrNscdScript))
    {
        // Upon launch, all EMR clusters should have NSCD running to cache DNS host lookups so EMR does not overwhelm DNS servers
        bootstrapActions
            .add(getBootstrapActionConfig(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), getBootstrapScriptLocation(emrNscdScript)));
    }

    // Add bootstrap actions.
    addDaemonBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
    addHadoopBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
    addCustomBootstrapActionConfig(emrClusterDefinition, bootstrapActions);
    addCustomMasterBootstrapActionConfig(emrClusterDefinition, bootstrapActions);

    // Return the object
    return bootstrapActions;
}
 
Example #7
Source File: EmrDaoImplTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateEmrClusterNoNscdBootstrapScript()
{
    // Create an AWS parameters DTO.
    final AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    final InstanceDefinitions instanceDefinitions =
        new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition());
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Collections.emptyList());

    AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName())
        .build();
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture()))
        .thenReturn(EMR_CLUSTER_ID);

    // Create the cluster without NSCD script configuration
    String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto);

    // Verifications
    assertEquals(clusterId, EMR_CLUSTER_ID);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    verify(awsClientFactory).getEmrClient(awsParamsDto);
    verify(emrOperations).runEmrJobFlow(any(), any());
    RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue();
    List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions();

    // There should be no bootstrap action
    assertTrue(bootstrapActionConfigs.isEmpty());
}
 
Example #8
Source File: EmrDaoImplTest.java    From herd with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateEmrClusterWithNscdBootstrapScript()
{
    // Create an AWS parameters DTO.
    final AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    final InstanceDefinitions instanceDefinitions =
        new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition());
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Collections.emptyList());
    when(configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT)).thenReturn(EMR_NSCD_SCRIPT);
    when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)).thenReturn(S3_URL_PROTOCOL);
    when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)).thenReturn(S3_BUCKET_NAME);
    when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE)).thenReturn(S3_STAGING_RESOURCE_BASE);
    when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)).thenReturn(S3_URL_PATH_DELIMITER);
    when(configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON)).thenReturn(EMR_CONFIGURE_DAEMON);
    List<Parameter> daemonConfigs = new ArrayList<>();
    Parameter daemonConfig = new Parameter();
    daemonConfig.setName(EMR_CLUSTER_DAEMON_CONFIG_NAME);
    daemonConfig.setValue(EMR_CLUSTER_DAEMON_CONFIG_VALUE);
    daemonConfigs.add(daemonConfig);

    emrClusterDefinition.setDaemonConfigurations(daemonConfigs);
    AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName())
        .build();
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture()))
        .thenReturn(EMR_CLUSTER_ID);

    // Create the cluster
    String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto);

    // Verifications
    RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue();
    assertEquals(clusterId, EMR_CLUSTER_ID);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_URL_PROTOCOL);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON);
    verify(awsClientFactory).getEmrClient(awsParamsDto);
    verify(emrOperations).runEmrJobFlow((AmazonElasticMapReduceClient) amazonElasticMapReduce, runJobFlowRequest);
    List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions();

    // There should be two bootstrap actions: NSCD script, and emr daemon config
    assertEquals(2, bootstrapActionConfigs.size());

    // Verify NSCD bootstrap action
    assertEquals(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), bootstrapActionConfigs.get(0).getName());
    assertEquals(String
            .format("%s%s%s%s%s%s", S3_URL_PROTOCOL, S3_BUCKET_NAME, S3_URL_PATH_DELIMITER, S3_STAGING_RESOURCE_BASE, S3_URL_PATH_DELIMITER, EMR_NSCD_SCRIPT),
        bootstrapActionConfigs.get(0).getScriptBootstrapAction().getPath());

    // Verify EMR configure daemon bootstrap action
    assertEquals(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(), bootstrapActionConfigs.get(1).getName());
    assertEquals(EMR_CONFIGURE_DAEMON, bootstrapActionConfigs.get(1).getScriptBootstrapAction().getPath());
    assertEquals(String.format("%s=%s", EMR_CLUSTER_DAEMON_CONFIG_NAME, EMR_CLUSTER_DAEMON_CONFIG_VALUE),
        bootstrapActionConfigs.get(1).getScriptBootstrapAction().getArgs().get(0));
}
 
Example #9
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 4 votes vote down vote up
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler,
        Config cluster, Filer filer, ParameterCompiler parameterCompiler)
        throws IOException
{
    RemoteFile runner = prepareRunner(filer, tag);

    // Compile steps
    stepCompiler.compile(runner);

    List<StepConfig> stepConfigs = stepCompiler.stepConfigs();

    Config ec2 = cluster.getNested("ec2");
    Config master = ec2.getNestedOrGetEmpty("master");
    List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of());
    List<Config> task = ec2.getListOrEmpty("task", Config.class);

    List<String> applications = cluster.getListOrEmpty("applications", String.class);
    if (applications.isEmpty()) {
        applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink");
    }

    // TODO: allow configuring additional application parameters
    List<Application> applicationConfigs = applications.stream()
            .map(application -> new Application().withName(application))
            .collect(toList());

    // TODO: merge configurations with the same classification?
    List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream()
            .map(this::configurations)
            .flatMap(Collection::stream)
            .collect(toList());

    List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class);
    List<BootstrapActionConfig> bootstrapActions = new ArrayList<>();
    for (int i = 0; i < bootstrap.size(); i++) {
        bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler));
    }

    // Stage files to S3
    filer.stageFiles();

    Optional<String> subnetId = ec2.getOptional("subnet_id", String.class);

    String defaultMasterInstanceType;
    String defaultCoreInstanceType;
    String defaultTaskInstanceType;

    if (subnetId.isPresent()) {
        // m4 requires VPC (subnet id)
        defaultMasterInstanceType = "m4.2xlarge";
        defaultCoreInstanceType = "m4.xlarge";
        defaultTaskInstanceType = "m4.xlarge";
    }
    else {
        defaultMasterInstanceType = "m3.2xlarge";
        defaultCoreInstanceType = "m3.xlarge";
        defaultTaskInstanceType = "m3.xlarge";
    }

    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")")
            .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0"))
            .withSteps(stepConfigs)
            .withBootstrapActions(bootstrapActions)
            .withApplications(applicationConfigs)
            .withLogUri(cluster.get("logs", String.class, null))
            .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole"))
            .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole"))
            .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag))
            .withVisibleToAllUsers(cluster.get("visible", boolean.class, true))
            .withConfigurations(configurations)
            .withInstances(new JobFlowInstancesConfig()
                    .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder()
                            // Master Node
                            .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1))
                            // Core Group
                            .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType))
                            // Task Groups
                            .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType))
                            .build()
                    )
                    .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class))
                    .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class))
                    .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null))
                    .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null))
                    .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null))
                    .withTerminationProtected(cluster.get("termination_protected", boolean.class, false))
                    .withPlacement(cluster.getOptional("availability_zone", String.class)
                            .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull())
                    .withEc2SubnetId(subnetId.orNull())
                    .withEc2KeyName(ec2.get("key", String.class))
                    .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true)));

    logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size());
    RunJobFlowResult result = emr.runJobFlow(request);
    logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result);

    return NewCluster.of(result.getJobFlowId(), request.getSteps().size());
}
 
Example #10
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 4 votes vote down vote up
private BootstrapActionConfig bootstrapAction(int index, JsonNode action, String tag, Filer filer, RemoteFile runner, ParameterCompiler parameterCompiler)
        throws IOException
{
    String script;
    String name;
    FileReference reference;

    Config config;
    if (action.isTextual()) {
        script = action.asText();
        reference = fileReference("bootstrap", script);
        name = reference.filename();
        config = request.getConfig().getFactory().create();
    }
    else if (action.isObject()) {
        config = request.getConfig().getFactory().create(action);
        script = config.get("path", String.class);
        reference = fileReference("bootstrap", script);
        name = config.get("name", String.class, reference.filename());
    }
    else {
        throw new ConfigException("Invalid bootstrap action: " + action);
    }

    RemoteFile file = filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), reference, false);

    CommandRunnerConfiguration configuration = CommandRunnerConfiguration.builder()
            .workingDirectory(bootstrapWorkingDirectory(index))
            .env(parameterCompiler.parameters(config.getNestedOrGetEmpty("env"), (key, value) -> value))
            .addDownload(DownloadConfig.of(file, 0777))
            .addAllDownload(config.getListOrEmpty("files", String.class).stream()
                    .map(r -> fileReference("file", r))
                    .map(r -> filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), r, false, bootstrapWorkingDirectory(index)))
                    .collect(toList()))
            .addCommand(file.localPath())
            .addAllCommand(parameterCompiler.parameters(config, "args"))
            .build();

    FileReference configurationFileReference = ImmutableFileReference.builder()
            .type(FileReference.Type.DIRECT)
            .contents(objectMapper.writeValueAsBytes(configuration))
            .filename("config.json")
            .build();
    RemoteFile remoteConfigurationFile = filer.prepareRemoteFile(tag, "bootstrap", Integer.toString(index), configurationFileReference, false);

    return new BootstrapActionConfig()
            .withName(name)
            .withScriptBootstrapAction(new ScriptBootstrapActionConfig()
                    .withPath(runner.s3Uri().toString())
                    .withArgs(remoteConfigurationFile.s3Uri().toString()));
}
 
Example #11
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
/**
 * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. 
 * 
 * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service
 * @param clusterIdentifier - identifier of an existing cluster
 * @param amiVersion - AMI to use for launching this cluster
 * @param keypair - A keypair for SSHing into the Amazon EMR master node
 * @param masterInstanceType - Master node Amazon EC2 instance type 
 * @param coreInstanceType - core nodes Amazon EC2 instance type 
 * @param logUri - An Amazon S3 bucket for your 
 * @param numberOfNodes - total number of nodes in this cluster including master node
 * @return
 */
public static String createCluster(AmazonElasticMapReduce client,
		String clusterIdentifier,
		String amiVersion,
		String keypair,
		String masterInstanceType,
		String coreInstanceType,
		String logUri,
		int numberOfNodes) {

	if (clusterExists(client, clusterIdentifier)) {
		LOG.info("Cluster " + clusterIdentifier + " is available");
		return clusterIdentifier;
	}
	
	//Error checking
	if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version");
	if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair");
	if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type");
	if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs.");
	if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node");
	  		
	  RunJobFlowRequest request = new RunJobFlowRequest()
	    .withAmiVersion(amiVersion)
		.withBootstrapActions(new BootstrapActionConfig()
		             .withName("Install HBase")
		             .withScriptBootstrapAction(new ScriptBootstrapActionConfig()
		             .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase")))
		.withName("Job Flow With HBAse Actions")	 
		.withSteps(new StepConfig() //enable debugging step
					.withName("Enable debugging")
					.withActionOnFailure("TERMINATE_CLUSTER")
					.withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), 
					//Start HBase step - after installing it with a bootstrap action
					createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), 
					//add HBase backup step
					createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs()))
		.withLogUri(logUri)
		.withInstances(new JobFlowInstancesConfig()
		.withEc2KeyName(keypair)
		.withInstanceCount(numberOfNodes)
		.withKeepJobFlowAliveWhenNoSteps(true)
		.withMasterInstanceType(masterInstanceType)
		.withSlaveInstanceType(coreInstanceType));

	RunJobFlowResult result = client.runJobFlow(request);
	
	String state = null;
	while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) {
		try {
			Thread.sleep(10 * 1000);
			LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available.");
		} catch (InterruptedException e) {

		}
		
		if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){
			LOG.error("Could not create EMR Cluster");
			System.exit(-1);	
		}
	}
	LOG.info("Created cluster " + result.getJobFlowId());
	LOG.info("Cluster " + clusterIdentifier + " is available");	
	return result.getJobFlowId();
}