com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig Java Examples

The following examples show how to use com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrClusterJob.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
public Properties submitJob(Properties jobProps) throws IOException {
  EMRJobConfig emrJobConfig = new EMRJobConfig(jobProps);
  Utils.checkNotNull(emrJobConfig.getClusterId(), "EMR Cluster Id");
  StepConfig stepConfig = new StepConfig()
      .withName(emrJobConfig.getJobName())
      .withActionOnFailure(ActionOnFailure.CONTINUE) // check if action on failure needs to be configurable
      .withHadoopJarStep(new HadoopJarStepConfig()
          .withJar(emrJobConfig.getDriverJarPath())
          .withMainClass(emrJobConfig.getDriverMainClass()).withArgs(
              emrJobConfig.getArchives(),
              emrJobConfig.getLibjars(),
              emrJobConfig.getUniquePrefix(),
              emrJobConfig.getJavaOpts(),
              emrJobConfig.getLogLevel()
          ));
  LOG.debug("Step config is {}", stepConfig.toString());
  AddJobFlowStepsResult addJobFlowStepsResult = getEmrClient(emrClusterConfig).addJobFlowSteps(
      new AddJobFlowStepsRequest()
          .withJobFlowId(emrJobConfig.getClusterId())
          .withSteps(stepConfig));
  String stepId = addJobFlowStepsResult.getStepIds().get(0);
  jobProps.setProperty("stepId", stepId);
  return jobProps;
}
 
Example #2
Source File: EmrShellStepHelper.java    From herd with Apache License 2.0 5 votes vote down vote up
@Override
public StepConfig getEmrStepConfig(Object step)
{
    EmrShellStep emrShellStep = (EmrShellStep) step;

    // Hadoop Jar provided by Amazon for running Shell Scripts
    String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR_PATH);

    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
    if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError())
    {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // Add the script location
    List<String> argsList = new ArrayList<>();
    argsList.add(emrShellStep.getScriptLocation().trim());

    // Add the script arguments
    if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments()))
    {
        for (String argument : emrShellStep.getScriptArguments())
        {
            argsList.add(argument.trim());
        }
    }

    // Return the StepConfig object
    HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList);
    return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(jarConfig);
}
 
Example #3
Source File: EmrHelper.java    From herd with Apache License 2.0 5 votes vote down vote up
/**
 * Builds the StepConfig for the Hadoop jar step.
 *
 * @param stepName the step name.
 * @param jarLocation the location of jar.
 * @param mainClass the main class.
 * @param scriptArguments the arguments.
 * @param isContinueOnError indicate what to do on error.
 *
 * @return the stepConfig.
 */
public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass, List<String> scriptArguments, Boolean isContinueOnError)
{
    // Default ActionOnFailure is to cancel the execution and wait
    ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;

    if (isContinueOnError != null && isContinueOnError)
    {
        // Override based on user input
        actionOnFailure = ActionOnFailure.CONTINUE;
    }

    // If there are no arguments
    if (CollectionUtils.isEmpty(scriptArguments))
    {
        // Build the StepConfig object and return
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
            .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass));
    }
    else
    {
        // If there are arguments, include the arguments in the StepConfig object
        return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(
            new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)
                .withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
    }
}
 
Example #4
Source File: EmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Override
public String createCluster(String clusterName) {
  RunJobFlowRequest request = new RunJobFlowRequest()
      .withName(clusterName)
      .withReleaseLabel(EmrInfo.getVersion())
      .withServiceRole(emrClusterConfig.getServiceRole())
      .withJobFlowRole(emrClusterConfig.getJobFlowRole())
      .withVisibleToAllUsers(emrClusterConfig.isVisibleToAllUsers())
      .withInstances(new JobFlowInstancesConfig()
          .withEc2SubnetId(emrClusterConfig.getEc2SubnetId())
          .withEmrManagedMasterSecurityGroup(emrClusterConfig.getMasterSecurityGroup())
          .withEmrManagedSlaveSecurityGroup(emrClusterConfig.getSlaveSecurityGroup())
          .withInstanceCount(emrClusterConfig.getInstanceCount())
          .withKeepJobFlowAliveWhenNoSteps(true)
          .withMasterInstanceType(emrClusterConfig.getMasterInstanceType())
          .withSlaveInstanceType(emrClusterConfig.getSlaveInstanceType()));

  if (emrClusterConfig.isLoggingEnabled()) {
    request.withLogUri(emrClusterConfig.getS3LogUri());
    if (emrClusterConfig.isEnableEmrDebugging()) {
      String COMMAND_RUNNER = "command-runner.jar";
      String DEBUGGING_COMMAND = "state-pusher-script";
      String DEBUGGING_NAME = "Setup Hadoop Debugging";
      StepConfig enabledebugging = new StepConfig()
          .withName(DEBUGGING_NAME)
          .withActionOnFailure(ActionOnFailure.CONTINUE)
          .withHadoopJarStep(new HadoopJarStepConfig()
              .withJar(COMMAND_RUNNER)
              .withArgs(DEBUGGING_COMMAND));
      request.withSteps(enabledebugging);
    }
  }
  RunJobFlowResult result = getEmrClient(emrClusterConfig).runJobFlow(request);
  return result.getJobFlowId();
}
 
Example #5
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
/**
 * This is a helper method for creating step configuration information
 * @param stepName - a custom name to label this step
 * @param actionOnFailure - options are terminate cluster, terminate job flow, contiunue
 * @param jarPath - path to jar file - could be on S3 or  local file system
 * @param args list of Java args to configure custom step
 * @return
 */
private static StepConfig createStepConfig(String stepName, String actionOnFailure, String jarPath, List<String> args ) {
	//Start HBase step - after installing it with a bootstrap action
			StepConfig stepConfig = new StepConfig()
				.withName(stepName)
				.withActionOnFailure(actionOnFailure)
				.withHadoopJarStep(new HadoopJarStepConfig()
									.withJar(jarPath)
									.withArgs(args));
			return stepConfig;
}
 
Example #6
Source File: LambdaContainer.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
protected String fireEMRJob(String paramsStr,String clusterId){
	StepFactory stepFactory = new StepFactory();
	AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient();
	emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION"))));
	Application sparkConfig = new Application()
			.withName("Spark");
	
	String[] params = paramsStr.split(",");
	StepConfig enabledebugging = new StepConfig()
			.withName("Enable debugging")
			.withActionOnFailure("TERMINATE_JOB_FLOW")
			.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
	
	HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig()
			.withJar("command-runner.jar")
			.withArgs(params);	
	
	final StepConfig sparkStep = new StepConfig()
			.withName("Spark Step")
			.withActionOnFailure("CONTINUE")
			.withHadoopJarStep(sparkStepConf);

	
	AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId)
			.withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}});
			

	AddJobFlowStepsResult result = emr.addJobFlowSteps(request);
	return result.getStepIds().get(0);
}