com.amazonaws.services.elasticmapreduce.model.StepConfig Java Examples
The following examples show how to use
com.amazonaws.services.elasticmapreduce.model.StepConfig.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrClusterJob.java From datacollector with Apache License 2.0 | 6 votes |
@Override public Properties submitJob(Properties jobProps) throws IOException { EMRJobConfig emrJobConfig = new EMRJobConfig(jobProps); Utils.checkNotNull(emrJobConfig.getClusterId(), "EMR Cluster Id"); StepConfig stepConfig = new StepConfig() .withName(emrJobConfig.getJobName()) .withActionOnFailure(ActionOnFailure.CONTINUE) // check if action on failure needs to be configurable .withHadoopJarStep(new HadoopJarStepConfig() .withJar(emrJobConfig.getDriverJarPath()) .withMainClass(emrJobConfig.getDriverMainClass()).withArgs( emrJobConfig.getArchives(), emrJobConfig.getLibjars(), emrJobConfig.getUniquePrefix(), emrJobConfig.getJavaOpts(), emrJobConfig.getLogLevel() )); LOG.debug("Step config is {}", stepConfig.toString()); AddJobFlowStepsResult addJobFlowStepsResult = getEmrClient(emrClusterConfig).addJobFlowSteps( new AddJobFlowStepsRequest() .withJobFlowId(emrJobConfig.getClusterId()) .withSteps(stepConfig)); String stepId = addJobFlowStepsResult.getStepIds().get(0); jobProps.setProperty("stepId", stepId); return jobProps; }
Example #2
Source File: MockEmrOperationsImpl.java From herd with Apache License 2.0 | 6 votes |
private MockEmrJobFlow createNewCluster(RunJobFlowRequest jobFlowRequest, String status, StatusChangeReason reason, StatusTimeline timeline) { MockEmrJobFlow cluster = new MockEmrJobFlow(); cluster.setJobFlowId(getNewJobFlowId()); cluster.setJobFlowName(jobFlowRequest.getName()); cluster.setStatus(status); cluster.setStatusTimeline(timeline); cluster.setStatusChangeReason(reason); emrClusters.put(cluster.getJobFlowId(), cluster); // Add the steps for (StepConfig stepConfig : jobFlowRequest.getSteps()) { addClusterStep(cluster.getJobFlowId(), stepConfig); } return cluster; }
Example #3
Source File: EmrHelperTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testEmrHadoopJarStepConfigWithArguments() { List<String> arguments = new ArrayList<>(); arguments.add("arg1"); StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, arguments, false); assertNotNull("step not returned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); assertNotNull("arguments not found", stepConfig.getHadoopJarStep().getArgs()); }
Example #4
Source File: LambdaContainer.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
protected String fireEMRJob(String paramsStr,String clusterId){ StepFactory stepFactory = new StepFactory(); AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient(); emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION")))); Application sparkConfig = new Application() .withName("Spark"); String[] params = paramsStr.split(","); StepConfig enabledebugging = new StepConfig() .withName("Enable debugging") .withActionOnFailure("TERMINATE_JOB_FLOW") .withHadoopJarStep(stepFactory.newEnableDebuggingStep()); HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig() .withJar("command-runner.jar") .withArgs(params); final StepConfig sparkStep = new StepConfig() .withName("Spark Step") .withActionOnFailure("CONTINUE") .withHadoopJarStep(sparkStepConf); AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId) .withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}}); AddJobFlowStepsResult result = emr.addJobFlowSteps(request); return result.getStepIds().get(0); }
Example #5
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 5 votes |
/** * This is a helper method for creating step configuration information * @param stepName - a custom name to label this step * @param actionOnFailure - options are terminate cluster, terminate job flow, contiunue * @param jarPath - path to jar file - could be on S3 or local file system * @param args list of Java args to configure custom step * @return */ private static StepConfig createStepConfig(String stepName, String actionOnFailure, String jarPath, List<String> args ) { //Start HBase step - after installing it with a bootstrap action StepConfig stepConfig = new StepConfig() .withName(stepName) .withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig() .withJar(jarPath) .withArgs(args)); return stepConfig; }
Example #6
Source File: EmrClusterJob.java From datacollector with Apache License 2.0 | 5 votes |
@Override public String createCluster(String clusterName) { RunJobFlowRequest request = new RunJobFlowRequest() .withName(clusterName) .withReleaseLabel(EmrInfo.getVersion()) .withServiceRole(emrClusterConfig.getServiceRole()) .withJobFlowRole(emrClusterConfig.getJobFlowRole()) .withVisibleToAllUsers(emrClusterConfig.isVisibleToAllUsers()) .withInstances(new JobFlowInstancesConfig() .withEc2SubnetId(emrClusterConfig.getEc2SubnetId()) .withEmrManagedMasterSecurityGroup(emrClusterConfig.getMasterSecurityGroup()) .withEmrManagedSlaveSecurityGroup(emrClusterConfig.getSlaveSecurityGroup()) .withInstanceCount(emrClusterConfig.getInstanceCount()) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(emrClusterConfig.getMasterInstanceType()) .withSlaveInstanceType(emrClusterConfig.getSlaveInstanceType())); if (emrClusterConfig.isLoggingEnabled()) { request.withLogUri(emrClusterConfig.getS3LogUri()); if (emrClusterConfig.isEnableEmrDebugging()) { String COMMAND_RUNNER = "command-runner.jar"; String DEBUGGING_COMMAND = "state-pusher-script"; String DEBUGGING_NAME = "Setup Hadoop Debugging"; StepConfig enabledebugging = new StepConfig() .withName(DEBUGGING_NAME) .withActionOnFailure(ActionOnFailure.CONTINUE) .withHadoopJarStep(new HadoopJarStepConfig() .withJar(COMMAND_RUNNER) .withArgs(DEBUGGING_COMMAND)); request.withSteps(enabledebugging); } } RunJobFlowResult result = getEmrClient(emrClusterConfig).runJobFlow(request); return result.getJobFlowId(); }
Example #7
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private StepConfig stepConfig(String defaultName, String tag, Config step) { String name = step.get("name", String.class, defaultName); return new StepConfig() .withName(name + " (" + tag + ")") // TERMINATE_JOB_FLOW | TERMINATE_CLUSTER | CANCEL_AND_WAIT | CONTINUE .withActionOnFailure(step.get("action_on_failure", String.class, defaultActionOnFailure)); }
Example #8
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 5 votes |
private void addStep(String name, CommandRunnerConfiguration configuration) throws IOException { FileReference configurationFileReference = ImmutableFileReference.builder() .type(FileReference.Type.DIRECT) .contents(objectMapper.writeValueAsBytes(configuration)) .filename("config.json") .build(); RemoteFile remoteConfigurationFile = prepareRemoteFile(configurationFileReference, false); StepConfig runStep = stepConfig(name, tag, step) .withHadoopJarStep(stepFactory().newScriptRunnerStep(runner.s3Uri().toString(), remoteConfigurationFile.s3Uri().toString())); configs.add(runStep); }
Example #9
Source File: MockEmrOperationsImpl.java From herd with Apache License 2.0 | 5 votes |
private MockEmrJobFlow addClusterStep(String jobFlowId, StepConfig step) { List<MockEmrJobFlow> mockSteps = getStepsByClusterId(jobFlowId); if (mockSteps == null) { mockSteps = new ArrayList<>(); } MockEmrJobFlow mockStep = new MockEmrJobFlow(); if (!step.getName().equalsIgnoreCase(MOCK_STEP_RUNNING_WITHOUT_ID_NAME)) { mockStep.setJobFlowId(getNewJobFlowId()); } mockStep.setJobFlowName(step.getName()); if (step.getName().equalsIgnoreCase(MOCK_STEP_RUNNING_NAME) || step.getName().equalsIgnoreCase(MOCK_STEP_RUNNING_WITHOUT_ID_NAME)) { mockStep.setStatus(StepState.RUNNING.toString()); } else { mockStep.setStatus(StepState.PENDING.toString()); } mockStep.setJarLocation(step.getHadoopJarStep().getJar()); mockSteps.add(mockStep); setStepsByClusterId(jobFlowId, mockSteps); return mockStep; }
Example #10
Source File: EmrPigStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrPigStep pigStep = (EmrPigStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (pigStep.isContinueOnError() != null && pigStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(pigStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunPigScriptStep(pigStep.getScriptLocation().trim())); } // If there are arguments specified else { return new StepConfig().withName(pigStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(new StepFactory() .newRunPigScriptStep(pigStep.getScriptLocation().trim(), pigStep.getScriptArguments().toArray(new String[pigStep.getScriptArguments().size()]))); } }
Example #11
Source File: EmrHelperTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testEmrHadoopJarStepConfigContinueOnError() { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, null, true); assertNotNull("step not retuned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); }
Example #12
Source File: EmrHelperTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testEmrHadoopJarStepConfigNoContinueOnError() { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, null, null); assertNotNull("step not retuned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); }
Example #13
Source File: EmrHelperTest.java From herd with Apache License 2.0 | 5 votes |
@Test public void testEmrHadoopJarStepConfig() { StepConfig stepConfig = emrHelper.getEmrHadoopJarStepConfig("step_name", "jar_location", null, null, false); assertNotNull("step not retuned", stepConfig); assertEquals("name not found", "step_name", stepConfig.getName()); assertEquals("jar not found", "jar_location", stepConfig.getHadoopJarStep().getJar()); }
Example #14
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
/** * Create the step config list of objects for hive/pig installation. * * @param emrClusterDefinition the EMR definition name value. * * @return list of step configuration that contains all the steps for the given configuration. */ private List<StepConfig> getStepConfig(EmrClusterDefinition emrClusterDefinition) { StepFactory stepFactory = new StepFactory(); List<StepConfig> appSteps = new ArrayList<>(); // Create install hive step and add to the StepConfig list if (StringUtils.isNotBlank(emrClusterDefinition.getHiveVersion())) { StepConfig installHive = new StepConfig().withName("Hive " + emrClusterDefinition.getHiveVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallHiveStep(emrClusterDefinition.getHiveVersion())); appSteps.add(installHive); } // Create install Pig step and add to the StepConfig List if (StringUtils.isNotBlank(emrClusterDefinition.getPigVersion())) { StepConfig installPig = new StepConfig().withName("Pig " + emrClusterDefinition.getPigVersion()).withActionOnFailure(ActionOnFailure.TERMINATE_JOB_FLOW) .withHadoopJarStep(stepFactory.newInstallPigStep(emrClusterDefinition.getPigVersion())); appSteps.add(installPig); } // Add the hadoop jar steps that need to be added. if (!CollectionUtils.isEmpty(emrClusterDefinition.getHadoopJarSteps())) { for (HadoopJarStep hadoopJarStep : emrClusterDefinition.getHadoopJarSteps()) { StepConfig stepConfig = emrHelper .getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(), hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError()); appSteps.add(stepConfig); } } return appSteps; }
Example #15
Source File: EmrDaoImpl.java From herd with Apache License 2.0 | 5 votes |
@Override public String addEmrStep(String clusterId, StepConfig emrStepConfig, AwsParamsDto awsParamsDto) throws Exception { List<StepConfig> steps = new ArrayList<>(); steps.add(emrStepConfig); // Add the job flow request AddJobFlowStepsRequest jobFlowStepRequest = new AddJobFlowStepsRequest(clusterId, steps); List<String> emrStepIds = emrOperations.addJobFlowStepsRequest(getEmrClient(awsParamsDto), jobFlowStepRequest); return emrStepIds.get(0); }
Example #16
Source File: EmrHelper.java From herd with Apache License 2.0 | 5 votes |
/** * Builds the StepConfig for the Hadoop jar step. * * @param stepName the step name. * @param jarLocation the location of jar. * @param mainClass the main class. * @param scriptArguments the arguments. * @param isContinueOnError indicate what to do on error. * * @return the stepConfig. */ public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass, List<String> scriptArguments, Boolean isContinueOnError) { // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (isContinueOnError != null && isContinueOnError) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments if (CollectionUtils.isEmpty(scriptArguments)) { // Build the StepConfig object and return return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)); } else { // If there are arguments, include the arguments in the StepConfig object return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass) .withArgs(scriptArguments.toArray(new String[scriptArguments.size()]))); } }
Example #17
Source File: EmrShellStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrShellStep emrShellStep = (EmrShellStep) step; // Hadoop Jar provided by Amazon for running Shell Scripts String hadoopJarForShellScript = configurationHelper.getProperty(ConfigurationValue.EMR_SHELL_SCRIPT_JAR_PATH); // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrShellStep.isContinueOnError() != null && emrShellStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // Add the script location List<String> argsList = new ArrayList<>(); argsList.add(emrShellStep.getScriptLocation().trim()); // Add the script arguments if (!CollectionUtils.isEmpty(emrShellStep.getScriptArguments())) { for (String argument : emrShellStep.getScriptArguments()) { argsList.add(argument.trim()); } } // Return the StepConfig object HadoopJarStepConfig jarConfig = new HadoopJarStepConfig(hadoopJarForShellScript).withArgs(argsList); return new StepConfig().withName(emrShellStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(jarConfig); }
Example #18
Source File: EmrHadoopJarStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrHadoopJarStep hadoopJarStep = (EmrHadoopJarStep) step; return emrHelper.getEmrHadoopJarStepConfig(hadoopJarStep.getStepName(), hadoopJarStep.getJarLocation(), hadoopJarStep.getMainClass(), hadoopJarStep.getScriptArguments(), hadoopJarStep.isContinueOnError()); }
Example #19
Source File: EmrHiveStepHelper.java From herd with Apache License 2.0 | 5 votes |
@Override public StepConfig getEmrStepConfig(Object step) { EmrHiveStep emrHiveStep = (EmrHiveStep) step; // Default ActionOnFailure is to cancel the execution and wait ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT; if (emrHiveStep.isContinueOnError() != null && emrHiveStep.isContinueOnError()) { // Override based on user input actionOnFailure = ActionOnFailure.CONTINUE; } // If there are no arguments to hive script if (CollectionUtils.isEmpty(emrHiveStep.getScriptArguments())) { // Just build the StepConfig object and return return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure) .withHadoopJarStep(new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim())); } // If there are arguments specified else { // For each argument, add "-d" option List<String> hiveArgs = new ArrayList<>(); for (String hiveArg : emrHiveStep.getScriptArguments()) { hiveArgs.add("-d"); hiveArgs.add(hiveArg); } // Return the StepConfig object return new StepConfig().withName(emrHiveStep.getStepName().trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep( new StepFactory().newRunHiveScriptStep(emrHiveStep.getScriptLocation().trim(), hiveArgs.toArray(new String[hiveArgs.size()]))); } }
Example #20
Source File: EmrDaoTest.java From herd with Apache License 2.0 | 4 votes |
@Test public void createEmrClusterAssertCallRunEmrJobFlowWithInstanceFleetAndMultipleSubnets() { // Create objects required for testing. final String clusterName = "clusterName"; final String clusterId = "clusterId"; final String name = STRING_VALUE; final String instanceFleetType = STRING_VALUE_2; final Integer targetOnDemandCapacity = INTEGER_VALUE; final Integer targetSpotCapacity = INTEGER_VALUE_2; final List<EmrClusterDefinitionInstanceTypeConfig> emrClusterDefinitionInstanceTypeConfigs = null; final EmrClusterDefinitionLaunchSpecifications emrClusterDefinitionLaunchSpecifications = null; final EmrClusterDefinitionInstanceFleet emrClusterDefinitionInstanceFleet = new EmrClusterDefinitionInstanceFleet(name, instanceFleetType, targetOnDemandCapacity, targetSpotCapacity, emrClusterDefinitionInstanceTypeConfigs, emrClusterDefinitionLaunchSpecifications); // Create an EMR cluster definition with instance fleet configuration and multiple EC2 subnet IDs. EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition(); emrClusterDefinition.setInstanceFleets(Lists.newArrayList(emrClusterDefinitionInstanceFleet)); emrClusterDefinition.setSubnetId(String.format("%s , %s ", EC2_SUBNET, EC2_SUBNET_2)); emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue"))); when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>() { @Override public String answer(InvocationOnMock invocation) { // Assert that the given EMR cluster definition produced the correct RunJobFlowRequest. RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1); JobFlowInstancesConfig jobFlowInstancesConfig = runJobFlowRequest.getInstances(); assertEquals(0, CollectionUtils.size(jobFlowInstancesConfig.getInstanceGroups())); final List<InstanceTypeConfig> expectedInstanceTypeConfigs = null; assertEquals(Lists.newArrayList( new InstanceFleetConfig().withName(name).withInstanceFleetType(instanceFleetType).withTargetOnDemandCapacity(targetOnDemandCapacity) .withTargetSpotCapacity(targetSpotCapacity).withInstanceTypeConfigs(expectedInstanceTypeConfigs).withLaunchSpecifications(null)), jobFlowInstancesConfig.getInstanceFleets()); assertNull(jobFlowInstancesConfig.getEc2SubnetId()); assertEquals(2, CollectionUtils.size(jobFlowInstancesConfig.getEc2SubnetIds())); assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET)); assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET_2)); assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME), runJobFlowRequest.getJobFlowRole()); assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME), runJobFlowRequest.getServiceRole()); List<StepConfig> stepConfigs = runJobFlowRequest.getSteps(); assertEquals(0, stepConfigs.size()); List<Tag> tags = runJobFlowRequest.getTags(); assertEquals(1, tags.size()); { Tag tag = tags.get(0); assertEquals("tagName", tag.getKey()); assertEquals("tagValue", tag.getValue()); } return clusterId; } }); assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto())); }
Example #21
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config cluster, Filer filer, ParameterCompiler parameterCompiler) throws IOException { RemoteFile runner = prepareRunner(filer, tag); // Compile steps stepCompiler.compile(runner); List<StepConfig> stepConfigs = stepCompiler.stepConfigs(); Config ec2 = cluster.getNested("ec2"); Config master = ec2.getNestedOrGetEmpty("master"); List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of()); List<Config> task = ec2.getListOrEmpty("task", Config.class); List<String> applications = cluster.getListOrEmpty("applications", String.class); if (applications.isEmpty()) { applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink"); } // TODO: allow configuring additional application parameters List<Application> applicationConfigs = applications.stream() .map(application -> new Application().withName(application)) .collect(toList()); // TODO: merge configurations with the same classification? List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream() .map(this::configurations) .flatMap(Collection::stream) .collect(toList()); List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class); List<BootstrapActionConfig> bootstrapActions = new ArrayList<>(); for (int i = 0; i < bootstrap.size(); i++) { bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler)); } // Stage files to S3 filer.stageFiles(); Optional<String> subnetId = ec2.getOptional("subnet_id", String.class); String defaultMasterInstanceType; String defaultCoreInstanceType; String defaultTaskInstanceType; if (subnetId.isPresent()) { // m4 requires VPC (subnet id) defaultMasterInstanceType = "m4.2xlarge"; defaultCoreInstanceType = "m4.xlarge"; defaultTaskInstanceType = "m4.xlarge"; } else { defaultMasterInstanceType = "m3.2xlarge"; defaultCoreInstanceType = "m3.xlarge"; defaultTaskInstanceType = "m3.xlarge"; } RunJobFlowRequest request = new RunJobFlowRequest() .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")") .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0")) .withSteps(stepConfigs) .withBootstrapActions(bootstrapActions) .withApplications(applicationConfigs) .withLogUri(cluster.get("logs", String.class, null)) .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole")) .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole")) .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag)) .withVisibleToAllUsers(cluster.get("visible", boolean.class, true)) .withConfigurations(configurations) .withInstances(new JobFlowInstancesConfig() .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder() // Master Node .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1)) // Core Group .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType)) // Task Groups .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType)) .build() ) .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class)) .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class)) .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null)) .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null)) .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null)) .withTerminationProtected(cluster.get("termination_protected", boolean.class, false)) .withPlacement(cluster.getOptional("availability_zone", String.class) .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull()) .withEc2SubnetId(subnetId.orNull()) .withEc2KeyName(ec2.get("key", String.class)) .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true))); logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size()); RunJobFlowResult result = emr.runJobFlow(request); logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result); return NewCluster.of(result.getJobFlowId(), request.getSteps().size()); }
Example #22
Source File: EmrOperatorFactory.java From digdag with Apache License 2.0 | 4 votes |
List<StepConfig> stepConfigs() { Preconditions.checkState(configs != null); return configs; }
Example #23
Source File: EMRUtils.java From aws-big-data-blog with Apache License 2.0 | 4 votes |
/** * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. * * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service * @param clusterIdentifier - identifier of an existing cluster * @param amiVersion - AMI to use for launching this cluster * @param keypair - A keypair for SSHing into the Amazon EMR master node * @param masterInstanceType - Master node Amazon EC2 instance type * @param coreInstanceType - core nodes Amazon EC2 instance type * @param logUri - An Amazon S3 bucket for your * @param numberOfNodes - total number of nodes in this cluster including master node * @return */ public static String createCluster(AmazonElasticMapReduce client, String clusterIdentifier, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { if (clusterExists(client, clusterIdentifier)) { LOG.info("Cluster " + clusterIdentifier + " is available"); return clusterIdentifier; } //Error checking if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version"); if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair"); if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type"); if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs."); if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node"); RunJobFlowRequest request = new RunJobFlowRequest() .withAmiVersion(amiVersion) .withBootstrapActions(new BootstrapActionConfig() .withName("Install HBase") .withScriptBootstrapAction(new ScriptBootstrapActionConfig() .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase"))) .withName("Job Flow With HBAse Actions") .withSteps(new StepConfig() //enable debugging step .withName("Enable debugging") .withActionOnFailure("TERMINATE_CLUSTER") .withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), //Start HBase step - after installing it with a bootstrap action createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), //add HBase backup step createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs())) .withLogUri(logUri) .withInstances(new JobFlowInstancesConfig() .withEc2KeyName(keypair) .withInstanceCount(numberOfNodes) .withKeepJobFlowAliveWhenNoSteps(true) .withMasterInstanceType(masterInstanceType) .withSlaveInstanceType(coreInstanceType)); RunJobFlowResult result = client.runJobFlow(request); String state = null; while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) { try { Thread.sleep(10 * 1000); LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available."); } catch (InterruptedException e) { } if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){ LOG.error("Could not create EMR Cluster"); System.exit(-1); } } LOG.info("Created cluster " + result.getJobFlowId()); LOG.info("Cluster " + clusterIdentifier + " is available"); return result.getJobFlowId(); }
Example #24
Source File: EmrDao.java From herd with Apache License 2.0 | 2 votes |
/** * Add an EMR Step. This method adds the step to EMR cluster based on the input. * * @param clusterId EMR cluster ID. * @param emrStepConfig the EMR step config to be added. * @param awsParamsDto the proxy details. * <p/> * There are four serializable objects supported currently. They are 1: ShellStep - For shell scripts 2: HiveStep - For hive scripts 3: HadoopJarStep - For * Custom Map Reduce Jar files and 4: PigStep - For Pig scripts. * * @return the step id */ public String addEmrStep(String clusterId, StepConfig emrStepConfig, AwsParamsDto awsParamsDto) throws Exception;
Example #25
Source File: EmrStepHelper.java From herd with Apache License 2.0 | 2 votes |
/** * This method gets the StepConfig object for the given Step. * * @param step the step object * * @return the step config object */ public abstract StepConfig getEmrStepConfig(Object step);