com.amazonaws.services.elasticmapreduce.model.Application Java Examples

The following examples show how to use com.amazonaws.services.elasticmapreduce.model.Application. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrClusterTableProviderTest.java    From aws-athena-query-federation with Apache License 2.0 6 votes vote down vote up
private Cluster makeCluster(String id)
{
    return new Cluster()
            .withId(id)
            .withName("name")
            .withAutoScalingRole("autoscaling_role")
            .withCustomAmiId("custom_ami")
            .withInstanceCollectionType("instance_collection_type")
            .withLogUri("log_uri")
            .withMasterPublicDnsName("master_public_dns")
            .withReleaseLabel("release_label")
            .withRunningAmiVersion("running_ami")
            .withScaleDownBehavior("scale_down_behavior")
            .withServiceRole("service_role")
            .withApplications(new Application().withName("name").withVersion("version"))
            .withTags(new Tag("key", "value"));
}
 
Example #2
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 6 votes vote down vote up
/**
 * Converts the given list of {@link EmrClusterDefinitionApplication} into a list of {@link Application}
 *
 * @param emrClusterDefinitionApplications list of {@link EmrClusterDefinitionApplication}
 *
 * @return list {@link Application}
 */
public List<Application> getApplications(List<EmrClusterDefinitionApplication> emrClusterDefinitionApplications)
{
    List<Application> applications = new ArrayList<>();
    for (EmrClusterDefinitionApplication emrClusterDefinitionApplication : emrClusterDefinitionApplications)
    {
        Application application = new Application();
        application.setName(emrClusterDefinitionApplication.getName());
        application.setVersion(emrClusterDefinitionApplication.getVersion());
        application.setArgs(emrClusterDefinitionApplication.getArgs());

        List<Parameter> additionalInfoList = emrClusterDefinitionApplication.getAdditionalInfoList();
        if (!CollectionUtils.isEmpty(additionalInfoList))
        {
            application.setAdditionalInfo(getMap(additionalInfoList));
        }

        applications.add(application);
    }
    return applications;
}
 
Example #3
Source File: EmrIT.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Test
public void test()
        throws Exception
{
    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName("Digdag Test")
            .withReleaseLabel("emr-5.2.0")
            .withApplications(Stream.of("Hadoop", "Hive", "Spark", "Flink")
                    .map(s -> new Application().withName(s))
                    .collect(toList()))
            .withJobFlowRole("EMR_EC2_DefaultRole")
            .withServiceRole("EMR_DefaultRole")
            .withVisibleToAllUsers(true)
            .withLogUri(tmpS3FolderUri + "/logs/")
            .withInstances(new JobFlowInstancesConfig()
                    .withEc2KeyName("digdag-test")
                    .withInstanceCount(1)
                    .withKeepJobFlowAliveWhenNoSteps(true)
                    .withMasterInstanceType("m3.xlarge")
                    .withSlaveInstanceType("m3.xlarge"));

    RunJobFlowResult result = emr.runJobFlow(request);

    String clusterId = result.getJobFlowId();

    clusterIds.add(clusterId);

    Id attemptId = pushAndStart(server.endpoint(), projectDir, "emr", ImmutableMap.of(
            "test_s3_folder", tmpS3FolderUri.toString(),
            "test_cluster", clusterId,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(30), attemptSuccess(server.endpoint(), attemptId));

    validateTdSparkQueryOutput();

    assertThat(Files.exists(outfile), is(true));
}
 
Example #4
Source File: LambdaContainer.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
protected String fireEMRJob(String paramsStr,String clusterId){
	StepFactory stepFactory = new StepFactory();
	AmazonElasticMapReduceClient emr = new AmazonElasticMapReduceClient();
	emr.setRegion(Region.getRegion(Regions.fromName(System.getenv().get("AWS_REGION"))));
	Application sparkConfig = new Application()
			.withName("Spark");
	
	String[] params = paramsStr.split(",");
	StepConfig enabledebugging = new StepConfig()
			.withName("Enable debugging")
			.withActionOnFailure("TERMINATE_JOB_FLOW")
			.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
	
	HadoopJarStepConfig sparkStepConf = new HadoopJarStepConfig()
			.withJar("command-runner.jar")
			.withArgs(params);	
	
	final StepConfig sparkStep = new StepConfig()
			.withName("Spark Step")
			.withActionOnFailure("CONTINUE")
			.withHadoopJarStep(sparkStepConf);

	
	AddJobFlowStepsRequest request = new AddJobFlowStepsRequest(clusterId)
			.withSteps(new ArrayList<StepConfig>(){{add(sparkStep);}});
			

	AddJobFlowStepsResult result = emr.addJobFlowSteps(request);
	return result.getStepIds().get(0);
}
 
Example #5
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 4 votes vote down vote up
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler,
        Config cluster, Filer filer, ParameterCompiler parameterCompiler)
        throws IOException
{
    RemoteFile runner = prepareRunner(filer, tag);

    // Compile steps
    stepCompiler.compile(runner);

    List<StepConfig> stepConfigs = stepCompiler.stepConfigs();

    Config ec2 = cluster.getNested("ec2");
    Config master = ec2.getNestedOrGetEmpty("master");
    List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of());
    List<Config> task = ec2.getListOrEmpty("task", Config.class);

    List<String> applications = cluster.getListOrEmpty("applications", String.class);
    if (applications.isEmpty()) {
        applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink");
    }

    // TODO: allow configuring additional application parameters
    List<Application> applicationConfigs = applications.stream()
            .map(application -> new Application().withName(application))
            .collect(toList());

    // TODO: merge configurations with the same classification?
    List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream()
            .map(this::configurations)
            .flatMap(Collection::stream)
            .collect(toList());

    List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class);
    List<BootstrapActionConfig> bootstrapActions = new ArrayList<>();
    for (int i = 0; i < bootstrap.size(); i++) {
        bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler));
    }

    // Stage files to S3
    filer.stageFiles();

    Optional<String> subnetId = ec2.getOptional("subnet_id", String.class);

    String defaultMasterInstanceType;
    String defaultCoreInstanceType;
    String defaultTaskInstanceType;

    if (subnetId.isPresent()) {
        // m4 requires VPC (subnet id)
        defaultMasterInstanceType = "m4.2xlarge";
        defaultCoreInstanceType = "m4.xlarge";
        defaultTaskInstanceType = "m4.xlarge";
    }
    else {
        defaultMasterInstanceType = "m3.2xlarge";
        defaultCoreInstanceType = "m3.xlarge";
        defaultTaskInstanceType = "m3.xlarge";
    }

    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")")
            .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0"))
            .withSteps(stepConfigs)
            .withBootstrapActions(bootstrapActions)
            .withApplications(applicationConfigs)
            .withLogUri(cluster.get("logs", String.class, null))
            .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole"))
            .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole"))
            .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag))
            .withVisibleToAllUsers(cluster.get("visible", boolean.class, true))
            .withConfigurations(configurations)
            .withInstances(new JobFlowInstancesConfig()
                    .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder()
                            // Master Node
                            .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1))
                            // Core Group
                            .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType))
                            // Task Groups
                            .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType))
                            .build()
                    )
                    .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class))
                    .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class))
                    .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null))
                    .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null))
                    .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null))
                    .withTerminationProtected(cluster.get("termination_protected", boolean.class, false))
                    .withPlacement(cluster.getOptional("availability_zone", String.class)
                            .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull())
                    .withEc2SubnetId(subnetId.orNull())
                    .withEc2KeyName(ec2.get("key", String.class))
                    .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true)));

    logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size());
    RunJobFlowResult result = emr.runJobFlow(request);
    logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result);

    return NewCluster.of(result.getJobFlowId(), request.getSteps().size());
}