com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest Java Examples

The following examples show how to use com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EmrDaoImpl.java    From herd with Apache License 2.0 6 votes vote down vote up
@Override
public String createEmrCluster(String clusterName, EmrClusterDefinition emrClusterDefinition, AwsParamsDto awsParams)
{
    RunJobFlowRequest runJobFlowRequest = getRunJobFlowRequest(clusterName, emrClusterDefinition);
    LOGGER.info("runJobFlowRequest={}", HerdStringUtils.sanitizeLogText(jsonHelper.objectToJson(runJobFlowRequest)));
    String clusterId = emrOperations.runEmrJobFlow(getEmrClient(awsParams), runJobFlowRequest);
    LOGGER.info("EMR cluster started. emrClusterId=\"{}\"", clusterId);

    // Add the new cluster name and cluster id to the EMR cluster cache.
    LOGGER.info("Adding EMR cluster to the EMR Cluster Cache. emrClusterName=\"{}\" emrClusterId=\"{}\" accountId=\"{}\"", clusterName.toUpperCase(),
        clusterId, emrClusterDefinition.getAccountId());

    // Build the EMR cluster cache key using the cluster name and the account id.
    EmrClusterCacheKey emrClusterCacheKey = new EmrClusterCacheKey(clusterName.toUpperCase(), emrClusterDefinition.getAccountId());

    // Get the cluster cache using the accountId.
    Map<EmrClusterCacheKey, String> emrClusterCache = getEmrClusterCacheByAccountId(emrClusterDefinition.getAccountId());

    // Add the newly created cluster cache key and id pair to the cluster cache.
    emrClusterCache.put(emrClusterCacheKey, clusterId);

    LOGGER.debug("EMR cluster cache after creating a cluster and adding it to the existing cache. emrClusterCache=\"{}\" emrClusterCacheContents=\"{}\"",
        System.identityHashCode(emrClusterCache), emrClusterCache.toString());

    return clusterId;
}
 
Example #2
Source File: MockEmrOperationsImpl.java    From herd with Apache License 2.0 6 votes vote down vote up
private MockEmrJobFlow createNewCluster(RunJobFlowRequest jobFlowRequest, String status, StatusChangeReason reason, StatusTimeline timeline)
{
    MockEmrJobFlow cluster = new MockEmrJobFlow();
    cluster.setJobFlowId(getNewJobFlowId());
    cluster.setJobFlowName(jobFlowRequest.getName());
    cluster.setStatus(status);
    cluster.setStatusTimeline(timeline);
    cluster.setStatusChangeReason(reason);
    emrClusters.put(cluster.getJobFlowId(), cluster);

    // Add the steps
    for (StepConfig stepConfig : jobFlowRequest.getSteps())
    {
        addClusterStep(cluster.getJobFlowId(), stepConfig);
    }

    return cluster;
}
 
Example #3
Source File: EmrDaoTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void createEmrClusterAssertEncryptionDisabled()
{
    /*
     * Use only minimum required options
     */
    String clusterName = "clusterName";
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    InstanceDefinitions instanceDefinitions = new InstanceDefinitions();
    instanceDefinitions.setMasterInstances(
        new MasterInstanceDefinition(10, "masterInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE,
            NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD));
    instanceDefinitions.setCoreInstances(
        new InstanceDefinition(20, "coreInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE,
            NO_INSTANCE_ON_DEMAND_THRESHOLD));
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue")));

    emrClusterDefinition.setEncryptionEnabled(false);

    String clusterId = "clusterId";

    when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>()
    {
        @Override
        public String answer(InvocationOnMock invocation)
        {
            RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1);
            // No bootstrap action should be added
            assertEquals(0, runJobFlowRequest.getBootstrapActions().size());
            return clusterId;
        }
    });

    assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto()));
}
 
Example #4
Source File: EmrDaoTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void createEmrClusterAssertInstallOozieDisabled()
{
    /*
     * Use only minimum required options
     */
    String clusterName = "clusterName";
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    InstanceDefinitions instanceDefinitions = new InstanceDefinitions();
    instanceDefinitions.setMasterInstances(
        new MasterInstanceDefinition(10, "masterInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE,
            NO_INSTANCE_MAX_SEARCH_PRICE, NO_INSTANCE_ON_DEMAND_THRESHOLD));
    instanceDefinitions.setCoreInstances(
        new InstanceDefinition(20, "coreInstanceType", NO_EMR_CLUSTER_DEFINITION_EBS_CONFIGURATION, NO_INSTANCE_SPOT_PRICE, NO_INSTANCE_MAX_SEARCH_PRICE,
            NO_INSTANCE_ON_DEMAND_THRESHOLD));
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue")));

    emrClusterDefinition.setInstallOozie(false);

    String clusterId = "clusterId";

    when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>()
    {
        @Override
        public String answer(InvocationOnMock invocation)
        {
            RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1);
            // The oozie step should be skipped.
            assertEquals(0, runJobFlowRequest.getSteps().size());
            return clusterId;
        }
    });

    assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto()));
}
 
Example #5
Source File: EmrDaoImplTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateEmrClusterNoNscdBootstrapScript()
{
    // Create an AWS parameters DTO.
    final AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    final InstanceDefinitions instanceDefinitions =
        new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition());
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Collections.emptyList());

    AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName())
        .build();
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture()))
        .thenReturn(EMR_CLUSTER_ID);

    // Create the cluster without NSCD script configuration
    String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto);

    // Verifications
    assertEquals(clusterId, EMR_CLUSTER_ID);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    verify(awsClientFactory).getEmrClient(awsParamsDto);
    verify(emrOperations).runEmrJobFlow(any(), any());
    RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue();
    List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions();

    // There should be no bootstrap action
    assertTrue(bootstrapActionConfigs.isEmpty());
}
 
Example #6
Source File: EmrIT.java    From digdag with Apache License 2.0 5 votes vote down vote up
@Test
public void test()
        throws Exception
{
    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName("Digdag Test")
            .withReleaseLabel("emr-5.2.0")
            .withApplications(Stream.of("Hadoop", "Hive", "Spark", "Flink")
                    .map(s -> new Application().withName(s))
                    .collect(toList()))
            .withJobFlowRole("EMR_EC2_DefaultRole")
            .withServiceRole("EMR_DefaultRole")
            .withVisibleToAllUsers(true)
            .withLogUri(tmpS3FolderUri + "/logs/")
            .withInstances(new JobFlowInstancesConfig()
                    .withEc2KeyName("digdag-test")
                    .withInstanceCount(1)
                    .withKeepJobFlowAliveWhenNoSteps(true)
                    .withMasterInstanceType("m3.xlarge")
                    .withSlaveInstanceType("m3.xlarge"));

    RunJobFlowResult result = emr.runJobFlow(request);

    String clusterId = result.getJobFlowId();

    clusterIds.add(clusterId);

    Id attemptId = pushAndStart(server.endpoint(), projectDir, "emr", ImmutableMap.of(
            "test_s3_folder", tmpS3FolderUri.toString(),
            "test_cluster", clusterId,
            "outfile", outfile.toString()));
    expect(Duration.ofMinutes(30), attemptSuccess(server.endpoint(), attemptId));

    validateTdSparkQueryOutput();

    assertThat(Files.exists(outfile), is(true));
}
 
Example #7
Source File: EmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Override
public String createCluster(String clusterName) {
  RunJobFlowRequest request = new RunJobFlowRequest()
      .withName(clusterName)
      .withReleaseLabel(EmrInfo.getVersion())
      .withServiceRole(emrClusterConfig.getServiceRole())
      .withJobFlowRole(emrClusterConfig.getJobFlowRole())
      .withVisibleToAllUsers(emrClusterConfig.isVisibleToAllUsers())
      .withInstances(new JobFlowInstancesConfig()
          .withEc2SubnetId(emrClusterConfig.getEc2SubnetId())
          .withEmrManagedMasterSecurityGroup(emrClusterConfig.getMasterSecurityGroup())
          .withEmrManagedSlaveSecurityGroup(emrClusterConfig.getSlaveSecurityGroup())
          .withInstanceCount(emrClusterConfig.getInstanceCount())
          .withKeepJobFlowAliveWhenNoSteps(true)
          .withMasterInstanceType(emrClusterConfig.getMasterInstanceType())
          .withSlaveInstanceType(emrClusterConfig.getSlaveInstanceType()));

  if (emrClusterConfig.isLoggingEnabled()) {
    request.withLogUri(emrClusterConfig.getS3LogUri());
    if (emrClusterConfig.isEnableEmrDebugging()) {
      String COMMAND_RUNNER = "command-runner.jar";
      String DEBUGGING_COMMAND = "state-pusher-script";
      String DEBUGGING_NAME = "Setup Hadoop Debugging";
      StepConfig enabledebugging = new StepConfig()
          .withName(DEBUGGING_NAME)
          .withActionOnFailure(ActionOnFailure.CONTINUE)
          .withHadoopJarStep(new HadoopJarStepConfig()
              .withJar(COMMAND_RUNNER)
              .withArgs(DEBUGGING_COMMAND));
      request.withSteps(enabledebugging);
    }
  }
  RunJobFlowResult result = getEmrClient(emrClusterConfig).runJobFlow(request);
  return result.getJobFlowId();
}
 
Example #8
Source File: TestEmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateCluster() {
  Properties properties = new Properties();
  properties.setProperty("instanceCount", "1");

  EmrClusterJob emrClusterJob = new EmrClusterJob();
  EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties));
  AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class);
  Mockito.doReturn(Mockito.mock(RunJobFlowResult.class)).when(emr).runJobFlow(Mockito.any(RunJobFlowRequest.class));
  Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class));
  client.createCluster("foo");
  Mockito.verify(emr, Mockito.times(1)).runJobFlow(Mockito.any(RunJobFlowRequest.class));
  Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class));

}
 
Example #9
Source File: EmrOperationsImpl.java    From herd with Apache License 2.0 4 votes vote down vote up
/**
 * Run Job Flow to AmazonElasticMapReduceClient
 */
@Override
public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest)
{
    return emrClient.runJobFlow(jobFlowRequest).getJobFlowId();
}
 
Example #10
Source File: EmrDaoTest.java    From herd with Apache License 2.0 4 votes vote down vote up
@Test
public void createEmrClusterAssertCallRunEmrJobFlowWithInstanceFleetAndMultipleSubnets()
{
    // Create objects required for testing.
    final String clusterName = "clusterName";
    final String clusterId = "clusterId";
    final String name = STRING_VALUE;
    final String instanceFleetType = STRING_VALUE_2;
    final Integer targetOnDemandCapacity = INTEGER_VALUE;
    final Integer targetSpotCapacity = INTEGER_VALUE_2;
    final List<EmrClusterDefinitionInstanceTypeConfig> emrClusterDefinitionInstanceTypeConfigs = null;
    final EmrClusterDefinitionLaunchSpecifications emrClusterDefinitionLaunchSpecifications = null;
    final EmrClusterDefinitionInstanceFleet emrClusterDefinitionInstanceFleet =
        new EmrClusterDefinitionInstanceFleet(name, instanceFleetType, targetOnDemandCapacity, targetSpotCapacity, emrClusterDefinitionInstanceTypeConfigs,
            emrClusterDefinitionLaunchSpecifications);

    // Create an EMR cluster definition with instance fleet configuration and multiple EC2 subnet IDs.
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    emrClusterDefinition.setInstanceFleets(Lists.newArrayList(emrClusterDefinitionInstanceFleet));
    emrClusterDefinition.setSubnetId(String.format("%s , %s  ", EC2_SUBNET, EC2_SUBNET_2));
    emrClusterDefinition.setNodeTags(Lists.newArrayList(new NodeTag("tagName", "tagValue")));

    when(mockEmrOperations.runEmrJobFlow(any(), any())).then(new Answer<String>()
    {
        @Override
        public String answer(InvocationOnMock invocation)
        {
            // Assert that the given EMR cluster definition produced the correct RunJobFlowRequest.
            RunJobFlowRequest runJobFlowRequest = invocation.getArgument(1);
            JobFlowInstancesConfig jobFlowInstancesConfig = runJobFlowRequest.getInstances();
            assertEquals(0, CollectionUtils.size(jobFlowInstancesConfig.getInstanceGroups()));
            final List<InstanceTypeConfig> expectedInstanceTypeConfigs = null;
            assertEquals(Lists.newArrayList(
                new InstanceFleetConfig().withName(name).withInstanceFleetType(instanceFleetType).withTargetOnDemandCapacity(targetOnDemandCapacity)
                    .withTargetSpotCapacity(targetSpotCapacity).withInstanceTypeConfigs(expectedInstanceTypeConfigs).withLaunchSpecifications(null)),
                jobFlowInstancesConfig.getInstanceFleets());
            assertNull(jobFlowInstancesConfig.getEc2SubnetId());
            assertEquals(2, CollectionUtils.size(jobFlowInstancesConfig.getEc2SubnetIds()));
            assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET));
            assertTrue(jobFlowInstancesConfig.getEc2SubnetIds().contains(EC2_SUBNET_2));
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_EC2_NODE_IAM_PROFILE_NAME),
                runJobFlowRequest.getJobFlowRole());
            assertEquals(herdStringHelper.getRequiredConfigurationValue(ConfigurationValue.EMR_DEFAULT_SERVICE_IAM_ROLE_NAME),
                runJobFlowRequest.getServiceRole());
            List<StepConfig> stepConfigs = runJobFlowRequest.getSteps();
            assertEquals(0, stepConfigs.size());
            List<Tag> tags = runJobFlowRequest.getTags();
            assertEquals(1, tags.size());
            {
                Tag tag = tags.get(0);
                assertEquals("tagName", tag.getKey());
                assertEquals("tagValue", tag.getValue());
            }

            return clusterId;
        }
    });

    assertEquals(clusterId, emrDao.createEmrCluster(clusterName, emrClusterDefinition, getAwsParamsDto()));
}
 
Example #11
Source File: MockEmrOperationsImpl.java    From herd with Apache License 2.0 4 votes vote down vote up
@Override
public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest)
{
    String clusterStatus = ClusterState.BOOTSTRAPPING.toString();

    StatusChangeReason reason = new StatusChangeReason(ClusterStateChangeReasonCode.USER_REQUEST.toString(), "Started " + clusterStatus);
    StatusTimeline timeline = new StatusTimeline();
    timeline.setCreationTime(HerdDateUtils.getXMLGregorianCalendarValue(new Date()));

    if (StringUtils.isNotBlank(jobFlowRequest.getAmiVersion()))
    {
        if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_THROTTLING_EXCEPTION))
        {
            AmazonServiceException throttlingException = new AmazonServiceException("test throttling exception");
            throttlingException.setErrorCode("ThrottlingException");

            throw throttlingException;
        }
        else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_BAD_REQUEST))
        {
            AmazonServiceException badRequestException = new AmazonServiceException(MockAwsOperationsHelper.AMAZON_BAD_REQUEST);
            badRequestException.setStatusCode(HttpStatus.SC_BAD_REQUEST);
            throw badRequestException;
        }
        else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_NOT_FOUND))
        {
            AmazonServiceException notFoundException = new AmazonServiceException(MockAwsOperationsHelper.AMAZON_NOT_FOUND);
            notFoundException.setStatusCode(HttpStatus.SC_NOT_FOUND);
            throw notFoundException;
        }
        else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_SERVICE_EXCEPTION))
        {
            throw new AmazonServiceException(MockAwsOperationsHelper.AMAZON_SERVICE_EXCEPTION);
        }
        else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_CLUSTER_STATUS_WAITING))
        {
            clusterStatus = ClusterState.WAITING.toString();
        }
        else if (jobFlowRequest.getAmiVersion().equals(MockAwsOperationsHelper.AMAZON_CLUSTER_STATUS_RUNNING))
        {
            clusterStatus = ClusterState.RUNNING.toString();
        }
    }


    return createNewCluster(jobFlowRequest, clusterStatus, reason, timeline).getJobFlowId();
}
 
Example #12
Source File: EmrDaoImplTest.java    From herd with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateEmrClusterWithNscdBootstrapScript()
{
    // Create an AWS parameters DTO.
    final AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    final InstanceDefinitions instanceDefinitions =
        new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition());
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Collections.emptyList());
    when(configurationHelper.getProperty(ConfigurationValue.EMR_NSCD_SCRIPT)).thenReturn(EMR_NSCD_SCRIPT);
    when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL)).thenReturn(S3_URL_PROTOCOL);
    when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME)).thenReturn(S3_BUCKET_NAME);
    when(configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE)).thenReturn(S3_STAGING_RESOURCE_BASE);
    when(configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER)).thenReturn(S3_URL_PATH_DELIMITER);
    when(configurationHelper.getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON)).thenReturn(EMR_CONFIGURE_DAEMON);
    List<Parameter> daemonConfigs = new ArrayList<>();
    Parameter daemonConfig = new Parameter();
    daemonConfig.setName(EMR_CLUSTER_DAEMON_CONFIG_NAME);
    daemonConfig.setValue(EMR_CLUSTER_DAEMON_CONFIG_VALUE);
    daemonConfigs.add(daemonConfig);

    emrClusterDefinition.setDaemonConfigurations(daemonConfigs);
    AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName())
        .build();
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture()))
        .thenReturn(EMR_CLUSTER_ID);

    // Create the cluster
    String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto);

    // Verifications
    RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue();
    assertEquals(clusterId, EMR_CLUSTER_ID);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_URL_PROTOCOL);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME);
    verify(configurationHelper).getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_CONFIGURE_DAEMON);
    verify(awsClientFactory).getEmrClient(awsParamsDto);
    verify(emrOperations).runEmrJobFlow((AmazonElasticMapReduceClient) amazonElasticMapReduce, runJobFlowRequest);
    List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions();

    // There should be two bootstrap actions: NSCD script, and emr daemon config
    assertEquals(2, bootstrapActionConfigs.size());

    // Verify NSCD bootstrap action
    assertEquals(ConfigurationValue.EMR_NSCD_SCRIPT.getKey(), bootstrapActionConfigs.get(0).getName());
    assertEquals(String
            .format("%s%s%s%s%s%s", S3_URL_PROTOCOL, S3_BUCKET_NAME, S3_URL_PATH_DELIMITER, S3_STAGING_RESOURCE_BASE, S3_URL_PATH_DELIMITER, EMR_NSCD_SCRIPT),
        bootstrapActionConfigs.get(0).getScriptBootstrapAction().getPath());

    // Verify EMR configure daemon bootstrap action
    assertEquals(ConfigurationValue.EMR_CONFIGURE_DAEMON.getKey(), bootstrapActionConfigs.get(1).getName());
    assertEquals(EMR_CONFIGURE_DAEMON, bootstrapActionConfigs.get(1).getScriptBootstrapAction().getPath());
    assertEquals(String.format("%s=%s", EMR_CLUSTER_DAEMON_CONFIG_NAME, EMR_CLUSTER_DAEMON_CONFIG_VALUE),
        bootstrapActionConfigs.get(1).getScriptBootstrapAction().getArgs().get(0));
}
 
Example #13
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 4 votes vote down vote up
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler,
        Config cluster, Filer filer, ParameterCompiler parameterCompiler)
        throws IOException
{
    RemoteFile runner = prepareRunner(filer, tag);

    // Compile steps
    stepCompiler.compile(runner);

    List<StepConfig> stepConfigs = stepCompiler.stepConfigs();

    Config ec2 = cluster.getNested("ec2");
    Config master = ec2.getNestedOrGetEmpty("master");
    List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of());
    List<Config> task = ec2.getListOrEmpty("task", Config.class);

    List<String> applications = cluster.getListOrEmpty("applications", String.class);
    if (applications.isEmpty()) {
        applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink");
    }

    // TODO: allow configuring additional application parameters
    List<Application> applicationConfigs = applications.stream()
            .map(application -> new Application().withName(application))
            .collect(toList());

    // TODO: merge configurations with the same classification?
    List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream()
            .map(this::configurations)
            .flatMap(Collection::stream)
            .collect(toList());

    List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class);
    List<BootstrapActionConfig> bootstrapActions = new ArrayList<>();
    for (int i = 0; i < bootstrap.size(); i++) {
        bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler));
    }

    // Stage files to S3
    filer.stageFiles();

    Optional<String> subnetId = ec2.getOptional("subnet_id", String.class);

    String defaultMasterInstanceType;
    String defaultCoreInstanceType;
    String defaultTaskInstanceType;

    if (subnetId.isPresent()) {
        // m4 requires VPC (subnet id)
        defaultMasterInstanceType = "m4.2xlarge";
        defaultCoreInstanceType = "m4.xlarge";
        defaultTaskInstanceType = "m4.xlarge";
    }
    else {
        defaultMasterInstanceType = "m3.2xlarge";
        defaultCoreInstanceType = "m3.xlarge";
        defaultTaskInstanceType = "m3.xlarge";
    }

    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")")
            .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0"))
            .withSteps(stepConfigs)
            .withBootstrapActions(bootstrapActions)
            .withApplications(applicationConfigs)
            .withLogUri(cluster.get("logs", String.class, null))
            .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole"))
            .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole"))
            .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag))
            .withVisibleToAllUsers(cluster.get("visible", boolean.class, true))
            .withConfigurations(configurations)
            .withInstances(new JobFlowInstancesConfig()
                    .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder()
                            // Master Node
                            .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1))
                            // Core Group
                            .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType))
                            // Task Groups
                            .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType))
                            .build()
                    )
                    .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class))
                    .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class))
                    .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null))
                    .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null))
                    .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null))
                    .withTerminationProtected(cluster.get("termination_protected", boolean.class, false))
                    .withPlacement(cluster.getOptional("availability_zone", String.class)
                            .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull())
                    .withEc2SubnetId(subnetId.orNull())
                    .withEc2KeyName(ec2.get("key", String.class))
                    .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true)));

    logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size());
    RunJobFlowResult result = emr.runJobFlow(request);
    logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result);

    return NewCluster.of(result.getJobFlowId(), request.getSteps().size());
}
 
Example #14
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
/**
 * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. 
 * 
 * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service
 * @param clusterIdentifier - identifier of an existing cluster
 * @param amiVersion - AMI to use for launching this cluster
 * @param keypair - A keypair for SSHing into the Amazon EMR master node
 * @param masterInstanceType - Master node Amazon EC2 instance type 
 * @param coreInstanceType - core nodes Amazon EC2 instance type 
 * @param logUri - An Amazon S3 bucket for your 
 * @param numberOfNodes - total number of nodes in this cluster including master node
 * @return
 */
public static String createCluster(AmazonElasticMapReduce client,
		String clusterIdentifier,
		String amiVersion,
		String keypair,
		String masterInstanceType,
		String coreInstanceType,
		String logUri,
		int numberOfNodes) {

	if (clusterExists(client, clusterIdentifier)) {
		LOG.info("Cluster " + clusterIdentifier + " is available");
		return clusterIdentifier;
	}
	
	//Error checking
	if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version");
	if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair");
	if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type");
	if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs.");
	if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node");
	  		
	  RunJobFlowRequest request = new RunJobFlowRequest()
	    .withAmiVersion(amiVersion)
		.withBootstrapActions(new BootstrapActionConfig()
		             .withName("Install HBase")
		             .withScriptBootstrapAction(new ScriptBootstrapActionConfig()
		             .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase")))
		.withName("Job Flow With HBAse Actions")	 
		.withSteps(new StepConfig() //enable debugging step
					.withName("Enable debugging")
					.withActionOnFailure("TERMINATE_CLUSTER")
					.withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), 
					//Start HBase step - after installing it with a bootstrap action
					createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), 
					//add HBase backup step
					createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs()))
		.withLogUri(logUri)
		.withInstances(new JobFlowInstancesConfig()
		.withEc2KeyName(keypair)
		.withInstanceCount(numberOfNodes)
		.withKeepJobFlowAliveWhenNoSteps(true)
		.withMasterInstanceType(masterInstanceType)
		.withSlaveInstanceType(coreInstanceType));

	RunJobFlowResult result = client.runJobFlow(request);
	
	String state = null;
	while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) {
		try {
			Thread.sleep(10 * 1000);
			LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available.");
		} catch (InterruptedException e) {

		}
		
		if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){
			LOG.error("Could not create EMR Cluster");
			System.exit(-1);	
		}
	}
	LOG.info("Created cluster " + result.getJobFlowId());
	LOG.info("Cluster " + clusterIdentifier + " is available");	
	return result.getJobFlowId();
}
 
Example #15
Source File: EmrOperations.java    From herd with Apache License 2.0 votes vote down vote up
public String runEmrJobFlow(AmazonElasticMapReduceClient emrClient, RunJobFlowRequest jobFlowRequest);