com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce Java Examples

The following examples show how to use com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestEmrClusterJob.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetClusterStatus() {
  Properties properties = new Properties();
  EmrClusterJob emrClusterJob = new EmrClusterJob();
  EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties));
  AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class);
  Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class));
  DescribeClusterResult result = Mockito.mock(DescribeClusterResult.class);
  Mockito.doReturn(result).when(emr).describeCluster(Mockito.any(DescribeClusterRequest
      .class));
  Cluster cluster = Mockito.mock(Cluster.class);
  Mockito.doReturn(cluster).when(result).getCluster();
  Mockito.doReturn(Mockito.mock(ClusterStatus.class)).when(cluster).getStatus();
  client.getClusterStatus("foo");
  Mockito.verify(emr, Mockito.times(1)).describeCluster(Mockito.any(DescribeClusterRequest
      .class));
  Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class));
}
 
Example #2
Source File: CloudFormationClient.java    From herd-mdl with Apache License 2.0 6 votes vote down vote up
public List<ClusterSummary> getStackClustersSummary(AmazonElasticMapReduce amazonElasticMapReduce,
        List<String> stackClusterIds, CFTStackInfo cftStackInfo) {
    List<ClusterSummary> stackClustersSummary = new ArrayList<>();
    ListClustersRequest listClustersRequest = new ListClustersRequest();
    //Only get clusters that got created after we setup our stack
    listClustersRequest.setCreatedAfter(cftStackInfo.creationTime());

    ListClustersResult listClustersResult = amazonElasticMapReduce
            .listClusters(listClustersRequest);
    while (true) {
        for (ClusterSummary cluster : listClustersResult.getClusters()) {
            if (stackClusterIds.contains(cluster.getId())) {
                stackClustersSummary.add(cluster);
            }
        }
        if (listClustersResult.getMarker() != null) {
            listClustersRequest.setMarker(listClustersResult.getMarker());
            listClustersResult = amazonElasticMapReduce.listClusters(listClustersRequest);
        }
        else {
            break;
        }
    }
    return stackClustersSummary;
}
 
Example #3
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 6 votes vote down vote up
private List<StepSummary> listSubmittedSteps(AmazonElasticMapReduce emr, String tag, NewCluster cluster)
{
    List<StepSummary> steps = new ArrayList<>();
    ListStepsRequest request = new ListStepsRequest().withClusterId(cluster.id());
    while (steps.size() < cluster.steps()) {
        ListStepsResult result = emr.listSteps(request);
        for (StepSummary step : result.getSteps()) {
            if (step.getName().contains(tag)) {
                steps.add(step);
            }
        }
        if (result.getMarker() == null) {
            break;
        }
        request.setMarker(result.getMarker());
    }
    // The ListSteps api returns steps in reverse order. So reverse them to submission order.
    Collections.reverse(steps);
    return steps;
}
 
Example #4
Source File: AwsClientFactory.java    From herd with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a client for accessing Amazon EMR service.
 *
 * @param awsParamsDto the AWS related parameters DTO that includes optional AWS credentials and proxy information
 *
 * @return the Amazon EMR client
 */
@Cacheable(DaoSpringModuleConfig.HERD_CACHE_NAME)
public AmazonElasticMapReduce getEmrClient(AwsParamsDto awsParamsDto)
{
    // Get client configuration.
    ClientConfiguration clientConfiguration = awsHelper.getClientConfiguration(awsParamsDto);

    // If specified, use the AWS credentials passed in.
    if (StringUtils.isNotBlank(awsParamsDto.getAwsAccessKeyId()))
    {
        return AmazonElasticMapReduceClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(
            new BasicSessionCredentials(awsParamsDto.getAwsAccessKeyId(), awsParamsDto.getAwsSecretKey(), awsParamsDto.getSessionToken())))
            .withClientConfiguration(clientConfiguration).withRegion(awsParamsDto.getAwsRegionName()).build();
    }
    // Otherwise, use the default AWS credentials provider chain.
    else
    {
        return AmazonElasticMapReduceClientBuilder.standard().withClientConfiguration(clientConfiguration).withRegion(awsParamsDto.getAwsRegionName())
            .build();
    }
}
 
Example #5
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private TaskResult run(String tag, AmazonElasticMapReduce emr, AWSKMSClient kms, Filer filer)
        throws IOException
{
    ParameterCompiler parameterCompiler = new ParameterCompiler(kms, context);

    // Set up step compiler
    List<Config> steps = params.getListOrEmpty("steps", Config.class);
    StepCompiler stepCompiler = new StepCompiler(tag, steps, filer, parameterCompiler, objectMapper, defaultActionOnFailure);

    // Set up job submitter
    Submitter submitter;
    Config cluster = null;
    try {
        cluster = params.parseNestedOrGetEmpty("cluster");
    }
    catch (ConfigException ignore) {
    }
    if (cluster != null) {
        // Create a new cluster
        submitter = newClusterSubmitter(emr, tag, stepCompiler, cluster, filer, parameterCompiler);
    }
    else {
        // Cluster ID? Use existing cluster.
        String clusterId = params.get("cluster", String.class);
        submitter = existingClusterSubmitter(emr, tag, stepCompiler, clusterId, filer);
    }

    // Submit EMR job
    SubmissionResult submission = submitter.submit();

    // Wait for the steps to finish running
    if (!steps.isEmpty()) {
        waitForSteps(emr, submission);
    }

    return result(submission);
}
 
Example #6
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to determine if HBase is installed on this cluster
 * @param client - The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterId - unique identifier for this cluster
 * @return true, other throws Runtime exception
 */
private static boolean isHBaseInstalled(AmazonElasticMapReduce client, String clusterId) {
	ListBootstrapActionsResult bootstrapActions = client.listBootstrapActions(new ListBootstrapActionsRequest()
	                                                                              .withClusterId(clusterId));
	ListIterator<Command> iterator = bootstrapActions.getBootstrapActions().listIterator();
	while(iterator.hasNext()) {
		Command command = iterator.next(); 
		if (command.getName().equalsIgnoreCase("Install HBase")) return true;
	}
	throw new RuntimeException("ERROR: Apache HBase is not installed on this cluster!!");
}
 
Example #7
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to determine the master node public DNS of an Amazon EMR cluster
 * 
 * @param client - The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterIdentifier - unique identifier for this cluster
 * @return public dns url
 */
public static String getPublicDns(AmazonElasticMapReduce client, String clusterId) {	
	DescribeJobFlowsResult describeJobFlows=client.describeJobFlows(new DescribeJobFlowsRequest().withJobFlowIds(clusterId));
	describeJobFlows.getJobFlows();
	List<JobFlowDetail> jobFlows = describeJobFlows.getJobFlows();		
	JobFlowDetail jobflow =  jobFlows.get(0);		
	JobFlowInstancesDetail instancesDetail = jobflow.getInstances();
	LOG.info("EMR cluster public DNS is "+instancesDetail.getMasterPublicDnsName());
	return instancesDetail.getMasterPublicDnsName();
}
 
Example #8
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to determine if an Amazon EMR cluster exists
 * 
 * @param client
 *        The {@link AmazonElasticMapReduceClient} with read permissions
 * @param clusterIdentifier
 *        The Amazon EMR cluster to check
 * @return true if the Amazon EMR cluster exists, otherwise false
 */
public static boolean clusterExists(AmazonElasticMapReduce client, String clusterIdentifier) {
	if (clusterIdentifier != null && !clusterIdentifier.isEmpty()) {
		ListClustersResult clustersList = client.listClusters();
		ListIterator<ClusterSummary> iterator = clustersList.getClusters().listIterator();
		ClusterSummary summary;
		for (summary = iterator.next() ; iterator.hasNext();summary = iterator.next()) {
			if (summary.getId().equals(clusterIdentifier)) {
				DescribeClusterRequest describeClusterRequest = new DescribeClusterRequest().withClusterId(clusterIdentifier);	
				DescribeClusterResult result = client.describeCluster(describeClusterRequest);	
				if (result != null) {
					Cluster cluster = result.getCluster();
					//check if HBase is installed on this cluster
					if (isHBaseInstalled(client, cluster.getId())) return false;
					String state = cluster.getStatus().getState();
					LOG.info(clusterIdentifier + " is " + state + ". ");
					if (state.equalsIgnoreCase("RUNNING") ||state.equalsIgnoreCase("WAITING"))	{
						LOG.info("The cluster with id " + clusterIdentifier + " exists and is " + state);   
						return true;
					}
				}
			}		
		}					
	}
	LOG.info("The cluster with id " + clusterIdentifier + " does not exist");
	return false;  
}
 
Example #9
Source File: TestEmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetActiveCluster() {
  Properties properties = new Properties();
  EmrClusterJob emrClusterJob = new EmrClusterJob();
  EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties));
  AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class);
  Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class));
  Mockito.doReturn(Mockito.mock(ListClustersResult.class)).when(emr).listClusters(Mockito.any(ListClustersRequest
      .class));
  client.getActiveCluster("foo");
  Mockito.verify(emr, Mockito.times(1)).listClusters(Mockito.any(ListClustersRequest.class));
  Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class));
}
 
Example #10
Source File: TestEmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testTerminateCluster() {
  Properties properties = new Properties();
  EmrClusterJob emrClusterJob = new EmrClusterJob();
  EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties));
  AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class);
  Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class));
  Mockito.doReturn(Mockito.mock(TerminateJobFlowsResult.class)).when(emr).terminateJobFlows(Mockito.any(TerminateJobFlowsRequest
      .class));
  client.terminateCluster("foo");
  Mockito.verify(emr, Mockito.times(1)).terminateJobFlows(Mockito.any(TerminateJobFlowsRequest.class));
  Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class));

}
 
Example #11
Source File: TestEmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateCluster() {
  Properties properties = new Properties();
  properties.setProperty("instanceCount", "1");

  EmrClusterJob emrClusterJob = new EmrClusterJob();
  EmrClusterJob.Client client = Mockito.spy(emrClusterJob.getClient(properties));
  AmazonElasticMapReduce emr = Mockito.mock(AmazonElasticMapReduce.class);
  Mockito.doReturn(Mockito.mock(RunJobFlowResult.class)).when(emr).runJobFlow(Mockito.any(RunJobFlowRequest.class));
  Mockito.doReturn(emr).when(client).getEmrClient(Mockito.any(EmrClusterConfig.class));
  client.createCluster("foo");
  Mockito.verify(emr, Mockito.times(1)).runJobFlow(Mockito.any(RunJobFlowRequest.class));
  Mockito.verify(client, Mockito.times(1)).getEmrClient(Mockito.any(EmrClusterConfig.class));

}
 
Example #12
Source File: EmrClusterJob.java    From datacollector with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
AmazonElasticMapReduce getEmrClient(EmrClusterConfig emrClusterConfig) {
  if (emrClient==null) {
    emrClient = AmazonElasticMapReduceClientBuilder.standard().withCredentials(
        new AWSStaticCredentialsProvider(new BasicAWSCredentials(
            emrClusterConfig.getAccessKey(),
            emrClusterConfig.getSecretKey()
        ))).withRegion(Regions.fromName(emrClusterConfig.getUserRegion())).build();
  }
  return emrClient;
}
 
Example #13
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private Submitter newClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, Config clusterConfig, Filer filer, ParameterCompiler parameterCompiler)
{

    return () -> {
        // Start cluster
        NewCluster cluster = pollingRetryExecutor(state, "submission")
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                // TODO: EMR requests are not idempotent, thus retrying might produce duplicate cluster submissions.
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .runOnce(NewCluster.class, s -> submitNewClusterRequest(emr, tag, stepCompiler, clusterConfig, filer, parameterCompiler));

        // Get submitted step IDs
        List<String> stepIds = pollingRetryExecutor(this.state, "steps")
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .runOnce(new TypeReference<List<String>>() {}, s -> {
                    List<StepSummary> steps = listSubmittedSteps(emr, tag, cluster);
                    logSubmittedSteps(cluster.id(), cluster.steps(), i -> steps.get(i).getName(), i -> steps.get(i).getId());
                    return steps.stream().map(StepSummary::getId).collect(toList());
                });

        // Log cluster status while waiting for it to come up
        pollingWaiter(state, "bootstrap")
                .withWaitMessage("EMR cluster still booting")
                .withPollInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .awaitOnce(String.class, pollState -> checkClusterBootStatus(emr, cluster, pollState));

        return SubmissionResult.ofNewCluster(cluster.id(), stepIds);
    };
}
 
Example #14
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private Submitter existingClusterSubmitter(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler, String clusterId, Filer filer)
{
    return () -> {
        List<String> stepIds = pollingRetryExecutor(state, "submission")
                .retryUnless(AmazonServiceException.class, Aws::isDeterministicException)
                .withRetryInterval(DurationInterval.of(Duration.ofSeconds(30), Duration.ofMinutes(5)))
                .runOnce(new TypeReference<List<String>>() {}, s -> {

                    RemoteFile runner = prepareRunner(filer, tag);

                    // Compile steps
                    stepCompiler.compile(runner);

                    // Stage files to S3
                    filer.stageFiles();

                    AddJobFlowStepsRequest request = new AddJobFlowStepsRequest()
                            .withJobFlowId(clusterId)
                            .withSteps(stepCompiler.stepConfigs());

                    int steps = request.getSteps().size();
                    logger.info("Submitting {} EMR step(s) to {}", steps, clusterId);
                    AddJobFlowStepsResult result = emr.addJobFlowSteps(request);
                    logSubmittedSteps(clusterId, steps, i -> request.getSteps().get(i).getName(), i -> result.getStepIds().get(i));
                    return ImmutableList.copyOf(result.getStepIds());
                });

        return SubmissionResult.ofExistingCluster(clusterId, stepIds);
    };
}
 
Example #15
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 5 votes vote down vote up
private void waitForSteps(AmazonElasticMapReduce emr, SubmissionResult submission)
{
    String lastStepId = Iterables.getLast(submission.stepIds());
    pollingWaiter(state, "result")
            .withWaitMessage("EMR job still running: %s", submission.clusterId())
            .withPollInterval(DurationInterval.of(Duration.ofSeconds(15), Duration.ofMinutes(5)))
            .awaitOnce(Step.class, pollState -> checkStepCompletion(emr, submission, lastStepId, pollState));
}
 
Example #16
Source File: TableProviderFactory.java    From aws-athena-query-federation with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
protected TableProviderFactory(AmazonEC2 ec2, AmazonElasticMapReduce emr, AmazonRDS rds, AmazonS3 amazonS3)
{
    addProvider(new Ec2TableProvider(ec2));
    addProvider(new EbsTableProvider(ec2));
    addProvider(new VpcTableProvider(ec2));
    addProvider(new SecurityGroupsTableProvider(ec2));
    addProvider(new RouteTableProvider(ec2));
    addProvider(new SubnetTableProvider(ec2));
    addProvider(new ImagesTableProvider(ec2));
    addProvider(new EmrClusterTableProvider(emr));
    addProvider(new RdsTableProvider(rds));
    addProvider(new S3ObjectsTableProvider(amazonS3));
    addProvider(new S3BucketsTableProvider(amazonS3));
}
 
Example #17
Source File: AwsClientFactoryTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetEmrClientCacheHitMiss()
{
    // Create an AWS parameters DTO that contains both AWS credentials and proxy information.
    AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);

    // Get an Amazon EMR client.
    AmazonElasticMapReduce amazonElasticMapReduceClient = awsClientFactory.getEmrClient(awsParamsDto);

    // Confirm a cache hit.
    assertEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient(
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1)));

    // Confirm a cache miss due to AWS credentials.
    assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient(
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY_2, AWS_ASSUMED_ROLE_SECRET_KEY_2, AWS_ASSUMED_ROLE_SESSION_TOKEN_2, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1)));

    // Confirm a cache miss due to http proxy information.
    assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient(
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST_2, HTTP_PROXY_PORT_2,
            AWS_REGION_NAME_US_EAST_1)));

    // Clear the cache.
    cacheManager.getCache(DaoSpringModuleConfig.HERD_CACHE_NAME).clear();

    // Confirm a cache miss due to cleared cache.
    assertNotEquals(amazonElasticMapReduceClient, awsClientFactory.getEmrClient(awsParamsDto));
}
 
Example #18
Source File: emr-add-steps.java    From aws-doc-sdk-examples with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
	AWSCredentials credentials_profile = null;		
	try {
		credentials_profile = new ProfileCredentialsProvider("default").getCredentials();
       } catch (Exception e) {
           throw new AmazonClientException(
                   "Cannot load credentials from .aws/credentials file. " +
                   "Make sure that the credentials file exists and the profile name is specified within it.",
                   e);
       }
	
	AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
		.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
		.withRegion(Regions.US_WEST_1)
		.build();
       
	// Run a bash script using a predefined step in the StepFactory helper class
    StepFactory stepFactory = new StepFactory();
    StepConfig runBashScript = new StepConfig()
    		.withName("Run a bash script") 
    		.withHadoopJarStep(stepFactory.newScriptRunnerStep("s3://jeffgoll/emr-scripts/create_users.sh"))
    		.withActionOnFailure("CONTINUE");

    // Run a custom jar file as a step
    HadoopJarStepConfig hadoopConfig1 = new HadoopJarStepConfig()
       .withJar("s3://path/to/my/jarfolder") // replace with the location of the jar to run as a step
       .withMainClass("com.my.Main1") // optional main class, this can be omitted if jar above has a manifest
       .withArgs("--verbose"); // optional list of arguments to pass to the jar
    StepConfig myCustomJarStep = new StepConfig("RunHadoopJar", hadoopConfig1);

    AddJobFlowStepsResult result = emr.addJobFlowSteps(new AddJobFlowStepsRequest()
	  .withJobFlowId("j-xxxxxxxxxxxx") // replace with cluster id to run the steps
	  .withSteps(runBashScript,myCustomJarStep));
    
         System.out.println(result.getStepIds());

}
 
Example #19
Source File: InventoryUtil.java    From pacbot with Apache License 2.0 5 votes vote down vote up
/**
 * Fetch EMR info.
 *
 * @param temporaryCredentials the temporary credentials
 * @param skipRegions the skip regions
 * @param accountId the accountId
 * @param accountName the account name
 * @return the map
 */
public static Map<String,List<Cluster>> fetchEMRInfo(BasicSessionCredentials temporaryCredentials, String skipRegions,String accountId,String accountName){

	Map<String,List<Cluster>> clusterList = new LinkedHashMap<>();
	String expPrefix = InventoryConstants.ERROR_PREFIX_CODE+accountId + "\",\"Message\": \"Exception in fetching info for resource in specific region\" ,\"type\": \"EMR\" , \"region\":\"" ;
	for(Region region : RegionUtils.getRegions()){
		try{
			if(!skipRegions.contains(region.getName())){
				AmazonElasticMapReduce emrClient = AmazonElasticMapReduceClientBuilder.standard().
				 	withCredentials(new AWSStaticCredentialsProvider(temporaryCredentials)).withRegion(region.getName()).build();
				List<ClusterSummary> clusters = new ArrayList<>();
				String marker = null;
				ListClustersResult clusterResult ;
				do{
					clusterResult = emrClient.listClusters(new ListClustersRequest().withMarker(marker));
					clusters.addAll(clusterResult.getClusters());
					marker = clusterResult.getMarker();
				}while(marker!=null);

				List<Cluster> clustersList = new ArrayList<>();
				clusters.forEach(cluster ->
					{
						DescribeClusterResult descClstrRslt = emrClient.describeCluster(new DescribeClusterRequest().withClusterId(cluster.getId()));
						clustersList.add(descClstrRslt.getCluster());
					});

				if( !clustersList.isEmpty() ){
					log.debug(InventoryConstants.ACCOUNT + accountId +" Type : EMR "+region.getName() + " >> "+clustersList.size());
					clusterList.put(accountId+delimiter+accountName+delimiter+region.getName(),clustersList);
				}
			}
		}catch(Exception e){
			if(region.isServiceSupported(AmazonElasticMapReduce.ENDPOINT_PREFIX)){
				log.warn(expPrefix+ region.getName()+InventoryConstants.ERROR_CAUSE +e.getMessage()+"\"}");
				ErrorManageUtil.uploadError(accountId,region.getName(),"emr",e.getMessage());
			}
		}
	}
	return clusterList;
}
 
Example #20
Source File: InventoryUtilTest.java    From pacbot with Apache License 2.0 5 votes vote down vote up
/**
 * Fetch EMR info test.
 *
 * @throws Exception the exception
 */
@SuppressWarnings("static-access")
@Test
public void fetchEMRInfoTest() throws Exception {
    
    mockStatic(AmazonElasticMapReduceClientBuilder.class);
    AmazonElasticMapReduce emrClient = PowerMockito.mock(AmazonElasticMapReduce.class);
    AmazonElasticMapReduceClientBuilder amazonElasticFileSystemClientBuilder = PowerMockito.mock(AmazonElasticMapReduceClientBuilder.class);
    AWSStaticCredentialsProvider awsStaticCredentialsProvider = PowerMockito.mock(AWSStaticCredentialsProvider.class);
    PowerMockito.whenNew(AWSStaticCredentialsProvider.class).withAnyArguments().thenReturn(awsStaticCredentialsProvider);
    when(amazonElasticFileSystemClientBuilder.standard()).thenReturn(amazonElasticFileSystemClientBuilder);
    when(amazonElasticFileSystemClientBuilder.withCredentials(anyObject())).thenReturn(amazonElasticFileSystemClientBuilder);
    when(amazonElasticFileSystemClientBuilder.withRegion(anyString())).thenReturn(amazonElasticFileSystemClientBuilder);
    when(amazonElasticFileSystemClientBuilder.build()).thenReturn(emrClient);
    
    ListClustersResult listClustersResult = new ListClustersResult();
    List<ClusterSummary> clusters = new ArrayList<>();
    ClusterSummary clusterSummary = new ClusterSummary();
    clusterSummary.setId("id");
    clusters.add(clusterSummary);
    listClustersResult.setClusters(clusters);
    when(emrClient.listClusters(anyObject())).thenReturn(listClustersResult);
    
    DescribeClusterResult describeClusterResult = new DescribeClusterResult();
    describeClusterResult.setCluster(new Cluster());
    when(emrClient.describeCluster(anyObject())).thenReturn(describeClusterResult);
    assertThat(inventoryUtil.fetchEMRInfo(new BasicSessionCredentials("awsAccessKey", "awsSecretKey", "sessionToken"), 
            "skipRegions", "account","accountName").size(), is(1));
    
}
 
Example #21
Source File: EmrDaoImplTest.java    From herd with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateEmrClusterNoNscdBootstrapScript()
{
    // Create an AWS parameters DTO.
    final AwsParamsDto awsParamsDto =
        new AwsParamsDto(AWS_ASSUMED_ROLE_ACCESS_KEY, AWS_ASSUMED_ROLE_SECRET_KEY, AWS_ASSUMED_ROLE_SESSION_TOKEN, HTTP_PROXY_HOST, HTTP_PROXY_PORT,
            AWS_REGION_NAME_US_EAST_1);
    EmrClusterDefinition emrClusterDefinition = new EmrClusterDefinition();
    final InstanceDefinitions instanceDefinitions =
        new InstanceDefinitions(new MasterInstanceDefinition(), new InstanceDefinition(), new InstanceDefinition());
    emrClusterDefinition.setInstanceDefinitions(instanceDefinitions);
    emrClusterDefinition.setNodeTags(Collections.emptyList());

    AmazonElasticMapReduce amazonElasticMapReduce = AmazonElasticMapReduceClientBuilder.standard().withRegion(awsParamsDto.getAwsRegionName())
        .build();
    when(awsClientFactory.getEmrClient(awsParamsDto)).thenReturn(amazonElasticMapReduce);
    when(emrOperations.runEmrJobFlow(amazonElasticMapReduceClientArgumentCaptor.capture(), runJobFlowRequestArgumentCaptor.capture()))
        .thenReturn(EMR_CLUSTER_ID);

    // Create the cluster without NSCD script configuration
    String clusterId = emrDaoImpl.createEmrCluster(EMR_CLUSTER_NAME, emrClusterDefinition, awsParamsDto);

    // Verifications
    assertEquals(clusterId, EMR_CLUSTER_ID);
    verify(configurationHelper).getProperty(ConfigurationValue.EMR_NSCD_SCRIPT);
    verify(awsClientFactory).getEmrClient(awsParamsDto);
    verify(emrOperations).runEmrJobFlow(any(), any());
    RunJobFlowRequest runJobFlowRequest = runJobFlowRequestArgumentCaptor.getValue();
    List<BootstrapActionConfig> bootstrapActionConfigs = runJobFlowRequest.getBootstrapActions();

    // There should be no bootstrap action
    assertTrue(bootstrapActionConfigs.isEmpty());
}
 
Example #22
Source File: ClusterManager.java    From herd-mdl with Apache License 2.0 5 votes vote down vote up
AmazonElasticMapReduce createEmrClient() {
    DefaultAWSCredentialsProviderChain defaultAWSCredentialsProviderChain = new DefaultAWSCredentialsProviderChain();
    AWSCredentials credentials = defaultAWSCredentialsProviderChain.getCredentials();
    emrClient =  AmazonElasticMapReduceClientBuilder.standard()
.withCredentials(new AWSStaticCredentialsProvider(credentials))
.build();
    return emrClient;
}
 
Example #23
Source File: emr-flink-cluster-transient-step.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	AWSCredentials credentials_profile = null;		
	try {
		credentials_profile = new ProfileCredentialsProvider("default").getCredentials();
       } catch (Exception e) {
           throw new AmazonClientException(
                   "Cannot load credentials from .aws/credentials file. " +
                   "Make sure that the credentials file exists and the profile name is specified within it.",
                   e);
       }
	
	AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
		.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
		.withRegion(Regions.US_WEST_1)
		.build();
       
	List<StepConfig> stepConfigs = new ArrayList<StepConfig>();
    HadoopJarStepConfig flinkWordCountConf = new HadoopJarStepConfig()
      .withJar("command-runner.jar")
      .withArgs("bash","-c", "flink", "run", "-m", "yarn-cluster", "-yn", "2", "/usr/lib/flink/examples/streaming/WordCount.jar", "--input", "s3://path/to/input-file.txt", "--output", "s3://path/to/output/");
    
    StepConfig flinkRunWordCountStep = new StepConfig()
      .withName("Flink add a wordcount step and terminate")
      .withActionOnFailure("CONTINUE")
      .withHadoopJarStep(flinkWordCountConf);
    
    stepConfigs.add(flinkRunWordCountStep); 
    
    Application flink = new Application().withName("Flink");
    
    RunJobFlowRequest request = new RunJobFlowRequest()
      .withName("flink-transient")
      .withReleaseLabel("emr-5.20.0")
      .withApplications(flink)
      .withServiceRole("EMR_DefaultRole")
      .withJobFlowRole("EMR_EC2_DefaultRole")
      .withLogUri("s3://path/to/my/logfiles")
      .withInstances(new JobFlowInstancesConfig()
          .withEc2KeyName("myEc2Key")
          .withEc2SubnetId("subnet-12ab3c45")
          .withInstanceCount(3)
          .withKeepJobFlowAliveWhenNoSteps(false)
          .withMasterInstanceType("m4.large")
          .withSlaveInstanceType("m4.large"))
      .withSteps(stepConfigs);
    
    RunJobFlowResult result = emr.runJobFlow(request);  
	System.out.println("The cluster ID is " + result.toString());

}
 
Example #24
Source File: EmrClusterTableProvider.java    From aws-athena-query-federation with Apache License 2.0 4 votes vote down vote up
public EmrClusterTableProvider(AmazonElasticMapReduce emr)
{
    this.emr = emr;
}
 
Example #25
Source File: EMRUtils.java    From aws-big-data-blog with Apache License 2.0 4 votes vote down vote up
/**
 * This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR. 
 * 
 * @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service
 * @param clusterIdentifier - identifier of an existing cluster
 * @param amiVersion - AMI to use for launching this cluster
 * @param keypair - A keypair for SSHing into the Amazon EMR master node
 * @param masterInstanceType - Master node Amazon EC2 instance type 
 * @param coreInstanceType - core nodes Amazon EC2 instance type 
 * @param logUri - An Amazon S3 bucket for your 
 * @param numberOfNodes - total number of nodes in this cluster including master node
 * @return
 */
public static String createCluster(AmazonElasticMapReduce client,
		String clusterIdentifier,
		String amiVersion,
		String keypair,
		String masterInstanceType,
		String coreInstanceType,
		String logUri,
		int numberOfNodes) {

	if (clusterExists(client, clusterIdentifier)) {
		LOG.info("Cluster " + clusterIdentifier + " is available");
		return clusterIdentifier;
	}
	
	//Error checking
	if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version");
	if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair");
	if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type");
	if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs.");
	if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node");
	  		
	  RunJobFlowRequest request = new RunJobFlowRequest()
	    .withAmiVersion(amiVersion)
		.withBootstrapActions(new BootstrapActionConfig()
		             .withName("Install HBase")
		             .withScriptBootstrapAction(new ScriptBootstrapActionConfig()
		             .withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase")))
		.withName("Job Flow With HBAse Actions")	 
		.withSteps(new StepConfig() //enable debugging step
					.withName("Enable debugging")
					.withActionOnFailure("TERMINATE_CLUSTER")
					.withHadoopJarStep(new StepFactory().newEnableDebuggingStep()), 
					//Start HBase step - after installing it with a bootstrap action
					createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()), 
					//add HBase backup step
					createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs()))
		.withLogUri(logUri)
		.withInstances(new JobFlowInstancesConfig()
		.withEc2KeyName(keypair)
		.withInstanceCount(numberOfNodes)
		.withKeepJobFlowAliveWhenNoSteps(true)
		.withMasterInstanceType(masterInstanceType)
		.withSlaveInstanceType(coreInstanceType));

	RunJobFlowResult result = client.runJobFlow(request);
	
	String state = null;
	while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) {
		try {
			Thread.sleep(10 * 1000);
			LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available.");
		} catch (InterruptedException e) {

		}
		
		if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){
			LOG.error("Could not create EMR Cluster");
			System.exit(-1);	
		}
	}
	LOG.info("Created cluster " + result.getJobFlowId());
	LOG.info("Cluster " + clusterIdentifier + " is available");	
	return result.getJobFlowId();
}
 
Example #26
Source File: ClusterManager.java    From herd-mdl with Apache License 2.0 4 votes vote down vote up
AmazonElasticMapReduce getEmrClient() {
    return emrClient;
}
 
Example #27
Source File: custom-emrfs-materials.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	AWSCredentials credentials_profile = null;		
	try {
		credentials_profile = new ProfileCredentialsProvider("default").getCredentials();
       } catch (Exception e) {
           throw new AmazonClientException(
                   "Cannot load credentials from .aws/credentials file. " +
                   "Make sure that the credentials file exists and the profile name is specified within it.",
                   e);
       }
	
	AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
		.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
		.withRegion(Regions.US_WEST_1)
		.build();
	
	Map<String,String> emrfsProperties = new HashMap<String,String>();
		emrfsProperties.put("fs.s3.cse.encryptionMaterialsProvider.uri","s3://mybucket/MyCustomEncryptionMaterialsProvider.jar");
    	emrfsProperties.put("fs.s3.cse.enabled","true");
    	emrfsProperties.put("fs.s3.consistent","true");
	    emrfsProperties.put("fs.s3.cse.encryptionMaterialsProvider","full.class.name.of.EncryptionMaterialsProvider");

	Configuration myEmrfsConfig = new Configuration()
    	.withClassification("emrfs-site")
    	.withProperties(emrfsProperties);
	
	Application hive = new Application().withName("Hive");
	Application spark = new Application().withName("Spark");
	Application ganglia = new Application().withName("Ganglia");
	Application zeppelin = new Application().withName("Zeppelin");

	RunJobFlowRequest request = new RunJobFlowRequest()
		.withName("ClusterWithCustomEMRFSEncryptionMaterialsProvider")
		.withReleaseLabel("emr-5.20.0")
		.withApplications(hive,spark,ganglia,zeppelin)
		.withConfigurations(myEmrfsConfig)
		.withServiceRole("EMR_DefaultRole")
		.withJobFlowRole("EMR_EC2_DefaultRole")
		.withLogUri("s3://path/to/emr/logs")
		.withInstances(new JobFlowInstancesConfig()
			.withEc2KeyName("myEc2Key")
			.withInstanceCount(3)
			.withKeepJobFlowAliveWhenNoSteps(true)
			.withMasterInstanceType("m4.large")
			.withSlaveInstanceType("m4.large")
	);						
			
	RunJobFlowResult result = emr.runJobFlow(request);
	System.out.println("The cluster ID is " + result.toString());
}
 
Example #28
Source File: create_cluster.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	AWSCredentials credentials_profile = null;		
	try {
		credentials_profile = new ProfileCredentialsProvider("default").getCredentials(); // specifies any named profile in .aws/credentials as the credentials provider
       } catch (Exception e) {
           throw new AmazonClientException(
                   "Cannot load credentials from .aws/credentials file. " +
                   "Make sure that the credentials file exists and that the profile name is defined within it.",
                   e);
       }
	
	// create an EMR client using the credentials and region specified in order to create the cluster
	AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
		.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
		.withRegion(Regions.US_WEST_1)
		.build();
       
       // create a step to enable debugging in the AWS Management Console
	StepFactory stepFactory = new StepFactory(); 
	StepConfig enabledebugging = new StepConfig()
  			.withName("Enable debugging")
  			.withActionOnFailure("TERMINATE_JOB_FLOW")
  			.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
       
       // specify applications to be installed and configured when EMR creates the cluster
	Application hive = new Application().withName("Hive");
	Application spark = new Application().withName("Spark");
	Application ganglia = new Application().withName("Ganglia");
	Application zeppelin = new Application().withName("Zeppelin");
	
	// create the cluster
	RunJobFlowRequest request = new RunJobFlowRequest()
       		.withName("MyClusterCreatedFromJava")
       		.withReleaseLabel("emr-5.20.0") // specifies the EMR release version label, we recommend the latest release
       		.withSteps(enabledebugging)
       		.withApplications(hive,spark,ganglia,zeppelin)
       		.withLogUri("s3://path/to/my/emr/logs") // a URI in S3 for log files is required when debugging is enabled
       		.withServiceRole("EMR_DefaultRole") // replace the default with a custom IAM service role if one is used
       		.withJobFlowRole("EMR_EC2_DefaultRole") // replace the default with a custom EMR role for the EC2 instance profile if one is used
       		.withInstances(new JobFlowInstancesConfig()
       	   		.withEc2SubnetId("subnet-12ab34c56")
           		.withEc2KeyName("myEc2Key") 
           		.withInstanceCount(3) 
           		.withKeepJobFlowAliveWhenNoSteps(true)    
           		.withMasterInstanceType("m4.large")
           		.withSlaveInstanceType("m4.large"));

   RunJobFlowResult result = emr.runJobFlow(request);  
   System.out.println("The cluster ID is " + result.toString());

}
 
Example #29
Source File: create-spark-cluster.java    From aws-doc-sdk-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	AWSCredentials credentials_profile = null;		
	try {
		credentials_profile = new ProfileCredentialsProvider("default").getCredentials();
       } catch (Exception e) {
           throw new AmazonClientException(
                   "Cannot load credentials from .aws/credentials file. " +
                   "Make sure that the credentials file exists and the profile name is specified within it.",
                   e);
       }
       
       AmazonElasticMapReduce emr = AmazonElasticMapReduceClientBuilder.standard()
		.withCredentials(new AWSStaticCredentialsProvider(credentials_profile))
		.withRegion(Regions.US_WEST_1)
		.build();
       
       // create a step to enable debugging in the AWS Management Console
	StepFactory stepFactory = new StepFactory(); 
	StepConfig enabledebugging = new StepConfig()
  			.withName("Enable debugging")
  			.withActionOnFailure("TERMINATE_JOB_FLOW")
  			.withHadoopJarStep(stepFactory.newEnableDebuggingStep());
       
       Application spark = new Application().withName("Spark");

       RunJobFlowRequest request = new RunJobFlowRequest()
           .withName("Spark Cluster")
           .withReleaseLabel("emr-5.20.0")
           .withSteps(enabledebugging)
           .withApplications(spark)
           .withLogUri("s3://path/to/my/logs/")
       	.withServiceRole("EMR_DefaultRole") 
       	.withJobFlowRole("EMR_EC2_DefaultRole") 
           .withInstances(new JobFlowInstancesConfig()
               .withEc2SubnetId("subnet-12ab3c45")
               .withEc2KeyName("myEc2Key")
               .withInstanceCount(3)
               .withKeepJobFlowAliveWhenNoSteps(true)
               .withMasterInstanceType("m4.large")
               .withSlaveInstanceType("m4.large")
           );			
       RunJobFlowResult result = emr.runJobFlow(request);  
    System.out.println("The cluster ID is " + result.toString());
}
 
Example #30
Source File: EmrOperatorFactory.java    From digdag with Apache License 2.0 4 votes vote down vote up
private NewCluster submitNewClusterRequest(AmazonElasticMapReduce emr, String tag, StepCompiler stepCompiler,
        Config cluster, Filer filer, ParameterCompiler parameterCompiler)
        throws IOException
{
    RemoteFile runner = prepareRunner(filer, tag);

    // Compile steps
    stepCompiler.compile(runner);

    List<StepConfig> stepConfigs = stepCompiler.stepConfigs();

    Config ec2 = cluster.getNested("ec2");
    Config master = ec2.getNestedOrGetEmpty("master");
    List<Config> core = ec2.getOptional("core", Config.class).transform(ImmutableList::of).or(ImmutableList.of());
    List<Config> task = ec2.getListOrEmpty("task", Config.class);

    List<String> applications = cluster.getListOrEmpty("applications", String.class);
    if (applications.isEmpty()) {
        applications = ImmutableList.of("Hadoop", "Hive", "Spark", "Flink");
    }

    // TODO: allow configuring additional application parameters
    List<Application> applicationConfigs = applications.stream()
            .map(application -> new Application().withName(application))
            .collect(toList());

    // TODO: merge configurations with the same classification?
    List<Configuration> configurations = cluster.getListOrEmpty("configurations", JsonNode.class).stream()
            .map(this::configurations)
            .flatMap(Collection::stream)
            .collect(toList());

    List<JsonNode> bootstrap = cluster.getListOrEmpty("bootstrap", JsonNode.class);
    List<BootstrapActionConfig> bootstrapActions = new ArrayList<>();
    for (int i = 0; i < bootstrap.size(); i++) {
        bootstrapActions.add(bootstrapAction(i + 1, bootstrap.get(i), tag, filer, runner, parameterCompiler));
    }

    // Stage files to S3
    filer.stageFiles();

    Optional<String> subnetId = ec2.getOptional("subnet_id", String.class);

    String defaultMasterInstanceType;
    String defaultCoreInstanceType;
    String defaultTaskInstanceType;

    if (subnetId.isPresent()) {
        // m4 requires VPC (subnet id)
        defaultMasterInstanceType = "m4.2xlarge";
        defaultCoreInstanceType = "m4.xlarge";
        defaultTaskInstanceType = "m4.xlarge";
    }
    else {
        defaultMasterInstanceType = "m3.2xlarge";
        defaultCoreInstanceType = "m3.xlarge";
        defaultTaskInstanceType = "m3.xlarge";
    }

    RunJobFlowRequest request = new RunJobFlowRequest()
            .withName(cluster.get("name", String.class, "Digdag") + " (" + tag + ")")
            .withReleaseLabel(cluster.get("release", String.class, "emr-5.2.0"))
            .withSteps(stepConfigs)
            .withBootstrapActions(bootstrapActions)
            .withApplications(applicationConfigs)
            .withLogUri(cluster.get("logs", String.class, null))
            .withJobFlowRole(cluster.get("cluster_role", String.class, "EMR_EC2_DefaultRole"))
            .withServiceRole(cluster.get("service_role", String.class, "EMR_DefaultRole"))
            .withTags(new Tag().withKey("DIGDAG_CLUSTER_ID").withValue(tag))
            .withVisibleToAllUsers(cluster.get("visible", boolean.class, true))
            .withConfigurations(configurations)
            .withInstances(new JobFlowInstancesConfig()
                    .withInstanceGroups(ImmutableList.<InstanceGroupConfig>builder()
                            // Master Node
                            .add(instanceGroupConfig("Master", master, "MASTER", defaultMasterInstanceType, 1))
                            // Core Group
                            .addAll(instanceGroupConfigs("Core", core, "CORE", defaultCoreInstanceType))
                            // Task Groups
                            .addAll(instanceGroupConfigs("Task %d", task, "TASK", defaultTaskInstanceType))
                            .build()
                    )
                    .withAdditionalMasterSecurityGroups(ec2.getListOrEmpty("additional_master_security_groups", String.class))
                    .withAdditionalSlaveSecurityGroups(ec2.getListOrEmpty("additional_slave_security_groups", String.class))
                    .withEmrManagedMasterSecurityGroup(ec2.get("emr_managed_master_security_group", String.class, null))
                    .withEmrManagedSlaveSecurityGroup(ec2.get("emr_managed_slave_security_group", String.class, null))
                    .withServiceAccessSecurityGroup(ec2.get("service_access_security_group", String.class, null))
                    .withTerminationProtected(cluster.get("termination_protected", boolean.class, false))
                    .withPlacement(cluster.getOptional("availability_zone", String.class)
                            .transform(zone -> new PlacementType().withAvailabilityZone(zone)).orNull())
                    .withEc2SubnetId(subnetId.orNull())
                    .withEc2KeyName(ec2.get("key", String.class))
                    .withKeepJobFlowAliveWhenNoSteps(!cluster.get("auto_terminate", boolean.class, true)));

    logger.info("Submitting EMR job with {} steps(s)", request.getSteps().size());
    RunJobFlowResult result = emr.runJobFlow(request);
    logger.info("Submitted EMR job with {} step(s): {}", request.getSteps().size(), result.getJobFlowId(), result);

    return NewCluster.of(result.getJobFlowId(), request.getSteps().size());
}