java source code of LifecycleIT

/*-
 * -\-\-
 * Spydra
 * --
 * Copyright (C) 2016 - 2018 Spotify AB
 * --
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -/-/-
 */

package com.spotify.spydra;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import com.google.api.client.googleapis.auth.oauth2.GoogleCredential;
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport;
import com.google.api.client.http.HttpTransport;
import com.google.api.client.json.JsonFactory;
import com.google.api.client.json.jackson2.JacksonFactory;
import com.google.api.services.dataproc.Dataproc;
import com.google.api.services.dataproc.model.Cluster;
import com.google.api.services.dataproc.model.ListClustersResponse;
import com.google.auth.oauth2.GceHelper;
import com.spotify.spydra.model.SpydraArgument;
import com.spotify.spydra.submitter.api.Submitter;
import com.spotify.spydra.util.GcpUtils;
import com.spotify.spydra.util.SpydraArgumentUtil;
import java.io.IOException;
import java.net.URI;
import java.security.GeneralSecurityException;
import java.util.Arrays;
import java.util.Collections;
import java.util.UUID;
import org.apache.hadoop.examples.WordCount;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Assume;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class LifecycleIT {
  private static final Logger LOGGER = LoggerFactory.getLogger(LifecycleIT.class);

  private final static String CLIENT_ID = UUID.randomUUID().toString();

  private final static GcpUtils gcpUtils = new GcpUtils();

  @Test
  public void testLifecycle() throws Exception {
    // The way that the haddop credentials provider is buggy as it tries to contact the metadata
    // service in gcp if we use the default account
    Assume.assumeTrue("Skipping lifecycle test, not running on gce and "
                      + "GOOGLE_APPLICATION_CREDENTIALS not set",
        hasApplicationJsonOrRunningOnGce());
    SpydraArgument testArgs = SpydraArgumentUtil.loadArguments("integration-test-config.json");
    SpydraArgument arguments = SpydraArgumentUtil
        .dataprocConfiguration(CLIENT_ID, testArgs.getLogBucket(), testArgs.getRegion());
    arguments.getCluster().numWorkers(3);
    arguments.getSubmit().jar(getExamplesJarPath());
    arguments.getSubmit().setJobArgs(Arrays.asList("pi", "1", "1"));

    // TODO We should test the init action as well but the uploading before running the test is tricky
    // We could upload it manually to a test bucket here and set the right things
    arguments.getCluster().getOptions().remove(SpydraArgument.OPTION_INIT_ACTIONS);

    // Merge to get all other custom test arguments
    arguments = SpydraArgument.merge(arguments, testArgs);

    LOGGER.info("Using following service account to run gcloud commands locally: " +
        arguments.getCluster().getOptions().get(SpydraArgument.OPTION_ACCOUNT));
    Submitter submitter = Submitter.getSubmitter(arguments);
    assertTrue("job wasn't successful", submitter.executeJob(arguments));

    assertTrue(isClusterCollected(arguments));

    URI doneUri = URI.create(arguments.clusterProperties().getProperty(
        "mapred:mapreduce.jobhistory.done-dir"));
    LOGGER.info("Checking that we have two files in: " + doneUri);
    assertEquals(2, getFileCount(doneUri));
    URI intermediateUri = URI.create(arguments.clusterProperties().getProperty(
        "mapred:mapreduce.jobhistory.intermediate-done-dir"));
    LOGGER.info("Checking that we do not have any files in: " + intermediateUri);
    assertEquals(0, getFileCount(intermediateUri));
  }

  private boolean hasApplicationJsonOrRunningOnGce() {
    return new GcpUtils().getJsonCredentialsPath().isPresent()
           || GceHelper.runningOnComputeEngine();
  }

  private boolean isClusterCollected(SpydraArgument arguments)
      throws IOException, GeneralSecurityException {
    GoogleCredential credential = new GcpUtils().getCredential();
    if (credential.createScopedRequired()) {
      credential =
          credential.createScoped(
              Collections.singletonList("https://www.googleapis.com/auth/cloud-platform"));
    }

    HttpTransport httpTransport = GoogleNetHttpTransport.newTrustedTransport();
    JsonFactory jsonFactory = JacksonFactory.getDefaultInstance();
    Dataproc dataprocService =
        new Dataproc.Builder(httpTransport, jsonFactory, credential)
            .setApplicationName("Google Cloud Platform Sample")
            .build();

    Dataproc.Projects.Regions.Clusters.List request =
        dataprocService.projects().regions().clusters().list(
            arguments.getCluster().getOptions().get(SpydraArgument.OPTION_PROJECT),
            arguments.getRegion());
    ListClustersResponse response;
    do {
      response = request.execute();
      if (response.getClusters() == null) continue;

      String clusterName = arguments.getCluster().getName();
      for (Cluster cluster : response.getClusters()) {
        if (cluster.getClusterName().equals(clusterName)) {
          String status = cluster.getStatus().getState();
          LOGGER.info("Cluster state is" + status);
          return status.equals("DELETING");
        }
      }

      request.setPageToken(response.getNextPageToken());
    } while (response.getNextPageToken() != null);
    return true;
  }

  private int getFileCount(URI uri) throws IOException {
    FileSystem fs = gcpUtils.fileSystemForUri(uri);
    RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(uri), true);
    int count = 0;
    while (it.hasNext()) {
      it.next();
      count++;
    }
    return count;
  }

  private String getExamplesJarPath() {
    Class clazz = WordCount.class;
    return clazz.getProtectionDomain().getCodeSource().getLocation().getPath();
  }
}