/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapred.gridmix;

import static org.junit.Assert.*;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.mapreduce.MapReduceTestUtil;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.MapContextImpl;
import org.apache.hadoop.security.UserGroupInformation;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

/**
 * Validate emulation of distributed cache load in gridmix simulated jobs.
 * 
 */
public class TestDistCacheEmulation {

  private DistributedCacheEmulator dce = null;

  @BeforeClass
  public static void init() throws IOException {
    GridmixTestUtils.initCluster(TestDistCacheEmulation.class);
    File target=new File("target"+File.separator+TestDistCacheEmulation.class.getName());
    if(!target.exists()){
      assertTrue(target.mkdirs());
    }
    
  }

  @AfterClass
  public static void shutDown() throws IOException {
    GridmixTestUtils.shutdownCluster();
  }

  /**
   * Validate the dist cache files generated by GenerateDistCacheData job.
   * 
   * @param jobConf
   *          configuration of GenerateDistCacheData job.
   * @param sortedFileSizes
   *          array of sorted distributed cache file sizes
   * @throws IOException
   * @throws FileNotFoundException
   */
  private void validateDistCacheData(Configuration jobConf,
      long[] sortedFileSizes) throws FileNotFoundException, IOException {
    Path distCachePath = dce.getDistributedCacheDir();
    String filesListFile = jobConf
        .get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST);
    FileSystem fs = FileSystem.get(jobConf);

    // Validate the existence of Distributed Cache files list file directly
    // under distributed cache directory
    Path listFile = new Path(filesListFile);
    assertTrue("Path of Distributed Cache files list file is wrong.",
        distCachePath.equals(listFile.getParent().makeQualified(fs.getUri(), fs.getWorkingDirectory())));

    // Delete the dist cache files list file
    assertTrue(
        "Failed to delete distributed Cache files list file " + listFile,
        fs.delete(listFile,true));

    List<Long> fileSizes = new ArrayList<Long>();
    for (long size : sortedFileSizes) {
      fileSizes.add(size);
    }
    // validate dist cache files after deleting the 'files list file'
    validateDistCacheFiles(fileSizes, distCachePath);
  }

  /**
   * Validate private/public distributed cache files.
   * 
   * @param filesSizesExpected
   *          list of sizes of expected dist cache files
   * @param distCacheDir
   *          the distributed cache dir to be validated
   * @throws IOException
   * @throws FileNotFoundException
   */
  private void validateDistCacheFiles(List<Long> filesSizesExpected, Path distCacheDir)
      throws FileNotFoundException, IOException {
    // RemoteIterator<LocatedFileStatus> iter =
    FileStatus[] statuses = GridmixTestUtils.dfs.listStatus(distCacheDir);
    int numFiles = filesSizesExpected.size();
    assertEquals("Number of files under distributed cache dir is wrong.",
        numFiles, statuses.length);
    for (int i = 0; i < numFiles; i++) {
      FileStatus stat = statuses[i];
      assertTrue("File size of distributed cache file "
          + stat.getPath().toUri().getPath() + " is wrong.",
          filesSizesExpected.remove(stat.getLen()));

      FsPermission perm = stat.getPermission();
      assertEquals("Wrong permissions for distributed cache file "
          + stat.getPath().toUri().getPath(), new FsPermission(
          GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_PERM), perm);
    }
  }

  /**
   * Configures 5 HDFS-based dist cache files and 1 local-FS-based dist cache
   * file in the given Configuration object <code>conf</code>.
   * 
   * @param conf
   *          configuration where dist cache config properties are to be set
   * @return array of sorted HDFS-based distributed cache file sizes
   * @throws IOException
   */
  private long[] configureDummyDistCacheFiles(Configuration conf)
      throws IOException {
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    conf.set("user.name", user);
    
    // Set some dummy dist cache files in gridmix configuration so that they go
    // into the configuration of JobStory objects.
    String[] distCacheFiles = { "hdfs:///tmp/file1.txt",
        "/tmp/" + user + "/.staging/job_1/file2.txt",
        "hdfs:///user/user1/file3.txt", "/home/user2/file4.txt",
        "subdir1/file5.txt", "subdir2/file6.gz" };

    String[] fileSizes = { "400", "2500", "700", "1200", "1500", "500" };

    String[] visibilities = { "true", "false", "false", "true", "true", "false" };
    String[] timeStamps = { "1234", "2345", "34567", "5434", "125", "134" };

    // DistributedCache.setCacheFiles(fileCaches, conf);
    conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
    conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
    conf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, visibilities);
    conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);

    // local FS based dist cache file whose path contains <user>/.staging is
    // not created on HDFS. So file size 2500 is not added to sortedFileSizes.
    long[] sortedFileSizes = new long[] { 1500, 1200, 700, 500, 400 };
    return sortedFileSizes;
  }

  /**
   * Runs setupGenerateDistCacheData() on a new DistrbutedCacheEmulator and and
   * returns the jobConf. Fills the array <code>sortedFileSizes</code> that can
   * be used for validation. Validation of exit code from
   * setupGenerateDistCacheData() is done.
   * 
   * @param generate
   *          true if -generate option is specified
   * @param sortedFileSizes
   *          sorted HDFS-based distributed cache file sizes
   * @throws IOException
   * @throws InterruptedException
   */
  private Configuration runSetupGenerateDistCacheData(boolean generate,
      long[] sortedFileSizes) throws IOException, InterruptedException {
    Configuration conf = new Configuration();
    long[] fileSizes = configureDummyDistCacheFiles(conf);
    System.arraycopy(fileSizes, 0, sortedFileSizes, 0, fileSizes.length);

    // Job stories of all 3 jobs will have same dist cache files in their
    // configurations
    final int numJobs = 3;
    DebugJobProducer jobProducer = new DebugJobProducer(numJobs, conf);

    Configuration jobConf = GridmixTestUtils.mrvl.getConfig();
    Path ioPath = new Path("testSetupGenerateDistCacheData")
        .makeQualified(GridmixTestUtils.dfs.getUri(),GridmixTestUtils.dfs.getWorkingDirectory());
    FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(ioPath)) {
      fs.delete(ioPath, true);
    }
    FileSystem.mkdirs(fs, ioPath, new FsPermission((short) 0777));

    dce = createDistributedCacheEmulator(jobConf, ioPath, generate);
    int exitCode = dce.setupGenerateDistCacheData(jobProducer);
    int expectedExitCode = generate ? 0
        : Gridmix.MISSING_DIST_CACHE_FILES_ERROR;
    assertEquals("setupGenerateDistCacheData failed.", expectedExitCode,
        exitCode);

    // reset back
    resetDistCacheConfigProperties(jobConf);
    return jobConf;
  }

  /**
   * Reset the config properties related to Distributed Cache in the given job
   * configuration <code>jobConf</code>.
   * 
   * @param jobConf
   *          job configuration
   */
  private void resetDistCacheConfigProperties(Configuration jobConf) {
    // reset current/latest property names
    jobConf.setStrings(MRJobConfig.CACHE_FILES, "");
    jobConf.setStrings(MRJobConfig.CACHE_FILES_SIZES, "");
    jobConf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, "");
    jobConf.setStrings(JobContext.CACHE_FILE_VISIBILITIES, "");
    // reset old property names
    jobConf.setStrings("mapred.cache.files", "");
    jobConf.setStrings("mapred.cache.files.filesizes", "");
    jobConf.setStrings("mapred.cache.files.visibilities", "");
    jobConf.setStrings("mapred.cache.files.timestamps", "");
  }

  /**
   * Validate GenerateDistCacheData job if it creates dist cache files properly.
   * 
   * @throws Exception
   */
  @Test (timeout=200000)
  public void testGenerateDistCacheData() throws Exception {
    long[] sortedFileSizes = new long[5];
    Configuration jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes);
    GridmixJob gridmixJob = new GenerateDistCacheData(jobConf);
    Job job = gridmixJob.call();
    assertEquals("Number of reduce tasks in GenerateDistCacheData is not 0.",
        0, job.getNumReduceTasks());
    assertTrue("GenerateDistCacheData job failed.",
        job.waitForCompletion(false));
    validateDistCacheData(jobConf, sortedFileSizes);
  }

  /**
   * Validate setupGenerateDistCacheData by validating <li>permissions of the
   * distributed cache directories and <li>content of the generated sequence
   * file. This includes validation of dist cache file paths and their file
   * sizes.
   */
  private void validateSetupGenDC(Configuration jobConf, long[] sortedFileSizes)
      throws IOException, InterruptedException {
    // build things needed for validation
    long sumOfFileSizes = 0;
    for (int i = 0; i < sortedFileSizes.length; i++) {
      sumOfFileSizes += sortedFileSizes[i];
    }

    FileSystem fs = FileSystem.get(jobConf);
    assertEquals("Number of distributed cache files to be generated is wrong.",
        sortedFileSizes.length,
        jobConf.getInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, -1));
    assertEquals("Total size of dist cache files to be generated is wrong.",
        sumOfFileSizes,
        jobConf.getLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, -1));
    Path filesListFile = new Path(
        jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST));
    FileStatus stat = fs.getFileStatus(filesListFile);
    assertEquals("Wrong permissions of dist Cache files list file "
        + filesListFile, new FsPermission((short) 0644), stat.getPermission());

    InputSplit split = new FileSplit(filesListFile, 0, stat.getLen(),
        (String[]) null);
    TaskAttemptContext taskContext = MapReduceTestUtil
        .createDummyMapTaskAttemptContext(jobConf);
    RecordReader<LongWritable, BytesWritable> reader = new GenerateDistCacheData.GenDCDataFormat()
        .createRecordReader(split, taskContext);
    MapContext<LongWritable, BytesWritable, NullWritable, BytesWritable> mapContext = new MapContextImpl<LongWritable, BytesWritable, NullWritable, BytesWritable>(
        jobConf, taskContext.getTaskAttemptID(), reader, null, null,
        MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mapContext);

    // start validating setupGenerateDistCacheData
    doValidateSetupGenDC(reader, fs, sortedFileSizes);
  }

  /**
   * Validate setupGenerateDistCacheData by validating <li>permissions of the
   * distributed cache directory and <li>content of the generated sequence file.
   * This includes validation of dist cache file paths and their file sizes.
   */
  private void doValidateSetupGenDC(
      RecordReader<LongWritable, BytesWritable> reader, FileSystem fs,
      long[] sortedFileSizes) throws IOException, InterruptedException {

    // Validate permissions of dist cache directory
    Path distCacheDir = dce.getDistributedCacheDir();
    assertEquals(
        "Wrong permissions for distributed cache dir " + distCacheDir,
        fs.getFileStatus(distCacheDir).getPermission().getOtherAction()
            .and(FsAction.EXECUTE), FsAction.EXECUTE);

    // Validate the content of the sequence file generated by
    // dce.setupGenerateDistCacheData().
    LongWritable key = new LongWritable();
    BytesWritable val = new BytesWritable();
    for (int i = 0; i < sortedFileSizes.length; i++) {
      assertTrue("Number of files written to the sequence file by "
          + "setupGenerateDistCacheData is less than the expected.",
          reader.nextKeyValue());
      key = reader.getCurrentKey();
      val = reader.getCurrentValue();
      long fileSize = key.get();
      String file = new String(val.getBytes(), 0, val.getLength());

      // Dist Cache files should be sorted based on file size.
      assertEquals("Dist cache file size is wrong.", sortedFileSizes[i],
          fileSize);

      // Validate dist cache file path.

      // parent dir of dist cache file
      Path parent = new Path(file).getParent().makeQualified(fs.getUri(),fs.getWorkingDirectory());
      // should exist in dist cache dir
      assertTrue("Public dist cache file path is wrong.",
          distCacheDir.equals(parent));
    }
  }

  /**
   * Test if DistributedCacheEmulator's setup of GenerateDistCacheData is
   * working as expected.
   * 
   * @throws IOException
   * @throws InterruptedException
   */
  @Test  (timeout=20000)
  public void testSetupGenerateDistCacheData() throws IOException,
      InterruptedException {
    long[] sortedFileSizes = new long[5];
    Configuration jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes);
    validateSetupGenDC(jobConf, sortedFileSizes);

    // Verify if correct exit code is seen when -generate option is missing and
    // distributed cache files are missing in the expected path.
    runSetupGenerateDistCacheData(false, sortedFileSizes);
  }

  /**
   * Create DistributedCacheEmulator object and do the initialization by calling
   * init() on it with dummy trace. Also configure the pseudo local FS.
   */
  private DistributedCacheEmulator createDistributedCacheEmulator(
      Configuration conf, Path ioPath, boolean generate) throws IOException {
    DistributedCacheEmulator dce = new DistributedCacheEmulator(conf, ioPath);
    JobCreator jobCreator = JobCreator.getPolicy(conf, JobCreator.LOADJOB);
    jobCreator.setDistCacheEmulator(dce);
    dce.init("dummytrace", jobCreator, generate);
    return dce;
  }

  /**
   * Test the configuration property for disabling/enabling emulation of
   * distributed cache load.
   */
  @Test  (timeout=2000)
  public void testDistCacheEmulationConfigurability() throws IOException {
    Configuration jobConf = GridmixTestUtils.mrvl.getConfig();
    Path ioPath = new Path("testDistCacheEmulationConfigurability")
        .makeQualified(GridmixTestUtils.dfs.getUri(),GridmixTestUtils.dfs.getWorkingDirectory());
    FileSystem fs = FileSystem.get(jobConf);
    FileSystem.mkdirs(fs, ioPath, new FsPermission((short) 0777));

    // default config
    dce = createDistributedCacheEmulator(jobConf, ioPath, false);
    assertTrue("Default configuration of "
        + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
        + " is wrong.", dce.shouldEmulateDistCacheLoad());

    // config property set to false
    jobConf.setBoolean(
        DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE, false);
    dce = createDistributedCacheEmulator(jobConf, ioPath, false);
    assertFalse("Disabling of emulation of distributed cache load by setting "
        + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
        + " to false is not working.", dce.shouldEmulateDistCacheLoad());
  }
/** 
 * test method configureDistCacheFiles
 * 
 */
  @Test  (timeout=2000)
  public void testDistCacheEmulator() throws Exception {

    Configuration conf = new Configuration();
    configureDummyDistCacheFiles(conf);
    File ws = new File("target" + File.separator + this.getClass().getName());
    Path ioPath = new Path(ws.getAbsolutePath());

    DistributedCacheEmulator dce = new DistributedCacheEmulator(conf, ioPath);
    JobConf jobConf = new JobConf(conf);
    jobConf.setUser(UserGroupInformation.getCurrentUser().getShortUserName());
    File fin=new File("src"+File.separator+"test"+File.separator+"resources"+File.separator+"data"+File.separator+"wordcount.json");
    dce.init(fin.getAbsolutePath(), JobCreator.LOADJOB, true);
    dce.configureDistCacheFiles(conf, jobConf);
    
    String[] caches=conf.getStrings(MRJobConfig.CACHE_FILES);
    String[] tmpfiles=conf.getStrings("tmpfiles");
    // this method should fill caches AND tmpfiles  from MRJobConfig.CACHE_FILES property 
    assertEquals(6, ((caches==null?0:caches.length)+(tmpfiles==null?0:tmpfiles.length)));
  }
}