/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.mapred.gridmix;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.DeprecationDelta;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.tools.rumen.JobStory;
import org.apache.hadoop.tools.rumen.JobStoryProducer;
import org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * Emulation of Distributed Cache Usage in gridmix.
 * <br> Emulation of Distributed Cache Load in gridmix will put load on
 * TaskTrackers and affects execution time of tasks because of localization of
 * distributed cache files by TaskTrackers.
 * <br> Gridmix creates distributed cache files for simulated jobs by launching
 * a MapReduce job {@link GenerateDistCacheData} in advance i.e. before
 * launching simulated jobs.
 * <br> The distributed cache file paths used in the original cluster are mapped
 * to unique file names in the simulated cluster.
 * <br> All HDFS-based distributed cache files generated by gridmix are
 * public distributed cache files. But Gridmix makes sure that load incurred due
 * to localization of private distributed cache files on the original cluster
 * is also faithfully simulated. Gridmix emulates the load due to private
 * distributed cache files by mapping private distributed cache files of
 * different users in the original cluster to different public distributed cache
 * files in the simulated cluster.
 *
 * <br> The configuration properties like
 * {@link MRJobConfig#CACHE_FILES}, {@link MRJobConfig#CACHE_FILE_VISIBILITIES},
 * {@link MRJobConfig#CACHE_FILES_SIZES} and
 * {@link MRJobConfig#CACHE_FILE_TIMESTAMPS} obtained from trace are used to
 *  decide
 * <li> file size of each distributed cache file to be generated
 * <li> whether a distributed cache file is already seen in this trace file
 * <li> whether a distributed cache file was considered public or private.
 * <br>
 * <br> Gridmix configures these generated files as distributed cache files for
 * the simulated jobs.
 */
@InterfaceAudience.Private
@InterfaceStability.Evolving
class DistributedCacheEmulator {
  private static final Log LOG =
      LogFactory.getLog(DistributedCacheEmulator.class);

  static final long AVG_BYTES_PER_MAP = 128 * 1024 * 1024L;// 128MB

  private Path distCachePath;

  /**
   * Map between simulated cluster's distributed cache file paths and their
   * file sizes. Unique distributed cache files are entered into this map.
   * 2 distributed cache files are considered same if and only if their
   * file paths, visibilities and timestamps are same.
   */
  private Map<String, Long> distCacheFiles = new HashMap<String, Long>();

  /**
   * Configuration property for whether gridmix should emulate
   * distributed cache usage or not. Default value is true.
   */
  static final String GRIDMIX_EMULATE_DISTRIBUTEDCACHE =
      "gridmix.distributed-cache-emulation.enable";

  // Whether to emulate distributed cache usage or not
  boolean emulateDistributedCache = true;

  // Whether to generate distributed cache data or not
  boolean generateDistCacheData = false;

  Configuration conf; // gridmix configuration

  private static final Charset charsetUTF8 = Charset.forName("UTF-8");

  // Pseudo local file system where local FS based distributed cache files are
  // created by gridmix.
  FileSystem pseudoLocalFs = null;

  {
    // Need to handle deprecation of these MapReduce-internal configuration
    // properties as MapReduce doesn't handle their deprecation.
    Configuration.addDeprecations(new DeprecationDelta[] {
      new DeprecationDelta("mapred.cache.files.filesizes",
          MRJobConfig.CACHE_FILES_SIZES),
      new DeprecationDelta("mapred.cache.files.visibilities",
          MRJobConfig.CACHE_FILE_VISIBILITIES)
    });
  }

  /**
   * @param conf gridmix configuration
   * @param ioPath &lt;ioPath&gt;/distributedCache/ is the gridmix Distributed
   *               Cache directory
   */
  public DistributedCacheEmulator(Configuration conf, Path ioPath) {
    this.conf = conf;
    distCachePath = new Path(ioPath, "distributedCache");
    this.conf.setClass("fs.pseudo.impl", PseudoLocalFs.class, FileSystem.class);
  }

  /**
   * This is to be called before any other method of DistributedCacheEmulator.
   * <br> Checks if emulation of distributed cache load is needed and is feasible.
   *  Sets the flags generateDistCacheData and emulateDistributedCache to the
   *  appropriate values.
   * <br> Gridmix does not emulate distributed cache load if
   * <ol><li> the specific gridmix job type doesn't need emulation of
   * distributed cache load OR
   * <li> the trace is coming from a stream instead of file OR
   * <li> the distributed cache dir where distributed cache data is to be
   * generated by gridmix is on local file system OR
   * <li> execute permission is not there for any of the ascendant directories
   * of &lt;ioPath&gt; till root. This is because for emulation of distributed
   * cache load, distributed cache files created under
   * &lt;ioPath/distributedCache/&gt; should be considered by hadoop
   * as public distributed cache files.
   * <li> creation of pseudo local file system fails.</ol>
   * <br> For (2), (3), (4) and (5), generation of distributed cache data
   * is also disabled.
   * 
   * @param traceIn trace file path. If this is '-', then trace comes from the
   *                stream stdin.
   * @param jobCreator job creator of gridmix jobs of a specific type
   * @param generate  true if -generate option was specified
   * @throws IOException
   */
  void init(String traceIn, JobCreator jobCreator, boolean generate)
      throws IOException {
    emulateDistributedCache = jobCreator.canEmulateDistCacheLoad()
        && conf.getBoolean(GRIDMIX_EMULATE_DISTRIBUTEDCACHE, true);
    generateDistCacheData = generate;

    if (generateDistCacheData || emulateDistributedCache) {
      if ("-".equals(traceIn)) {// trace is from stdin
        LOG.warn("Gridmix will not emulate Distributed Cache load because "
            + "the input trace source is a stream instead of file.");
        emulateDistributedCache = generateDistCacheData = false;
      } else if (FileSystem.getLocal(conf).getUri().getScheme().equals(
          distCachePath.toUri().getScheme())) {// local FS
        LOG.warn("Gridmix will not emulate Distributed Cache load because "
            + "<iopath> provided is on local file system.");
        emulateDistributedCache = generateDistCacheData = false;
      } else {
        // Check if execute permission is there for all the ascendant
        // directories of distCachePath till root.
        FileSystem fs = FileSystem.get(conf);
        Path cur = distCachePath.getParent();
        while (cur != null) {
          if (cur.toString().length() > 0) {
            FsPermission perm = fs.getFileStatus(cur).getPermission();
            if (!perm.getOtherAction().and(FsAction.EXECUTE).equals(
                FsAction.EXECUTE)) {
              LOG.warn("Gridmix will not emulate Distributed Cache load "
                  + "because the ascendant directory (of distributed cache "
                  + "directory) " + cur + " doesn't have execute permission "
                  + "for others.");
              emulateDistributedCache = generateDistCacheData = false;
              break;
            }
          }
          cur = cur.getParent();
        }
      }
    }

    // Check if pseudo local file system can be created
    try {
      pseudoLocalFs = FileSystem.get(new URI("pseudo:///"), conf);
    } catch (URISyntaxException e) {
      LOG.warn("Gridmix will not emulate Distributed Cache load because "
          + "creation of pseudo local file system failed.");
      e.printStackTrace();
      emulateDistributedCache = generateDistCacheData = false;
      return;
    }
  }

  /**
   * @return true if gridmix should emulate distributed cache load
   */
  boolean shouldEmulateDistCacheLoad() {
    return emulateDistributedCache;
  }

  /**
   * @return true if gridmix should generate distributed cache data
   */
  boolean shouldGenerateDistCacheData() {
    return generateDistCacheData;
  }

  /**
   * @return the distributed cache directory path
   */
  Path getDistributedCacheDir() {
    return distCachePath;
  }

  /**
   * Create distributed cache directories.
   * Also create a file that contains the list of distributed cache files
   * that will be used as distributed cache files for all the simulated jobs.
   * @param jsp job story producer for the trace
   * @return exit code
   * @throws IOException
   */
  int setupGenerateDistCacheData(JobStoryProducer jsp)
      throws IOException {

    createDistCacheDirectory();
    return buildDistCacheFilesList(jsp);
  }

  /**
   * Create distributed cache directory where distributed cache files will be
   * created by the MapReduce job {@link GenerateDistCacheData#JOB_NAME}.
   * @throws IOException
   */
  private void createDistCacheDirectory() throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileSystem.mkdirs(fs, distCachePath, new FsPermission((short) 0777));
  }

  /**
   * Create the list of unique distributed cache files needed for all the
   * simulated jobs and write the list to a special file.
   * @param jsp job story producer for the trace
   * @return exit code
   * @throws IOException
   */
  private int buildDistCacheFilesList(JobStoryProducer jsp) throws IOException {
    // Read all the jobs from the trace file and build the list of unique
    // distributed cache files.
    JobStory jobStory;
    while ((jobStory = jsp.getNextJob()) != null) {
      if (jobStory.getOutcome() == Pre21JobHistoryConstants.Values.SUCCESS && 
         jobStory.getSubmissionTime() >= 0) {
        updateHDFSDistCacheFilesList(jobStory);
      }
    }
    jsp.close();

    return writeDistCacheFilesList();
  }

  /**
   * For the job to be simulated, identify the needed distributed cache files by
   * mapping original cluster's distributed cache file paths to the simulated cluster's
   * paths and add these paths in the map {@code distCacheFiles}.
   *<br>
   * JobStory should contain distributed cache related properties like
   * <li> {@link MRJobConfig#CACHE_FILES}
   * <li> {@link MRJobConfig#CACHE_FILE_VISIBILITIES}
   * <li> {@link MRJobConfig#CACHE_FILES_SIZES}
   * <li> {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}
   * <li> {@link MRJobConfig#CLASSPATH_FILES}
   *
   * <li> {@link MRJobConfig#CACHE_ARCHIVES}
   * <li> {@link MRJobConfig#CACHE_ARCHIVES_VISIBILITIES}
   * <li> {@link MRJobConfig#CACHE_ARCHIVES_SIZES}
   * <li> {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}
   * <li> {@link MRJobConfig#CLASSPATH_ARCHIVES}
   *
   * <li> {@link MRJobConfig#CACHE_SYMLINK}
   *
   * @param jobdesc JobStory of original job obtained from trace
   * @throws IOException
   */
  void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException {

    // Map original job's distributed cache file paths to simulated cluster's
    // paths, to be used by this simulated job.
    JobConf jobConf = jobdesc.getJobConf();

    String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
    if (files != null) {

      String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
      String[] visibilities =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
      String[] timeStamps =
        jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);

      FileSystem fs = FileSystem.get(conf);
      String user = jobConf.getUser();
      for (int i = 0; i < files.length; i++) {
        // Check if visibilities are available because older hadoop versions
        // didn't have public, private Distributed Caches separately.
        boolean visibility =
            (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
        if (isLocalDistCacheFile(files[i], user, visibility)) {
          // local FS based distributed cache file.
          // Create this file on the pseudo local FS on the fly (i.e. when the
          // simulated job is submitted).
          continue;
        }
        // distributed cache file on hdfs
        String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                                 visibility, user);

        // No need to add a distributed cache file path to the list if
        // (1) the mapped path is already there in the list OR
        // (2) the file with the mapped path already exists.
        // In any of the above 2 cases, file paths, timestamps, file sizes and
        // visibilities match. File sizes should match if file paths and
        // timestamps match because single file path with single timestamp
        // should correspond to a single file size.
        if (distCacheFiles.containsKey(mappedPath) ||
            fs.exists(new Path(mappedPath))) {
          continue;
        }
        distCacheFiles.put(mappedPath, Long.valueOf(fileSizes[i]));
      }
    }
  }

  /**
   * Check if the file path provided was constructed by MapReduce for a
   * distributed cache file on local file system.
   * @param filePath path of the distributed cache file
   * @param user job submitter of the job for which &lt;filePath&gt; is a
   *             distributed cache file
   * @param visibility <code>true</code> for public distributed cache file
   * @return true if the path provided is of a local file system based
   *              distributed cache file
   */
  static boolean isLocalDistCacheFile(String filePath, String user,
                                       boolean visibility) {
    return (!visibility && filePath.contains(user + "/.staging"));
  }

  /**
   * Map the HDFS based distributed cache file path from original cluster to
   * a unique file name on the simulated cluster.
   * <br> Unique  distributed file names on simulated cluster are generated
   * using original cluster's <li>file path, <li>timestamp and <li> the
   * job-submitter for private distributed cache file.
   * <br> This implies that if on original cluster, a single HDFS file
   * considered as two private distributed cache files for two jobs of
   * different users, then the corresponding simulated jobs will have two
   * different files of the same size in public distributed cache, one for each
   * user. Both these simulated jobs will not share these distributed cache
   * files, thus leading to the same load as seen in the original cluster.
   * @param file distributed cache file path
   * @param timeStamp time stamp of dist cachce file
   * @param isPublic true if this distributed cache file is a public
   *                 distributed cache file
   * @param user job submitter on original cluster
   * @return the mapped path on simulated cluster
   */
  private String mapDistCacheFilePath(String file, String timeStamp,
      boolean isPublic, String user) {
    String id = file + timeStamp;
    if (!isPublic) {
      // consider job-submitter for private distributed cache file
      id = id.concat(user);
    }
    return new Path(distCachePath, MD5Hash.digest(id).toString()).toUri()
               .getPath();
  }

  /**
   * Write the list of distributed cache files in the decreasing order of
   * file sizes into the sequence file. This file will be input to the job
   * {@link GenerateDistCacheData}.
   * Also validates if -generate option is missing and distributed cache files
   * are missing.
   * @return exit code
   * @throws IOException
   */
  private int writeDistCacheFilesList()
      throws IOException {
    // Sort the distributed cache files in the decreasing order of file sizes.
    List dcFiles = new ArrayList(distCacheFiles.entrySet());
    Collections.sort(dcFiles, new Comparator() {
      public int compare(Object dc1, Object dc2) {
        return ((Comparable) ((Map.Entry) (dc2)).getValue())
            .compareTo(((Map.Entry) (dc1)).getValue());
      }
    });

    // write the sorted distributed cache files to the sequence file
    FileSystem fs = FileSystem.get(conf);
    Path distCacheFilesList = new Path(distCachePath, "_distCacheFiles.txt");
    conf.set(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST,
        distCacheFilesList.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(fs, conf,
        distCacheFilesList, LongWritable.class, BytesWritable.class,
        SequenceFile.CompressionType.NONE);

    // Total number of unique distributed cache files
    int fileCount = dcFiles.size();
    long byteCount = 0;// Total size of all distributed cache files
    long bytesSync = 0;// Bytes after previous sync;used to add sync marker

    for (Iterator it = dcFiles.iterator(); it.hasNext();) {
      Map.Entry entry = (Map.Entry)it.next();
      LongWritable fileSize =
          new LongWritable(Long.parseLong(entry.getValue().toString()));
      BytesWritable filePath =
          new BytesWritable(
          entry.getKey().toString().getBytes(charsetUTF8));

      byteCount += fileSize.get();
      bytesSync += fileSize.get();
      if (bytesSync > AVG_BYTES_PER_MAP) {
        src_writer.sync();
        bytesSync = fileSize.get();
      }
      src_writer.append(fileSize, filePath);
    }
    if (src_writer != null) {
      src_writer.close();
    }
    // Set delete on exit for 'dist cache files list' as it is not needed later.
    fs.deleteOnExit(distCacheFilesList);

    conf.setInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, fileCount);
    conf.setLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, byteCount);
    LOG.info("Number of HDFS based distributed cache files to be generated is "
        + fileCount + ". Total size of HDFS based distributed cache files "
        + "to be generated is " + byteCount);

    if (!shouldGenerateDistCacheData() && fileCount > 0) {
      LOG.error("Missing " + fileCount + " distributed cache files under the "
          + " directory\n" + distCachePath + "\nthat are needed for gridmix"
          + " to emulate distributed cache load. Either use -generate\noption"
          + " to generate distributed cache data along with input data OR "
          + "disable\ndistributed cache emulation by configuring '"
          + DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
          + "' to false.");
      return Gridmix.MISSING_DIST_CACHE_FILES_ERROR;
    }
    return 0;
  }

  /**
   * If gridmix needs to emulate distributed cache load, then configure
   * distributed cache files of a simulated job by mapping the original
   * cluster's distributed cache file paths to the simulated cluster's paths and
   * setting these mapped paths in the job configuration of the simulated job.
   * <br>
   * Configure local FS based distributed cache files through the property
   * "tmpfiles" and hdfs based distributed cache files through the property
   * {@link MRJobConfig#CACHE_FILES}.
   * @param conf configuration for the simulated job to be run
   * @param jobConf job configuration of original cluster's job, obtained from
   *                trace
   * @throws IOException
   */
  void configureDistCacheFiles(Configuration conf, JobConf jobConf)
      throws IOException {
    if (shouldEmulateDistCacheLoad()) {

      String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
      if (files != null) {
        // hdfs based distributed cache files to be configured for simulated job
        List<String> cacheFiles = new ArrayList<String>();
        // local FS based distributed cache files to be configured for
        // simulated job
        List<String> localCacheFiles = new ArrayList<String>();

        String[] visibilities =
          jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
        String[] timeStamps =
          jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
        String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);

        String user = jobConf.getUser();
        for (int i = 0; i < files.length; i++) {
          // Check if visibilities are available because older hadoop versions
          // didn't have public, private Distributed Caches separately.
          boolean visibility =
            (visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
          if (isLocalDistCacheFile(files[i], user, visibility)) {
            // local FS based distributed cache file.
            // Create this file on the pseudo local FS.
            String fileId = MD5Hash.digest(files[i] + timeStamps[i]).toString();
            long fileSize = Long.parseLong(fileSizes[i]);
            Path mappedLocalFilePath =
                PseudoLocalFs.generateFilePath(fileId, fileSize)
                    .makeQualified(pseudoLocalFs.getUri(),
                                   pseudoLocalFs.getWorkingDirectory());
            pseudoLocalFs.create(mappedLocalFilePath);
            localCacheFiles.add(mappedLocalFilePath.toUri().toString());
          } else {
            // hdfs based distributed cache file.
            // Get the mapped HDFS path on simulated cluster
            String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
                                                     visibility, user);
            cacheFiles.add(mappedPath);
          }
        }
        if (cacheFiles.size() > 0) {
          // configure hdfs based distributed cache files for simulated job
          conf.setStrings(MRJobConfig.CACHE_FILES,
                          cacheFiles.toArray(new String[cacheFiles.size()]));
        }
        if (localCacheFiles.size() > 0) {
          // configure local FS based distributed cache files for simulated job
          conf.setStrings("tmpfiles", localCacheFiles.toArray(
                                        new String[localCacheFiles.size()]));
        }
      }
    }
  }
}