/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tez.mapreduce.hadoop;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.split.JobSplit;
import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.tez.dag.api.DataSourceDescriptor;
import org.apache.tez.dag.api.TaskLocationHint;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestMRInputHelpers {

  protected static MiniDFSCluster dfsCluster;

  private static Configuration conf = new Configuration();
  private static FileSystem remoteFs;
  private static Path testFilePath;
  private static Path oldSplitsDir;
  private static Path newSplitsDir;

  private static String TEST_ROOT_DIR = "target"
      + Path.SEPARATOR + TestMRHelpers.class.getName() + "-tmpDir";

  @BeforeClass
  public static void setup() throws IOException {
    try {
      conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
      dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
          .format(true).racks(null).build();
      remoteFs = dfsCluster.getFileSystem();
    } catch (IOException io) {
      throw new RuntimeException("problem starting mini dfs cluster", io);
    }

    Configuration testConf = new YarnConfiguration(
        dfsCluster.getFileSystem().getConf());


    FSDataOutputStream dataOutputStream = null;
    try {
      dataOutputStream = remoteFs.create(new Path("/tmp/input/test.xml"), true);
      testConf.writeXml(dataOutputStream);
      dataOutputStream.hsync();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      throw new RuntimeException(e);
    } finally {
      if (dataOutputStream != null) {
        dataOutputStream.close();
      }
    }

    remoteFs.mkdirs(new Path("/tmp/input/"));
    remoteFs.mkdirs(new Path("/tmp/splitsDirNew/"));
    remoteFs.mkdirs(new Path("/tmp/splitsDirOld/"));
    testFilePath = remoteFs.makeQualified(new Path("/tmp/input/test.xml"));
    FileStatus fsStatus = remoteFs.getFileStatus(testFilePath);
    Assert.assertTrue(fsStatus.getLen() > 0);

    oldSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirOld/"));
    newSplitsDir = remoteFs.makeQualified(new Path("/tmp/splitsDirNew/"));
  }


  @Test(timeout = 5000)
  public void testNewSplitsGen() throws Exception {

    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapReduce(newSplitsDir);

    Assert.assertTrue(dataSource.getAdditionalLocalFiles()
        .containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(dataSource.getAdditionalLocalFiles()
        .containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files =
        remoteFs.listFiles(newSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
      LocatedFileStatus status = files.next();
      String fName = status.getPath().getName();
      totalFilesFound++;
      if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
        foundSplitsFile = true;
      } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
        foundMetaFile = true;
      } else {
        Assert.fail("Found invalid file in splits dir, filename=" + fName);
      }
      Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(newSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
  }

  @Test(timeout = 5000)
  public void testOldSplitsGen() throws Exception {
    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir);
    Assert.assertTrue(
        dataSource.getAdditionalLocalFiles().containsKey(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(dataSource.getAdditionalLocalFiles()
        .containsKey(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

    RemoteIterator<LocatedFileStatus> files =
        remoteFs.listFiles(oldSplitsDir, false);

    boolean foundSplitsFile = false;
    boolean foundMetaFile = false;
    int totalFilesFound = 0;

    while (files.hasNext()) {
      LocatedFileStatus status = files.next();
      String fName = status.getPath().getName();
      totalFilesFound++;
      if (fName.equals(MRInputHelpers.JOB_SPLIT_RESOURCE_NAME)) {
        foundSplitsFile = true;
      } else if (fName.equals(MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME)) {
        foundMetaFile = true;
      } else {
        Assert.fail("Found invalid file in splits dir, filename=" + fName);
      }
      Assert.assertTrue(status.getLen() > 0);
    }

    Assert.assertEquals(2, totalFilesFound);
    Assert.assertTrue(foundSplitsFile);
    Assert.assertTrue(foundMetaFile);

    verifyLocationHints(oldSplitsDir, dataSource.getLocationHint().getTaskLocationHints());
  }

  @Test(timeout = 5000)
  public void testInputSplitLocalResourceCreation() throws Exception {
    DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(oldSplitsDir);

    Map<String, LocalResource> localResources = dataSource.getAdditionalLocalFiles();

    Assert.assertEquals(2, localResources.size());
    Assert.assertTrue(localResources.containsKey(
        MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
    Assert.assertTrue(localResources.containsKey(
        MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));
  }

  private void verifyLocationHints(Path inputSplitsDir,
                                   List<TaskLocationHint> actual) throws Exception {
    JobID jobId = new JobID("dummy", 1);
    JobSplit.TaskSplitMetaInfo[] splitsInfo =
        SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs,
            conf, inputSplitsDir);
    int splitsCount = splitsInfo.length;
    List<TaskLocationHint> locationHints =
        new ArrayList<TaskLocationHint>(splitsCount);
    for (int i = 0; i < splitsCount; ++i) {
      locationHints.add(
          TaskLocationHint.createTaskLocationHint(new HashSet<String>(
              Arrays.asList(splitsInfo[i].getLocations())), null)
      );
    }

    Assert.assertEquals(locationHints, actual);
  }

  private DataSourceDescriptor generateDataSourceDescriptorMapReduce(Path inputSplitsDir)
      throws Exception {
    JobConf jobConf = new JobConf(dfsCluster.getFileSystem().getConf());
    jobConf.setUseNewMapper(true);
    jobConf.setClass(org.apache.hadoop.mapreduce.MRJobConfig.INPUT_FORMAT_CLASS_ATTR, TextInputFormat.class,
        InputFormat.class);
    jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString());

    return MRInputHelpers.configureMRInputWithLegacySplitGeneration(jobConf, inputSplitsDir, true);
  }


  private DataSourceDescriptor generateDataSourceDescriptorMapRed(Path inputSplitsDir)
      throws Exception {
    JobConf jobConf = new JobConf(dfsCluster.getFileSystem().getConf());
    jobConf.setUseNewMapper(false);
    jobConf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    jobConf.set(TextInputFormat.INPUT_DIR, testFilePath.toString());

    return MRInputHelpers.configureMRInputWithLegacySplitGeneration(jobConf, inputSplitsDir, true);
  }

  @Test(timeout = 5000)
  public void testInputSplitLocalResourceCreationWithDifferentFS() throws Exception {
    FileSystem localFs = FileSystem.getLocal(conf);
    Path LOCAL_TEST_ROOT_DIR = new Path("target"
        + Path.SEPARATOR + TestMRHelpers.class.getName() + "-localtmpDir");

    try {
      localFs.mkdirs(LOCAL_TEST_ROOT_DIR);

      Path splitsDir = localFs.resolvePath(LOCAL_TEST_ROOT_DIR);

      DataSourceDescriptor dataSource = generateDataSourceDescriptorMapRed(splitsDir);

      Map<String, LocalResource> localResources = dataSource.getAdditionalLocalFiles();

      Assert.assertEquals(2, localResources.size());
      Assert.assertTrue(localResources.containsKey(
          MRInputHelpers.JOB_SPLIT_RESOURCE_NAME));
      Assert.assertTrue(localResources.containsKey(
          MRInputHelpers.JOB_SPLIT_METAINFO_RESOURCE_NAME));

      for (LocalResource lr : localResources.values()) {
        Assert.assertFalse(lr.getResource().getScheme().contains(remoteFs.getScheme()));
      }
    } finally {
      localFs.delete(LOCAL_TEST_ROOT_DIR, true);
    }
  }

}