/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.kitesdk.morphline.hadoop.core;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.base.AbstractCommand;

import com.typesafe.config.Config;

/**
 * Command for transferring HDFS files, for example to help with centralized configuration file
 * management. On startup, the command downloads zero or more files or directory trees from HDFS to
 * the local file system.
 */
public final class DownloadHdfsFileBuilder implements CommandBuilder {

  @Override
  public Collection<String> getNames() {
    return Collections.singletonList("downloadHdfsFile");
  }

  @Override
  public Command build(Config config, Command parent, Command child, MorphlineContext context) {
    try {
      return new DownloadHdfsFile(this, config, parent, child, context);
    } catch (IOException e) {
      throw new MorphlineCompilationException("Cannot compile", config, e);
    }
  }
  
  
  ///////////////////////////////////////////////////////////////////////////////
  // Nested classes:
  ///////////////////////////////////////////////////////////////////////////////
  private static final class DownloadHdfsFile extends AbstractCommand {
    
    // global lock; contains successfully copied file paths
    private static final Set<String> DONE = new HashSet(); 

    public DownloadHdfsFile(CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) 
        throws IOException {
      
      super(builder, config, parent, child, context);
      List<String> uris = getConfigs().getStringList(config, "inputFiles", Collections.<String>emptyList()); 
      File dstRootDir = new File(getConfigs().getString(config, "outputDir", "."));
      Configuration conf = new Configuration();
      String defaultFileSystemUri = getConfigs().getString(config, "fs", null);
      if (defaultFileSystemUri != null) {
        FileSystem.setDefaultUri(conf, defaultFileSystemUri); // see Hadoop's GenericOptionsParser
      }
      for (String value : getConfigs().getStringList(config, "conf", Collections.<String>emptyList())) {
        conf.addResource(new Path(value)); // see Hadoop's GenericOptionsParser
      }
      validateArguments();
      download(uris, conf, dstRootDir);
    }
    
    /*
     * To prevent races, we lock out other commands that delete and write the same local files,
     * and we only once delete and write any given file. This ensures that local file reads only
     * occur after local file writes are completed. E.g. this handles N parallel SolrSinks clones
     * in the same VM.
     * 
     * TODO: consider extending this scheme to add filesystem based locking (advisory) in order to
     * also lock out clones in other JVM processes on the same file system.
     */
    private void download(List<String> uris, Configuration conf, File dstRootDir) throws IOException {
      synchronized (DONE) { 
        for (String uri : uris) {
          Path path = new Path(uri);
          File dst = new File(dstRootDir, path.getName()).getCanonicalFile();
          if (!DONE.contains(dst.getPath())) {
            if (dst.isDirectory()) {
              LOG.debug("Deleting dir {}", dst);
              FileUtils.deleteDirectory(dst);
            }
            FileSystem fs = path.getFileSystem(conf);
            if (fs.isFile(path)) {
              dst.getParentFile().mkdirs();
            }
            LOG.debug("Downloading {} to {}", uri, dst);
            if (!FileUtil.copy(fs, path, dst, false, conf)) {
              throw new IOException("Cannot download URI " + uri + " to " + dst);
            }
            DONE.add(dst.getPath());
            LOG.debug("Succeeded downloading {} to {}", uri, dst);
          }
        }
      }
    }
  }  
}