package nl.sidnlabs.entrada.file;

import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import lombok.extern.log4j.Log4j2;
import nl.sidnlabs.entrada.exception.ApplicationException;
import nl.sidnlabs.entrada.util.FileUtil;

public class HDFSFileManagerImpl implements FileManager {

  private static final String HDFS_SCHEME = "hdfs://";

  private String confDir;

  private String hdfsNameservice;

  private String hdfsUsername;

  private String krbKeyTab;

  private String owner;

  private String group;

  private FileSystem fs;

  public String schema() {
    return HDFS_SCHEME;

  public void close() {
    // close filesystem
    if (fs != null) {
      try {
        fs = null;
      } catch (Exception e) {
        log.error("Error while closing filesystem", e);

    // close filesystem internal cache
    try {
    } catch (Exception e) {
      log.error("Error while closing filesystem cache entries", e);

  public boolean exists(String path) {
    FileSystem fs = null;
    try {
      fs = createFS();
      return exists(fs, new Path(path));
    } catch (Exception e) {
      log.error("Error checking if file exists", e);
      return false;

  private boolean exists(FileSystem fs, Path path) {
    try {
      return fs.exists(path);
    } catch (Exception e) {
      log.error("Error while checking existence of path: {}", path, e);
      return false;

  public List<String> files(String dir, String... filter) {
    if (!exists(dir)) {
      log.error("Location {} does not exist, cannot continue", dir);
      return Collections.emptyList();
    FileSystem fs = null;
    try {
      fs = createFS();
      return Arrays
          .stream(fs.listStatus(new Path(dir)))
          .map(s -> s.getPath().toString())
          .filter(p -> checkFilter(p, Arrays.asList(filter)))
    } catch (Exception e) {
      log.error("Error while checking for files in dir: " + dir, e);

    return Collections.emptyList();

  private boolean checkFilter(String file, List<String> filters) {
    if (filters.isEmpty()) {
      return true;
    return -> StringUtils.endsWith(file, f));

  public Optional<InputStream> open(String location) {"Open HDFS file: " + location);

    if (!exists(location)) {
      log.error("Location {} does not exist, cannot continue");
      return Optional.empty();
    FileSystem fs = null;
    try {
      fs = createFS();
      return Optional.of( Path(location)));
    } catch (Exception e) {
      log.error("Cannot open: " + location, e);

    return Optional.empty();

  public boolean upload(String src, String dst, boolean archive) {"Upload src location: {} to dst location: {}", src, dst);

    File f = new File(src);

    if (!f.exists()) {
      log.error("Location {} does not exist, cannot continue with upload");
      return false;
    FileSystem fs = null;
    try {
      fs = createFS();
      Path pathSrc = new Path(src);
      Path pathDst = new Path(dst);

      if (!exists(fs, pathDst)) {
        mkdir(fs, pathDst);

      if (f.isDirectory()) {
        uploadDir(fs, src, pathDst, archive);
      } else {
        upload(fs, pathSrc, pathDst, archive);
      return true;
    } catch (Exception e) {
      log.error("Cannot upload, src: " + src + " dst: " + dst, e);
      return false;

  private boolean uploadDir(FileSystem fs, String src, Path dst, boolean archive) {
    // uploading a (sub)directory will fail if the dir already exists
    // at the destination, therefore upload each file individual and make
    // sure the directories exist.

    if (log.isDebugEnabled()) {
      log.debug("Upload dir {} to {}", src, dst);

    Set<Path> dirs = new HashSet<>();
    try (Stream<java.nio.file.Path> walk =
        Files.walk(Paths.get(src)).filter(p -> p.toFile().isFile())) {

      walk.forEach(p -> {
        if (log.isDebugEnabled()) {
          log.debug("Check if {} needs to be uploaded {}", p);

        Path srcPath = new Path(p.toString());
        Path dir = new Path(FileUtil
                StringUtils.substringAfter(srcPath.getParent().toString(), src)));
        if (!dirs.contains(dir)) {
          // new dir, try op create
          if (log.isDebugEnabled()) {
            log.debug("Create HDFS directory {}", dir);

          mkdir(fs, dir);

        if (log.isDebugEnabled()) {
          log.debug("Upload file {}", srcPath);

        upload(fs, srcPath, new Path(FileUtil.appendPath(dir.toString(), srcPath.getName())),

        if (log.isDebugEnabled()) {
          log.debug("Completed uploading file {}", srcPath);

      if (log.isDebugEnabled()) {
        log.debug("Completed upload");

      return true;
    } catch (Exception e) {
      log.error("Error while uploading {} to {}", src, dst, e);

    return false;

  private boolean upload(FileSystem fs, Path src, Path dst, boolean archive) {
    if (log.isDebugEnabled()) {
      log.debug("Upload file {} to {}", src, dst);

    try {
      fs.copyFromLocalFile(false, true, src, dst);

      if (!archive) {
        // when uploading non-pcap data files set the correct hdfs permissions
        if (log.isDebugEnabled()) {
          log.debug("Setting correct file permissions to uploaded files");
        chown(fs, dst.toString(), owner, group);

    } catch (IOException e) {
      log.error("Error while uploading {} to {}", src, dst, e);
      return false;

    return true;

  public boolean supported(String location) {
    try {
      URI uri = new URI(location);
      return StringUtils.equalsIgnoreCase(uri.getScheme(), "hdfs");
    } catch (URISyntaxException e) {
      log.error("Invalid location URI: " + location);
    return false;

  public boolean delete(String location) {"Delete HDFS file: " + location);

    FileSystem fs = null;
    try {
      fs = createFS();
      Path path = new Path(location);
      // do not try to delete non-existing path, just return true
      if (fs.exists(path)) {
        return fs.delete(path, false);

      return true;
    } catch (IllegalArgumentException | IOException e) {
      log.error("Cannot delete location: " + location, e);
      return false;

  public boolean rmdir(String location) {"Delete HDFS directory: " + location);

    FileSystem fs = null;
    try {
      fs = createFS();
      Path path = new Path(location);
      // do not try to delete non-existing path, just return true
      if (fs.exists(path)) {
        return fs.delete(path, true);

      return true;
    } catch (IllegalArgumentException | IOException e) {
      log.error("Cannot delete location: " + location, e);
      return false;

  public boolean move(String src, String dst, boolean archive) {"Move HDFS file: {} to: {} ", src, dst);

    FileSystem fs = null;
    try {
      fs = createFS();
      Path dstPath = new Path(dst);
      if (!fs.exists(dstPath.getParent())) {
      return fs.rename(new Path(src), dstPath);
    } catch (Exception e) {
      log.error("Cannot rename, src: " + src + " dst: " + dst, e);
      return false;

  public boolean isLocal() {
    return false;

  private Configuration conf() {
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", hdfsNameservice);

    String coreSiteXml = confDir + "/core-site.xml";
    if (!new File(coreSiteXml).exists()) {
      throw new ApplicationException("Missing core-site.xml, add this to the conf directory");

    String hdfsSiteXml = confDir + "/hdfs-site.xml";
    if (!new File(hdfsSiteXml).exists()) {
      throw new ApplicationException("Missing hdfs-site.xml, add this to the conf directory");

    conf.addResource(new Path("file://" + hdfsSiteXml));
    conf.addResource(new Path("file://" + coreSiteXml));

    return conf;

  private FileSystem createNonSecureFS() {
    Configuration conf = conf();
    System.setProperty("HADOOP_USER_NAME", hdfsUsername);

    try {
      return FileSystem.get(conf);
    } catch (IOException e) {
      throw new ApplicationException("Cannot create non-secure HDFS filesystem", e);

  private FileSystem createSecureFS() {
    Configuration conf = conf();
    conf.set("", "kerberos");


    try {
      if (StringUtils.isNotBlank(krbKeyTab)) {
        UserGroupInformation.loginUserFromKeytab(hdfsUsername, krbKeyTab);

      return FileSystem.get(new URI(hdfsNameservice), conf);
    } catch (Exception e) {
      throw new ApplicationException("Cannot create secure HDFS filesystem", e);

  private FileSystem createFS() {

    if (fs != null) {
      return fs;

    if (StringUtils.isNotBlank(krbKeyTab)) {
      // user using krb user/pass
      fs = createSecureFS();
    } else {
      // use on-secure
      fs = createNonSecureFS();

    return fs;

  public boolean mkdir(String path) {
    FileSystem fs = null;
    try {
      fs = createFS();
      return mkdir(fs, new Path(path));
    } catch (Exception e) {
      return false;

  private boolean mkdir(FileSystem fs, Path path) {"Create directory: {}", path);

    try {
      return fs.mkdirs(path);
    } catch (Exception e) {
      log.error("Cannot create directory: {}", path, e);
    return false;

  public boolean chown(String path, String owner, String group) {
    FileSystem fs = null;
    try {
      fs = createFS();
      return chown(fs, path, owner, group);
    } catch (Exception e) {
      log.error("Cannot chown, path: " + path, e);
      return false;

  private boolean chown(FileSystem fs, String path, String owner, String group) {

    if (log.isDebugEnabled()) {
      log.debug("Chown permissions for path: {}", path);

    Path p = new Path(path);
    try {
      if (owner != null && group != null) {
        FileStatus fStatus = fs.getFileStatus(p);

        if (fStatus.isDirectory()) {
          for (FileStatus child : fs.listStatus(p)) {
            chown(fs, child.getPath().toString(), owner, group);

        // check to see if the owner/group needs to be corrected
        if (!StringUtils.equals(owner, fStatus.getOwner())
            || !StringUtils.equals(group, fStatus.getGroup())) {

"Chown: {} owner: {} group: {}", path, owner, group);
          fs.setOwner(p, owner, group);
    } catch (Exception e) {
      log.error("Error while doing chown for {}", path, e);
      return false;
    return true;

  public List<String> expired(String location, int maxAge, String... filter) {
    if (!exists(location)) {
      log.error("Location {} does not exist, cannot continue", location);
      return Collections.emptyList();
    List<String> files = new ArrayList<>();
    FileSystem fs = null;

    try {
      fs = createFS();
      RemoteIterator<LocatedFileStatus> fileStatusListIterator =
          fs.listFiles(new Path(location), true);

      while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus =;
    } catch (Exception e) {
      log.error("Error while getting files", e);
      return Collections.emptyList();

    // retrun found files, can be partial list in case of an exception
    return files
        .filter(p -> checkFilter(p, Arrays.asList(filter)))
