/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.raid;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.protocol.LocatedBlocksWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.LocatedBlockWithMetaInfo;
import org.apache.hadoop.hdfs.protocol.VersionedLocatedBlocks;
import org.apache.hadoop.metrics.util.MetricsLongValue;
import org.apache.hadoop.util.StringUtils;

/**
 * Monitors and potentially fixes placement of blocks in RAIDed files.
 */
public class PlacementMonitor {
  public static final Log LOG = LogFactory.getLog(PlacementMonitor.class);

  /**
   * Maps number of neighbor blocks to number of blocks
   */
  Map<String, Map<Integer, Long>> blockHistograms;
  Configuration conf;
  private volatile Map<String, Map<Integer, Long>> lastBlockHistograms;
  private volatile long lastUpdateStartTime = 0L;
  private volatile long lastUpdateFinishTime = 0L;
  private volatile long lastUpdateUsedTime = 0L;
  public static ThreadLocal<HashMap<String, LocatedFileStatus>> 
      locatedFileStatusCache = new ThreadLocal<HashMap<String, LocatedFileStatus>>() {
        @Override
        protected HashMap<String, LocatedFileStatus> initialValue() {
          return new HashMap<String, LocatedFileStatus>();
        }
      };
  
  RaidNodeMetrics metrics;
  BlockMover blockMover;
  int blockMoveMinRepl = DEFAULT_BLOCK_MOVE_MIN_REPLICATION;

  final static String NUM_MOVING_THREADS_KEY = "hdfs.raid.block.move.threads";
  final static String SIMULATE_KEY = "hdfs.raid.block.move.simulate";
  final static String BLOCK_MOVE_QUEUE_LENGTH_KEY = "hdfs.raid.block.move.queue.length";
  final static String BLOCK_MOVE_MIN_REPLICATION_KEY =
      "hdfs.raid.block.move.min.replication";
  final static int DEFAULT_NUM_MOVING_THREADS = 10;
  final static int DEFAULT_BLOCK_MOVE_QUEUE_LENGTH = 30000;
  final static int ALWAYS_SUBMIT_PRIORITY = 3;
  final static int DEFAULT_BLOCK_MOVE_MIN_REPLICATION = 1;

  public PlacementMonitor(Configuration conf) throws IOException {
    this.conf = conf;
    this.blockHistograms = createEmptyHistograms();
    int numMovingThreads = conf.getInt(
        NUM_MOVING_THREADS_KEY, DEFAULT_NUM_MOVING_THREADS);
    int maxMovingQueueSize = conf.getInt(
        BLOCK_MOVE_QUEUE_LENGTH_KEY, DEFAULT_BLOCK_MOVE_QUEUE_LENGTH);
    this.blockMoveMinRepl = conf.getInt(BLOCK_MOVE_MIN_REPLICATION_KEY,
        DEFAULT_BLOCK_MOVE_MIN_REPLICATION);

    boolean simulate = conf.getBoolean(SIMULATE_KEY, false);
    blockMover = new BlockMover(
        numMovingThreads, maxMovingQueueSize, simulate,
        ALWAYS_SUBMIT_PRIORITY, conf);
    this.metrics = RaidNodeMetrics.getInstance(RaidNodeMetrics.DEFAULT_NAMESPACE_ID);
  }

  private Map<String, Map<Integer, Long>> createEmptyHistograms() {
    Map<String, Map<Integer, Long>> histo =
        new HashMap<String, Map<Integer, Long>>();
    for (Codec codec : Codec.getCodecs()) {
      histo.put(codec.id, new HashMap<Integer, Long>());
    }
    return histo;
  }

  public void start() {
    blockMover.start();
  }

  public void stop() {
    blockMover.stop();
  }

  public void startCheckingFiles() {
    lastUpdateStartTime = RaidNode.now();
  }

  public int getMovingQueueSize() {
    return blockMover.getQueueSize();
  }

  public void checkFile(FileSystem srcFs, FileStatus srcFile,
            FileSystem parityFs, Path partFile, HarIndex.IndexEntry entry,
            Codec codec) throws IOException {
    if (srcFile.getReplication() > blockMoveMinRepl) {
      // We only check placement for the file with 0..blockMoveMinRepl replicas.
      return;
    }
    if (srcFs.getUri().equals(parityFs.getUri())) {
      BlockAndDatanodeResolver resolver = new BlockAndDatanodeResolver(
          srcFile.getPath(), srcFs, partFile, parityFs);
      checkBlockLocations(
          getBlockInfos(srcFs, srcFile),
          getBlockInfos(parityFs, partFile, entry.startOffset, entry.length),
          codec, srcFile, resolver);
    } else { 
      // TODO: Move blocks in two clusters separately
      LOG.warn("Source and parity are in different file system. " +
          " source:" + srcFs.getUri() + " parity:" + parityFs.getUri() +
          ". Skip.");
    }
  }

  public void checkFile(FileSystem srcFs, FileStatus srcFile,
                        FileSystem parityFs, FileStatus parityFile,
                        Codec codec)
      throws IOException {
    
    if (!codec.isDirRaid) {
      if (srcFile.getReplication() > blockMoveMinRepl) {
        // We only check placement for the file with 0..blockMoveMinRepl replicas.
        return;
      }
    } 
    List<BlockInfo> srcLstBI = getBlockInfos(srcFs, srcFile);
    if (srcLstBI.size() == 0) 
      return;
    if (codec.isDirRaid) {
      if (srcLstBI.get(0).blockLocation.getHosts().length > blockMoveMinRepl) {
        return;
      }
    }
    if (srcFs.equals(parityFs)) {
      BlockAndDatanodeResolver resolver = new BlockAndDatanodeResolver(
          srcFile.getPath(), srcFs, parityFile.getPath(), parityFs);
      checkBlockLocations(
          srcLstBI,
          getBlockInfos(parityFs, parityFile),
          codec, srcFile, resolver);
    } else {
      // TODO: Move blocks in two clusters separately
      LOG.warn("Source and parity are in different file systems. Skip");
    }
  }

  LocatedFileStatus getLocatedFileStatus(
      FileSystem fs, Path p) throws IOException {
    HashMap<String, LocatedFileStatus> cache = 
        locatedFileStatusCache.get();
    LocatedFileStatus result = cache.get(p.toUri().getPath());
    if (result != null) {
      return result;
    }
    Path parent = p.getParent();
    String parentPath = parent.toUri().getPath();
    //If we already did listlocatedStatus on parent path,
    //it means path p doesn't exist, we don't need to list again 
    if (cache.containsKey(parentPath) && 
        cache.get(parentPath) == null) {
      return null;
    }
    
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(parent);
    while (iter.hasNext()) {
      LocatedFileStatus stat = iter.next();
      cache.put(stat.getPath().toUri().getPath(), stat);
    }
    // trick: add parent path to the cache with value = null 
    cache.put(parentPath, null);
    result = cache.get(p.toUri().getPath());
    // This may still return null
    return result;
  }

  static class BlockInfo {
    final BlockLocation blockLocation;
    final Path file;
    BlockInfo(BlockLocation blockLocation, Path file) {
      this.blockLocation = blockLocation;
      this.file = file;
    }
    String[] getNames() {
      try {
        return blockLocation.getNames();
      } catch (IOException e) {
        return new String[]{};
      }
    }
  }

  List<BlockInfo> getBlockInfos(
    FileSystem fs, FileStatus stat) throws IOException {
    if (stat.isDir()) {
      return getDirBlockInfos(fs, stat.getPath());
    } else {
      return getBlockInfos(
        fs, stat.getPath(), 0, stat.getLen());
    }
  }
  
  List<BlockInfo> getDirBlockInfos(FileSystem fs, Path dirPath)
      throws IOException {
    List<LocatedFileStatus> lfs = RaidNode.listDirectoryRaidLocatedFileStatus(conf,
        fs, dirPath);
    List<BlockInfo> result = new ArrayList<BlockInfo>();
    for (LocatedFileStatus stat: lfs) {
      for (BlockLocation loc : stat.getBlockLocations()) {
        result.add(new BlockInfo(loc, stat.getPath()));
      }
    }
    return result;
  }

  List<BlockInfo> getBlockInfos(
    FileSystem fs, Path path, long start, long length)
      throws IOException {
    LocatedFileStatus stat = getLocatedFileStatus(fs, path);
    List<BlockInfo> result = new ArrayList<BlockInfo>();
    long end = start + length;
    if (stat != null) {
      for (BlockLocation loc : stat.getBlockLocations()) {
        if (loc.getOffset() >= start && loc.getOffset() < end) {
          result.add(new BlockInfo(loc, path));
        }
      }
    }
    return result;
  }

  void checkBlockLocations(List<BlockInfo> srcBlocks,
      List<BlockInfo> parityBlocks, Codec codec,
      FileStatus srcFile, BlockAndDatanodeResolver resolver) throws IOException {
    if (srcBlocks == null || parityBlocks == null) {
      return;
    }
    int stripeLength = codec.stripeLength;
    int parityLength = codec.parityLength;
    int numBlocks = 0;
    int numStripes = 0;
    numBlocks = srcBlocks.size();
    numStripes = (int)RaidNode.numStripes(numBlocks, stripeLength);
    
    Map<String, Integer> nodeToNumBlocks = new HashMap<String, Integer>();
    Set<String> nodesInThisStripe = new HashSet<String>();

    for (int stripeIndex = 0; stripeIndex < numStripes; ++stripeIndex) {

      List<BlockInfo> stripeBlocks = getStripeBlocks(
          stripeIndex, srcBlocks, stripeLength, parityBlocks, parityLength);

      countBlocksOnEachNode(stripeBlocks, nodeToNumBlocks, nodesInThisStripe);

      logBadFile(nodeToNumBlocks, stripeIndex, parityLength, srcFile);

      updateBlockPlacementHistogram(nodeToNumBlocks, blockHistograms.get(codec.id));

      submitBlockMoves(
          nodeToNumBlocks, stripeBlocks, nodesInThisStripe, resolver);

    }
  }

  private static void logBadFile(
        Map<String, Integer> nodeToNumBlocks, int stripeIndex, int parityLength,
        FileStatus srcFile) {
    int max = 0;
    for (Integer n : nodeToNumBlocks.values()) {
      if (max < n) {
        max = n;
      }
    }
    int maxNeighborBlocks = max - 1;
    if (maxNeighborBlocks >= parityLength) {
      LOG.warn("Bad placement found. file:" + srcFile.getPath() +
          " stripeIndex " + stripeIndex +
          " neighborBlocks:" + maxNeighborBlocks +
          " parityLength:" + parityLength);
    }
  }

  private static List<BlockInfo> getStripeBlocks(int stripeIndex,
      List<BlockInfo> srcBlocks, int stripeLength,
      List<BlockInfo> parityBlocks, int parityLength) {
    List<BlockInfo> stripeBlocks = new ArrayList<BlockInfo>();
    // Adding source blocks
    int stripeStart = stripeLength * stripeIndex;
    int stripeEnd = Math.min(
        stripeStart + stripeLength, srcBlocks.size());
    if (stripeStart < stripeEnd) {
      stripeBlocks.addAll(
          srcBlocks.subList(stripeStart, stripeEnd));
    }
    // Adding parity blocks
    stripeStart = parityLength * stripeIndex;
    stripeEnd = Math.min(
        stripeStart + parityLength, parityBlocks.size());
    if (stripeStart < stripeEnd) {
      stripeBlocks.addAll(parityBlocks.subList(stripeStart, stripeEnd));
    }
    return stripeBlocks;
  }

  static void countBlocksOnEachNode(List<BlockInfo> stripeBlocks,
      Map<String, Integer> nodeToNumBlocks,
      Set<String> nodesInThisStripe) throws IOException {
    nodeToNumBlocks.clear();
    nodesInThisStripe.clear();
    for (BlockInfo block : stripeBlocks) {
      for (String node : block.getNames()) {
        Integer n = nodeToNumBlocks.get(node);
        if (n == null) {
          n = 0;
        }
        nodeToNumBlocks.put(node, n + 1);
        nodesInThisStripe.add(node);
      }
    }
  }

  private static void updateBlockPlacementHistogram(
      Map<String, Integer> nodeToNumBlocks,
      Map<Integer, Long> blockHistogram) {
    for (Integer numBlocks : nodeToNumBlocks.values()) {
      Long n = blockHistogram.get(numBlocks - 1);
      if (n == null) {
        n = 0L;
      }
      // Number of neighbor blocks to number of blocks
      blockHistogram.put(numBlocks - 1, n + 1);
    }
  }

  private void submitBlockMoves(Map<String, Integer> nodeToNumBlocks,
      List<BlockInfo> stripeBlocks, Set<String> excludedNodes,
      BlockAndDatanodeResolver resolver) throws IOException {
    // Initialize resolver
    for (BlockInfo block: stripeBlocks) {
      resolver.initialize(block.file, resolver.srcFs);
    }
   
    // For all the nodes that has more than 2 blocks, find and move the blocks
    // so that there are only one block left on this node.
    for (String node : nodeToNumBlocks.keySet()) {
      int numberOfNeighborBlocks = nodeToNumBlocks.get(node) - 1;
      if (numberOfNeighborBlocks == 0) {
        // Most of the time we will be hitting this
        continue;
      }
      boolean skip = true;
      for (BlockInfo block : stripeBlocks) {
        for (String otherNode : block.getNames()) {
          if (node.equals(otherNode)) {
            if (skip) {
              // leave the first block where it is
              skip = false;
              break;
            }
            int priority = numberOfNeighborBlocks;
            LocatedBlockWithMetaInfo lb = resolver.getLocatedBlock(block);
            DatanodeInfo datanode = resolver.getDatanodeInfo(node);
            Set<DatanodeInfo> excludedDatanodes = new HashSet<DatanodeInfo>();
            for (String name : excludedNodes) {
              excludedDatanodes.add(resolver.getDatanodeInfo(name));
            }
            if (lb != null) {
              blockMover.move(lb, datanode, excludedDatanodes, priority,
                  lb.getDataProtocolVersion(), lb.getNamespaceID());
            }
            break;
          }
        }
      }
    }
  }

  /**
   * Report the placement histogram to {@link RaidNodeMetrics}. This should only
   * be called right after a complete parity file traversal is done.
   */
  public void clearAndReport() {
    synchronized (metrics) {
      for (Codec codec : Codec.getCodecs()) {
        String id = codec.id;
        int extra = 0;
        Map<Integer, MetricsLongValue> codecStatsMap =
          metrics.codecToMisplacedBlocks.get(id);
        // Reset the values.
        for (Entry<Integer, MetricsLongValue> e: codecStatsMap.entrySet()) {
          e.getValue().set(0);
        }
        for (Entry<Integer, Long> e : blockHistograms.get(id).entrySet()) {
          if (e.getKey() < RaidNodeMetrics.MAX_MONITORED_MISPLACED_BLOCKS - 1) {
            MetricsLongValue v = codecStatsMap.get(e.getKey());
            v.set(e.getValue());
          } else {
            extra += e.getValue();
          }
        }
        MetricsLongValue v = codecStatsMap.get(
            RaidNodeMetrics.MAX_MONITORED_MISPLACED_BLOCKS - 1);
        v.set(extra);
      }
    }
    lastBlockHistograms = blockHistograms;
    lastUpdateFinishTime = RaidNode.now();
    lastUpdateUsedTime = lastUpdateFinishTime - lastUpdateStartTime;
    LOG.info("Reporting metrices:\n" + toString());
    blockHistograms = createEmptyHistograms();
  }

  @Override
  public String toString() {
    if (lastBlockHistograms == null) {
      return "Not available";
    }
    String result = "";
    for (Codec codec : Codec.getCodecs()) {
      String code = codec.id;
      Map<Integer, Long> histo = lastBlockHistograms.get(code);
      result += code + " Blocks\n";
      List<Integer> neighbors = new ArrayList<Integer>();
      neighbors.addAll(histo.keySet());
      Collections.sort(neighbors);
      for (Integer i : neighbors) {
        Long numBlocks = histo.get(i);
        result += i + " co-localted blocks:" + numBlocks + "\n";
      }
    }
    return result;
  }

  public String htmlTable() {
    if (lastBlockHistograms == null) {
      return "Not available";
    }
    int max = computeMaxColocatedBlocks();
    String head = "";
    for (int i = 0; i <= max; ++i) {
      head += JspUtils.td(i + "");
    }
    head = JspUtils.tr(JspUtils.td("CODE") + head);
    String result = head;
    for (Codec codec : Codec.getCodecs()) {
      String code = codec.id;
      String row = JspUtils.td(code);
      Map<Integer, Long> histo = lastBlockHistograms.get(code);
      for (int i = 0; i <= max; ++i) {
        Long numBlocks = histo.get(i);
        numBlocks = numBlocks == null ? 0 : numBlocks;
        row += JspUtils.td(StringUtils.humanReadableInt(numBlocks));
      }
      row = JspUtils.tr(row);
      result += row;
    }
    return JspUtils.table(result);
  }

  public long lastUpdateTime() {
    return lastUpdateFinishTime;
  }

  public long lastUpdateUsedTime() {
    return lastUpdateUsedTime;
  }

  private int computeMaxColocatedBlocks() {
    int max = 0;
    for (Codec codec : Codec.getCodecs()) {
      String code = codec.id;
      Map<Integer, Long> histo = lastBlockHistograms.get(code);
      for (Integer i : histo.keySet()) {
        max = Math.max(i, max);
      }
    }
    return max;
  }

  /**
   * Translates {@link BlockLocation} to {@link LocatedBlockLocation} and
   * Datanode host:port to {@link DatanodeInfo}
   */
  static class BlockAndDatanodeResolver {
    final Path src;
    final FileSystem srcFs;
    final Path parity;
    final FileSystem parityFs;

    private boolean inited = false;
    private Map<String, DatanodeInfo> nameToDatanodeInfo = 
        new HashMap<String, DatanodeInfo>();
    private Map<Path, Map<Long, LocatedBlockWithMetaInfo>>
      pathAndOffsetToLocatedBlock =
        new HashMap<Path, Map<Long, LocatedBlockWithMetaInfo>>();
    // For test
    BlockAndDatanodeResolver() {
      this.src = null;
      this.srcFs = null;
      this.parity = null;
      this.parityFs = null;
    }

    BlockAndDatanodeResolver(
        Path src, FileSystem srcFs, Path parity, FileSystem parityFs) {
      this.src = src;
      this.srcFs = srcFs;
      this.parity = parity;
      this.parityFs = parityFs;
    }

    public LocatedBlockWithMetaInfo getLocatedBlock(BlockInfo blk) throws IOException {
      checkParityInitialized();
      initialize(blk.file, srcFs);
      Map<Long, LocatedBlockWithMetaInfo> offsetToLocatedBlock =
          pathAndOffsetToLocatedBlock.get(blk.file);
      if (offsetToLocatedBlock != null) {
        LocatedBlockWithMetaInfo lb = offsetToLocatedBlock.get(
            blk.blockLocation.getOffset());
        if (lb != null) {
          return lb;
        }
      }
      // This should not happen
      throw new IOException("Cannot find the " + LocatedBlock.class +
          " for the block in file:" + blk.file +
          " offset:" + blk.blockLocation.getOffset());
    }

    public DatanodeInfo getDatanodeInfo(String name) throws IOException {
      checkParityInitialized();
      return nameToDatanodeInfo.get(name);
    }

    private void checkParityInitialized() throws IOException{
      if (inited) {
        return;
      }
      initialize(parity, parityFs);
      inited = true;
    }
    
    public void initialize(Path path, FileSystem fs) throws IOException {
      if (pathAndOffsetToLocatedBlock.containsKey(path)) {
        return;
      }
      VersionedLocatedBlocks pathLbs = getLocatedBlocks(path, fs);
      pathAndOffsetToLocatedBlock.put(
          path, createOffsetToLocatedBlockMap(pathLbs));

      for (LocatedBlocks lbs : Arrays.asList(pathLbs)) {
        for (LocatedBlock lb : lbs.getLocatedBlocks()) {
          for (DatanodeInfo dn : lb.getLocations()) {
            nameToDatanodeInfo.put(dn.getName(), dn);
          }
        }
      }
    }

    private Map<Long, LocatedBlockWithMetaInfo> createOffsetToLocatedBlockMap(
        VersionedLocatedBlocks lbs) {
      Map<Long, LocatedBlockWithMetaInfo> result =
          new HashMap<Long, LocatedBlockWithMetaInfo>();
      if (lbs instanceof LocatedBlocksWithMetaInfo) {
        LocatedBlocksWithMetaInfo lbsm = (LocatedBlocksWithMetaInfo)lbs;
        for (LocatedBlock lb : lbs.getLocatedBlocks()) {
          result.put(lb.getStartOffset(), new LocatedBlockWithMetaInfo(
              lb.getBlock(), lb.getLocations(), lb.getStartOffset(),
              lbsm.getDataProtocolVersion(), lbsm.getNamespaceID(),
              lbsm.getMethodFingerPrint()));
        }
      } else {
        for (LocatedBlock lb : lbs.getLocatedBlocks()) {
          result.put(lb.getStartOffset(), new LocatedBlockWithMetaInfo(
              lb.getBlock(), lb.getLocations(), lb.getStartOffset(),
              lbs.getDataProtocolVersion(), 0, 0));
        }
      }
      return result;
    }

    private VersionedLocatedBlocks getLocatedBlocks(Path file, FileSystem fs)
        throws IOException {
      if (!(fs instanceof DistributedFileSystem)) {
        throw new IOException("Cannot obtain " + LocatedBlocks.class +
            " from " + fs.getClass().getSimpleName());
      }
      DistributedFileSystem dfs = (DistributedFileSystem) fs;
      if (DFSClient.isMetaInfoSuppoted(dfs.getClient().namenodeProtocolProxy)) {
        LocatedBlocksWithMetaInfo lbwmi = 
        dfs.getClient().namenode.openAndFetchMetaInfo(
            file.toUri().getPath(), 0, Long.MAX_VALUE);
        dfs.getClient().getNewNameNodeIfNeeded(lbwmi.getMethodFingerPrint());
        return lbwmi;
      }
      return dfs.getClient().namenode.open(
          file.toUri().getPath(), 0, Long.MAX_VALUE);
    }
  }
}