Java Code Examples for org.apache.commons.io.FileUtils#ONE_MB

The following examples show how to use org.apache.commons.io.FileUtils#ONE_MB . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskMemoryHeuristic.java    From dr-elephant with Apache License 2.0 6 votes vote down vote up
/**
 * Constructor for {@link TaskMemoryHeuristic}.
 * @param heuristicConfData  the configuration for this heuristic
 */
public TaskMemoryHeuristic(HeuristicConfigurationData heuristicConfData) {
  this._heuristicConfData = heuristicConfData;

  Map<String, String> params = heuristicConfData.getParamMap();
  // read default container size
  if (params.containsKey(CONTAINER_MEMORY_DEFAULT_MB_CONF)) {
    defaultContainerMemoryBytes = Long.parseLong(params.get(CONTAINER_MEMORY_DEFAULT_MB_CONF)) * FileUtils.ONE_MB;
  }
  // read max memory thresholds
  if (params.containsKey(TASK_MEMORY_THRESHOLDS_CONF)) {
    maxMemoryLimits = Utils.getParam(params.get(TASK_MEMORY_THRESHOLDS_CONF), maxMemoryLimits.length);
  }

  Configuration yarnConf = new YarnConfiguration();
  int minimumMBAllocation = yarnConf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
      YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
  graceMemoryHeadroomBytes = 2 * minimumMBAllocation * FileUtils.ONE_MB;
}
 
Example 2
Source File: ProfileImageLogicImpl.java    From sakai with Educational Community License v2.0 5 votes vote down vote up
/**
 * Read a file to a byte array.
 * 
 * @param file
 * @return byte[] if file ok, or null if its too big
 * @throws IOException
 */
private byte[] getBytesFromFile(File file) throws IOException {
   
	// Get the size of the file
	long length = file.length();
	
	if (length > (ProfileConstants.MAX_IMAGE_UPLOAD_SIZE * FileUtils.ONE_MB)) {
		log.error("File too large: " + file.getCanonicalPath());  
		return null;
	}
	
	// return file contents
	return FileUtils.readFileToByteArray(file);
  }
 
Example 3
Source File: ThirdEyeUtils.java    From incubator-pinot with Apache License 2.0 5 votes vote down vote up
private static long getApproximateMaxBucketNumber(int percentage) {
  long jvmMaxMemoryInBytes = Runtime.getRuntime().maxMemory();
  if (jvmMaxMemoryInBytes == Long.MAX_VALUE) { // Check upper bound
    jvmMaxMemoryInBytes = DEFAULT_UPPER_BOUND_OF_RESULTSETGROUP_CACHE_SIZE_IN_MB * FileUtils.ONE_MB; // MB to Bytes
  } else { // Check lower bound
    long lowerBoundInBytes = DEFAULT_LOWER_BOUND_OF_RESULTSETGROUP_CACHE_SIZE_IN_MB * FileUtils.ONE_MB; // MB to Bytes
    if (jvmMaxMemoryInBytes < lowerBoundInBytes) {
      jvmMaxMemoryInBytes = lowerBoundInBytes;
    }
  }
  return (jvmMaxMemoryInBytes / 102400) * percentage;
}
 
Example 4
Source File: SolrConfig.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Converts a Java heap option-like config string to bytes. Valid suffixes are: 'k', 'm', 'g'
 * (case insensitive). If there is no suffix, the default unit is bytes.
 * For example, 50k = 50KB, 20m = 20MB, 4g = 4GB, 300 = 300 bytes
 * @param configStr the config setting to parse
 * @return the size, in bytes. -1 if the given config string is empty
 */
protected static long convertHeapOptionStyleConfigStringToBytes(String configStr) {
  if (configStr.isEmpty()) {
    return -1;
  }
  long multiplier = 1;
  String numericValueStr = configStr;
  char suffix = Character.toLowerCase(configStr.charAt(configStr.length() - 1));
  if (Character.isLetter(suffix)) {
    if (suffix == 'k') {
      multiplier = FileUtils.ONE_KB;
    }
    else if (suffix == 'm') {
      multiplier = FileUtils.ONE_MB;
    }
    else if (suffix == 'g') {
      multiplier = FileUtils.ONE_GB;
    } else {
      throw new RuntimeException("Invalid suffix. Valid suffixes are 'k' (KB), 'm' (MB), 'g' (G). "
          + "No suffix means the amount is in bytes. ");
    }
    numericValueStr = configStr.substring(0, configStr.length() - 1);
  }
  try {
    return Long.parseLong(numericValueStr) * multiplier;
  } catch (NumberFormatException e) {
    throw new RuntimeException("Invalid format. The config setting should be a long with an "
        + "optional letter suffix. Valid suffixes are 'k' (KB), 'm' (MB), 'g' (G). "
        + "No suffix means the amount is in bytes.");
  }
}
 
Example 5
Source File: MapReduceTaskNumProcessor.java    From eagle with Apache License 2.0 5 votes vote down vote up
private String analyzeMapTaskNum(List<String> optSettings) {
    StringBuilder sb = new StringBuilder();

    long numMaps = context.getNumMaps();
    long avgMapTime = context.getAvgMapTimeInSec();
    long avgMapInput = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_READ)
            / numMaps;
    String avgMapInputDisplaySize = bytesToHumanReadable(avgMapInput);

    if (avgMapInput < 5 * FileUtils.ONE_MB && avgMapTime < 30 && numMaps > 1) {
        sb.append("Best practice: average map input bytes only have ").append(avgMapInputDisplaySize);
        sb.append(". Please reduce the number of mappers by merging input files.\n");
    } else if (avgMapInput > FileUtils.ONE_GB) {
        sb.append("Best practice: average map input bytes have ").append(avgMapInputDisplaySize);
        sb.append(". Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n");
    }

    if (avgMapTime < 10 && numMaps > 1) {
        sb.append("Best practice: average map time only have ").append(avgMapTime);
        sb.append(" seconds. Please reduce the number of mappers by merging input files or by using a larger block size.\n");
    } else if (avgMapTime > 600 && avgMapInput < FileUtils.ONE_GB) {
        sb.append("Best practice: average map time is ").append(avgMapInput);
        sb.append(" seconds. Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n");
    }

    return sb.toString();
}
 
Example 6
Source File: DefaultFSSinkProvider.java    From ambari-metrics with Apache License 2.0 5 votes vote down vote up
public DefaultFSSinkProvider() {
  try {
    FIXED_FILE_SIZE = conf.getMetricsConf().getLong("timeline.metrics.service.external.fs.sink.filesize", FileUtils.ONE_MB * 100);
  } catch (Exception ignored) {
    FIXED_FILE_SIZE = FileUtils.ONE_MB * 100;
  }
}
 
Example 7
Source File: ProfileImageLogicImpl.java    From sakai with Educational Community License v2.0 5 votes vote down vote up
/**
 * Read a file to a byte array.
 * 
 * @param file
 * @return byte[] if file ok, or null if its too big
 * @throws IOException
 */
private byte[] getBytesFromFile(File file) throws IOException {
   
	// Get the size of the file
	long length = file.length();
	
	if (length > (ProfileConstants.MAX_IMAGE_UPLOAD_SIZE * FileUtils.ONE_MB)) {
		log.error("File too large: " + file.getCanonicalPath());  
		return null;
	}
	
	// return file contents
	return FileUtils.readFileToByteArray(file);
  }
 
Example 8
Source File: ParamGenerator.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
/**
 * Check if the parameters violated constraints
 * Constraint 1: sort.mb > 60% of map.memory: To avoid heap memory failure
 * Constraint 2: map.memory - sort.mb < 768: To avoid heap memory failure
 * Constraint 3: pig.maxCombinedSplitSize > 1.8*mapreduce.map.memory.mb
 * @param jobSuggestedParamValueList List of suggested param values
 * @param jobType Job type
 * @return true if the constraint is violated, false otherwise
 */
private boolean isParamConstraintViolated(List<JobSuggestedParamValue> jobSuggestedParamValueList,
    TuningAlgorithm.JobType jobType) {

  logger.info("Checking whether parameter values are within constraints");
  Integer violations = 0;

  if (jobType.equals(TuningAlgorithm.JobType.PIG)) {
    Double mrSortMemory = null;
    Double mrMapMemory = null;
    Double pigMaxCombinedSplitSize = null;

    for (JobSuggestedParamValue jobSuggestedParamValue : jobSuggestedParamValueList) {
      if (jobSuggestedParamValue.tuningParameter.paramName.equals("mapreduce.task.io.sort.mb")) {
        mrSortMemory = jobSuggestedParamValue.paramValue;
      } else if (jobSuggestedParamValue.tuningParameter.paramName.equals("mapreduce.map.memory.mb")) {
        mrMapMemory = jobSuggestedParamValue.paramValue;
      } else if (jobSuggestedParamValue.tuningParameter.paramName.equals("pig.maxCombinedSplitSize")) {
        pigMaxCombinedSplitSize = jobSuggestedParamValue.paramValue / FileUtils.ONE_MB;
      }
    }

    if (mrSortMemory != null && mrMapMemory != null) {
      if (mrSortMemory > 0.6 * mrMapMemory) {
        logger.info("Constraint violated: Sort memory > 60% of map memory");
        violations++;
      }
      if (mrMapMemory - mrSortMemory < 768) {
        logger.info("Constraint violated: Map memory - sort memory < 768 mb");
        violations++;
      }
    }

    if (pigMaxCombinedSplitSize != null && mrMapMemory != null && (pigMaxCombinedSplitSize > 1.8 * mrMapMemory)) {
      logger.info("Constraint violated: Pig max combined split size > 1.8 * map memory");
      violations++;
    }
  }
  if (violations == 0) {
    return false;
  } else {
    logger.info("Number of constraint(s) violated: " + violations);
    return true;
  }
}
 
Example 9
Source File: ImageFromDockerfile.java    From testcontainers-java with MIT License 4 votes vote down vote up
@Override
protected final String resolve() {
    Logger logger = DockerLoggerFactory.getLogger(dockerImageName);

    DockerClient dockerClient = DockerClientFactory.instance().client();

    try {
        if (deleteOnExit) {
            ResourceReaper.instance().registerImageForCleanup(dockerImageName);
        }

        BuildImageResultCallback resultCallback = new BuildImageResultCallback() {
            @Override
            public void onNext(BuildResponseItem item) {
                super.onNext(item);

                if (item.isErrorIndicated()) {
                    logger.error(item.getErrorDetail().getMessage());
                } else {
                    logger.debug(StringUtils.chomp(item.getStream(), "\n"));
                }
            }
        };

        // We have to use pipes to avoid high memory consumption since users might want to build really big images
        @Cleanup PipedInputStream in = new PipedInputStream();
        @Cleanup PipedOutputStream out = new PipedOutputStream(in);

        BuildImageCmd buildImageCmd = dockerClient.buildImageCmd(in);
        configure(buildImageCmd);
        Map<String, String> labels = new HashMap<>();
        if (buildImageCmd.getLabels() != null) {
            labels.putAll(buildImageCmd.getLabels());
        }
        labels.putAll(DockerClientFactory.DEFAULT_LABELS);
        buildImageCmd.withLabels(labels);

        prePullDependencyImages(dependencyImageNames);

        BuildImageResultCallback exec = buildImageCmd.exec(resultCallback);

        long bytesToDockerDaemon = 0;

        // To build an image, we have to send the context to Docker in TAR archive format
        try (TarArchiveOutputStream tarArchive = new TarArchiveOutputStream(new GZIPOutputStream(out))) {
            tarArchive.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX);

            for (Map.Entry<String, Transferable> entry : transferables.entrySet()) {
                Transferable transferable = entry.getValue();
                final String destination = entry.getKey();
                transferable.transferTo(tarArchive, destination);
                bytesToDockerDaemon += transferable.getSize();
            }
            tarArchive.finish();
        }

        log.info("Transferred {} to Docker daemon", FileUtils.byteCountToDisplaySize(bytesToDockerDaemon));
        if (bytesToDockerDaemon > FileUtils.ONE_MB * 50) // warn if >50MB sent to docker daemon
            log.warn("A large amount of data was sent to the Docker daemon ({}). Consider using a .dockerignore file for better performance.",
                    FileUtils.byteCountToDisplaySize(bytesToDockerDaemon));

        exec.awaitImageId();

        return dockerImageName;
    } catch(IOException e) {
        throw new RuntimeException("Can't close DockerClient", e);
    }
}
 
Example 10
Source File: TezTaskLevelAggregatedMetrics.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
/**
 * Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage.
 * @param taskDatas
 * @param containerSize
 * @param idealStartTime
 */
private void compute(TezTaskData[] taskDatas, long containerSize, long idealStartTime) {

  long peakMemoryNeed = 0;
  long taskFinishTimeMax = 0;
  long taskDurationMax = 0;

  // if there are zero tasks, then nothing to compute.
  if(taskDatas == null || taskDatas.length == 0) {
    return;
  }

  for (TezTaskData taskData: taskDatas) {
    if (!taskData.isSampled()) {
      continue;
    }
    long taskMemory = taskData.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB
    long taskVM = taskData.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB
    long taskDuration = taskData.getFinishTime() - taskData.getStartTime(); // Milliseconds
    long taskCost =  (containerSize) * (taskDuration / Statistics.SECOND_IN_MS); // MB Seconds

    durations.add(taskDuration);
    finishTimes.add(taskData.getFinishTime());

    //peak Memory usage
    long memoryRequiredForVM = (long) (taskVM/CLUSTER_MEMORY_FACTOR);
    long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory;
    peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed;

    if(taskFinishTimeMax < taskData.getFinishTime()) {
      taskFinishTimeMax = taskData.getFinishTime();
    }

    if(taskDurationMax < taskDuration) {
      taskDurationMax = taskDuration;
    }
    _resourceUsed += taskCost;
  }

  // Compute the delay in starting the task.
  _delay = taskFinishTimeMax - (idealStartTime + taskDurationMax);

  // invalid delay
  if(_delay < 0) {
    _delay = 0;
  }

  // wastedResources
  long wastedMemory = containerSize -  (long) (peakMemoryNeed * MEMORY_BUFFER);
  if(wastedMemory > 0) {
    for (long duration : durations) {
      _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds
    }
  }
}
 
Example 11
Source File: GenericMemoryHeuristic.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
public HeuristicResult apply(TezApplicationData data) {
  if(!data.getSucceeded()) {
    return null;
  }

  TezTaskData[] tasks = getTasks(data);

  List<Long> totalPhysicalMemory = new LinkedList<Long>();
  List<Long> totalVirtualMemory = new LinkedList<Long>();
  List<Long> runTime = new LinkedList<Long>();

  for (TezTaskData task : tasks) {
    if (task.isSampled()) {
      totalPhysicalMemory.add(task.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES));
      totalVirtualMemory.add(task.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES));
      runTime.add(task.getTotalRunTimeMs());
    }
  }

  long averagePMem = Statistics.average(totalPhysicalMemory);
  long averageVMem = Statistics.average(totalVirtualMemory);
  long maxPMem;
  long minPMem;
  try{
    maxPMem = Collections.max(totalPhysicalMemory);
    minPMem = Collections.min(totalPhysicalMemory);

  }
  catch(Exception exception){
    maxPMem = 0;
    minPMem = 0;
  }
  long averageRunTime = Statistics.average(runTime);

  String containerSizeStr;


  if(!Strings.isNullOrEmpty(data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF))
      && Long.parseLong(data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF)) > 0) {
    containerSizeStr = data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF);
  } else if(!Strings.isNullOrEmpty(data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF))
      && Long.parseLong(data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF)) > 0) {
    containerSizeStr = data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF);
  }
  else if(!Strings.isNullOrEmpty(data.getConf().getProperty(_mapredContainerMemConf))
      && Long.parseLong(data.getConf().getProperty(_mapredContainerMemConf)) > 0) {
    containerSizeStr = data.getConf().getProperty(_mapredContainerMemConf);
  }
  else {
    containerSizeStr = getContainerMemDefaultMBytes();
  }

  long containerSize = Long.parseLong(containerSizeStr) * FileUtils.ONE_MB;

  double averageMemMb = (double)((averagePMem) /FileUtils.ONE_MB) ;

  double ratio = averageMemMb / ((double)(containerSize / FileUtils.ONE_MB));

  Severity severity ;

  if(tasks.length == 0){
    severity = Severity.NONE;
  }
  else{
    severity = getMemoryRatioSeverity(ratio);
  }

  HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(),
          _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));

  result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
  result.addResultDetail("Maximum Physical Memory (MB)",
          tasks.length == 0 ? "0" : Long.toString(maxPMem/FileUtils.ONE_MB));
  result.addResultDetail("Minimum Physical memory (MB)",
          tasks.length == 0 ? "0" : Long.toString(minPMem/FileUtils.ONE_MB));
  result.addResultDetail("Average Physical Memory (MB)",
          tasks.length == 0 ? "0" : Long.toString(averagePMem/FileUtils.ONE_MB));
  result.addResultDetail("Average Virtual Memory (MB)",
          tasks.length == 0 ? "0" : Long.toString(averageVMem/FileUtils.ONE_MB));
  result.addResultDetail("Average Task RunTime",
          tasks.length == 0 ? "0" : Statistics.readableTimespan(averageRunTime));
  result.addResultDetail("Requested Container Memory (MB)",
          (tasks.length == 0 || containerSize == 0 || containerSize == -1) ? "0" : String.valueOf(containerSize / FileUtils.ONE_MB));


  return result;

}
 
Example 12
Source File: TaskLevelAggregatedMetrics.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
/**
 * Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage.
 * Aggregated metrics are expected to be approximation when sampling is enabled.
 * @param taskDatas
 * @param containerSize
 * @param idealStartTime
 */
private void compute(MapReduceTaskData[] taskDatas, long containerSize, long idealStartTime) {

  long peakMemoryNeed = 0;
  long taskFinishTimeMax = 0;
  long taskDurationMax = 0;

  // if there are zero tasks, then nothing to compute.
  if(taskDatas == null || taskDatas.length == 0) {
    return;
  }

  for (MapReduceTaskData taskData: taskDatas) {
    if (!taskData.isTimeAndCounterDataPresent()) {
      continue;
    }
    long taskMemory = taskData.getCounters().get(MapReduceCounterData.CounterName.PHYSICAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB
    long taskVM = taskData.getCounters().get(MapReduceCounterData.CounterName.VIRTUAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB
    long taskDuration = taskData.getFinishTimeMs() - taskData.getStartTimeMs(); // Milliseconds
    long taskCost =  (containerSize) * (taskDuration / Statistics.SECOND_IN_MS); // MB Seconds

    durations.add(taskDuration);
    finishTimes.add(taskData.getFinishTimeMs());

    //peak Memory usage
    long memoryRequiredForVM = (long) (taskVM/CLUSTER_MEMORY_FACTOR);
    long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory;
    peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed;

    if(taskFinishTimeMax < taskData.getFinishTimeMs()) {
      taskFinishTimeMax = taskData.getFinishTimeMs();
    }

    if(taskDurationMax < taskDuration) {
      taskDurationMax = taskDuration;
    }
    _resourceUsed += taskCost;
  }

  // Compute the delay in starting the task.
  _delay = taskFinishTimeMax - (idealStartTime + taskDurationMax);

  // invalid delay
  if(_delay < 0) {
    _delay = 0;
  }

  // wastedResources
  long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer
  if(wastedMemory > 0) {
    for (long duration : durations) {
      _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds
    }
  }
}
 
Example 13
Source File: GenericMemoryHeuristic.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
@Override
public HeuristicResult apply(MapReduceApplicationData data) {

  if(!data.getSucceeded()) {
    return null;
  }

  String containerSizeStr = data.getConf().getProperty(_containerMemConf);
  long containerMem = -1L;

  if (containerSizeStr != null) {
    try {
      containerMem = Long.parseLong(containerSizeStr);
    } catch (NumberFormatException e0) {
      // Some job has a string var like "${VAR}" for this config.
      if(containerSizeStr.startsWith("$")) {
        String realContainerConf = containerSizeStr.substring(containerSizeStr.indexOf("{")+1,
            containerSizeStr.indexOf("}"));
        String realContainerSizeStr = data.getConf().getProperty(realContainerConf);
        try {
          containerMem = Long.parseLong(realContainerSizeStr);
        }
        catch (NumberFormatException e1) {
          logger.warn(realContainerConf + ": expected number [" + realContainerSizeStr + "]");
        }
      } else {
        logger.warn(_containerMemConf + ": expected number [" + containerSizeStr + "]");
      }
    }
  }
  if (containerMem < 0) {
    containerMem = getContainerMemDefaultMBytes();
  }
  containerMem *= FileUtils.ONE_MB;

  MapReduceTaskData[] tasks = getTasks(data);
  List<Long> taskPMems = new ArrayList<Long>();
  List<Long> taskVMems = new ArrayList<Long>();
  List<Long> runtimesMs = new ArrayList<Long>();
  long taskPMin = Long.MAX_VALUE;
  long taskPMax = 0;
  for (MapReduceTaskData task : tasks) {
    if (task.isTimeAndCounterDataPresent()) {
      runtimesMs.add(task.getTotalRunTimeMs());
      long taskPMem = task.getCounters().get(MapReduceCounterData.CounterName.PHYSICAL_MEMORY_BYTES);
      long taskVMem = task.getCounters().get(MapReduceCounterData.CounterName.VIRTUAL_MEMORY_BYTES);
      taskPMems.add(taskPMem);
      taskPMin = Math.min(taskPMin, taskPMem);
      taskPMax = Math.max(taskPMax, taskPMem);
      taskVMems.add(taskVMem);
    }
  }

  if(taskPMin == Long.MAX_VALUE) {
    taskPMin = 0;
  }

  long taskPMemAvg = Statistics.average(taskPMems);
  long taskVMemAvg = Statistics.average(taskVMems);
  long averageTimeMs = Statistics.average(runtimesMs);

  Severity severity;
  if (tasks.length == 0) {
    severity = Severity.NONE;
  } else {
    severity = getTaskMemoryUtilSeverity(taskPMemAvg, containerMem);
  }

  HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(),
      _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length));

  result.addResultDetail("Number of tasks", Integer.toString(tasks.length));
  result.addResultDetail("Avg task runtime", Statistics.readableTimespan(averageTimeMs));
  result.addResultDetail("Avg Physical Memory (MB)", Long.toString(taskPMemAvg / FileUtils.ONE_MB));
  result.addResultDetail("Max Physical Memory (MB)", Long.toString(taskPMax / FileUtils.ONE_MB));
  result.addResultDetail("Min Physical Memory (MB)", Long.toString(taskPMin / FileUtils.ONE_MB));
  result.addResultDetail("Avg Virtual Memory (MB)", Long.toString(taskVMemAvg / FileUtils.ONE_MB));
  result.addResultDetail("Requested Container Memory", FileUtils.byteCountToDisplaySize(containerMem));

  return result;
}
 
Example 14
Source File: MapReduceTaskNumProcessor.java    From eagle with Apache License 2.0 4 votes vote down vote up
private String analyzeReduceTaskNum(List<String> optSettings) {
    StringBuilder sb = new StringBuilder();

    long numReduces = context.getNumReduces();
    if (numReduces > 0) {
        long avgReduceTime = context.getAvgReduceTimeInSec();
        long avgShuffleTime = context.getAvgShuffleTimeInSec();
        long avgShuffleBytes = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_SHUFFLE_BYTES)
                / numReduces;
        long avgReduceOutput = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_WRITTEN)
                / numReduces;
        long avgReduceTotalTime = avgShuffleTime + avgReduceTime;

        long suggestReduces = 0;
        StringBuilder tmpsb = new StringBuilder();

        String avgShuffleDisplaySize = bytesToHumanReadable(avgShuffleBytes);
        if (avgShuffleBytes < 256 * FileUtils.ONE_MB && avgReduceTotalTime < 300
                && avgReduceOutput < 256 * FileUtils.ONE_MB && numReduces > 1) {
            tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", ");
            suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
        } else if (avgShuffleBytes > 10 * FileUtils.ONE_GB && avgReduceTotalTime > 1800) {
            tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", ");
            suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
        }

        if (avgReduceTotalTime < 60 && numReduces > 1) {
            tmpsb.append("average reduce time is only ").append(avgReduceTotalTime).append(" seconds, ");
            if (suggestReduces == 0) {
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            }
        } else if (avgReduceTotalTime > 3600 && avgReduceTime > 1800) {
            tmpsb.append("average reduce time is ").append(avgReduceTotalTime).append(" seconds, ");
            if (suggestReduces == 0) {
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            }
        }

        String avgReduceOutputDisplaySize = bytesToHumanReadable(avgReduceOutput);
        if (avgReduceOutput < 10 * FileUtils.ONE_MB && avgReduceTime < 300
                && avgShuffleBytes < 2 * FileUtils.ONE_GB && numReduces > 1) {
            tmpsb.append(" average reduce output is only ").append(avgReduceOutputDisplaySize).append(", ");
            if (suggestReduces == 0) {
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            }
        } else if (avgReduceOutput > 10 * FileUtils.ONE_GB && avgReduceTime > 1800) {
            tmpsb.append(" average reduce output is ").append(avgReduceOutputDisplaySize).append(", ");
            if (suggestReduces == 0) {
                suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime);
            }
        }

        if (suggestReduces > 0) {
            sb.append("Best practice: ").append(tmpsb.toString()).append("please consider ");
            if (suggestReduces > numReduces) {
                sb.append("increasing the ");
            } else {
                sb.append("decreasing the ");
            }
            String setting = String.format("-D%s=%s", NUM_REDUCES, suggestReduces);
            sb.append("reducer number. You could try ").append(setting).append("\n");
            optSettings.add(setting);
        }
    }
    return sb.toString();
}
 
Example 15
Source File: MapReduceFSFetcherHadoop2.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
@Override
public MapReduceApplicationData fetchData(AnalyticJob job) throws IOException {
  DataFiles files = getHistoryFiles(job);
  String confFile = files.getJobConfPath();
  String histFile = files.getJobHistPath();
  String appId = job.getAppId();
  String jobId = Utils.getJobIdFromApplicationId(appId);

  MapReduceApplicationData jobData = new MapReduceApplicationData();
  jobData.setAppId(appId).setJobId(jobId);

  // Fetch job config
  Configuration jobConf = new Configuration(false);
  jobConf.addResource(_fs.open(new Path(confFile)), confFile);
  Properties jobConfProperties = new Properties();
  for (Map.Entry<String, String> entry : jobConf) {
    jobConfProperties.put(entry.getKey(), entry.getValue());
  }
  jobData.setJobConf(jobConfProperties);

  // Check if job history file is too large and should be throttled
  if (_fs.getFileStatus(new Path(histFile)).getLen() > _maxLogSizeInMB * FileUtils.ONE_MB) {
    String errMsg =
        "The history log of MapReduce application: " + appId + " is over the limit size of " + _maxLogSizeInMB + " MB, the parsing process gets throttled.";
    logger.warn(errMsg);
    jobData.setDiagnosticInfo(errMsg);
    jobData.setSucceeded(false);  // set succeeded to false to avoid heuristic analysis
    return jobData;
  }

  // Analyze job history file
  JobHistoryParser parser = new JobHistoryParser(_fs, histFile);
  JobHistoryParser.JobInfo jobInfo = parser.parse();
  IOException parseException = parser.getParseException();
  if (parseException != null) {
    throw new RuntimeException("Could not parse history file " + histFile, parseException);
  }
  // Populate missing fields from parsed job info. This info will be missing for backfilled jobs.
  populateJobFromJobInfo(job, jobInfo);

  jobData.setSubmitTime(jobInfo.getSubmitTime());
  jobData.setStartTime(jobInfo.getLaunchTime());
  jobData.setFinishTime(jobInfo.getFinishTime());

  String state = jobInfo.getJobStatus();
  if (state.equals("SUCCEEDED")) {
    jobData.setSucceeded(true);
  } else if (state.equals("FAILED")) {
    jobData.setSucceeded(false);
    jobData.setDiagnosticInfo(jobInfo.getErrorInfo());
  } else {
    throw new RuntimeException("job neither succeeded or failed. can not process it ");
  }

  // Fetch job counter
  MapReduceCounterData jobCounter = getCounterData(jobInfo.getTotalCounters());

  // Fetch task data
  Map<TaskID, JobHistoryParser.TaskInfo> allTasks = jobInfo.getAllTasks();
  List<JobHistoryParser.TaskInfo> mapperInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
  List<JobHistoryParser.TaskInfo> reducerInfoList = new ArrayList<JobHistoryParser.TaskInfo>();
  for (JobHistoryParser.TaskInfo taskInfo : allTasks.values()) {
    if (taskInfo.getTaskType() == TaskType.MAP) {
      mapperInfoList.add(taskInfo);
    } else {
      reducerInfoList.add(taskInfo);
    }
  }
  if (jobInfo.getTotalMaps() > MAX_SAMPLE_SIZE) {
    logger.debug(jobId + " total mappers: " + mapperInfoList.size());
  }
  if (jobInfo.getTotalReduces() > MAX_SAMPLE_SIZE) {
    logger.debug(jobId + " total reducers: " + reducerInfoList.size());
  }
  MapReduceTaskData[] mapperList = getTaskData(jobId, mapperInfoList);
  MapReduceTaskData[] reducerList = getTaskData(jobId, reducerInfoList);

  jobData.setCounters(jobCounter).setMapperData(mapperList).setReducerData(reducerList);

  return jobData;
}
 
Example 16
Source File: TonyMetricsAggregator.java    From dr-elephant with Apache License 2.0 4 votes vote down vote up
@Override
public void aggregate(HadoopApplicationData data) {
  _hadoopAggregatedData = new HadoopAggregatedData();

  TonyApplicationData tonyData = (TonyApplicationData) data;
  Configuration tonyConf = tonyData.getConfiguration();

  long mbSecUsed = 0;
  long mbSecWasted = 0;

  Map<String, Map<Integer, TonyTaskData>> taskMap = tonyData.getTaskMap();
  for (Map.Entry<String, Map<Integer, TonyTaskData>> entry : taskMap.entrySet()) {
    String taskType = entry.getKey();

    String memoryString = tonyConf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY));
    String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString);
    long mbRequested = Long.parseLong(memoryStringMB);
    double maxMemoryMBUsed = TonyUtils.getMaxMetricForTaskTypeAndMetricName(taskMap, taskType,
        Constants.MAX_MEMORY_BYTES) / FileUtils.ONE_MB;

    for (TonyTaskData taskData : entry.getValue().values()) {
      long taskDurationSec = (taskData.getTaskEndTime() - taskData.getTaskStartTime()) / Statistics.SECOND_IN_MS;
      if (taskDurationSec < 0) {
        // Most likely TASK_FINISHED and APPLICATION_FINISHED events are missing for the task.
        continue;
      }
      mbSecUsed += mbRequested * taskDurationSec;

      if (maxMemoryMBUsed == 0) {
        // If we don't have max memory metrics, don't calculate wasted memory.
        continue;
      }
      long wastedMemory = (long) (mbRequested - maxMemoryMBUsed * MEMORY_BUFFER);
      if (wastedMemory > 0) {
        mbSecWasted += wastedMemory * taskDurationSec;
      }
    }
  }

  _hadoopAggregatedData.setResourceUsed(mbSecUsed);
  _hadoopAggregatedData.setResourceWasted(mbSecWasted);
  // TODO: Calculate and set delay
}
 
Example 17
Source File: StandardQuotaStrategy.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
protected long getMaxFileSizeBytes()
{
    return maxFileSizeMB * FileUtils.ONE_MB;
}
 
Example 18
Source File: StandardQuotaStrategy.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public long getMaxUsageMB()
{
    return maxUsageBytes / FileUtils.ONE_MB;
}
 
Example 19
Source File: StandardQuotaStrategy.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public double getCurrentUsageMB()
{
    return (double) getCurrentUsageBytes() / FileUtils.ONE_MB;
}
 
Example 20
Source File: CachedContentCleaner.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public double getSizeFilesDeletedMB()
{
    return (double) getSizeFilesDeleted() / FileUtils.ONE_MB;
}