Java Code Examples for org.apache.commons.io.FileUtils#ONE_MB
The following examples show how to use
org.apache.commons.io.FileUtils#ONE_MB .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskMemoryHeuristic.java From dr-elephant with Apache License 2.0 | 6 votes |
/** * Constructor for {@link TaskMemoryHeuristic}. * @param heuristicConfData the configuration for this heuristic */ public TaskMemoryHeuristic(HeuristicConfigurationData heuristicConfData) { this._heuristicConfData = heuristicConfData; Map<String, String> params = heuristicConfData.getParamMap(); // read default container size if (params.containsKey(CONTAINER_MEMORY_DEFAULT_MB_CONF)) { defaultContainerMemoryBytes = Long.parseLong(params.get(CONTAINER_MEMORY_DEFAULT_MB_CONF)) * FileUtils.ONE_MB; } // read max memory thresholds if (params.containsKey(TASK_MEMORY_THRESHOLDS_CONF)) { maxMemoryLimits = Utils.getParam(params.get(TASK_MEMORY_THRESHOLDS_CONF), maxMemoryLimits.length); } Configuration yarnConf = new YarnConfiguration(); int minimumMBAllocation = yarnConf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); graceMemoryHeadroomBytes = 2 * minimumMBAllocation * FileUtils.ONE_MB; }
Example 2
Source File: ProfileImageLogicImpl.java From sakai with Educational Community License v2.0 | 5 votes |
/** * Read a file to a byte array. * * @param file * @return byte[] if file ok, or null if its too big * @throws IOException */ private byte[] getBytesFromFile(File file) throws IOException { // Get the size of the file long length = file.length(); if (length > (ProfileConstants.MAX_IMAGE_UPLOAD_SIZE * FileUtils.ONE_MB)) { log.error("File too large: " + file.getCanonicalPath()); return null; } // return file contents return FileUtils.readFileToByteArray(file); }
Example 3
Source File: ThirdEyeUtils.java From incubator-pinot with Apache License 2.0 | 5 votes |
private static long getApproximateMaxBucketNumber(int percentage) { long jvmMaxMemoryInBytes = Runtime.getRuntime().maxMemory(); if (jvmMaxMemoryInBytes == Long.MAX_VALUE) { // Check upper bound jvmMaxMemoryInBytes = DEFAULT_UPPER_BOUND_OF_RESULTSETGROUP_CACHE_SIZE_IN_MB * FileUtils.ONE_MB; // MB to Bytes } else { // Check lower bound long lowerBoundInBytes = DEFAULT_LOWER_BOUND_OF_RESULTSETGROUP_CACHE_SIZE_IN_MB * FileUtils.ONE_MB; // MB to Bytes if (jvmMaxMemoryInBytes < lowerBoundInBytes) { jvmMaxMemoryInBytes = lowerBoundInBytes; } } return (jvmMaxMemoryInBytes / 102400) * percentage; }
Example 4
Source File: SolrConfig.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Converts a Java heap option-like config string to bytes. Valid suffixes are: 'k', 'm', 'g' * (case insensitive). If there is no suffix, the default unit is bytes. * For example, 50k = 50KB, 20m = 20MB, 4g = 4GB, 300 = 300 bytes * @param configStr the config setting to parse * @return the size, in bytes. -1 if the given config string is empty */ protected static long convertHeapOptionStyleConfigStringToBytes(String configStr) { if (configStr.isEmpty()) { return -1; } long multiplier = 1; String numericValueStr = configStr; char suffix = Character.toLowerCase(configStr.charAt(configStr.length() - 1)); if (Character.isLetter(suffix)) { if (suffix == 'k') { multiplier = FileUtils.ONE_KB; } else if (suffix == 'm') { multiplier = FileUtils.ONE_MB; } else if (suffix == 'g') { multiplier = FileUtils.ONE_GB; } else { throw new RuntimeException("Invalid suffix. Valid suffixes are 'k' (KB), 'm' (MB), 'g' (G). " + "No suffix means the amount is in bytes. "); } numericValueStr = configStr.substring(0, configStr.length() - 1); } try { return Long.parseLong(numericValueStr) * multiplier; } catch (NumberFormatException e) { throw new RuntimeException("Invalid format. The config setting should be a long with an " + "optional letter suffix. Valid suffixes are 'k' (KB), 'm' (MB), 'g' (G). " + "No suffix means the amount is in bytes."); } }
Example 5
Source File: MapReduceTaskNumProcessor.java From eagle with Apache License 2.0 | 5 votes |
private String analyzeMapTaskNum(List<String> optSettings) { StringBuilder sb = new StringBuilder(); long numMaps = context.getNumMaps(); long avgMapTime = context.getAvgMapTimeInSec(); long avgMapInput = context.getJob().getMapCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_READ) / numMaps; String avgMapInputDisplaySize = bytesToHumanReadable(avgMapInput); if (avgMapInput < 5 * FileUtils.ONE_MB && avgMapTime < 30 && numMaps > 1) { sb.append("Best practice: average map input bytes only have ").append(avgMapInputDisplaySize); sb.append(". Please reduce the number of mappers by merging input files.\n"); } else if (avgMapInput > FileUtils.ONE_GB) { sb.append("Best practice: average map input bytes have ").append(avgMapInputDisplaySize); sb.append(". Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } if (avgMapTime < 10 && numMaps > 1) { sb.append("Best practice: average map time only have ").append(avgMapTime); sb.append(" seconds. Please reduce the number of mappers by merging input files or by using a larger block size.\n"); } else if (avgMapTime > 600 && avgMapInput < FileUtils.ONE_GB) { sb.append("Best practice: average map time is ").append(avgMapInput); sb.append(" seconds. Please increase the number of mappers by using splittable compression, a container file format or a smaller block size.\n"); } return sb.toString(); }
Example 6
Source File: DefaultFSSinkProvider.java From ambari-metrics with Apache License 2.0 | 5 votes |
public DefaultFSSinkProvider() { try { FIXED_FILE_SIZE = conf.getMetricsConf().getLong("timeline.metrics.service.external.fs.sink.filesize", FileUtils.ONE_MB * 100); } catch (Exception ignored) { FIXED_FILE_SIZE = FileUtils.ONE_MB * 100; } }
Example 7
Source File: ProfileImageLogicImpl.java From sakai with Educational Community License v2.0 | 5 votes |
/** * Read a file to a byte array. * * @param file * @return byte[] if file ok, or null if its too big * @throws IOException */ private byte[] getBytesFromFile(File file) throws IOException { // Get the size of the file long length = file.length(); if (length > (ProfileConstants.MAX_IMAGE_UPLOAD_SIZE * FileUtils.ONE_MB)) { log.error("File too large: " + file.getCanonicalPath()); return null; } // return file contents return FileUtils.readFileToByteArray(file); }
Example 8
Source File: ParamGenerator.java From dr-elephant with Apache License 2.0 | 4 votes |
/** * Check if the parameters violated constraints * Constraint 1: sort.mb > 60% of map.memory: To avoid heap memory failure * Constraint 2: map.memory - sort.mb < 768: To avoid heap memory failure * Constraint 3: pig.maxCombinedSplitSize > 1.8*mapreduce.map.memory.mb * @param jobSuggestedParamValueList List of suggested param values * @param jobType Job type * @return true if the constraint is violated, false otherwise */ private boolean isParamConstraintViolated(List<JobSuggestedParamValue> jobSuggestedParamValueList, TuningAlgorithm.JobType jobType) { logger.info("Checking whether parameter values are within constraints"); Integer violations = 0; if (jobType.equals(TuningAlgorithm.JobType.PIG)) { Double mrSortMemory = null; Double mrMapMemory = null; Double pigMaxCombinedSplitSize = null; for (JobSuggestedParamValue jobSuggestedParamValue : jobSuggestedParamValueList) { if (jobSuggestedParamValue.tuningParameter.paramName.equals("mapreduce.task.io.sort.mb")) { mrSortMemory = jobSuggestedParamValue.paramValue; } else if (jobSuggestedParamValue.tuningParameter.paramName.equals("mapreduce.map.memory.mb")) { mrMapMemory = jobSuggestedParamValue.paramValue; } else if (jobSuggestedParamValue.tuningParameter.paramName.equals("pig.maxCombinedSplitSize")) { pigMaxCombinedSplitSize = jobSuggestedParamValue.paramValue / FileUtils.ONE_MB; } } if (mrSortMemory != null && mrMapMemory != null) { if (mrSortMemory > 0.6 * mrMapMemory) { logger.info("Constraint violated: Sort memory > 60% of map memory"); violations++; } if (mrMapMemory - mrSortMemory < 768) { logger.info("Constraint violated: Map memory - sort memory < 768 mb"); violations++; } } if (pigMaxCombinedSplitSize != null && mrMapMemory != null && (pigMaxCombinedSplitSize > 1.8 * mrMapMemory)) { logger.info("Constraint violated: Pig max combined split size > 1.8 * map memory"); violations++; } } if (violations == 0) { return false; } else { logger.info("Number of constraint(s) violated: " + violations); return true; } }
Example 9
Source File: ImageFromDockerfile.java From testcontainers-java with MIT License | 4 votes |
@Override protected final String resolve() { Logger logger = DockerLoggerFactory.getLogger(dockerImageName); DockerClient dockerClient = DockerClientFactory.instance().client(); try { if (deleteOnExit) { ResourceReaper.instance().registerImageForCleanup(dockerImageName); } BuildImageResultCallback resultCallback = new BuildImageResultCallback() { @Override public void onNext(BuildResponseItem item) { super.onNext(item); if (item.isErrorIndicated()) { logger.error(item.getErrorDetail().getMessage()); } else { logger.debug(StringUtils.chomp(item.getStream(), "\n")); } } }; // We have to use pipes to avoid high memory consumption since users might want to build really big images @Cleanup PipedInputStream in = new PipedInputStream(); @Cleanup PipedOutputStream out = new PipedOutputStream(in); BuildImageCmd buildImageCmd = dockerClient.buildImageCmd(in); configure(buildImageCmd); Map<String, String> labels = new HashMap<>(); if (buildImageCmd.getLabels() != null) { labels.putAll(buildImageCmd.getLabels()); } labels.putAll(DockerClientFactory.DEFAULT_LABELS); buildImageCmd.withLabels(labels); prePullDependencyImages(dependencyImageNames); BuildImageResultCallback exec = buildImageCmd.exec(resultCallback); long bytesToDockerDaemon = 0; // To build an image, we have to send the context to Docker in TAR archive format try (TarArchiveOutputStream tarArchive = new TarArchiveOutputStream(new GZIPOutputStream(out))) { tarArchive.setLongFileMode(TarArchiveOutputStream.LONGFILE_POSIX); for (Map.Entry<String, Transferable> entry : transferables.entrySet()) { Transferable transferable = entry.getValue(); final String destination = entry.getKey(); transferable.transferTo(tarArchive, destination); bytesToDockerDaemon += transferable.getSize(); } tarArchive.finish(); } log.info("Transferred {} to Docker daemon", FileUtils.byteCountToDisplaySize(bytesToDockerDaemon)); if (bytesToDockerDaemon > FileUtils.ONE_MB * 50) // warn if >50MB sent to docker daemon log.warn("A large amount of data was sent to the Docker daemon ({}). Consider using a .dockerignore file for better performance.", FileUtils.byteCountToDisplaySize(bytesToDockerDaemon)); exec.awaitImageId(); return dockerImageName; } catch(IOException e) { throw new RuntimeException("Can't close DockerClient", e); } }
Example 10
Source File: TezTaskLevelAggregatedMetrics.java From dr-elephant with Apache License 2.0 | 4 votes |
/** * Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage. * @param taskDatas * @param containerSize * @param idealStartTime */ private void compute(TezTaskData[] taskDatas, long containerSize, long idealStartTime) { long peakMemoryNeed = 0; long taskFinishTimeMax = 0; long taskDurationMax = 0; // if there are zero tasks, then nothing to compute. if(taskDatas == null || taskDatas.length == 0) { return; } for (TezTaskData taskData: taskDatas) { if (!taskData.isSampled()) { continue; } long taskMemory = taskData.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB long taskVM = taskData.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB long taskDuration = taskData.getFinishTime() - taskData.getStartTime(); // Milliseconds long taskCost = (containerSize) * (taskDuration / Statistics.SECOND_IN_MS); // MB Seconds durations.add(taskDuration); finishTimes.add(taskData.getFinishTime()); //peak Memory usage long memoryRequiredForVM = (long) (taskVM/CLUSTER_MEMORY_FACTOR); long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory; peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed; if(taskFinishTimeMax < taskData.getFinishTime()) { taskFinishTimeMax = taskData.getFinishTime(); } if(taskDurationMax < taskDuration) { taskDurationMax = taskDuration; } _resourceUsed += taskCost; } // Compute the delay in starting the task. _delay = taskFinishTimeMax - (idealStartTime + taskDurationMax); // invalid delay if(_delay < 0) { _delay = 0; } // wastedResources long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); if(wastedMemory > 0) { for (long duration : durations) { _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds } } }
Example 11
Source File: GenericMemoryHeuristic.java From dr-elephant with Apache License 2.0 | 4 votes |
public HeuristicResult apply(TezApplicationData data) { if(!data.getSucceeded()) { return null; } TezTaskData[] tasks = getTasks(data); List<Long> totalPhysicalMemory = new LinkedList<Long>(); List<Long> totalVirtualMemory = new LinkedList<Long>(); List<Long> runTime = new LinkedList<Long>(); for (TezTaskData task : tasks) { if (task.isSampled()) { totalPhysicalMemory.add(task.getCounters().get(TezCounterData.CounterName.PHYSICAL_MEMORY_BYTES)); totalVirtualMemory.add(task.getCounters().get(TezCounterData.CounterName.VIRTUAL_MEMORY_BYTES)); runTime.add(task.getTotalRunTimeMs()); } } long averagePMem = Statistics.average(totalPhysicalMemory); long averageVMem = Statistics.average(totalVirtualMemory); long maxPMem; long minPMem; try{ maxPMem = Collections.max(totalPhysicalMemory); minPMem = Collections.min(totalPhysicalMemory); } catch(Exception exception){ maxPMem = 0; minPMem = 0; } long averageRunTime = Statistics.average(runTime); String containerSizeStr; if(!Strings.isNullOrEmpty(data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF)) && Long.parseLong(data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF)) > 0) { containerSizeStr = data.getConf().getProperty(TEZ_MAPPER_MEMORY_CONF); } else if(!Strings.isNullOrEmpty(data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF)) && Long.parseLong(data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF)) > 0) { containerSizeStr = data.getConf().getProperty(HIVE_MAPPER_MEMORY_CONF); } else if(!Strings.isNullOrEmpty(data.getConf().getProperty(_mapredContainerMemConf)) && Long.parseLong(data.getConf().getProperty(_mapredContainerMemConf)) > 0) { containerSizeStr = data.getConf().getProperty(_mapredContainerMemConf); } else { containerSizeStr = getContainerMemDefaultMBytes(); } long containerSize = Long.parseLong(containerSizeStr) * FileUtils.ONE_MB; double averageMemMb = (double)((averagePMem) /FileUtils.ONE_MB) ; double ratio = averageMemMb / ((double)(containerSize / FileUtils.ONE_MB)); Severity severity ; if(tasks.length == 0){ severity = Severity.NONE; } else{ severity = getMemoryRatioSeverity(ratio); } HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length)); result.addResultDetail("Number of tasks", Integer.toString(tasks.length)); result.addResultDetail("Maximum Physical Memory (MB)", tasks.length == 0 ? "0" : Long.toString(maxPMem/FileUtils.ONE_MB)); result.addResultDetail("Minimum Physical memory (MB)", tasks.length == 0 ? "0" : Long.toString(minPMem/FileUtils.ONE_MB)); result.addResultDetail("Average Physical Memory (MB)", tasks.length == 0 ? "0" : Long.toString(averagePMem/FileUtils.ONE_MB)); result.addResultDetail("Average Virtual Memory (MB)", tasks.length == 0 ? "0" : Long.toString(averageVMem/FileUtils.ONE_MB)); result.addResultDetail("Average Task RunTime", tasks.length == 0 ? "0" : Statistics.readableTimespan(averageRunTime)); result.addResultDetail("Requested Container Memory (MB)", (tasks.length == 0 || containerSize == 0 || containerSize == -1) ? "0" : String.valueOf(containerSize / FileUtils.ONE_MB)); return result; }
Example 12
Source File: TaskLevelAggregatedMetrics.java From dr-elephant with Apache License 2.0 | 4 votes |
/** * Computes the aggregated metrics -> peakMemory, delay, total task duration, wasted resources and memory usage. * Aggregated metrics are expected to be approximation when sampling is enabled. * @param taskDatas * @param containerSize * @param idealStartTime */ private void compute(MapReduceTaskData[] taskDatas, long containerSize, long idealStartTime) { long peakMemoryNeed = 0; long taskFinishTimeMax = 0; long taskDurationMax = 0; // if there are zero tasks, then nothing to compute. if(taskDatas == null || taskDatas.length == 0) { return; } for (MapReduceTaskData taskData: taskDatas) { if (!taskData.isTimeAndCounterDataPresent()) { continue; } long taskMemory = taskData.getCounters().get(MapReduceCounterData.CounterName.PHYSICAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB long taskVM = taskData.getCounters().get(MapReduceCounterData.CounterName.VIRTUAL_MEMORY_BYTES)/ FileUtils.ONE_MB; // MB long taskDuration = taskData.getFinishTimeMs() - taskData.getStartTimeMs(); // Milliseconds long taskCost = (containerSize) * (taskDuration / Statistics.SECOND_IN_MS); // MB Seconds durations.add(taskDuration); finishTimes.add(taskData.getFinishTimeMs()); //peak Memory usage long memoryRequiredForVM = (long) (taskVM/CLUSTER_MEMORY_FACTOR); long biggerMemoryRequirement = memoryRequiredForVM > taskMemory ? memoryRequiredForVM : taskMemory; peakMemoryNeed = biggerMemoryRequirement > peakMemoryNeed ? biggerMemoryRequirement : peakMemoryNeed; if(taskFinishTimeMax < taskData.getFinishTimeMs()) { taskFinishTimeMax = taskData.getFinishTimeMs(); } if(taskDurationMax < taskDuration) { taskDurationMax = taskDuration; } _resourceUsed += taskCost; } // Compute the delay in starting the task. _delay = taskFinishTimeMax - (idealStartTime + taskDurationMax); // invalid delay if(_delay < 0) { _delay = 0; } // wastedResources long wastedMemory = containerSize - (long) (peakMemoryNeed * MEMORY_BUFFER); // give a 50% buffer if(wastedMemory > 0) { for (long duration : durations) { _resourceWasted += (wastedMemory) * (duration / Statistics.SECOND_IN_MS); // MB Seconds } } }
Example 13
Source File: GenericMemoryHeuristic.java From dr-elephant with Apache License 2.0 | 4 votes |
@Override public HeuristicResult apply(MapReduceApplicationData data) { if(!data.getSucceeded()) { return null; } String containerSizeStr = data.getConf().getProperty(_containerMemConf); long containerMem = -1L; if (containerSizeStr != null) { try { containerMem = Long.parseLong(containerSizeStr); } catch (NumberFormatException e0) { // Some job has a string var like "${VAR}" for this config. if(containerSizeStr.startsWith("$")) { String realContainerConf = containerSizeStr.substring(containerSizeStr.indexOf("{")+1, containerSizeStr.indexOf("}")); String realContainerSizeStr = data.getConf().getProperty(realContainerConf); try { containerMem = Long.parseLong(realContainerSizeStr); } catch (NumberFormatException e1) { logger.warn(realContainerConf + ": expected number [" + realContainerSizeStr + "]"); } } else { logger.warn(_containerMemConf + ": expected number [" + containerSizeStr + "]"); } } } if (containerMem < 0) { containerMem = getContainerMemDefaultMBytes(); } containerMem *= FileUtils.ONE_MB; MapReduceTaskData[] tasks = getTasks(data); List<Long> taskPMems = new ArrayList<Long>(); List<Long> taskVMems = new ArrayList<Long>(); List<Long> runtimesMs = new ArrayList<Long>(); long taskPMin = Long.MAX_VALUE; long taskPMax = 0; for (MapReduceTaskData task : tasks) { if (task.isTimeAndCounterDataPresent()) { runtimesMs.add(task.getTotalRunTimeMs()); long taskPMem = task.getCounters().get(MapReduceCounterData.CounterName.PHYSICAL_MEMORY_BYTES); long taskVMem = task.getCounters().get(MapReduceCounterData.CounterName.VIRTUAL_MEMORY_BYTES); taskPMems.add(taskPMem); taskPMin = Math.min(taskPMin, taskPMem); taskPMax = Math.max(taskPMax, taskPMem); taskVMems.add(taskVMem); } } if(taskPMin == Long.MAX_VALUE) { taskPMin = 0; } long taskPMemAvg = Statistics.average(taskPMems); long taskVMemAvg = Statistics.average(taskVMems); long averageTimeMs = Statistics.average(runtimesMs); Severity severity; if (tasks.length == 0) { severity = Severity.NONE; } else { severity = getTaskMemoryUtilSeverity(taskPMemAvg, containerMem); } HeuristicResult result = new HeuristicResult(_heuristicConfData.getClassName(), _heuristicConfData.getHeuristicName(), severity, Utils.getHeuristicScore(severity, tasks.length)); result.addResultDetail("Number of tasks", Integer.toString(tasks.length)); result.addResultDetail("Avg task runtime", Statistics.readableTimespan(averageTimeMs)); result.addResultDetail("Avg Physical Memory (MB)", Long.toString(taskPMemAvg / FileUtils.ONE_MB)); result.addResultDetail("Max Physical Memory (MB)", Long.toString(taskPMax / FileUtils.ONE_MB)); result.addResultDetail("Min Physical Memory (MB)", Long.toString(taskPMin / FileUtils.ONE_MB)); result.addResultDetail("Avg Virtual Memory (MB)", Long.toString(taskVMemAvg / FileUtils.ONE_MB)); result.addResultDetail("Requested Container Memory", FileUtils.byteCountToDisplaySize(containerMem)); return result; }
Example 14
Source File: MapReduceTaskNumProcessor.java From eagle with Apache License 2.0 | 4 votes |
private String analyzeReduceTaskNum(List<String> optSettings) { StringBuilder sb = new StringBuilder(); long numReduces = context.getNumReduces(); if (numReduces > 0) { long avgReduceTime = context.getAvgReduceTimeInSec(); long avgShuffleTime = context.getAvgShuffleTimeInSec(); long avgShuffleBytes = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_SHUFFLE_BYTES) / numReduces; long avgReduceOutput = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.HDFS_BYTES_WRITTEN) / numReduces; long avgReduceTotalTime = avgShuffleTime + avgReduceTime; long suggestReduces = 0; StringBuilder tmpsb = new StringBuilder(); String avgShuffleDisplaySize = bytesToHumanReadable(avgShuffleBytes); if (avgShuffleBytes < 256 * FileUtils.ONE_MB && avgReduceTotalTime < 300 && avgReduceOutput < 256 * FileUtils.ONE_MB && numReduces > 1) { tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", "); suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } else if (avgShuffleBytes > 10 * FileUtils.ONE_GB && avgReduceTotalTime > 1800) { tmpsb.append("average reduce input bytes is: ").append(avgShuffleDisplaySize).append(", "); suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } if (avgReduceTotalTime < 60 && numReduces > 1) { tmpsb.append("average reduce time is only ").append(avgReduceTotalTime).append(" seconds, "); if (suggestReduces == 0) { suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } } else if (avgReduceTotalTime > 3600 && avgReduceTime > 1800) { tmpsb.append("average reduce time is ").append(avgReduceTotalTime).append(" seconds, "); if (suggestReduces == 0) { suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } } String avgReduceOutputDisplaySize = bytesToHumanReadable(avgReduceOutput); if (avgReduceOutput < 10 * FileUtils.ONE_MB && avgReduceTime < 300 && avgShuffleBytes < 2 * FileUtils.ONE_GB && numReduces > 1) { tmpsb.append(" average reduce output is only ").append(avgReduceOutputDisplaySize).append(", "); if (suggestReduces == 0) { suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } } else if (avgReduceOutput > 10 * FileUtils.ONE_GB && avgReduceTime > 1800) { tmpsb.append(" average reduce output is ").append(avgReduceOutputDisplaySize).append(", "); if (suggestReduces == 0) { suggestReduces = getReduceNum(avgShuffleBytes, avgReduceOutput, avgReduceTime); } } if (suggestReduces > 0) { sb.append("Best practice: ").append(tmpsb.toString()).append("please consider "); if (suggestReduces > numReduces) { sb.append("increasing the "); } else { sb.append("decreasing the "); } String setting = String.format("-D%s=%s", NUM_REDUCES, suggestReduces); sb.append("reducer number. You could try ").append(setting).append("\n"); optSettings.add(setting); } } return sb.toString(); }
Example 15
Source File: MapReduceFSFetcherHadoop2.java From dr-elephant with Apache License 2.0 | 4 votes |
@Override public MapReduceApplicationData fetchData(AnalyticJob job) throws IOException { DataFiles files = getHistoryFiles(job); String confFile = files.getJobConfPath(); String histFile = files.getJobHistPath(); String appId = job.getAppId(); String jobId = Utils.getJobIdFromApplicationId(appId); MapReduceApplicationData jobData = new MapReduceApplicationData(); jobData.setAppId(appId).setJobId(jobId); // Fetch job config Configuration jobConf = new Configuration(false); jobConf.addResource(_fs.open(new Path(confFile)), confFile); Properties jobConfProperties = new Properties(); for (Map.Entry<String, String> entry : jobConf) { jobConfProperties.put(entry.getKey(), entry.getValue()); } jobData.setJobConf(jobConfProperties); // Check if job history file is too large and should be throttled if (_fs.getFileStatus(new Path(histFile)).getLen() > _maxLogSizeInMB * FileUtils.ONE_MB) { String errMsg = "The history log of MapReduce application: " + appId + " is over the limit size of " + _maxLogSizeInMB + " MB, the parsing process gets throttled."; logger.warn(errMsg); jobData.setDiagnosticInfo(errMsg); jobData.setSucceeded(false); // set succeeded to false to avoid heuristic analysis return jobData; } // Analyze job history file JobHistoryParser parser = new JobHistoryParser(_fs, histFile); JobHistoryParser.JobInfo jobInfo = parser.parse(); IOException parseException = parser.getParseException(); if (parseException != null) { throw new RuntimeException("Could not parse history file " + histFile, parseException); } // Populate missing fields from parsed job info. This info will be missing for backfilled jobs. populateJobFromJobInfo(job, jobInfo); jobData.setSubmitTime(jobInfo.getSubmitTime()); jobData.setStartTime(jobInfo.getLaunchTime()); jobData.setFinishTime(jobInfo.getFinishTime()); String state = jobInfo.getJobStatus(); if (state.equals("SUCCEEDED")) { jobData.setSucceeded(true); } else if (state.equals("FAILED")) { jobData.setSucceeded(false); jobData.setDiagnosticInfo(jobInfo.getErrorInfo()); } else { throw new RuntimeException("job neither succeeded or failed. can not process it "); } // Fetch job counter MapReduceCounterData jobCounter = getCounterData(jobInfo.getTotalCounters()); // Fetch task data Map<TaskID, JobHistoryParser.TaskInfo> allTasks = jobInfo.getAllTasks(); List<JobHistoryParser.TaskInfo> mapperInfoList = new ArrayList<JobHistoryParser.TaskInfo>(); List<JobHistoryParser.TaskInfo> reducerInfoList = new ArrayList<JobHistoryParser.TaskInfo>(); for (JobHistoryParser.TaskInfo taskInfo : allTasks.values()) { if (taskInfo.getTaskType() == TaskType.MAP) { mapperInfoList.add(taskInfo); } else { reducerInfoList.add(taskInfo); } } if (jobInfo.getTotalMaps() > MAX_SAMPLE_SIZE) { logger.debug(jobId + " total mappers: " + mapperInfoList.size()); } if (jobInfo.getTotalReduces() > MAX_SAMPLE_SIZE) { logger.debug(jobId + " total reducers: " + reducerInfoList.size()); } MapReduceTaskData[] mapperList = getTaskData(jobId, mapperInfoList); MapReduceTaskData[] reducerList = getTaskData(jobId, reducerInfoList); jobData.setCounters(jobCounter).setMapperData(mapperList).setReducerData(reducerList); return jobData; }
Example 16
Source File: TonyMetricsAggregator.java From dr-elephant with Apache License 2.0 | 4 votes |
@Override public void aggregate(HadoopApplicationData data) { _hadoopAggregatedData = new HadoopAggregatedData(); TonyApplicationData tonyData = (TonyApplicationData) data; Configuration tonyConf = tonyData.getConfiguration(); long mbSecUsed = 0; long mbSecWasted = 0; Map<String, Map<Integer, TonyTaskData>> taskMap = tonyData.getTaskMap(); for (Map.Entry<String, Map<Integer, TonyTaskData>> entry : taskMap.entrySet()) { String taskType = entry.getKey(); String memoryString = tonyConf.get(TonyConfigurationKeys.getResourceKey(taskType, Constants.MEMORY)); String memoryStringMB = com.linkedin.tony.util.Utils.parseMemoryString(memoryString); long mbRequested = Long.parseLong(memoryStringMB); double maxMemoryMBUsed = TonyUtils.getMaxMetricForTaskTypeAndMetricName(taskMap, taskType, Constants.MAX_MEMORY_BYTES) / FileUtils.ONE_MB; for (TonyTaskData taskData : entry.getValue().values()) { long taskDurationSec = (taskData.getTaskEndTime() - taskData.getTaskStartTime()) / Statistics.SECOND_IN_MS; if (taskDurationSec < 0) { // Most likely TASK_FINISHED and APPLICATION_FINISHED events are missing for the task. continue; } mbSecUsed += mbRequested * taskDurationSec; if (maxMemoryMBUsed == 0) { // If we don't have max memory metrics, don't calculate wasted memory. continue; } long wastedMemory = (long) (mbRequested - maxMemoryMBUsed * MEMORY_BUFFER); if (wastedMemory > 0) { mbSecWasted += wastedMemory * taskDurationSec; } } } _hadoopAggregatedData.setResourceUsed(mbSecUsed); _hadoopAggregatedData.setResourceWasted(mbSecWasted); // TODO: Calculate and set delay }
Example 17
Source File: StandardQuotaStrategy.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
protected long getMaxFileSizeBytes() { return maxFileSizeMB * FileUtils.ONE_MB; }
Example 18
Source File: StandardQuotaStrategy.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
public long getMaxUsageMB() { return maxUsageBytes / FileUtils.ONE_MB; }
Example 19
Source File: StandardQuotaStrategy.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
public double getCurrentUsageMB() { return (double) getCurrentUsageBytes() / FileUtils.ONE_MB; }
Example 20
Source File: CachedContentCleaner.java From alfresco-repository with GNU Lesser General Public License v3.0 | 4 votes |
public double getSizeFilesDeletedMB() { return (double) getSizeFilesDeleted() / FileUtils.ONE_MB; }