Java Code Examples for org.apache.flink.api.common.JobExecutionResult#getAllAccumulatorResults()

The following examples show how to use org.apache.flink.api.common.JobExecutionResult#getAllAccumulatorResults() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CliFrontend.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
protected void executeProgram(PackagedProgram program, ClusterClient<?> client, int parallelism) throws ProgramMissingJobException, ProgramInvocationException {
	logAndSysout("Starting execution of program");

	final JobSubmissionResult result = client.run(program, parallelism);

	if (null == result) {
		throw new ProgramMissingJobException("No JobSubmissionResult returned, please make sure you called " +
			"ExecutionEnvironment.execute()");
	}

	if (result.isJobExecutionResult()) {
		logAndSysout("Program execution finished");
		JobExecutionResult execResult = result.getJobExecutionResult();
		System.out.println("Job with JobID " + execResult.getJobID() + " has finished.");
		System.out.println("Job Runtime: " + execResult.getNetRuntime() + " ms");
		Map<String, Object> accumulatorsResult = execResult.getAllAccumulatorResults();
		if (accumulatorsResult.size() > 0) {
			System.out.println("Accumulator Results: ");
			System.out.println(AccumulatorHelper.getResultsFormatted(accumulatorsResult));
		}
	} else {
		logAndSysout("Job has been submitted with JobID " + result.getJobID());
	}
}
 
Example 2
Source File: CliFrontend.java    From flink with Apache License 2.0 6 votes vote down vote up
protected void executeProgram(PackagedProgram program, ClusterClient<?> client, int parallelism) throws ProgramMissingJobException, ProgramInvocationException {
	logAndSysout("Starting execution of program");

	final JobSubmissionResult result = client.run(program, parallelism);

	if (null == result) {
		throw new ProgramMissingJobException("No JobSubmissionResult returned, please make sure you called " +
			"ExecutionEnvironment.execute()");
	}

	if (result.isJobExecutionResult()) {
		logAndSysout("Program execution finished");
		JobExecutionResult execResult = result.getJobExecutionResult();
		System.out.println("Job with JobID " + execResult.getJobID() + " has finished.");
		System.out.println("Job Runtime: " + execResult.getNetRuntime() + " ms");
		Map<String, Object> accumulatorsResult = execResult.getAllAccumulatorResults();
		if (accumulatorsResult.size() > 0) {
			System.out.println("Accumulator Results: ");
			System.out.println(AccumulatorHelper.getResultsFormatted(accumulatorsResult));
		}
	} else {
		logAndSysout("Job has been submitted with JobID " + result.getJobID());
	}
}
 
Example 3
Source File: AccumulatorErrorITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(ExceptionUtils.findThrowable(ex, CustomException.class).isPresent());
	}
}
 
Example 4
Source File: AccumulatorErrorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(ExceptionUtils.findThrowable(ex, CustomException.class).isPresent());
	}
}
 
Example 5
Source File: FlinkPipelineRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
private PortablePipelineResult createPortablePipelineResult(
    JobExecutionResult result, PipelineOptions options) {
  // The package of DetachedJobExecutionResult has been changed in 1.10.
  // Refer to https://github.com/apache/flink/commit/c36b35e6876ecdc717dade653e8554f9d8b543c9 for
  // details.
  String resultClassName = result.getClass().getCanonicalName();
  if (resultClassName.equals(
          "org.apache.flink.client.program.DetachedEnvironment.DetachedJobExecutionResult")
      || resultClassName.equals("org.apache.flink.core.execution.DetachedJobExecutionResult")) {
    LOG.info("Pipeline submitted in Detached mode");
    // no metricsPusher because metrics are not supported in detached mode
    return new FlinkPortableRunnerResult.Detached();
  } else {
    LOG.info("Execution finished in {} msecs", result.getNetRuntime());
    Map<String, Object> accumulators = result.getAllAccumulatorResults();
    if (accumulators != null && !accumulators.isEmpty()) {
      LOG.info("Final accumulator values:");
      for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
        LOG.info("{} : {}", entry.getKey(), entry.getValue());
      }
    }
    FlinkPortableRunnerResult flinkRunnerResult =
        new FlinkPortableRunnerResult(accumulators, result.getNetRuntime());
    MetricsPusher metricsPusher =
        new MetricsPusher(
            flinkRunnerResult.getMetricsContainerStepMap(),
            options.as(MetricsOptions.class),
            flinkRunnerResult);
    metricsPusher.start();
    return flinkRunnerResult;
  }
}
 
Example 6
Source File: FlinkRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
static PipelineResult createPipelineResult(JobExecutionResult result, PipelineOptions options) {
  // The package of DetachedJobExecutionResult has been changed in 1.10.
  // Refer to https://github.com/apache/flink/commit/c36b35e6876ecdc717dade653e8554f9d8b543c9 for
  // more details.
  String resultClassName = result.getClass().getCanonicalName();
  if (resultClassName.equals(
          "org.apache.flink.client.program.DetachedEnvironment.DetachedJobExecutionResult")
      || resultClassName.equals("org.apache.flink.core.execution.DetachedJobExecutionResult")) {
    LOG.info("Pipeline submitted in Detached mode");
    // no metricsPusher because metrics are not supported in detached mode
    return new FlinkDetachedRunnerResult();
  } else {
    LOG.info("Execution finished in {} msecs", result.getNetRuntime());
    Map<String, Object> accumulators = result.getAllAccumulatorResults();
    if (accumulators != null && !accumulators.isEmpty()) {
      LOG.info("Final accumulator values:");
      for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
        LOG.info("{} : {}", entry.getKey(), entry.getValue());
      }
    }
    FlinkRunnerResult flinkRunnerResult =
        new FlinkRunnerResult(accumulators, result.getNetRuntime());
    MetricsPusher metricsPusher =
        new MetricsPusher(
            flinkRunnerResult.getMetricsContainerStepMap(),
            options.as(MetricsOptions.class),
            flinkRunnerResult);
    metricsPusher.start();
    return flinkRunnerResult;
  }
}
 
Example 7
Source File: FlinkPipelineRunner.java    From flink-dataflow with Apache License 2.0 5 votes vote down vote up
@Override
public FlinkRunnerResult run(Pipeline pipeline) {
	LOG.info("Executing pipeline using FlinkPipelineRunner.");

	LOG.info("Translating pipeline to Flink program.");

	this.flinkJobEnv.translate(pipeline);

	LOG.info("Starting execution of Flink program.");
	
	JobExecutionResult result;
	try {
		result = this.flinkJobEnv.executePipeline();
	} catch (Exception e) {
		LOG.error("Pipeline execution failed", e);
		throw new RuntimeException("Pipeline execution failed", e);
	}

	LOG.info("Execution finished in {} msecs", result.getNetRuntime());

	Map<String, Object> accumulators = result.getAllAccumulatorResults();
	if (accumulators != null && !accumulators.isEmpty()) {
		LOG.info("Final aggregator values:");

		for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
			LOG.info("{} : {}", entry.getKey(), entry.getValue());
		}
	}

	return new FlinkRunnerResult(accumulators, result.getNetRuntime());
}
 
Example 8
Source File: AccumulatorErrorITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private static void assertAccumulatorsShouldFail(JobExecutionResult result) {
	try {
		result.getAllAccumulatorResults();
		fail("Should have failed");
	}
	catch (Exception ex) {
		assertTrue(findThrowable(ex, CustomException.class).isPresent());
	}
}
 
Example 9
Source File: FlinkFactDistinctColumns.java    From kylin with Apache License 2.0 4 votes vote down vote up
@Override
protected void execute(OptionsHelper optionsHelper) throws Exception {
    String cubeName = optionsHelper.getOptionValue(OPTION_CUBE_NAME);
    String metaUrl = optionsHelper.getOptionValue(OPTION_META_URL);
    String segmentId = optionsHelper.getOptionValue(OPTION_SEGMENT_ID);
    String hiveTable = optionsHelper.getOptionValue(OPTION_INPUT_TABLE);
    String inputPath = optionsHelper.getOptionValue(OPTION_INPUT_PATH);
    String outputPath = optionsHelper.getOptionValue(OPTION_OUTPUT_PATH);
    String counterPath = optionsHelper.getOptionValue(OPTION_COUNTER_PATH);
    int samplingPercent = Integer.parseInt(optionsHelper.getOptionValue(OPTION_STATS_SAMPLING_PERCENT));
    String enableObjectReuseOptValue = optionsHelper.getOptionValue(OPTION_ENABLE_OBJECT_REUSE);

    Job job = Job.getInstance();
    FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
    HadoopUtil.deletePath(job.getConfiguration(), new Path(outputPath));

    final SerializableConfiguration sConf = new SerializableConfiguration(job.getConfiguration());
    KylinConfig envConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(sConf, metaUrl);

    final CubeInstance cubeInstance = CubeManager.getInstance(envConfig).getCube(cubeName);

    final FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance);
    final int totalReducer = reducerMapping.getTotalReducerNum();

    logger.info("getTotalReducerNum: {}", totalReducer);
    logger.info("getCuboidRowCounterReducerNum: {}", reducerMapping.getCuboidRowCounterReducerNum());
    logger.info("counter path {}", counterPath);

    boolean isSequenceFile = JoinedFlatTable.SEQUENCEFILE.equalsIgnoreCase(envConfig.getFlatTableStorageFormat());

    // calculate source record bytes size
    final String bytesWrittenName = "byte-writer-counter";
    final String recordCounterName = "record-counter";

    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    if (!StringUtil.isEmpty(enableObjectReuseOptValue) &&
            enableObjectReuseOptValue.equalsIgnoreCase("true")) {
        env.getConfig().enableObjectReuse();
    }

    DataSet<String[]> recordDataSet = FlinkUtil.readHiveRecords(isSequenceFile, env, inputPath, hiveTable, job);

    // read record from flat table
    // output:
    //   1, statistic
    //   2, field value of dict col
    //   3, min/max field value of not dict col
    DataSet<Tuple2<SelfDefineSortableKey, Text>> flatOutputDataSet = recordDataSet.mapPartition(
            new FlatOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent,
                    bytesWrittenName, recordCounterName));

    // repartition data, make each reducer handle only one col data or the statistic data
    DataSet<Tuple2<SelfDefineSortableKey, Text>> partitionDataSet = flatOutputDataSet
            .partitionCustom(new FactDistinctColumnPartitioner(cubeName, metaUrl, sConf), 0)
            .setParallelism(totalReducer);

    // multiple output result
    // 1, CFG_OUTPUT_COLUMN: field values of dict col, which will not be built in reducer, like globalDictCol
    // 2, CFG_OUTPUT_DICT: dictionary object built in reducer
    // 3, CFG_OUTPUT_STATISTICS: cube statistic: hll of cuboids ...
    // 4, CFG_OUTPUT_PARTITION: dimension value range(min,max)
    DataSet<Tuple2<String, Tuple3<Writable, Writable, String>>> outputDataSet = partitionDataSet
            .mapPartition(new MultiOutputMapPartitionFunction(sConf, cubeName, segmentId, metaUrl, samplingPercent))
            .setParallelism(totalReducer);

    // make each reducer output to respective dir
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class,
            NullWritable.class, Text.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class,
            NullWritable.class, ArrayPrimitiveWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class,
            LongWritable.class, BytesWritable.class);
    MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, false);

    // prevent to create zero-sized default output
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    outputDataSet.output(new HadoopMultipleOutputFormat(new LazyOutputFormat(), job));

    JobExecutionResult jobExecutionResult =
            env.execute("Fact distinct columns for:" + cubeName + " segment " + segmentId);
    Map<String, Object> accumulatorResults = jobExecutionResult.getAllAccumulatorResults();
    Long recordCount = (Long) accumulatorResults.get(recordCounterName);
    Long bytesWritten = (Long) accumulatorResults.get(bytesWrittenName);
    logger.info("Map input records={}", recordCount);
    logger.info("HDFS Read: {} HDFS Write", bytesWritten);
    logger.info("HDFS: Number of bytes written=" + FlinkBatchCubingJobBuilder2.getFileSize(outputPath, fs));

    Map<String, String> counterMap = Maps.newHashMap();
    counterMap.put(ExecutableConstants.SOURCE_RECORDS_COUNT, String.valueOf(recordCount));
    counterMap.put(ExecutableConstants.SOURCE_RECORDS_SIZE, String.valueOf(bytesWritten));

    // save counter to hdfs
    HadoopUtil.writeToSequenceFile(job.getConfiguration(), counterPath, counterMap);
}
 
Example 10
Source File: AdaptivePageRank.java    From flink-perf with Apache License 2.0 2 votes vote down vote up
public static void main(String[] args) throws Exception {

		long numVertices = 41652230;

		double threshold = 0.005 / numVertices;
		double dampeningFactor = 0.85;

		String adjacencyPath = args.length > 1 ? args[0] : "/data/demodata/pagerank/edges/edges.csv";
		String outpath = args.length > 2 ? args[1] : "/data/demodata/pagerank/adacency_comp";
		int numIterations = args.length > 3 ? Integer.valueOf(args[2]) : 100;

		ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	//	env.setDegreeOfParallelism(4);

		DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());
		DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder());


		DataSet<Tuple2<Long, Double>> initialRanks = adjacency
				.flatMap(new InitialMessageBuilder(numVertices, dampeningFactor))
				.groupBy(0)
				.reduceGroup(new Agg());

		DataSet<Tuple2<Long, Double>> initialDeltas = initialRanks.map(new InitialDeltaBuilder(numVertices));


		// ---------- iterative part ---------

		DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> adaptiveIteration = initialRanks.iterateDelta(initialDeltas, numIterations, 0);

		DataSet<Tuple2<Long, Double>> deltas = adaptiveIteration.getWorkset()
				.join(adjacency2).where(0).equalTo(0).with(new DeltaDistributor(0.85))
				.groupBy(0)
				.reduceGroup(new AggAndFilter(threshold));

		DataSet<Tuple2<Long, Double>> rankUpdates = adaptiveIteration.getSolutionSet()
				.join(deltas).where(0).equalTo(0).with(new SolutionJoin());

		adaptiveIteration.closeWith(rankUpdates, deltas)
				.writeAsCsv(outpath + "_adapt", WriteMode.OVERWRITE);


//		System.out.println(env.getExecutionPlan());
		JobExecutionResult result = env.execute("Adaptive Page Rank");

		Map<String, Object> accumulators = result.getAllAccumulatorResults();
		List<String> keys = new ArrayList<String>(accumulators.keySet());
		Collections.sort(keys);
		for (String key : keys) {
			System.out.println(key + " : " + accumulators.get(key));
		}
	}