Java Code Examples for org.apache.flink.api.common.JobExecutionResult#getAccumulatorResult()

The following examples show how to use org.apache.flink.api.common.JobExecutionResult#getAccumulatorResult() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UnalignedCheckpointITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
private void execute(int parallelism, int slotsPerTaskManager, boolean slotSharing) throws Exception {
	StreamExecutionEnvironment env = createEnv(parallelism, slotsPerTaskManager, slotSharing);

	long minCheckpoints = 10;
	createDAG(env, minCheckpoints, slotSharing);
	final JobExecutionResult result = env.execute();

	collector.checkThat(result.<Long>getAccumulatorResult(NUM_OUT_OF_ORDER), equalTo(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_DUPLICATES), equalTo(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_LOST), equalTo(0L));

	// at this point, there is no way that #input != #output, but still perform these sanity checks
	Long inputs = result.<Long>getAccumulatorResult(NUM_INPUTS);
	collector.checkThat(inputs, greaterThan(0L));
	collector.checkThat(result.<Long>getAccumulatorResult(NUM_OUTPUTS), equalTo(inputs));
}
 
Example 2
Source File: MaterializedCollectBatchResult.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void accept(JobExecutionResult jobExecutionResult) {
	try {
		final ArrayList<byte[]> accResult = jobExecutionResult.getAccumulatorResult(accumulatorName);
		if (accResult == null) {
			throw new SqlExecutionException("The accumulator could not retrieve the result.");
		}
		final List<Row> resultTable = SerializedListAccumulator.deserializeList(accResult, tableSink.getSerializer());
		// sets the result table all at once
		synchronized (resultLock) {
			MaterializedCollectBatchResult.this.resultTable = resultTable;
		}
	} catch (ClassNotFoundException | IOException e) {
		throw new SqlExecutionException("Serialization error while deserializing collected data.", e);
	}
}
 
Example 3
Source File: JavaCounterApp.java    From 163-bigdate-note with GNU General Public License v3.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> data = env.fromElements("hadoop", "spark", "flink", "strom", "pyspark");

    data.map(new RichMapFunction<String, String>() {
        LongCounter counter = new LongCounter();

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            getRuntimeContext().addAccumulator("ele_counter_java", counter);
        }

        @Override
        public String map(String value) throws Exception {
            counter.add(1);
            return value;
        }
    }).writeAsText("file:\\D:\\imooc\\新一代大数据计算引擎 Flink从入门到实战-v\\input\\sinkout\\sink-java-counter.txt",
            FileSystem.WriteMode.OVERWRITE).setParallelism(3);

    JobExecutionResult counterApp = env.execute("JavaCounterApp");
    Long num = counterApp.getAccumulatorResult("ele_counter_java");
    System.out.println("num:" + num);
}
 
Example 4
Source File: ParallelMaximumLikelihood.java    From toolbox with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataFlink<DataInstance> dataUpdate) {
    try {
        Configuration config = new Configuration();
        config.setString(BN_NAME, this.dag.getName());
        config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork));

        DataSet<DataInstance> dataset = dataUpdate.getDataSet();
        this.sumSS = dataset.map(new SufficientSatisticsMAP())
                .withParameters(config)
                .reduce(new SufficientSatisticsReduce())
                .collect().get(0);

        //Add the prior
        sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());

        JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult();

        numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName());
        numInstances++;//Initial counts

    }catch(Exception ex){
        throw new UndeclaredThrowableException(ex);
    }

    return this.getLogMarginalProbability();
}
 
Example 5
Source File: DataSetUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}
 
Example 6
Source File: ParallelMaximumLikelihood2.java    From toolbox with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataFlink<DataInstance> dataUpdate) {

    try {

        this.initLearning();

        Configuration config = new Configuration();
        config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName());
        config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork));

        DataSet<DataInstance> dataset = dataUpdate.getDataSet();

        this.sumSS = dataset.mapPartition(new SufficientSatisticsMAP())
                .withParameters(config)
                .reduce(new SufficientSatisticsReduce())
                .collect().get(0);

        //Add the prior
        sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());

        JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult();

        numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood2.COUNTER_NAME+"_"+this.dag.getName());
        numInstances++;//Initial counts

    }catch(Exception ex){
        throw new UndeclaredThrowableException(ex);
    }

    return this.getLogMarginalProbability();
}
 
Example 7
Source File: EmptyFieldsCountAccumulator.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
Example 8
Source File: DataSet.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}
 
Example 9
Source File: DataSetUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}
 
Example 10
Source File: EmptyFieldsCountAccumulator.java    From flink with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
Example 11
Source File: DataSet.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}
 
Example 12
Source File: DataSetUtils.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet
 * as well as the checksum (sum over element hashes).
 *
 * @return A ChecksumHashCode that represents the count and checksum of elements in the data set.
 * @deprecated replaced with {@code org.apache.flink.graph.asm.dataset.ChecksumHashCode} in Gelly
 */
@Deprecated
public static <T> Utils.ChecksumHashCode checksumHashCode(DataSet<T> input) throws Exception {
	final String id = new AbstractID().toString();

	input.output(new Utils.ChecksumHashCodeHelper<T>(id)).name("ChecksumHashCode");

	JobExecutionResult res = input.getExecutionEnvironment().execute();
	return res.<Utils.ChecksumHashCode> getAccumulatorResult(id);
}
 
Example 13
Source File: EmptyFieldsCountAccumulator.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public static void main(final String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		// get the data set
		final DataSet<StringTriple> file = getDataSet(env, params);

		// filter lines with empty fields
		final DataSet<StringTriple> filteredLines = file.filter(new EmptyFieldFilter());

		// Here, we could do further processing with the filtered lines...
		JobExecutionResult result;
		// output the filtered lines
		if (params.has("output")) {
			filteredLines.writeAsCsv(params.get("output"));
			// execute program
			result = env.execute("Accumulator example");
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			filteredLines.print();
			result = env.getLastJobExecutionResult();
		}

		// get the accumulator result via its registration key
		final List<Integer> emptyFields = result.getAccumulatorResult(EMPTY_FIELD_ACCUMULATOR);
		System.out.format("Number of detected empty fields per column: %s\n", emptyFields);
	}
 
Example 14
Source File: DataSet.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Convenience method to get the count (number of elements) of a DataSet.
 *
 * @return A long integer that represents the number of elements in the data set.
 */
public long count() throws Exception {
	final String id = new AbstractID().toString();

	output(new Utils.CountHelper<T>(id)).name("count()");

	JobExecutionResult res = getExecutionEnvironment().execute();
	return res.<Long> getAccumulatorResult(id);
}
 
Example 15
Source File: AnalyticHelper.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}
 
Example 16
Source File: AnalyticHelper.java    From flink with Apache License 2.0 3 votes vote down vote up
/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}
 
Example 17
Source File: AnalyticHelper.java    From Flink-CEPplus with Apache License 2.0 3 votes vote down vote up
/**
 * Gets the accumulator with the given name. Returns {@code null}, if no accumulator with
 * that name was produced.
 *
 * @param accumulatorName The name of the accumulator
 * @param <A> The generic type of the accumulator value
 * @return The value of the accumulator with the given name
 */
public <A> A getAccumulator(ExecutionEnvironment env, String accumulatorName) {
	JobExecutionResult result = env.getLastJobExecutionResult();

	Preconditions.checkNotNull(result, "No result found for job, was execute() called before getting the result?");

	return result.getAccumulatorResult(id + SEPARATOR + accumulatorName);
}