org.apache.flink.runtime.operators.shipping.OutputEmitter Java Examples

The following examples show how to use org.apache.flink.runtime.operators.shipping.OutputEmitter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OutputEmitterTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private ChannelSelector createChannelSelector(
		ShipStrategyType shipStrategyType,
		TypeComparator comparator,
		int numberOfChannels) {
	final ChannelSelector selector = new OutputEmitter<>(shipStrategyType, comparator);
	selector.setup(numberOfChannels);
	assertEquals(shipStrategyType == ShipStrategyType.BROADCAST, selector.isBroadcast());
	return selector;
}
 
Example #2
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ChannelSelector createChannelSelector(
		ShipStrategyType shipStrategyType,
		TypeComparator comparator,
		int numberOfChannels) {
	final ChannelSelector selector = new OutputEmitter<>(shipStrategyType, comparator);
	selector.setup(numberOfChannels);
	assertEquals(shipStrategyType == ShipStrategyType.BROADCAST, selector.isBroadcast());
	return selector;
}
 
Example #3
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
private ChannelSelector createChannelSelector(
		ShipStrategyType shipStrategyType,
		TypeComparator comparator,
		int numberOfChannels) {
	final ChannelSelector selector = new OutputEmitter<>(shipStrategyType, comparator);
	selector.setup(numberOfChannels);
	assertEquals(shipStrategyType == ShipStrategyType.BROADCAST, selector.isBroadcast());
	return selector;
}
 
Example #4
Source File: RecordWriterTest.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Creates the {@link RecordWriter} instance based on whether it is a broadcast writer.
 */
private RecordWriter createRecordWriter(ResultPartitionWriter writer) {
	if (isBroadcastWriter) {
		return new RecordWriterBuilder()
			.setChannelSelector(new OutputEmitter(ShipStrategyType.BROADCAST, 0))
			.build(writer);
	} else {
		return new RecordWriterBuilder().build(writer);
	}
}
 
Example #5
Source File: BatchTask.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration. The
 * output collector contains the writers that forward the data to the different tasks that the given task
 * is connected to. Each writer applies the partitioning as described in the configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 *
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl,
		List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception
{
	if (numOutputs == 0) {
		return null;
	}

	// get the factory for the serializer
	final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
	final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);

	// create a writer for each output
	for (int i = 0; i < numOutputs; i++)
	{
		// create the OutputEmitter from output ship strategy
		final ShipStrategyType strategy = config.getOutputShipStrategy(i);
		final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
		final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);

		final ChannelSelector<SerializationDelegate<T>> oe;
		if (compFactory == null) {
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
		}
		else {
			final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
			final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);

			final TypeComparator<T> comparator = compFactory.createComparator();
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
		}

		final RecordWriter<SerializationDelegate<T>> recordWriter = RecordWriter.createRecordWriter(
			task.getEnvironment().getWriter(outputOffset + i),
			oe,
			task.getEnvironment().getTaskInfo().getTaskName());

		recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());

		writers.add(recordWriter);
	}
	if (eventualOutputs != null) {
		eventualOutputs.addAll(writers);
	}
	return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
 
Example #6
Source File: OutputEmitterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testForcedRebalance() {
	final int numberOfChannels = 100;
	int toTaskIndex = numberOfChannels * 6 / 7;
	int fromTaskIndex = toTaskIndex + numberOfChannels;
	int extraRecords = numberOfChannels / 3;
	int numRecords = 50000 + extraRecords;
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(
		new RecordSerializerFactory().getSerializer());
	final ChannelSelector<SerializationDelegate<Record>> selector = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector.setup(numberOfChannels);

	// Test for IntValue
	int[] hits = getSelectedChannelsHitCount(selector, delegate, RecordType.INTEGER, numRecords, numberOfChannels);
	int totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i || i < toTaskIndex+extraRecords - numberOfChannels) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);

	toTaskIndex = numberOfChannels / 5;
	fromTaskIndex = toTaskIndex + 2 * numberOfChannels;
	extraRecords = numberOfChannels * 2 / 9;
	numRecords = 10000 + extraRecords;

	// Test for StringValue
	final ChannelSelector<SerializationDelegate<Record>> selector2 = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector2.setup(numberOfChannels);
	hits = getSelectedChannelsHitCount(selector2, delegate, RecordType.STRING, numRecords, numberOfChannels);
	totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i && i < toTaskIndex + extraRecords) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);
}
 
Example #7
Source File: RecordWriterTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * The results of emitting records via BroadcastPartitioner or broadcasting records directly are the same,
 * that is all the target channels can receive the whole outputs.
 *
 * @param isBroadcastEmit whether using {@link RecordWriter#broadcastEmit(IOReadableWritable)} or not
 */
private void emitRecordWithBroadcastPartitionerOrBroadcastEmitRecord(boolean isBroadcastEmit) throws Exception {
	final int numberOfChannels = 4;
	final int bufferSize = 32;
	final int numValues = 8;
	final int serializationLength = 4;

	@SuppressWarnings("unchecked")
	final Queue<BufferConsumer>[] queues = new Queue[numberOfChannels];
	for (int i = 0; i < numberOfChannels; i++) {
		queues[i] = new ArrayDeque<>();
	}

	final TestPooledBufferProvider bufferProvider = new TestPooledBufferProvider(Integer.MAX_VALUE, bufferSize);
	final ResultPartitionWriter partitionWriter = new CollectingPartitionWriter(queues, bufferProvider);
	final ChannelSelector selector = new OutputEmitter(ShipStrategyType.BROADCAST, 0);
	final RecordWriter<SerializationTestType> writer = RecordWriter.createRecordWriter(partitionWriter, selector, 0, "test");
	final RecordDeserializer<SerializationTestType> deserializer = new SpillingAdaptiveSpanningRecordDeserializer<>(
		new String[]{ tempFolder.getRoot().getAbsolutePath() });

	final ArrayDeque<SerializationTestType> serializedRecords = new ArrayDeque<>();
	final Iterable<SerializationTestType> records = Util.randomRecords(numValues, SerializationTestTypeFactory.INT);
	for (SerializationTestType record : records) {
		serializedRecords.add(record);

		if (isBroadcastEmit) {
			writer.broadcastEmit(record);
		} else {
			writer.emit(record);
		}
	}

	final int requiredBuffers = numValues / (bufferSize / (4 + serializationLength));
	for (int i = 0; i < numberOfChannels; i++) {
		assertEquals(requiredBuffers, queues[i].size());

		final ArrayDeque<SerializationTestType> expectedRecords = serializedRecords.clone();
		int assertRecords = 0;
		for (int j = 0; j < requiredBuffers; j++) {
			Buffer buffer = buildSingleBuffer(queues[i].remove());
			deserializer.setNextBuffer(buffer);

			assertRecords += DeserializationUtils.deserializeRecords(expectedRecords, deserializer);
		}
		Assert.assertEquals(numValues, assertRecords);
	}
}
 
Example #8
Source File: BatchTask.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration. The
 * output collector contains the writers that forward the data to the different tasks that the given task
 * is connected to. Each writer applies the partitioning as described in the configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 *
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl,
		List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception
{
	if (numOutputs == 0) {
		return null;
	}

	// get the factory for the serializer
	final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
	final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);

	// create a writer for each output
	for (int i = 0; i < numOutputs; i++)
	{
		// create the OutputEmitter from output ship strategy
		final ShipStrategyType strategy = config.getOutputShipStrategy(i);
		final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
		final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);

		final ChannelSelector<SerializationDelegate<T>> oe;
		if (compFactory == null) {
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
		}
		else {
			final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
			final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);

			final TypeComparator<T> comparator = compFactory.createComparator();
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
		}

		final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder()
			.setChannelSelector(oe)
			.setTaskName(task.getEnvironment().getTaskInfo().getTaskName())
			.build(task.getEnvironment().getWriter(outputOffset + i));

		recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());

		writers.add(recordWriter);
	}
	if (eventualOutputs != null) {
		eventualOutputs.addAll(writers);
	}
	return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
 
Example #9
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testForcedRebalance() {
	final int numberOfChannels = 100;
	int toTaskIndex = numberOfChannels * 6 / 7;
	int fromTaskIndex = toTaskIndex + numberOfChannels;
	int extraRecords = numberOfChannels / 3;
	int numRecords = 50000 + extraRecords;
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(
		new RecordSerializerFactory().getSerializer());
	final ChannelSelector<SerializationDelegate<Record>> selector = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector.setup(numberOfChannels);

	// Test for IntValue
	int[] hits = getSelectedChannelsHitCount(selector, delegate, RecordType.INTEGER, numRecords, numberOfChannels);
	int totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i || i < toTaskIndex+extraRecords - numberOfChannels) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);

	toTaskIndex = numberOfChannels / 5;
	fromTaskIndex = toTaskIndex + 2 * numberOfChannels;
	extraRecords = numberOfChannels * 2 / 9;
	numRecords = 10000 + extraRecords;

	// Test for StringValue
	final ChannelSelector<SerializationDelegate<Record>> selector2 = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector2.setup(numberOfChannels);
	hits = getSelectedChannelsHitCount(selector2, delegate, RecordType.STRING, numRecords, numberOfChannels);
	totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i && i < toTaskIndex + extraRecords) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);
}
 
Example #10
Source File: RecordWriterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * The results of emitting records via BroadcastPartitioner or broadcasting records directly are the same,
 * that is all the target channels can receive the whole outputs.
 *
 * @param isBroadcastEmit whether using {@link RecordWriter#broadcastEmit(IOReadableWritable)} or not
 */
private void emitRecordWithBroadcastPartitionerOrBroadcastEmitRecord(boolean isBroadcastEmit) throws Exception {
	final int numberOfChannels = 4;
	final int bufferSize = 32;
	final int numValues = 8;
	final int serializationLength = 4;

	@SuppressWarnings("unchecked")
	final Queue<BufferConsumer>[] queues = new Queue[numberOfChannels];
	for (int i = 0; i < numberOfChannels; i++) {
		queues[i] = new ArrayDeque<>();
	}

	final TestPooledBufferProvider bufferProvider = new TestPooledBufferProvider(Integer.MAX_VALUE, bufferSize);
	final ResultPartitionWriter partitionWriter = new CollectingPartitionWriter(queues, bufferProvider);
	final ChannelSelector selector = new OutputEmitter(ShipStrategyType.BROADCAST, 0);
	final RecordWriter<SerializationTestType> writer = new RecordWriterBuilder()
		.setChannelSelector(selector)
		.setTimeout(0)
		.build(partitionWriter);
	final RecordDeserializer<SerializationTestType> deserializer = new SpillingAdaptiveSpanningRecordDeserializer<>(
		new String[]{ tempFolder.getRoot().getAbsolutePath() });

	final ArrayDeque<SerializationTestType> serializedRecords = new ArrayDeque<>();
	final Iterable<SerializationTestType> records = Util.randomRecords(numValues, SerializationTestTypeFactory.INT);
	for (SerializationTestType record : records) {
		serializedRecords.add(record);

		if (isBroadcastEmit) {
			writer.broadcastEmit(record);
		} else {
			writer.emit(record);
		}
	}

	final int requiredBuffers = numValues / (bufferSize / (4 + serializationLength));
	for (int i = 0; i < numberOfChannels; i++) {
		assertEquals(requiredBuffers, queues[i].size());

		final ArrayDeque<SerializationTestType> expectedRecords = serializedRecords.clone();
		int assertRecords = 0;
		for (int j = 0; j < requiredBuffers; j++) {
			Buffer buffer = buildSingleBuffer(queues[i].remove());
			deserializer.setNextBuffer(buffer);

			assertRecords += DeserializationUtils.deserializeRecords(expectedRecords, deserializer);
		}
		Assert.assertEquals(numValues, assertRecords);
	}
}
 
Example #11
Source File: BatchTask.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration. The
 * output collector contains the writers that forward the data to the different tasks that the given task
 * is connected to. Each writer applies the partitioning as described in the configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 *
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl,
		List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception
{
	if (numOutputs == 0) {
		return null;
	}

	// get the factory for the serializer
	final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
	final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);

	// create a writer for each output
	for (int i = 0; i < numOutputs; i++)
	{
		// create the OutputEmitter from output ship strategy
		final ShipStrategyType strategy = config.getOutputShipStrategy(i);
		final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
		final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);

		final ChannelSelector<SerializationDelegate<T>> oe;
		if (compFactory == null) {
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
		}
		else {
			final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
			final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);

			final TypeComparator<T> comparator = compFactory.createComparator();
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
		}

		final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder()
			.setChannelSelector(oe)
			.setTaskName(task.getEnvironment().getTaskInfo().getTaskName())
			.build(task.getEnvironment().getWriter(outputOffset + i));

		recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());

		writers.add(recordWriter);
	}
	if (eventualOutputs != null) {
		eventualOutputs.addAll(writers);
	}
	return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
 
Example #12
Source File: OutputEmitterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testForcedRebalance() {
	final int numberOfChannels = 100;
	int toTaskIndex = numberOfChannels * 6 / 7;
	int fromTaskIndex = toTaskIndex + numberOfChannels;
	int extraRecords = numberOfChannels / 3;
	int numRecords = 50000 + extraRecords;
	final SerializationDelegate<Record> delegate = new SerializationDelegate<>(
		new RecordSerializerFactory().getSerializer());
	final ChannelSelector<SerializationDelegate<Record>> selector = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector.setup(numberOfChannels);

	// Test for IntValue
	int[] hits = getSelectedChannelsHitCount(selector, delegate, RecordType.INTEGER, numRecords, numberOfChannels);
	int totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i || i < toTaskIndex+extraRecords - numberOfChannels) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);

	toTaskIndex = numberOfChannels / 5;
	fromTaskIndex = toTaskIndex + 2 * numberOfChannels;
	extraRecords = numberOfChannels * 2 / 9;
	numRecords = 10000 + extraRecords;

	// Test for StringValue
	final ChannelSelector<SerializationDelegate<Record>> selector2 = new OutputEmitter<>(
		ShipStrategyType.PARTITION_FORCED_REBALANCE, fromTaskIndex);
	selector2.setup(numberOfChannels);
	hits = getSelectedChannelsHitCount(selector2, delegate, RecordType.STRING, numRecords, numberOfChannels);
	totalHitCount = 0;
	for (int i = 0; i < hits.length; i++) {
		if (toTaskIndex <= i && i < toTaskIndex + extraRecords) {
			assertTrue(hits[i] == (numRecords / numberOfChannels) + 1);
		} else {
			assertTrue(hits[i] == numRecords / numberOfChannels);
		}
		totalHitCount += hits[i];
	}
	assertTrue(totalHitCount == numRecords);
}