org.apache.flink.api.common.distributions.DataDistribution Java Examples

The following examples show how to use org.apache.flink.api.common.distributions.DataDistribution. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PartitionOperator.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}
 
Example #2
Source File: PartitionOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}
 
Example #3
Source File: PartitionOperator.java    From flink with Apache License 2.0 6 votes vote down vote up
private <P> PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, Partitioner<P> customPartitioner,
		TypeInformation<P> partitionerTypeInfo, DataDistribution distribution, String partitionLocationName) {
	super(input, input.getType());

	Preconditions.checkNotNull(pMethod);
	Preconditions.checkArgument(pKeys != null || pMethod == PartitionMethod.REBALANCE, "Partitioning requires keys");
	Preconditions.checkArgument(pMethod != PartitionMethod.CUSTOM || customPartitioner != null, "Custom partioning requires a partitioner.");
	Preconditions.checkArgument(distribution == null || pMethod == PartitionMethod.RANGE, "Customized data distribution is only neccessary for range partition.");

	if (distribution != null) {
		Preconditions.checkArgument(pKeys.getNumberOfKeyFields() <= distribution.getNumberOfFields(), "The distribution must provide at least as many fields as flat key fields are specified.");
		Preconditions.checkArgument(Arrays.equals(pKeys.getKeyFieldTypes(), Arrays.copyOfRange(distribution.getKeyTypes(), 0, pKeys.getNumberOfKeyFields())),
				"The types of the flat key fields must be equal to the types of the fields of the distribution.");
	}

	if (customPartitioner != null) {
		pKeys.validateCustomPartitioner(customPartitioner, partitionerTypeInfo);
	}

	this.pMethod = pMethod;
	this.pKeys = pKeys;
	this.partitionLocationName = partitionLocationName;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
}
 
Example #4
Source File: TaskConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public void setOutputDataDistribution(DataDistribution distribution, int outputNum) {
	this.config.setString(OUTPUT_DATA_DISTRIBUTION_CLASS, distribution.getClass().getName());
	
	try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
			DataOutputViewStreamWrapper out = new DataOutputViewStreamWrapper(baos)) {
		
		distribution.write(out);
		config.setBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, baos.toByteArray());
		
	}
	catch (IOException e) {
		throw new RuntimeException("Error serializing the DataDistribution: " + e.getMessage(), e);
	}
}
 
Example #5
Source File: RequestedGlobalPropertiesFilteringTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitioningPreserved3() {

	SingleInputSemanticProperties sProp = new SingleInputSemanticProperties();
	SemanticPropUtil.getSemanticPropsSingleFromString(sProp, new String[]{"7->3;1->1;2->6"}, null, null, tupleInfo, tupleInfo);

	DataDistribution dd = new MockDistribution();
	Ordering o = new Ordering();
	o.appendOrdering(3, LongValue.class, Order.DESCENDING);
	o.appendOrdering(1, IntValue.class, Order.ASCENDING);
	o.appendOrdering(6, ByteValue.class, Order.DESCENDING);

	RequestedGlobalProperties rgProps = new RequestedGlobalProperties();
	rgProps.setRangePartitioned(o, dd);

	RequestedGlobalProperties filtered = rgProps.filterBySemanticProperties(sProp, 0);

	assertNotNull(filtered);
	assertEquals(PartitioningProperty.RANGE_PARTITIONED, filtered.getPartitioning());
	assertNotNull(filtered.getOrdering());
	assertEquals(3, filtered.getOrdering().getNumberOfFields());
	assertEquals(7, filtered.getOrdering().getFieldNumber(0).intValue());
	assertEquals(1, filtered.getOrdering().getFieldNumber(1).intValue());
	assertEquals(2, filtered.getOrdering().getFieldNumber(2).intValue());
	assertEquals(LongValue.class, filtered.getOrdering().getType(0));
	assertEquals(IntValue.class, filtered.getOrdering().getType(1));
	assertEquals(ByteValue.class, filtered.getOrdering().getType(2));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(0));
	assertEquals(Order.ASCENDING, filtered.getOrdering().getOrder(1));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(2));
	assertNotNull(filtered.getDataDistribution());
	assertEquals(dd, filtered.getDataDistribution());
	assertNull(filtered.getPartitionedFields());
	assertNull(filtered.getCustomPartitioner());
}
 
Example #6
Source File: PartitionNode.java    From flink with Apache License 2.0 5 votes vote down vote up
public PartitionDescriptor(PartitionMethod pMethod, FieldSet pKeys, Ordering ordering, Partitioner<?>
		customPartitioner, DataDistribution distribution) {
	super(pKeys);

	Preconditions.checkArgument(pMethod != PartitionMethod.RANGE
			|| pKeys.equals(new FieldSet(ordering.getFieldPositions())),
			"Partition keys must match the given ordering.");

	this.pMethod = pMethod;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
	this.ordering = ordering;
}
 
Example #7
Source File: RequestedGlobalProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
public void setRangePartitioned(Ordering ordering, DataDistribution dataDistribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = null;
	this.dataDistribution = dataDistribution;
}
 
Example #8
Source File: GlobalProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Set the parameters for range partition.
 * 
 * @param ordering Order of the partitioned fields
 * @param distribution The data distribution for range partition. User can supply a customized data distribution,
 *                     also the data distribution can be null.  
 */
public void setRangePartitioned(Ordering ordering, DataDistribution distribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = ordering.getInvolvedIndexes();
	this.distribution = distribution;
}
 
Example #9
Source File: TaskConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public DataDistribution getOutputDataDistribution(int outputNum, final ClassLoader cl) throws ClassNotFoundException {
	final String className = this.config.getString(OUTPUT_DATA_DISTRIBUTION_CLASS, null);
	if (className == null) {
		return null;
	}
	
	final Class<? extends DataDistribution> clazz;
	try {
		clazz = Class.forName(className, true, cl).asSubclass(DataDistribution.class);
	} catch (ClassCastException ccex) {
		throw new CorruptConfigurationException("The class noted in the configuration as the data distribution " +
				"is no subclass of DataDistribution.");
	}
	
	final DataDistribution distribution = InstantiationUtil.instantiate(clazz, DataDistribution.class);
	
	final byte[] stateEncoded = this.config.getBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, null);
	if (stateEncoded == null) {
		throw new CorruptConfigurationException(
					"The configuration contained the data distribution type, but no serialized state.");
	}
	
	final ByteArrayInputStream bais = new ByteArrayInputStream(stateEncoded);
	final DataInputViewStreamWrapper in = new DataInputViewStreamWrapper(bais);
	
	try {
		distribution.read(in);
		return distribution;
	} catch (Exception ex) {
		throw new RuntimeException("The deserialization of the encoded data distribution state caused an error"
			+ (ex.getMessage() == null ? "." : ": " + ex.getMessage()), ex);
	}
}
 
Example #10
Source File: TaskConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public void setOutputDataDistribution(DataDistribution distribution, int outputNum) {
	this.config.setString(OUTPUT_DATA_DISTRIBUTION_CLASS, distribution.getClass().getName());
	
	try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
			DataOutputViewStreamWrapper out = new DataOutputViewStreamWrapper(baos)) {
		
		distribution.write(out);
		config.setBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, baos.toByteArray());
		
	}
	catch (IOException e) {
		throw new RuntimeException("Error serializing the DataDistribution: " + e.getMessage(), e);
	}
}
 
Example #11
Source File: RequestedGlobalPropertiesFilteringTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitioningPreserved3() {

	SingleInputSemanticProperties sProp = new SingleInputSemanticProperties();
	SemanticPropUtil.getSemanticPropsSingleFromString(sProp, new String[]{"7->3;1->1;2->6"}, null, null, tupleInfo, tupleInfo);

	DataDistribution dd = new MockDistribution();
	Ordering o = new Ordering();
	o.appendOrdering(3, LongValue.class, Order.DESCENDING);
	o.appendOrdering(1, IntValue.class, Order.ASCENDING);
	o.appendOrdering(6, ByteValue.class, Order.DESCENDING);

	RequestedGlobalProperties rgProps = new RequestedGlobalProperties();
	rgProps.setRangePartitioned(o, dd);

	RequestedGlobalProperties filtered = rgProps.filterBySemanticProperties(sProp, 0);

	assertNotNull(filtered);
	assertEquals(PartitioningProperty.RANGE_PARTITIONED, filtered.getPartitioning());
	assertNotNull(filtered.getOrdering());
	assertEquals(3, filtered.getOrdering().getNumberOfFields());
	assertEquals(7, filtered.getOrdering().getFieldNumber(0).intValue());
	assertEquals(1, filtered.getOrdering().getFieldNumber(1).intValue());
	assertEquals(2, filtered.getOrdering().getFieldNumber(2).intValue());
	assertEquals(LongValue.class, filtered.getOrdering().getType(0));
	assertEquals(IntValue.class, filtered.getOrdering().getType(1));
	assertEquals(ByteValue.class, filtered.getOrdering().getType(2));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(0));
	assertEquals(Order.ASCENDING, filtered.getOrdering().getOrder(1));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(2));
	assertNotNull(filtered.getDataDistribution());
	assertEquals(dd, filtered.getDataDistribution());
	assertNull(filtered.getPartitionedFields());
	assertNull(filtered.getCustomPartitioner());
}
 
Example #12
Source File: PartitionNode.java    From flink with Apache License 2.0 5 votes vote down vote up
public PartitionDescriptor(PartitionMethod pMethod, FieldSet pKeys, Ordering ordering, Partitioner<?>
		customPartitioner, DataDistribution distribution) {
	super(pKeys);

	Preconditions.checkArgument(pMethod != PartitionMethod.RANGE
			|| pKeys.equals(new FieldSet(ordering.getFieldPositions())),
			"Partition keys must match the given ordering.");

	this.pMethod = pMethod;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
	this.ordering = ordering;
}
 
Example #13
Source File: RequestedGlobalProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
public void setRangePartitioned(Ordering ordering, DataDistribution dataDistribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = null;
	this.dataDistribution = dataDistribution;
}
 
Example #14
Source File: GlobalProperties.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Set the parameters for range partition.
 * 
 * @param ordering Order of the partitioned fields
 * @param distribution The data distribution for range partition. User can supply a customized data distribution,
 *                     also the data distribution can be null.  
 */
public void setRangePartitioned(Ordering ordering, DataDistribution distribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = ordering.getInvolvedIndexes();
	this.distribution = distribution;
}
 
Example #15
Source File: TaskConfig.java    From flink with Apache License 2.0 5 votes vote down vote up
public DataDistribution getOutputDataDistribution(int outputNum, final ClassLoader cl) throws ClassNotFoundException {
	final String className = this.config.getString(OUTPUT_DATA_DISTRIBUTION_CLASS, null);
	if (className == null) {
		return null;
	}
	
	final Class<? extends DataDistribution> clazz;
	try {
		clazz = Class.forName(className, true, cl).asSubclass(DataDistribution.class);
	} catch (ClassCastException ccex) {
		throw new CorruptConfigurationException("The class noted in the configuration as the data distribution " +
				"is no subclass of DataDistribution.");
	}
	
	final DataDistribution distribution = InstantiationUtil.instantiate(clazz, DataDistribution.class);
	
	final byte[] stateEncoded = this.config.getBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, null);
	if (stateEncoded == null) {
		throw new CorruptConfigurationException(
					"The configuration contained the data distribution type, but no serialized state.");
	}
	
	final ByteArrayInputStream bais = new ByteArrayInputStream(stateEncoded);
	final DataInputViewStreamWrapper in = new DataInputViewStreamWrapper(bais);
	
	try {
		distribution.read(in);
		return distribution;
	} catch (Exception ex) {
		throw new RuntimeException("The deserialization of the encoded data distribution state caused an error"
			+ (ex.getMessage() == null ? "." : ": " + ex.getMessage()), ex);
	}
}
 
Example #16
Source File: RequestedGlobalPropertiesFilteringTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitioningPreserved3() {

	SingleInputSemanticProperties sProp = new SingleInputSemanticProperties();
	SemanticPropUtil.getSemanticPropsSingleFromString(sProp, new String[]{"7->3;1->1;2->6"}, null, null, tupleInfo, tupleInfo);

	DataDistribution dd = new MockDistribution();
	Ordering o = new Ordering();
	o.appendOrdering(3, LongValue.class, Order.DESCENDING);
	o.appendOrdering(1, IntValue.class, Order.ASCENDING);
	o.appendOrdering(6, ByteValue.class, Order.DESCENDING);

	RequestedGlobalProperties rgProps = new RequestedGlobalProperties();
	rgProps.setRangePartitioned(o, dd);

	RequestedGlobalProperties filtered = rgProps.filterBySemanticProperties(sProp, 0);

	assertNotNull(filtered);
	assertEquals(PartitioningProperty.RANGE_PARTITIONED, filtered.getPartitioning());
	assertNotNull(filtered.getOrdering());
	assertEquals(3, filtered.getOrdering().getNumberOfFields());
	assertEquals(7, filtered.getOrdering().getFieldNumber(0).intValue());
	assertEquals(1, filtered.getOrdering().getFieldNumber(1).intValue());
	assertEquals(2, filtered.getOrdering().getFieldNumber(2).intValue());
	assertEquals(LongValue.class, filtered.getOrdering().getType(0));
	assertEquals(IntValue.class, filtered.getOrdering().getType(1));
	assertEquals(ByteValue.class, filtered.getOrdering().getType(2));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(0));
	assertEquals(Order.ASCENDING, filtered.getOrdering().getOrder(1));
	assertEquals(Order.DESCENDING, filtered.getOrdering().getOrder(2));
	assertNotNull(filtered.getDataDistribution());
	assertEquals(dd, filtered.getDataDistribution());
	assertNull(filtered.getPartitionedFields());
	assertNull(filtered.getCustomPartitioner());
}
 
Example #17
Source File: PartitionNode.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public PartitionDescriptor(PartitionMethod pMethod, FieldSet pKeys, Ordering ordering, Partitioner<?>
		customPartitioner, DataDistribution distribution) {
	super(pKeys);

	Preconditions.checkArgument(pMethod != PartitionMethod.RANGE
			|| pKeys.equals(new FieldSet(ordering.getFieldPositions())),
			"Partition keys must match the given ordering.");

	this.pMethod = pMethod;
	this.customPartitioner = customPartitioner;
	this.distribution = distribution;
	this.ordering = ordering;
}
 
Example #18
Source File: TaskConfig.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public void setOutputDataDistribution(DataDistribution distribution, int outputNum) {
	this.config.setString(OUTPUT_DATA_DISTRIBUTION_CLASS, distribution.getClass().getName());
	
	try (ByteArrayOutputStream baos = new ByteArrayOutputStream();
			DataOutputViewStreamWrapper out = new DataOutputViewStreamWrapper(baos)) {
		
		distribution.write(out);
		config.setBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, baos.toByteArray());
		
	}
	catch (IOException e) {
		throw new RuntimeException("Error serializing the DataDistribution: " + e.getMessage(), e);
	}
}
 
Example #19
Source File: TaskConfig.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public DataDistribution getOutputDataDistribution(int outputNum, final ClassLoader cl) throws ClassNotFoundException {
	final String className = this.config.getString(OUTPUT_DATA_DISTRIBUTION_CLASS, null);
	if (className == null) {
		return null;
	}
	
	final Class<? extends DataDistribution> clazz;
	try {
		clazz = Class.forName(className, true, cl).asSubclass(DataDistribution.class);
	} catch (ClassCastException ccex) {
		throw new CorruptConfigurationException("The class noted in the configuration as the data distribution " +
				"is no subclass of DataDistribution.");
	}
	
	final DataDistribution distribution = InstantiationUtil.instantiate(clazz, DataDistribution.class);
	
	final byte[] stateEncoded = this.config.getBytes(OUTPUT_DATA_DISTRIBUTION_PREFIX + outputNum, null);
	if (stateEncoded == null) {
		throw new CorruptConfigurationException(
					"The configuration contained the data distribution type, but no serialized state.");
	}
	
	final ByteArrayInputStream bais = new ByteArrayInputStream(stateEncoded);
	final DataInputViewStreamWrapper in = new DataInputViewStreamWrapper(bais);
	
	try {
		distribution.read(in);
		return distribution;
	} catch (Exception ex) {
		throw new RuntimeException("The deserialization of the encoded data distribution state caused an error"
			+ (ex.getMessage() == null ? "." : ": " + ex.getMessage()), ex);
	}
}
 
Example #20
Source File: GlobalProperties.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
/**
 * Set the parameters for range partition.
 * 
 * @param ordering Order of the partitioned fields
 * @param distribution The data distribution for range partition. User can supply a customized data distribution,
 *                     also the data distribution can be null.  
 */
public void setRangePartitioned(Ordering ordering, DataDistribution distribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = ordering.getInvolvedIndexes();
	this.distribution = distribution;
}
 
Example #21
Source File: RequestedGlobalProperties.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
public void setRangePartitioned(Ordering ordering, DataDistribution dataDistribution) {
	if (ordering == null) {
		throw new NullPointerException();
	}
	this.partitioning = PartitioningProperty.RANGE_PARTITIONED;
	this.ordering = ordering;
	this.partitioningFields = null;
	this.dataDistribution = dataDistribution;
}
 
Example #22
Source File: BatchTask.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration. The
 * output collector contains the writers that forward the data to the different tasks that the given task
 * is connected to. Each writer applies the partitioning as described in the configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 *
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl,
		List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception
{
	if (numOutputs == 0) {
		return null;
	}

	// get the factory for the serializer
	final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
	final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);

	// create a writer for each output
	for (int i = 0; i < numOutputs; i++)
	{
		// create the OutputEmitter from output ship strategy
		final ShipStrategyType strategy = config.getOutputShipStrategy(i);
		final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
		final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);

		final ChannelSelector<SerializationDelegate<T>> oe;
		if (compFactory == null) {
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
		}
		else {
			final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
			final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);

			final TypeComparator<T> comparator = compFactory.createComparator();
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
		}

		final RecordWriter<SerializationDelegate<T>> recordWriter = new RecordWriterBuilder()
			.setChannelSelector(oe)
			.setTaskName(task.getEnvironment().getTaskInfo().getTaskName())
			.build(task.getEnvironment().getWriter(outputOffset + i));

		recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());

		writers.add(recordWriter);
	}
	if (eventualOutputs != null) {
		eventualOutputs.addAll(writers);
	}
	return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
 
Example #23
Source File: DataSetUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Range-partitions a DataSet on the specified fields.
 */
public static <T> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, String... fields) {
	return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, input.getType()), distribution, Utils.getCallLocationName());
}
 
Example #24
Source File: DataSetUtils.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Range-partitions a DataSet using the specified key selector function.
 */
public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) {
	final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType());
	return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName());
}
 
Example #25
Source File: PartitionOperator.java    From flink with Apache License 2.0 4 votes vote down vote up
public PartitionOperator(DataSet<T> input, PartitionMethod pMethod, Keys<T> pKeys, DataDistribution distribution, String partitionLocationName) {
	this(input, pMethod, pKeys, null, null, distribution, partitionLocationName);
}
 
Example #26
Source File: BatchTask.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Creates the {@link Collector} for the given task, as described by the given configuration. The
 * output collector contains the writers that forward the data to the different tasks that the given task
 * is connected to. Each writer applies the partitioning as described in the configuration.
 *
 * @param task The task that the output collector is created for.
 * @param config The configuration describing the output shipping strategies.
 * @param cl The classloader used to load user defined types.
 * @param eventualOutputs The output writers that this task forwards to the next task for each output.
 * @param outputOffset The offset to start to get the writers for the outputs
 * @param numOutputs The number of outputs described in the configuration.
 *
 * @return The OutputCollector that data produced in this task is submitted to.
 */
public static <T> Collector<T> getOutputCollector(AbstractInvokable task, TaskConfig config, ClassLoader cl,
		List<RecordWriter<?>> eventualOutputs, int outputOffset, int numOutputs) throws Exception
{
	if (numOutputs == 0) {
		return null;
	}

	// get the factory for the serializer
	final TypeSerializerFactory<T> serializerFactory = config.getOutputSerializer(cl);
	final List<RecordWriter<SerializationDelegate<T>>> writers = new ArrayList<>(numOutputs);

	// create a writer for each output
	for (int i = 0; i < numOutputs; i++)
	{
		// create the OutputEmitter from output ship strategy
		final ShipStrategyType strategy = config.getOutputShipStrategy(i);
		final int indexInSubtaskGroup = task.getIndexInSubtaskGroup();
		final TypeComparatorFactory<T> compFactory = config.getOutputComparator(i, cl);

		final ChannelSelector<SerializationDelegate<T>> oe;
		if (compFactory == null) {
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup);
		}
		else {
			final DataDistribution dataDist = config.getOutputDataDistribution(i, cl);
			final Partitioner<?> partitioner = config.getOutputPartitioner(i, cl);

			final TypeComparator<T> comparator = compFactory.createComparator();
			oe = new OutputEmitter<T>(strategy, indexInSubtaskGroup, comparator, partitioner, dataDist);
		}

		final RecordWriter<SerializationDelegate<T>> recordWriter = RecordWriter.createRecordWriter(
			task.getEnvironment().getWriter(outputOffset + i),
			oe,
			task.getEnvironment().getTaskInfo().getTaskName());

		recordWriter.setMetricGroup(task.getEnvironment().getMetricGroup().getIOMetricGroup());

		writers.add(recordWriter);
	}
	if (eventualOutputs != null) {
		eventualOutputs.addAll(writers);
	}
	return new OutputCollector<T>(writers, serializerFactory.getSerializer());
}
 
Example #27
Source File: PartitionOperatorBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public DataDistribution getDistribution() {
	return  this.distribution;
}
 
Example #28
Source File: PartitionOperatorBase.java    From flink with Apache License 2.0 4 votes vote down vote up
public void setDistribution(DataDistribution distribution) {
	this.distribution = distribution;
}
 
Example #29
Source File: OutputEmitter.java    From flink with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
public OutputEmitter(
		ShipStrategyType strategy,
		int indexInSubtaskGroup,
		TypeComparator<T> comparator,
		Partitioner<?> partitioner,
		DataDistribution distribution) {
	if (strategy == null) { 
		throw new NullPointerException();
	}

	this.strategy = strategy;
	this.nextChannelToSendTo = indexInSubtaskGroup;
	this.comparator = comparator;
	this.partitioner = (Partitioner<Object>) partitioner;
	this.distribution = distribution;

	switch (strategy) {
	case PARTITION_CUSTOM:
		extractedKeys = new Object[1];
	case FORWARD:
	case PARTITION_HASH:
	case PARTITION_RANDOM:
	case PARTITION_FORCED_REBALANCE:
		break;
	case PARTITION_RANGE:
		if (comparator != null) {
			this.flatComparators = comparator.getFlatComparators();
			this.keys = new Object[flatComparators.length];
		}
		break;
	case BROADCAST:
		break;
	default:
		throw new IllegalArgumentException("Invalid shipping strategy for OutputEmitter: " + strategy.name());
	}

	if (strategy == ShipStrategyType.PARTITION_CUSTOM && partitioner == null) {
		throw new NullPointerException("Partitioner must not be null when the ship strategy is set to custom partitioning.");
	}
}
 
Example #30
Source File: Channel.java    From flink with Apache License 2.0 4 votes vote down vote up
public DataDistribution getDataDistribution() {
	return this.dataDistribution;
}