cascading.pipe.GroupBy Java Examples

The following examples show how to use cascading.pipe.GroupBy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BoundaryBeforeGroupByRemovalTransformer.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public BoundaryGroupByMatcher() {
	super(
			(new ExpressionGraph()).
					arc(
							new TypeExpression(ElementCapture.Primary, Boundary.class, TypeExpression.Topo.LinearOut),
							ScopeExpression.ALL,
							new TypeExpression(ElementCapture.Secondary, GroupBy.class, TypeExpression.Topo.LinearIn)
			)
	);
}
 
Example #2
Source File: GroupByReducer.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Configuration config) {

	this.calledPrepare = false;

	try {

		currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID());

		Set<FlowElement> sources = flowNode.getSourceElements();
		if(sources.size() != 1) {
			throw new RuntimeException("FlowNode for GroupByReducer may only have a single source");
		}
		FlowElement sourceElement = sources.iterator().next();
		if(!(sourceElement instanceof GroupBy)) {
			throw new RuntimeException("Source of GroupByReducer must be a GroupBy");
		}
		GroupBy source = (GroupBy)sourceElement;

		streamGraph = new GroupByStreamGraph( currentProcess, flowNode, source );
		groupSource = this.streamGraph.getGroupSource();

		for( Duct head : streamGraph.getHeads() ) {
			LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement());
		}

		for( Duct tail : streamGraph.getTails() ) {
			LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement());
		}
	}
	catch( Throwable throwable ) {

		if( throwable instanceof CascadingException) {
			throw (CascadingException) throwable;
		}

		throw new FlowException( "internal error during GroupByReducer configuration", throwable );
	}

}
 
Example #3
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private DataSet<Tuple> translateGroupBy(DataSet<Tuple> input, FlowNode node, int dop) {

		GroupBy groupBy = (GroupBy) node.getSourceElements().iterator().next();

		Scope outScope = getOutScope(node);
		List<Scope> inScopes = getInputScopes(node, groupBy);

		Fields outFields;
		if(outScope.isEvery()) {
			outFields = outScope.getOutGroupingFields();
		}
		else {
			outFields = outScope.getOutValuesFields();
		}
		registerKryoTypes(outFields);

		// get input scope
		Scope inScope = inScopes.get(0);

		// get grouping keys
		Fields groupKeyFields = groupBy.getKeySelectors().get(inScope.getName());
		// get group sorting keys
		Fields sortKeyFields = groupBy.getSortingSelectors().get(inScope.getName());

		String[] groupKeys = registerKeyFields(input, groupKeyFields);
		String[] sortKeys = null;
		if (sortKeyFields != null) {
			sortKeys = registerKeyFields(input, sortKeyFields);
		}
		Order sortOrder = groupBy.isSortReversed() ? Order.DESCENDING : Order.ASCENDING;

		if(sortOrder == Order.DESCENDING) {
			// translate groupBy with inverse sort order
			return translateInverseSortedGroupBy(input, node, dop, groupKeys, sortKeys, outFields);
		}
		else if(groupKeys == null || groupKeys.length == 0) {
			// translate key-less (global) groupBy
			return translateGlobalGroupBy(input, node, dop, sortKeys, sortOrder, outFields);
		}
		else {

			UnsortedGrouping<Tuple> grouping = input
					.groupBy(groupKeys);

			if(sortKeys != null && sortKeys.length > 0) {
				// translate groupBy with group sorting

				SortedGrouping<Tuple> sortedGrouping = grouping
						.sortGroup(sortKeys[0], Order.ASCENDING);
				for(int i=1; i<sortKeys.length; i++) {
					sortedGrouping = sortedGrouping
							.sortGroup(sortKeys[i], Order.DESCENDING);
				}

				return sortedGrouping
						.reduceGroup(new GroupByReducer(node))
						.returns(new TupleTypeInfo(outFields))
						.withParameters(this.getFlinkNodeConfig(node))
						.setParallelism(dop)
						.name("reduce-" + node.getID());
			}
			else {
				// translate groupBy without group sorting

				return grouping
						.reduceGroup(new GroupByReducer(node))
						.returns(new TupleTypeInfo(outFields))
						.withParameters(this.getFlinkNodeConfig(node))
						.setParallelism(dop)
						.name("reduce-" + node.getID());
			}
		}

	}
 
Example #4
Source File: HashJoinMapperStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a HashJoinMapperStreamGraph");
}
 
Example #5
Source File: HashJoinStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a HashJoinStreamGraph");
}
 
Example #6
Source File: GroupByStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
private void buildGraph( GroupBy groupBy, FlowProcess flowProcess ) {

		this.sourceStage = new GroupByInGate(flowProcess, groupBy, IORole.source);
		addHead( sourceStage );
		handleDuct( groupBy, sourceStage );
	}
 
Example #7
Source File: GroupByStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a GroupByStreamGraph");
}
 
Example #8
Source File: GroupByInGate.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
public GroupByInGate(FlowProcess flowProcess, GroupBy splice, IORole ioRole) {
	super(flowProcess, splice, ioRole);

	this.isBufferJoin = splice.getJoiner() instanceof BufferJoin;
}
 
Example #9
Source File: SinkStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a SinkStreamGraph");
}
 
Example #10
Source File: EachStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a MapStreamGraph.");
}
 
Example #11
Source File: SourceStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy element, IORole role) {
	throw new UnsupportedOperationException("SourceStreamGraph may not have a GroupByGate");
}
 
Example #12
Source File: CoGroupBufferReduceStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a CoGroupBufferReduceStreamGraph");
}
 
Example #13
Source File: CoGroupReduceStreamGraph.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected Gate createGroupByGate(GroupBy groupBy, IORole ioRole) {
	throw new UnsupportedOperationException("Cannot create a GroupBy gate in a CoGroupReduceStreamGraph");
}
 
Example #14
Source File: GroupByStreamGraph.java    From cascading-flink with Apache License 2.0 3 votes vote down vote up
public GroupByStreamGraph(FlinkFlowProcess flowProcess, FlowNode node, GroupBy groupBy) {

		super(flowProcess, node);

		buildGraph(groupBy, flowProcess);

		setTraps();
		setScopes();

		printGraph( node.getID(), "groupby", flowProcess.getCurrentSliceNum() );
		bind();
	}
 
Example #15
Source File: BoundaryBeforeGroupByTransformer.java    From cascading-flink with Apache License 2.0 2 votes vote down vote up
public GroupByGraph() {

			super(SearchOrder.ReverseTopological, new FlowElementExpression(ElementCapture.Primary, GroupBy.class));

		}