cascading.pipe.Pipe Java Examples

The following examples show how to use cascading.pipe.Pipe. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PipeFactoryTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void typical() {
  when(flow.getFlowDef()).thenReturn(flowDef);

  PipeFactory pipeFactory = new PipeFactory(data, "name", flow);
  pipeFactory.newInstance();

  verify(flowDef).addSource(pipeCaptor.capture(), tapCaptor.capture());

  Pipe capturedPipe = pipeCaptor.getValue();
  assertThat(capturedPipe.getName(), is("name"));

  TupleListTap capturedTap = tapCaptor.getValue();
  assertThat(capturedTap.getSourceFields(), is(fields));

  Iterator<Tuple> input = capturedTap.getInput();
  List<Tuple> tuples = new ArrayList<Tuple>();
  while (input.hasNext()) {
    tuples.add(input.next());
  }
  assertThat(tuples.size(), is(1));
  assertThat(tuples.get(0), is(new Tuple("value")));
}
 
Example #2
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
private List<Scope> getInputScopes(FlowNode node, Splice splice) {

		Pipe[] inputs = splice.getPrevious();
		List<Scope> inScopes = new ArrayList<>(inputs.length);
		for(Pipe input : inputs) {
			boolean found = false;
			for (Scope inScope : node.getPreviousScopes(splice)) {
				if(inScope.getName().equals(input.getName())) {
					inScopes.add(inScope);
					found = true;
					break;
				}
			}
			if(!found) {
				throw new RuntimeException("Input scope was not found");
			}
		}

		return inScopes;
	}
 
Example #3
Source File: DumpTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void prefix() {
  Pipe dump = new Dump("prefix", plunger.newPipe(INPUT_DATA), printStreamSupplier, FIELDS);

  Bucket bucket = plunger.newBucket(FIELDS, dump);
  Data result = bucket.result();

  assertThat(result, is(INPUT_DATA));

  InOrder inOrder = inOrder(mockPrintStream);
  inOrder.verify(mockPrintStream).append("prefix");
  inOrder.verify(mockPrintStream).append("A");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("B");
  inOrder.verify(mockPrintStream).append('\n');
  inOrder.verify(mockPrintStream).append("prefix");
  inOrder.verify(mockPrintStream).append("one");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("1");
  inOrder.verify(mockPrintStream).append('\n');
  inOrder.verify(mockPrintStream).append("prefix");
  inOrder.verify(mockPrintStream).append("two");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("2");
  inOrder.verify(mockPrintStream).append('\n');
}
 
Example #4
Source File: DumpTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void typical() {
  Pipe dump = new Dump(plunger.newPipe(INPUT_DATA), printStreamSupplier);

  Bucket bucket = plunger.newBucket(FIELDS, dump);
  Data result = bucket.result();

  assertThat(result, is(INPUT_DATA));

  InOrder inOrder = inOrder(mockPrintStream);
  inOrder.verify(mockPrintStream).append("");
  inOrder.verify(mockPrintStream).append("A");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("B");
  inOrder.verify(mockPrintStream).append('\n');
  inOrder.verify(mockPrintStream).append("");
  inOrder.verify(mockPrintStream).append("one");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("1");
  inOrder.verify(mockPrintStream).append('\n');
  inOrder.verify(mockPrintStream).append("");
  inOrder.verify(mockPrintStream).append("two");
  inOrder.verify(mockPrintStream).append('\t');
  inOrder.verify(mockPrintStream).append("2");
  inOrder.verify(mockPrintStream).append('\n');
}
 
Example #5
Source File: PipeFactory.java    From plunger with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link Pipe} that will deliver the {@link Tuple Tuples} from the specified {@link Data} instance and
 * connects this as a source into the current {@link PlungerFlow}.
 */
Pipe newInstance() {
  if (flow.isComplete()) {
    throw new IllegalStateException(
        "You've already wielded your plunger! Create all of your pipes before calling result() on any of your buckets.");
  }
  TupleListTap tupleTap = new TupleListTap(data.getDeclaredFields(), data.getTuples());
  Pipe pipe = new Pipe(name);
  flow.getFlowDef().addSource(pipe, tupleTap);
  return pipe;
}
 
Example #6
Source File: Bucket.java    From plunger with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs a new tuple sink for the given {@link PlungerFlow}, that will capture {@link Tuple Tuples} from the
 * pipe, which must contain values consistent with the declared {@link Fields}.
 */
Bucket(Fields fields, Pipe pipe, PlungerFlow flow) {
  super(new TupleScheme(fields));
  if (flow.isComplete()) {
    throw new IllegalStateException(
        "You've already wielded your plunger! Create all of your buckets before calling result() on any one of them.");
  }
  this.flow = flow;
  output = new ArrayList<Tuple>();
  id = getClass().getSimpleName() + ":" + UUID.randomUUID().toString();
  flow.getFlowDef().addTailSink(pipe, this);
  modified();
}
 
Example #7
Source File: WordCount.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}
 
Example #8
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(Pipe pipe, Fields... fieldsOfInterest) {
  this("", pipe, SystemPrintStreams.SYSOUT, fieldsOfInterest);
}
 
Example #9
Source File: DumpTest.java    From plunger with Apache License 2.0 4 votes vote down vote up
@Test
public void serializes() {
  assertThat(new Dump(new Pipe("name")), is(serializable()));
}
 
Example #10
Source File: SortTest.java    From plunger with Apache License 2.0 4 votes vote down vote up
private static FlowDef defineFlow(Tap in, Tap out) {
  Pipe pipe = new Pipe("pipe");
  pipe = new SortAssembly(pipe);
  return FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out);
}
 
Example #11
Source File: IntegrationTest.java    From plunger with Apache License 2.0 4 votes vote down vote up
@Test(expected = IllegalStateException.class)
public void failWhenAddingBucketAfterFlowExecution() {
  sink.result();
  plunger.newBucket(FIELDS, new Pipe("too late for bucket"));
}
 
Example #12
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(Pipe pipe, PrintStreamSupplier streamSupplier, Fields... fieldsOfInterest) {
  this("", pipe, streamSupplier, fieldsOfInterest);
}
 
Example #13
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(String prefix, Pipe pipe, Fields... fieldsOfInterest) {
  this(prefix, pipe, SystemPrintStreams.SYSOUT, fieldsOfInterest);
}
 
Example #14
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(Pipe pipe, PrintStreamSupplier streamSupplier) {
  this("", pipe, streamSupplier);
}
 
Example #15
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(String prefix, Pipe pipe) {
  this(prefix, pipe, SystemPrintStreams.SYSOUT);
}
 
Example #16
Source File: Dump.java    From plunger with Apache License 2.0 4 votes vote down vote up
public Dump(Pipe pipe) {
  this("", pipe, SystemPrintStreams.SYSOUT);
}
 
Example #17
Source File: Plunger.java    From plunger with Apache License 2.0 4 votes vote down vote up
/** Creates a {@link Pipe} with a randomly generated name to deliver the provided {@link Data}. */
public Pipe newPipe(Data data) {
  return newNamedPipe("plunger-test-pipe:" + UUID.randomUUID().toString(), data);
}
 
Example #18
Source File: Plunger.java    From plunger with Apache License 2.0 4 votes vote down vote up
/** Creates a named {@link Pipe} to deliver the provided {@link Data}. */
public Pipe newNamedPipe(String name, Data data) {
  return new PipeFactory(data, name, flow).newInstance();
}
 
Example #19
Source File: JoinFilterExampleCascading.java    From hadoop-arch-book with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
  String fooInputPath = args[0];
  String barInputPath = args[1];
  String outputPath = args[2];
  int fooValMax = Integer.parseInt(args[3]);
  int joinValMax = Integer.parseInt(args[4]);
  int numberOfReducers = Integer.parseInt(args[5]);

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties,
      JoinFilterExampleCascading.class);
  properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
  properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
  
  SpillableProps props = SpillableProps.spillableProps()
      .setCompressSpill( true )
      .setMapSpillThreshold( 50 * 1000 );
      

  
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  // create source and sink taps
  Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
  Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
  Fields barFields = new Fields("barId", "barVal");
  Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);

  Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);

  Fields joinFooFields = new Fields("foobarId");
  Fields joinBarFields = new Fields("barId");

  Pipe fooPipe = new Pipe("fooPipe");
  Pipe barPipe = new Pipe("barPipe");

  Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));

  Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
      joinBarFields);
  props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
  
  
  Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
  Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
      new JoinedFilter(joinValMax));

  FlowDef flowDef = FlowDef.flowDef().setName("wc")
      .addSource(fooPipe, fooTap).addSource(barPipe, barTap)
      .addTailSink(joinedFilteredPipe, outputTap);

  Flow wcFlow = flowConnector.connect(flowDef);
  wcFlow.writeDOT("dot/wc.dot");
  wcFlow.complete();
}
 
Example #20
Source File: BoundaryAfterSplitNodeTransformer.java    From cascading-flink with Apache License 2.0 3 votes vote down vote up
public SplitElementGraph() {

			super(SearchOrder.ReverseTopological,

					OrElementExpression.or (ElementCapture.Primary,
							new TypeExpression(Tap.class, TypeExpression.Topo.Split),
							new TypeExpression(Group.class, TypeExpression.Topo.Split),
							new TypeExpression(Pipe.class, TypeExpression.Topo.Split)
					)

					);

		}
 
Example #21
Source File: Plunger.java    From plunger with Apache License 2.0 2 votes vote down vote up
/**
 * Creates a {@link Bucket} to capture the data emitted by the provided tail {@link Pipe}. It is expected that the
 * {@link Tuple Tuples} captured will conform to the provided {@link Fields} declaration.
 */
public Bucket newBucket(Fields fields, Pipe tail) {
  return new Bucket(fields, tail, flow);
}