cascading.flow.FlowDef Java Examples

The following examples show how to use cascading.flow.FlowDef. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CommonCrawlIndexTest.java    From aws-big-data-blog with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreateCommonCrawlFlowDef() throws Exception {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);

    String sourcePath = properties.getProperty("inPath");
    String sinkPath = properties.getProperty("testCreateCommonCrawlFlowDefOutput");
    String sinkValidationPath = properties.getProperty("testCreateCommonCrawlFlowDefOutputValidation");

    // create the Cascading "source" (input) tap to read the commonCrawl WAT file(s)
    Tap source = new FileTap(new TextLine(new Fields("line")) ,sourcePath);

    // create the Cascading "sink" (output) tap to dump the results
    Tap sink = new FileTap(new TextLine(new Fields("line")) ,sinkPath);

    //Build the Cascading Flow Definition
    FlowDef flowDef = CommonCrawlIndex.createCommonCrawlFlowDef(source, sink);
    new LocalFlowConnector(properties).connect(flowDef).complete();

    Assert.sameContent(sinkPath, sinkValidationPath);
}
 
Example #2
Source File: WordCount.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}
 
Example #3
Source File: Main.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
    Properties properties = null;
    try {
        properties = new ConfigReader().renderProperties(Main.class);
        if (args[0] != null && args[0].length() > 0){
            properties.put("inPath", args[0]);
        }
    } catch (IOException e) {
        System.out.println("Could not read your config.properties file");e.printStackTrace();
    }

    FlowDef flowDef = buildFlowDef(properties);
    new HadoopFlowConnector(properties).connect(flowDef).complete();
}
 
Example #4
Source File: CommonCrawlIndexTest.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
@Test
public void testMain() throws IOException {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
    FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties);

    if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){
    //Using cascading Local connector to exclude Hadoop and just test the logic
        new LocalFlowConnector(properties).connect(flowDef).complete();
    }
    else {
            new HadoopFlowConnector(properties).connect(flowDef).complete();
    }
}
 
Example #5
Source File: SortTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplete() throws Exception {

  Bucket sink = new Bucket();

  Fields inFields = Fields.join(FIELD_S, FIELD_X, FIELD_Y);

  TupleListTap source = new DataBuilder(inFields)
      .addTuple("A", "a", "za")
      .addTuple("B", "b", "zb")
      .addTuple("AA", "aa", "zaa")
      .addTuple("BB", "bb", "zbb")
      .toTap();

  FlowDef flowDef = defineFlow(source, sink);

  new LocalFlowConnector().connect(flowDef).complete();

  List<TupleEntry> tupleEntries = sink.result().asTupleEntryList();

  assertThat(tupleEntries.get(0).getString(FIELD_S), is("A"));
  assertThat(tupleEntries.get(0).getString(FIELD_Y), is("za"));
  assertThat(tupleEntries.get(0).getString(FIELD_V), is("a"));
  assertThat(tupleEntries.get(1).getString(FIELD_S), is("AA"));
  assertThat(tupleEntries.get(1).getString(FIELD_Y), is("zaa"));
  assertThat(tupleEntries.get(1).getString(FIELD_V), is("aa"));
  assertThat(tupleEntries.get(2).getString(FIELD_S), is("B"));
  assertThat(tupleEntries.get(3).getString(FIELD_S), is("BB"));
  assertThat(tupleEntries.get(3).getString(FIELD_Y), is("zbb"));
  assertThat(tupleEntries.get(3).getString(FIELD_V), is("bb"));

}
 
Example #6
Source File: FlinkConnector.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
public Flow connect(FlowDef flowDef) {
	classPath.addAll(flowDef.getClassPath());
	return super.connect(flowDef);
}
 
Example #7
Source File: FlinkPlanner.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
protected FlinkFlow createFlow( FlowDef flowDef ) {
	return new FlinkFlow(getPlatformInfo(), flowDef, getDefaultProperties(), getDefaultConfig());
}
 
Example #8
Source File: JoinFilterExampleCascading.java    From hadoop-arch-book with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
  String fooInputPath = args[0];
  String barInputPath = args[1];
  String outputPath = args[2];
  int fooValMax = Integer.parseInt(args[3]);
  int joinValMax = Integer.parseInt(args[4]);
  int numberOfReducers = Integer.parseInt(args[5]);

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties,
      JoinFilterExampleCascading.class);
  properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
  properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
  
  SpillableProps props = SpillableProps.spillableProps()
      .setCompressSpill( true )
      .setMapSpillThreshold( 50 * 1000 );
      

  
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  // create source and sink taps
  Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
  Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
  Fields barFields = new Fields("barId", "barVal");
  Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);

  Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);

  Fields joinFooFields = new Fields("foobarId");
  Fields joinBarFields = new Fields("barId");

  Pipe fooPipe = new Pipe("fooPipe");
  Pipe barPipe = new Pipe("barPipe");

  Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));

  Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
      joinBarFields);
  props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
  
  
  Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
  Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
      new JoinedFilter(joinValMax));

  FlowDef flowDef = FlowDef.flowDef().setName("wc")
      .addSource(fooPipe, fooTap).addSource(barPipe, barTap)
      .addTailSink(joinedFilteredPipe, outputTap);

  Flow wcFlow = flowConnector.connect(flowDef);
  wcFlow.writeDOT("dot/wc.dot");
  wcFlow.complete();
}
 
Example #9
Source File: PlungerFlow.java    From plunger with Apache License 2.0 4 votes vote down vote up
/** Constructs a new plunger flow */
PlungerFlow() {
  flowDef = new FlowDef();
}
 
Example #10
Source File: PlungerFlow.java    From plunger with Apache License 2.0 4 votes vote down vote up
/** Gets the underlying FlowDef */
FlowDef getFlowDef() {
  return flowDef;
}
 
Example #11
Source File: SortTest.java    From plunger with Apache License 2.0 4 votes vote down vote up
private static FlowDef defineFlow(Tap in, Tap out) {
  Pipe pipe = new Pipe("pipe");
  pipe = new SortAssembly(pipe);
  return FlowDef.flowDef().addSource(pipe, in).addTailSink(pipe, out);
}
 
Example #12
Source File: FlinkFlow.java    From cascading-flink with Apache License 2.0 2 votes vote down vote up
public FlinkFlow(PlatformInfo platformInfo, FlowDef flowDef, Map<Object, Object> properties, Configuration defaultConfig) {

		super(platformInfo, properties, defaultConfig, flowDef);
	}