cascading.flow.hadoop.HadoopFlowConnector Java Examples

The following examples show how to use cascading.flow.hadoop.HadoopFlowConnector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Main.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
    Properties properties = null;
    try {
        properties = new ConfigReader().renderProperties(Main.class);
        if (args[0] != null && args[0].length() > 0){
            properties.put("inPath", args[0]);
        }
    } catch (IOException e) {
        System.out.println("Could not read your config.properties file");e.printStackTrace();
    }

    FlowDef flowDef = buildFlowDef(properties);
    new HadoopFlowConnector(properties).connect(flowDef).complete();
}
 
Example #2
Source File: CommonCrawlIndexTest.java    From aws-big-data-blog with Apache License 2.0 5 votes vote down vote up
@Test
public void testMain() throws IOException {
    Properties properties = new ConfigReader().renderProperties(CommonCrawlIndexTest.class);
    FlowDef flowDef = CommonCrawlIndex.buildFlowDef(properties);

    if (properties.getProperty("platform").toString().compareTo("LOCAL")==0){
    //Using cascading Local connector to exclude Hadoop and just test the logic
        new LocalFlowConnector(properties).connect(flowDef).complete();
    }
    else {
            new HadoopFlowConnector(properties).connect(flowDef).complete();
    }
}
 
Example #3
Source File: JoinFilterExampleCascading.java    From hadoop-arch-book with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
  String fooInputPath = args[0];
  String barInputPath = args[1];
  String outputPath = args[2];
  int fooValMax = Integer.parseInt(args[3]);
  int joinValMax = Integer.parseInt(args[4]);
  int numberOfReducers = Integer.parseInt(args[5]);

  Properties properties = new Properties();
  AppProps.setApplicationJarClass(properties,
      JoinFilterExampleCascading.class);
  properties.setProperty("mapred.reduce.tasks", Integer.toString(numberOfReducers));
  properties.setProperty("mapreduce.job.reduces", Integer.toString(numberOfReducers));
  
  SpillableProps props = SpillableProps.spillableProps()
      .setCompressSpill( true )
      .setMapSpillThreshold( 50 * 1000 );
      

  
  HadoopFlowConnector flowConnector = new HadoopFlowConnector(properties);

  // create source and sink taps
  Fields fooFields = new Fields("fooId", "fooVal", "foobarId");
  Tap fooTap = new Hfs(new TextDelimited(fooFields, "|"), fooInputPath);
  Fields barFields = new Fields("barId", "barVal");
  Tap barTap = new Hfs(new TextDelimited(barFields, "|"), barInputPath);

  Tap outputTap = new Hfs(new TextDelimited(false, "|"), outputPath);

  Fields joinFooFields = new Fields("foobarId");
  Fields joinBarFields = new Fields("barId");

  Pipe fooPipe = new Pipe("fooPipe");
  Pipe barPipe = new Pipe("barPipe");

  Pipe fooFiltered = new Each(fooPipe, fooFields, new FooFilter(fooValMax));

  Pipe joinedPipe = new HashJoin(fooFiltered, joinFooFields, barPipe,
      joinBarFields);
  props.setProperties( joinedPipe.getConfigDef(), Mode.REPLACE );
  
  
  Fields joinFields = new Fields("fooId", "fooVal", "foobarId", "barVal");
  Pipe joinedFilteredPipe = new Each(joinedPipe, joinFields,
      new JoinedFilter(joinValMax));

  FlowDef flowDef = FlowDef.flowDef().setName("wc")
      .addSource(fooPipe, fooTap).addSource(barPipe, barTap)
      .addTailSink(joinedFilteredPipe, outputTap);

  Flow wcFlow = flowConnector.connect(flowDef);
  wcFlow.writeDOT("dot/wc.dot");
  wcFlow.complete();
}