cascading.flow.FlowProcess Java Examples

The following examples show how to use cascading.flow.FlowProcess. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JDBCScheme.java    From SpyGlass with Apache License 2.0 6 votes vote down vote up
@Override
public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
    JobConf conf ) {
    int concurrentReads = ( (JDBCTap) tap ).concurrentReads;

    if( selectQuery != null )
        DBInputFormat.setInput( conf, TupleRecord.class, selectQuery, countQuery, limit, concurrentReads );
    else {
        String tableName = ( (JDBCTap) tap ).getTableName();
        String joinedOrderBy = orderBy != null ? Util.join( orderBy, ", " ) : null;
        DBInputFormat.setInput( conf, TupleRecord.class, tableName, conditions, joinedOrderBy, limit, concurrentReads, columns );
    }

    if( inputFormatClass != null )
        conf.setInputFormat( inputFormatClass );
}
 
Example #2
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {

		Tap tap = this.getSingle(node.getSourceTaps());
		JobConf tapConfig = new JobConf(this.getNodeConfig(node));
		tap.sourceConfInit(flowProcess, tapConfig);
		tapConfig.set( "cascading.step.source", Tap.id( tap ) );

		Fields outFields = tap.getSourceFields();
		registerKryoTypes(outFields);

		JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
		MultiInputFormat.addInputFormat(sourceConfig, tapConfig);

		DataSet<Tuple> src = env
				.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
						.name(tap.getIdentifier())
						.setParallelism(dop)
						.withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));

		return src;

	}
 
Example #3
Source File: FunctionCallStubTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void completeDifferentOutputFields() {
  @SuppressWarnings("unchecked")
  List<TupleEntry> actual = new FunctionCallStub.Builder<Void>(FIELDS)
      .outputFields(OUTPUT)
      .addTuple("a")
      .addTuple("b")
      .build()
      .complete(mock(FlowProcess.class), new Insert(OUTPUT, 1))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 1));
}
 
Example #4
Source File: BufferCallStubTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void completeDifferentOutputFields() {
  List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .outputFields(OUTPUT)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new CountBuffer())
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(4));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 2));
  assertThat(actual.get(2), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(3), tupleEntry(OUTPUT, 2));
}
 
Example #5
Source File: AggregatorCallStubTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void complete() {
  List<TupleEntry> actual = new AggregatorCallStub.Builder<Tuple[]>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new First(NON_GROUP_FIELDS))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
  assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}
 
Example #6
Source File: HBaseRawScheme.java    From SpyGlass with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
		throws IOException {
	Tuple result = new Tuple();

	Object key = sourceCall.getContext()[0];
	Object value = sourceCall.getContext()[1];
	boolean hasNext = sourceCall.getInput().next(key, value);
	if (!hasNext) {
		return false;
	}

	// Skip nulls
	if (key == null || value == null) {
		return true;
	}

	ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
	Result row = (Result) value;
	result.add(keyWritable);
	result.add(row);
	sourceCall.getIncomingEntry().setTuple(result);
	return true;
}
 
Example #7
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
 @Override
 public void sourceConfInit(FlowProcess<JobConf> fp,
     Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

   if (filterPredicate != null) {
     ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate);
   }

   jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
   ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class);
   TupleReadSupport.setRequestedFields(jobConf, getSourceFields());
}
 
Example #8
Source File: TestParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Tuple name = new Tuple();
  name.addString(arguments.getString(0));
  name.addString(arguments.getString(1));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}
 
Example #9
Source File: HBaseRawTap.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector)
		throws IOException {
	HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this);
	hBaseCollector.prepare();
	return hBaseCollector;
}
 
Example #10
Source File: BufferCallStub.java    From plunger with Apache License 2.0 5 votes vote down vote up
/** Processes the groups with the provided {@link Buffer}. */
public BufferCallStub<C> complete(FlowProcess<?> flowProcess, Buffer<C> buffer) {
  while (groupsIterator.hasNext()) {
    buffer.prepare(flowProcess, this);
    buffer.operate(flowProcess, nextOperateCall());
  }
  buffer.flush(flowProcess, this);
  buffer.cleanup(flowProcess, this);
  return this;
}
 
Example #11
Source File: TapDataReader.java    From plunger with Apache License 2.0 5 votes vote down vote up
private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source;
  JobConf conf = new JobConf();
  FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sourceConfInit(flowProcess, conf);
  return hadoopTap.openForRead(flowProcess);
}
 
Example #12
Source File: TapDataReader.java    From plunger with Apache License 2.0 5 votes vote down vote up
private TupleEntryIterator getLocalTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source;
  Properties properties = new Properties();
  FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties);
  localTap.sourceConfInit(flowProcess, properties);
  return localTap.openForRead(flowProcess);
}
 
Example #13
Source File: FunctionCallStub.java    From plunger with Apache License 2.0 5 votes vote down vote up
/** Processes the groups with the provided {@link Function}. */
public FunctionCallStub<C> complete(FlowProcess<?> flowProcess, Function<C> function) {
  function.prepare(flowProcess, this);
  while (arguments.hasNext()) {
    function.operate(flowProcess, nextOperateCall());
  }
  function.flush(flowProcess, this);
  function.cleanup(flowProcess, this);
  return this;
}
 
Example #14
Source File: HBaseScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<JobConf> process,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
  conf.setOutputFormat(HBaseOutputFormat.class);

  conf.setOutputKeyClass(ImmutableBytesWritable.class);
  conf.setOutputValueClass(Put.class);
  
  String tableName = conf.get(HBaseOutputFormat.OUTPUT_TABLE);
  useSalt = conf.getBoolean(String.format(HBaseConstants.USE_SALT, tableName), false);
}
 
Example #15
Source File: TestParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = (Name) arguments.getObject(0);
  result.add(name.getFirst_name());
  result.add(name.getLast_name());
  functionCall.getOutputCollector().add(result);
}
 
Example #16
Source File: ParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class);
}
 
Example #17
Source File: JDBCTap.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit( FlowProcess<JobConf> process, JobConf conf )
{
    // a hack for MultiInputFormat to see that there is a child format
    FileInputFormat.setInputPaths( conf, getPath() );

    if( username == null )
        DBConfiguration.configureDB(conf, driverClassName, connectionUrl);
    else
        DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password );

    super.sourceConfInit( process, conf );
}
 
Example #18
Source File: AggregatorCallStub.java    From plunger with Apache License 2.0 5 votes vote down vote up
/** Processes the groups with the provided {@link Aggregator}. */
public AggregatorCallStub<C> complete(FlowProcess<?> flowProcess, Aggregator<C> aggregator) {
  while (groupsIterator.hasNext()) {
    aggregator.prepare(flowProcess, this);
    aggregator.start(flowProcess, nextGroup());
    while (valuesIterator.hasNext()) {
      aggregator.aggregate(flowProcess, nextAggregateCall());
    }
    aggregator.complete(flowProcess, this);
  }
  aggregator.flush(flowProcess, this);
  aggregator.cleanup(flowProcess, this);
  return this;
}
 
Example #19
Source File: ParquetScroogeSchemeTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = Name$.MODULE$.apply(arguments.getString(0), Option.apply(arguments.getString(1)));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}
 
Example #20
Source File: ParquetScroogeSchemeTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  Object record = functionCall.getArguments().getObject(0);
  Tuple result = new Tuple();
  result.add(record.toString());
  functionCall.getOutputCollector().add(result);
}
 
Example #21
Source File: ParquetScroogeScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ScroogeReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, ScroogeRecordConverter.class);
}
 
Example #22
Source File: ParquetScroogeScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  ParquetOutputFormat.setWriteSupportClass(jobConf, ScroogeWriteSupport.class);
  ScroogeWriteSupport.setScroogeClass(jobConf, this.config.getKlass());
}
 
Example #23
Source File: TestParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = (Name) arguments.get(0);
  result.add(name.getFirst_name());
  result.add(name.getLast_name());
  functionCall.getOutputCollector().add(result);
}
 
Example #24
Source File: TestParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = new Name();
  name.setFirst_name(arguments.getString(0));
  name.setLast_name(arguments.getString(1));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}
 
Example #25
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
        Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
  ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}
 
Example #26
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc)
    throws IOException {
  Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
  boolean hasNext = sc.getInput().next(null, value);
  if (!hasNext) { return false; }

  // Skip nulls
  if (value == null) { return true; }

  sc.getIncomingEntry().setTuple(value.get());
  return true;
}
 
Example #27
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Fields retrieveSourceFields(FlowProcess<JobConf> flowProcess, Tap tap) {
   MessageType schema = readSchema(flowProcess, tap);
   SchemaIntersection intersection = new SchemaIntersection(schema, getSourceFields());

   setSourceFields(intersection.getSourceFields());

   return getSourceFields();
 }
 
Example #28
Source File: HBaseRawScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
		JobConf conf) {

	DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf,
			ValueCopier.class);
	if (null != familyNames) {
		String columns = Util.join(this.familyNames, " ");
		LOG.debug("sourcing from column families: {}", columns);
		conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns);
	}
}
 
Example #29
Source File: ParquetValueScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc)
    throws IOException {
  TupleEntry tuple = sc.getOutgoingEntry();

  if (tuple.size() != 1) {
    throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields());
  }

  T value = (T) tuple.getObject(0);
  OutputCollector output = sc.getOutput();
  output.collect(null, value);
}
 
Example #30
Source File: ParquetTBaseScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

  if (this.config.getKlass() == null) {
    throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor");
  }

  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
  TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
}