cascading.flow.FlowProcess Java Exaples

Source File: JDBCScheme.java From SpyGlass with Apache License 2.0

6 votes

@Override
public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
    JobConf conf ) {
    int concurrentReads = ( (JDBCTap) tap ).concurrentReads;

    if( selectQuery != null )
        DBInputFormat.setInput( conf, TupleRecord.class, selectQuery, countQuery, limit, concurrentReads );
    else {
        String tableName = ( (JDBCTap) tap ).getTableName();
        String joinedOrderBy = orderBy != null ? Util.join( orderBy, ", " ) : null;
        DBInputFormat.setInput( conf, TupleRecord.class, tableName, conditions, joinedOrderBy, limit, concurrentReads, columns );
    }

    if( inputFormatClass != null )
        conf.setInputFormat( inputFormatClass );
}

Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0

6 votes

private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) {

		Tap tap = this.getSingle(node.getSourceTaps());
		JobConf tapConfig = new JobConf(this.getNodeConfig(node));
		tap.sourceConfInit(flowProcess, tapConfig);
		tapConfig.set( "cascading.step.source", Tap.id( tap ) );

		Fields outFields = tap.getSourceFields();
		registerKryoTypes(outFields);

		JobConf sourceConfig = new JobConf(this.getNodeConfig(node));
		MultiInputFormat.addInputFormat(sourceConfig, tapConfig);

		DataSet<Tuple> src = env
				.createInput(new TapInputFormat(node), new TupleTypeInfo(outFields))
						.name(tap.getIdentifier())
						.setParallelism(dop)
						.withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig)));

		return src;

	}

Source File: FunctionCallStubTest.java From plunger with Apache License 2.0

6 votes

@Test
public void completeDifferentOutputFields() {
  @SuppressWarnings("unchecked")
  List<TupleEntry> actual = new FunctionCallStub.Builder<Void>(FIELDS)
      .outputFields(OUTPUT)
      .addTuple("a")
      .addTuple("b")
      .build()
      .complete(mock(FlowProcess.class), new Insert(OUTPUT, 1))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 1));
}

Source File: BufferCallStubTest.java From plunger with Apache License 2.0

6 votes

@Test
public void completeDifferentOutputFields() {
  List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .outputFields(OUTPUT)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new CountBuffer())
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(4));
  assertThat(actual.get(0), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(1), tupleEntry(OUTPUT, 2));
  assertThat(actual.get(2), tupleEntry(OUTPUT, 1));
  assertThat(actual.get(3), tupleEntry(OUTPUT, 2));
}

Source File: AggregatorCallStubTest.java From plunger with Apache License 2.0

6 votes

@Test
public void complete() {
  List<TupleEntry> actual = new AggregatorCallStub.Builder<Tuple[]>(GROUP_FIELDS, NON_GROUP_FIELDS)
      .newGroup(1)
      .addTuple("a")
      .addTuple("b")
      .newGroup(2)
      .addTuple("c")
      .addTuple("d")
      .build()
      .complete(mock(FlowProcess.class), new First(NON_GROUP_FIELDS))
      .result()
      .asTupleEntryList();

  assertThat(actual.size(), is(2));
  assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a"));
  assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c"));
}

Source File: HBaseRawScheme.java From SpyGlass with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
		throws IOException {
	Tuple result = new Tuple();

	Object key = sourceCall.getContext()[0];
	Object value = sourceCall.getContext()[1];
	boolean hasNext = sourceCall.getInput().next(key, value);
	if (!hasNext) {
		return false;
	}

	// Skip nulls
	if (key == null || value == null) {
		return true;
	}

	ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
	Result row = (Result) value;
	result.add(keyWritable);
	result.add(row);
	sourceCall.getIncomingEntry().setTuple(result);
	return true;
}

Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
 @Override
 public void sourceConfInit(FlowProcess<JobConf> fp,
     Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

   if (filterPredicate != null) {
     ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate);
   }

   jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
   ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class);
   TupleReadSupport.setRequestedFields(jobConf, getSourceFields());
}

Source File: TestParquetTupleScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Tuple name = new Tuple();
  name.addString(arguments.getString(0));
  name.addString(arguments.getString(1));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}

Source File: HBaseRawTap.java From SpyGlass with Apache License 2.0

5 votes

@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector)
		throws IOException {
	HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this);
	hBaseCollector.prepare();
	return hBaseCollector;
}

Source File: BufferCallStub.java From plunger with Apache License 2.0

5 votes

/** Processes the groups with the provided {@link Buffer}. */
public BufferCallStub<C> complete(FlowProcess<?> flowProcess, Buffer<C> buffer) {
  while (groupsIterator.hasNext()) {
    buffer.prepare(flowProcess, this);
    buffer.operate(flowProcess, nextOperateCall());
  }
  buffer.flush(flowProcess, this);
  buffer.cleanup(flowProcess, this);
  return this;
}

Source File: TapDataReader.java From plunger with Apache License 2.0

5 votes

private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source;
  JobConf conf = new JobConf();
  FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sourceConfInit(flowProcess, conf);
  return hadoopTap.openForRead(flowProcess);
}

Source File: TapDataReader.java From plunger with Apache License 2.0

5 votes

private TupleEntryIterator getLocalTupleEntryIterator() throws IOException {
  @SuppressWarnings("unchecked")
  Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source;
  Properties properties = new Properties();
  FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties);
  localTap.sourceConfInit(flowProcess, properties);
  return localTap.openForRead(flowProcess);
}

Source File: FunctionCallStub.java From plunger with Apache License 2.0

5 votes

/** Processes the groups with the provided {@link Function}. */
public FunctionCallStub<C> complete(FlowProcess<?> flowProcess, Function<C> function) {
  function.prepare(flowProcess, this);
  while (arguments.hasNext()) {
    function.operate(flowProcess, nextOperateCall());
  }
  function.flush(flowProcess, this);
  function.cleanup(flowProcess, this);
  return this;
}

Source File: HBaseScheme.java From SpyGlass with Apache License 2.0

5 votes

@Override
public void sinkConfInit(FlowProcess<JobConf> process,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
  conf.setOutputFormat(HBaseOutputFormat.class);

  conf.setOutputKeyClass(ImmutableBytesWritable.class);
  conf.setOutputValueClass(Put.class);
  
  String tableName = conf.get(HBaseOutputFormat.OUTPUT_TABLE);
  useSalt = conf.getBoolean(String.format(HBaseConstants.USE_SALT, tableName), false);
}

Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = (Name) arguments.getObject(0);
  result.add(name.getFirst_name());
  result.add(name.getLast_name());
  functionCall.getOutputCollector().add(result);
}

Source File: ParquetTBaseScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void sourceConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class);
}

Source File: JDBCTap.java From SpyGlass with Apache License 2.0

5 votes

@Override
public void sourceConfInit( FlowProcess<JobConf> process, JobConf conf )
{
    // a hack for MultiInputFormat to see that there is a child format
    FileInputFormat.setInputPaths( conf, getPath() );

    if( username == null )
        DBConfiguration.configureDB(conf, driverClassName, connectionUrl);
    else
        DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password );

    super.sourceConfInit( process, conf );
}

Source File: AggregatorCallStub.java From plunger with Apache License 2.0

5 votes

/** Processes the groups with the provided {@link Aggregator}. */
public AggregatorCallStub<C> complete(FlowProcess<?> flowProcess, Aggregator<C> aggregator) {
  while (groupsIterator.hasNext()) {
    aggregator.prepare(flowProcess, this);
    aggregator.start(flowProcess, nextGroup());
    while (valuesIterator.hasNext()) {
      aggregator.aggregate(flowProcess, nextAggregateCall());
    }
    aggregator.complete(flowProcess, this);
  }
  aggregator.flush(flowProcess, this);
  aggregator.cleanup(flowProcess, this);
  return this;
}

Source File: ParquetScroogeSchemeTest.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = Name$.MODULE$.apply(arguments.getString(0), Option.apply(arguments.getString(1)));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}

Source File: ParquetScroogeSchemeTest.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  Object record = functionCall.getArguments().getObject(0);
  Tuple result = new Tuple();
  result.add(record.toString());
  functionCall.getOutputCollector().add(result);
}

Source File: ParquetScroogeScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void sourceConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  super.sourceConfInit(fp, tap, jobConf);
  jobConf.setInputFormat(DeprecatedParquetInputFormat.class);
  ParquetInputFormat.setReadSupportClass(jobConf, ScroogeReadSupport.class);
  ThriftReadSupport.setRecordConverterClass(jobConf, ScroogeRecordConverter.class);
}

Source File: ParquetScroogeScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  ParquetOutputFormat.setWriteSupportClass(jobConf, ScroogeWriteSupport.class);
  ScroogeWriteSupport.setScroogeClass(jobConf, this.config.getKlass());
}

Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = (Name) arguments.get(0);
  result.add(name.getFirst_name());
  result.add(name.getLast_name());
  functionCall.getOutputCollector().add(result);
}

Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = new Name();
  name.setFirst_name(arguments.getString(0));
  name.setLast_name(arguments.getString(1));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}

Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0

5 votes

@SuppressWarnings("rawtypes")
@Override
public void sinkConfInit(FlowProcess<JobConf> fp,
        Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {
  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema);
  ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class);
}

Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc)
    throws IOException {
  Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
  boolean hasNext = sc.getInput().next(null, value);
  if (!hasNext) { return false; }

  // Skip nulls
  if (value == null) { return true; }

  sc.getIncomingEntry().setTuple(value.get());
  return true;
}

Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public Fields retrieveSourceFields(FlowProcess<JobConf> flowProcess, Tap tap) {
   MessageType schema = readSchema(flowProcess, tap);
   SchemaIntersection intersection = new SchemaIntersection(schema, getSourceFields());

   setSourceFields(intersection.getSourceFields());

   return getSourceFields();
 }

Source File: HBaseRawScheme.java From SpyGlass with Apache License 2.0

5 votes

@Override
public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap,
		JobConf conf) {

	DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf,
			ValueCopier.class);
	if (null != familyNames) {
		String columns = Util.join(this.familyNames, " ");
		LOG.debug("sourcing from column families: {}", columns);
		conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns);
	}
}

Source File: ParquetValueScheme.java From parquet-mr with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
@Override
public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc)
    throws IOException {
  TupleEntry tuple = sc.getOutgoingEntry();

  if (tuple.size() != 1) {
    throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields());
  }

  T value = (T) tuple.getObject(0);
  OutputCollector output = sc.getOutput();
  output.collect(null, value);
}

Source File: ParquetTBaseScheme.java From parquet-mr with Apache License 2.0

5 votes

@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp,
    Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) {

  if (this.config.getKlass() == null) {
    throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor");
  }

  DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf);
  DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class);
  TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass());
}

cascading.flow.FlowProcess Java Examples