cascading.flow.FlowProcess Java Examples
The following examples show how to use
cascading.flow.FlowProcess.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JDBCScheme.java From SpyGlass with Apache License 2.0 | 6 votes |
@Override public void sourceConfInit( FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf ) { int concurrentReads = ( (JDBCTap) tap ).concurrentReads; if( selectQuery != null ) DBInputFormat.setInput( conf, TupleRecord.class, selectQuery, countQuery, limit, concurrentReads ); else { String tableName = ( (JDBCTap) tap ).getTableName(); String joinedOrderBy = orderBy != null ? Util.join( orderBy, ", " ) : null; DBInputFormat.setInput( conf, TupleRecord.class, tableName, conditions, joinedOrderBy, limit, concurrentReads, columns ); } if( inputFormatClass != null ) conf.setInputFormat( inputFormatClass ); }
Example #2
Source File: FlinkFlowStep.java From cascading-flink with Apache License 2.0 | 6 votes |
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) { Tap tap = this.getSingle(node.getSourceTaps()); JobConf tapConfig = new JobConf(this.getNodeConfig(node)); tap.sourceConfInit(flowProcess, tapConfig); tapConfig.set( "cascading.step.source", Tap.id( tap ) ); Fields outFields = tap.getSourceFields(); registerKryoTypes(outFields); JobConf sourceConfig = new JobConf(this.getNodeConfig(node)); MultiInputFormat.addInputFormat(sourceConfig, tapConfig); DataSet<Tuple> src = env .createInput(new TapInputFormat(node), new TupleTypeInfo(outFields)) .name(tap.getIdentifier()) .setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig))); return src; }
Example #3
Source File: FunctionCallStubTest.java From plunger with Apache License 2.0 | 6 votes |
@Test public void completeDifferentOutputFields() { @SuppressWarnings("unchecked") List<TupleEntry> actual = new FunctionCallStub.Builder<Void>(FIELDS) .outputFields(OUTPUT) .addTuple("a") .addTuple("b") .build() .complete(mock(FlowProcess.class), new Insert(OUTPUT, 1)) .result() .asTupleEntryList(); assertThat(actual.size(), is(2)); assertThat(actual.get(0), tupleEntry(OUTPUT, 1)); assertThat(actual.get(1), tupleEntry(OUTPUT, 1)); }
Example #4
Source File: BufferCallStubTest.java From plunger with Apache License 2.0 | 6 votes |
@Test public void completeDifferentOutputFields() { List<TupleEntry> actual = new BufferCallStub.Builder<Void>(GROUP_FIELDS, NON_GROUP_FIELDS) .outputFields(OUTPUT) .newGroup(1) .addTuple("a") .addTuple("b") .newGroup(2) .addTuple("c") .addTuple("d") .build() .complete(mock(FlowProcess.class), new CountBuffer()) .result() .asTupleEntryList(); assertThat(actual.size(), is(4)); assertThat(actual.get(0), tupleEntry(OUTPUT, 1)); assertThat(actual.get(1), tupleEntry(OUTPUT, 2)); assertThat(actual.get(2), tupleEntry(OUTPUT, 1)); assertThat(actual.get(3), tupleEntry(OUTPUT, 2)); }
Example #5
Source File: AggregatorCallStubTest.java From plunger with Apache License 2.0 | 6 votes |
@Test public void complete() { List<TupleEntry> actual = new AggregatorCallStub.Builder<Tuple[]>(GROUP_FIELDS, NON_GROUP_FIELDS) .newGroup(1) .addTuple("a") .addTuple("b") .newGroup(2) .addTuple("c") .addTuple("d") .build() .complete(mock(FlowProcess.class), new First(NON_GROUP_FIELDS)) .result() .asTupleEntryList(); assertThat(actual.size(), is(2)); assertThat(actual.get(0), tupleEntry(NON_GROUP_FIELDS, "a")); assertThat(actual.get(1), tupleEntry(NON_GROUP_FIELDS, "c")); }
Example #6
Source File: HBaseRawScheme.java From SpyGlass with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Override public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { Tuple result = new Tuple(); Object key = sourceCall.getContext()[0]; Object value = sourceCall.getContext()[1]; boolean hasNext = sourceCall.getInput().next(key, value); if (!hasNext) { return false; } // Skip nulls if (key == null || value == null) { return true; } ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key; Result row = (Result) value; result.add(keyWritable); result.add(row); sourceCall.getIncomingEntry().setTuple(result); return true; }
Example #7
Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Override public void sourceConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (filterPredicate != null) { ParquetInputFormat.setFilterPredicate(jobConf, filterPredicate); } jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, TupleReadSupport.class); TupleReadSupport.setRequestedFields(jobConf, getSourceFields()); }
Example #8
Source File: TestParquetTupleScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry arguments = functionCall.getArguments(); Tuple result = new Tuple(); Tuple name = new Tuple(); name.addString(arguments.getString(0)); name.addString(arguments.getString(1)); result.add(name); functionCall.getOutputCollector().add(result); }
Example #9
Source File: HBaseRawTap.java From SpyGlass with Apache License 2.0 | 5 votes |
@Override public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector) throws IOException { HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this); hBaseCollector.prepare(); return hBaseCollector; }
Example #10
Source File: BufferCallStub.java From plunger with Apache License 2.0 | 5 votes |
/** Processes the groups with the provided {@link Buffer}. */ public BufferCallStub<C> complete(FlowProcess<?> flowProcess, Buffer<C> buffer) { while (groupsIterator.hasNext()) { buffer.prepare(flowProcess, this); buffer.operate(flowProcess, nextOperateCall()); } buffer.flush(flowProcess, this); buffer.cleanup(flowProcess, this); return this; }
Example #11
Source File: TapDataReader.java From plunger with Apache License 2.0 | 5 votes |
private TupleEntryIterator getHadoopTupleEntryIterator() throws IOException { @SuppressWarnings("unchecked") Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) source; JobConf conf = new JobConf(); FlowProcess<JobConf> flowProcess = new HadoopFlowProcess(conf); hadoopTap.sourceConfInit(flowProcess, conf); return hadoopTap.openForRead(flowProcess); }
Example #12
Source File: TapDataReader.java From plunger with Apache License 2.0 | 5 votes |
private TupleEntryIterator getLocalTupleEntryIterator() throws IOException { @SuppressWarnings("unchecked") Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) source; Properties properties = new Properties(); FlowProcess<Properties> flowProcess = new LocalFlowProcess(properties); localTap.sourceConfInit(flowProcess, properties); return localTap.openForRead(flowProcess); }
Example #13
Source File: FunctionCallStub.java From plunger with Apache License 2.0 | 5 votes |
/** Processes the groups with the provided {@link Function}. */ public FunctionCallStub<C> complete(FlowProcess<?> flowProcess, Function<C> function) { function.prepare(flowProcess, this); while (arguments.hasNext()) { function.operate(flowProcess, nextOperateCall()); } function.flush(flowProcess, this); function.cleanup(flowProcess, this); return this; }
Example #14
Source File: HBaseScheme.java From SpyGlass with Apache License 2.0 | 5 votes |
@Override public void sinkConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(HBaseOutputFormat.class); conf.setOutputKeyClass(ImmutableBytesWritable.class); conf.setOutputValueClass(Put.class); String tableName = conf.get(HBaseOutputFormat.OUTPUT_TABLE); useSalt = conf.getBoolean(String.format(HBaseConstants.USE_SALT, tableName), false); }
Example #15
Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry arguments = functionCall.getArguments(); Tuple result = new Tuple(); Name name = (Name) arguments.getObject(0); result.add(name.getFirst_name()); result.add(name.getLast_name()); functionCall.getOutputCollector().add(result); }
Example #16
Source File: ParquetTBaseScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void sourceConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { super.sourceConfInit(fp, tap, jobConf); jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, ThriftReadSupport.class); ThriftReadSupport.setRecordConverterClass(jobConf, TBaseRecordConverter.class); }
Example #17
Source File: JDBCTap.java From SpyGlass with Apache License 2.0 | 5 votes |
@Override public void sourceConfInit( FlowProcess<JobConf> process, JobConf conf ) { // a hack for MultiInputFormat to see that there is a child format FileInputFormat.setInputPaths( conf, getPath() ); if( username == null ) DBConfiguration.configureDB(conf, driverClassName, connectionUrl); else DBConfiguration.configureDB( conf, driverClassName, connectionUrl, username, password ); super.sourceConfInit( process, conf ); }
Example #18
Source File: AggregatorCallStub.java From plunger with Apache License 2.0 | 5 votes |
/** Processes the groups with the provided {@link Aggregator}. */ public AggregatorCallStub<C> complete(FlowProcess<?> flowProcess, Aggregator<C> aggregator) { while (groupsIterator.hasNext()) { aggregator.prepare(flowProcess, this); aggregator.start(flowProcess, nextGroup()); while (valuesIterator.hasNext()) { aggregator.aggregate(flowProcess, nextAggregateCall()); } aggregator.complete(flowProcess, this); } aggregator.flush(flowProcess, this); aggregator.cleanup(flowProcess, this); return this; }
Example #19
Source File: ParquetScroogeSchemeTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry arguments = functionCall.getArguments(); Tuple result = new Tuple(); Name name = Name$.MODULE$.apply(arguments.getString(0), Option.apply(arguments.getString(1))); result.add(name); functionCall.getOutputCollector().add(result); }
Example #20
Source File: ParquetScroogeSchemeTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { Object record = functionCall.getArguments().getObject(0); Tuple result = new Tuple(); result.add(record.toString()); functionCall.getOutputCollector().add(result); }
Example #21
Source File: ParquetScroogeScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void sourceConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { super.sourceConfInit(fp, tap, jobConf); jobConf.setInputFormat(DeprecatedParquetInputFormat.class); ParquetInputFormat.setReadSupportClass(jobConf, ScroogeReadSupport.class); ThriftReadSupport.setRecordConverterClass(jobConf, ScroogeRecordConverter.class); }
Example #22
Source File: ParquetScroogeScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void sinkConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); ParquetOutputFormat.setWriteSupportClass(jobConf, ScroogeWriteSupport.class); ScroogeWriteSupport.setScroogeClass(jobConf, this.config.getKlass()); }
Example #23
Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry arguments = functionCall.getArguments(); Tuple result = new Tuple(); Name name = (Name) arguments.get(0); result.add(name.getFirst_name()); result.add(name.getLast_name()); functionCall.getOutputCollector().add(result); }
Example #24
Source File: TestParquetTBaseScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void operate(FlowProcess flowProcess, FunctionCall functionCall) { TupleEntry arguments = functionCall.getArguments(); Tuple result = new Tuple(); Name name = new Name(); name.setFirst_name(arguments.getString(0)); name.setLast_name(arguments.getString(1)); result.add(name); functionCall.getOutputCollector().add(result); }
Example #25
Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("rawtypes") @Override public void sinkConfInit(FlowProcess<JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); jobConf.set(TupleWriteSupport.PARQUET_CASCADING_SCHEMA, parquetSchema); ParquetOutputFormat.setWriteSupportClass(jobConf, TupleWriteSupport.class); }
Example #26
Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc) throws IOException { Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue(); boolean hasNext = sc.getInput().next(null, value); if (!hasNext) { return false; } // Skip nulls if (value == null) { return true; } sc.getIncomingEntry().setTuple(value.get()); return true; }
Example #27
Source File: ParquetTupleScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public Fields retrieveSourceFields(FlowProcess<JobConf> flowProcess, Tap tap) { MessageType schema = readSchema(flowProcess, tap); SchemaIntersection intersection = new SchemaIntersection(schema, getSourceFields()); setSourceFields(intersection.getSourceFields()); return getSourceFields(); }
Example #28
Source File: HBaseRawScheme.java From SpyGlass with Apache License 2.0 | 5 votes |
@Override public void sourceConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { DeprecatedInputFormatWrapper.setInputFormat(org.apache.hadoop.hbase.mapreduce.TableInputFormat.class, conf, ValueCopier.class); if (null != familyNames) { String columns = Util.join(this.familyNames, " "); LOG.debug("sourcing from column families: {}", columns); conf.set(org.apache.hadoop.hbase.mapreduce.TableInputFormat.SCAN_COLUMNS, columns); } }
Example #29
Source File: ParquetValueScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sc) throws IOException { TupleEntry tuple = sc.getOutgoingEntry(); if (tuple.size() != 1) { throw new RuntimeException("ParquetValueScheme expects tuples with an arity of exactly 1, but found " + tuple.getFields()); } T value = (T) tuple.getObject(0); OutputCollector output = sc.getOutput(); output.collect(null, value); }
Example #30
Source File: ParquetTBaseScheme.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf jobConf) { if (this.config.getKlass() == null) { throw new IllegalArgumentException("To use ParquetTBaseScheme as a sink, you must specify a thrift class in the constructor"); } DeprecatedParquetOutputFormat.setAsOutputFormat(jobConf); DeprecatedParquetOutputFormat.setWriteSupportClass(jobConf, TBaseWriteSupport.class); TBaseWriteSupport.<T>setThriftClass(jobConf, this.config.getKlass()); }