cascading.tuple.TupleEntryCollector Java Examples

The following examples show how to use cascading.tuple.TupleEntryCollector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 6 votes vote down vote up
@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {

	if (trap instanceof Hfs) {

		JobConf jobConf = new JobConf(this.getConfigCopy());

		int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 );
		int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 );

		String partname = String.format( "-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum() );
		jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname );

		String value = String.format( "attempt_%012d_0000_m_%06d_0", (int) Math.rint( System.currentTimeMillis() ), this.getCurrentSliceNum() );
		jobConf.set( "mapred.task.id", value );
		jobConf.set( "mapreduce.task.id", value );

		return trap.openForWrite( new FlinkFlowProcess( jobConf ), null);
	}
	else {
		throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
	}
}
 
Example #2
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
private void writeToHadoopPartitionTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  BasePartitionTap<JobConf, ?, ?> hadoopTap = (BasePartitionTap<JobConf, ?, ?>) tap;
  JobConf conf = new JobConf();

  // Avoids deletion of results when using a partition tap (close() will delete the _temporary before the copy has
  // been done if not in a flow)
  HadoopUtil.setIsInflow(conf);

  HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();

  // We need to clean up the '_temporary' folder
  BasePartitionTap<JobConf, ?, ?> partitionTap = hadoopTap;
  @SuppressWarnings("unchecked")
  String basePath = partitionTap.getParent().getFullIdentifier(flowProcess);
  deleteTemporaryPath(new Path(basePath), FileSystem.get(conf));
}
 
Example #3
Source File: TapDataWriter.java    From plunger with Apache License 2.0 6 votes vote down vote up
private void writeToLocalTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  Tap<Properties, ?, ?> localTap = (Tap<Properties, ?, ?>) tap;
  Properties conf = new Properties();
  LocalFlowProcess flowProcess = new LocalFlowProcess(conf);

  flowProcess.setStepStats(new LocalStepStats(new NullFlowStep(), NullClientState.INSTANCE));

  localTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = localTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();
  localTap.commitResource(conf);
}
 
Example #4
Source File: TapDataWriter.java    From plunger with Apache License 2.0 5 votes vote down vote up
private void writeToHadoopTap(Tap<?, ?, ?> tap) throws IOException {
  @SuppressWarnings("unchecked")
  Tap<JobConf, ?, ?> hadoopTap = (Tap<JobConf, ?, ?>) tap;
  JobConf conf = new JobConf();

  HadoopFlowProcess flowProcess = new HadoopFlowProcess(conf);
  hadoopTap.sinkConfInit(flowProcess, conf);
  TupleEntryCollector collector = hadoopTap.openForWrite(flowProcess);
  for (TupleEntry tuple : data.asTupleEntryList()) {
    collector.add(tuple);
  }
  collector.close();
}
 
Example #5
Source File: BucketTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleEntryList() throws IOException {
  Bucket sink = new Bucket(FIELDS, pipe, flow);
  TupleEntryCollector collector = sink.openForWrite(null, null);
  collector.add(TUPLE_1);
  collector.add(TUPLE_2);
  List<TupleEntry> tupleEntryList = sink.result().asTupleEntryList();
  assertThat(tupleEntryList.size(), is(2));
  assertThat(tupleEntryList.get(0).getFields(), is(FIELDS));
  assertThat(tupleEntryList.get(0).getTuple(), is(TUPLE_1));
  assertThat(tupleEntryList.get(1).getFields(), is(FIELDS));
  assertThat(tupleEntryList.get(1).getTuple(), is(TUPLE_2));
}
 
Example #6
Source File: BucketTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleList() throws IOException {
  Bucket sink = new Bucket(FIELDS, pipe, flow);
  TupleEntryCollector collector = sink.openForWrite(null, null);
  collector.add(TUPLE_1);
  collector.add(TUPLE_2);
  List<Tuple> tupleList = sink.result().asTupleList();
  assertThat(tupleList.size(), is(2));
  assertThat(tupleList.get(0), is(TUPLE_1));
  assertThat(tupleList.get(1), is(TUPLE_2));
}
 
Example #7
Source File: JDBCTap.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public TupleEntryCollector openForWrite( FlowProcess<JobConf> flowProcess, OutputCollector output ) throws IOException {
    if( !isSink() )
        throw new TapException( "this tap may not be used as a sink, no TableDesc defined" );

    LOG.info("Creating JDBCTapCollector output instance");
    JDBCTapCollector jdbcCollector = new JDBCTapCollector( flowProcess, this );

    jdbcCollector.prepare();

    return jdbcCollector;
}
 
Example #8
Source File: HBaseRawTap.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector)
		throws IOException {
	HBaseTapCollector hBaseCollector = new HBaseTapCollector(jobConfFlowProcess, this);
	hBaseCollector.prepare();
	return hBaseCollector;
}
 
Example #9
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
public TupleEntryCollector openTapForWrite(Tap tap) throws IOException {
	return tap.openForWrite( this, null ); // do not honor sinkmode as this may be opened across tasks
}
 
Example #10
Source File: FlinkFlowProcess.java    From cascading-flink with Apache License 2.0 4 votes vote down vote up
@Override
public TupleEntryCollector openSystemIntermediateForWrite() throws IOException {
	return null; // Not required for Flink
}
 
Example #11
Source File: AbstractOperationCallStub.java    From plunger with Apache License 2.0 4 votes vote down vote up
public TupleEntryCollector getOutputCollector() {
  return collector;
}
 
Example #12
Source File: TupleListTap.java    From plunger with Apache License 2.0 4 votes vote down vote up
/**
 * Always throws {@link UnsupportedOperationException} - this is a tap not a sink.
 * 
 * @throws UnsupportedOperationException always.
 */
@Override
public TupleEntryCollector openForWrite(FlowProcess<? extends Properties> flowProcess, List<Tuple> output)
  throws IOException {
  throw new UnsupportedOperationException("cannot write to a " + getClass().getSimpleName());
}
 
Example #13
Source File: Bucket.java    From plunger with Apache License 2.0 4 votes vote down vote up
/**
 * {@inheritDoc}
 * <p/>
 * Returned type is a {@link ListTupleEntryCollector}.
 */
@Override
public TupleEntryCollector openForWrite(FlowProcess<? extends Properties> flowProcess, List<Tuple> output)
    throws IOException {
  return new ListTupleEntryCollector(this.output, this);
}
 
Example #14
Source File: UnsupportedTap.java    From plunger with Apache License 2.0 4 votes vote down vote up
@Override
public TupleEntryCollector openForWrite(FlowProcess<? extends String> flowProcess, Integer output) throws IOException {
  return null;
}
 
Example #15
Source File: HBaseTap.java    From SpyGlass with Apache License 2.0 4 votes vote down vote up
@Override
public TupleEntryCollector openForWrite(FlowProcess<JobConf> jobConfFlowProcess, OutputCollector outputCollector) throws IOException {
  HBaseTapCollector hBaseCollector = new HBaseTapCollector( jobConfFlowProcess, this );
  hBaseCollector.prepare();
  return hBaseCollector;
}