Java Code Examples for cascading.tuple.Tuple

The following examples show how to use cascading.tuple.Tuple. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: plunger   Author: HotelsDotCom   File: DataBuilderTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void addMultipleTupleEntriesIterable() {
  Fields fields = new Fields("A", "B");
  DataBuilder builder = new DataBuilder(fields);

  List<TupleEntry> tupleEntries = Arrays.asList(new TupleEntry(fields, new Tuple(1, 2)), new TupleEntry(fields,
      new Tuple(3, 4)));

  builder.addTupleEntries(tupleEntries);
  Data source = builder.build();

  List<Tuple> tuples = source.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(1, 2)));
  assertThat(tuples.get(1), is(new Tuple(3, 4)));
}
 
Example #2
Source Project: cascading-flink   Author: dataArtisans   File: FlinkFlowStep.java    License: Apache License 2.0 6 votes vote down vote up
private DataSet<Tuple> translateMerge(List<DataSet<Tuple>> inputs, FlowNode node) {

		DataSet<Tuple> unioned = null;
		TypeInformation<Tuple> type = null;

		int maxDop = -1;

		for(DataSet<Tuple> input : inputs) {
			maxDop = Math.max(maxDop, ((Operator)input).getParallelism());
			if(unioned == null) {
				unioned = input;
				type = input.getType();
			}
			else {
				unioned = unioned.union(input);
			}
		}
		return unioned.map(new IdMapper())
				.returns(type)
				.setParallelism(maxDop);

	}
 
Example #3
Source Project: cascading-flink   Author: dataArtisans   File: CoGroupBufferClosure.java    License: Apache License 2.0 6 votes vote down vote up
private TupleBuilder makeJoinedBuilder( final Fields[] joinFields )
{
	final Fields[] fields = isSelfJoin() ? new Fields[ size() ] : joinFields;

	if( isSelfJoin() ) {
		Arrays.fill(fields, 0, fields.length, joinFields[0]);
	}

	return new TupleBuilder()
	{
		Tuple result = TupleViews.createComposite(fields);

		@Override
		public Tuple makeResult( Tuple[] tuples )
		{
			return TupleViews.reset( result, tuples );
		}
	};
}
 
Example #4
Source Project: cascading-flink   Author: dataArtisans   File: TupleArraySerializer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Tuple[] deserialize(Tuple[] reuse, DataInputView source) throws IOException {

	// read null mask
	NullMaskSerDeUtils.readNullMask(this.nullFields, this.fillLength, source);

	// read non-null fields
	for (int i = 0; i < this.fillLength; i++) {

		if(!this.nullFields[i]) {
			reuse[i] = tupleSerializers[i].deserialize(source);
		}
		else {
			reuse[i] = null;
		}
	}
	return reuse;
}
 
Example #5
Source Project: cascading-flink   Author: dataArtisans   File: NullMaskSerDeUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static void writeNullMask(
		Tuple t, DataOutputView target) throws IOException {

	final int length = t.size();
	int b;
	int bytePos;

	for(int fieldPos = 0; fieldPos < length; ) {
		b = 0x00;
		// set bits in byte
		for(bytePos = 0; bytePos < 8 && fieldPos < length; bytePos++, fieldPos++) {
			b = b << 1;
			// set bit if field is null
			if(t.getObject(fieldPos) == null) {
				b |= 0x01;
			}
		}
		// shift bits if last byte is not completely filled
		for(; bytePos < 8; bytePos++) {
			b = b << 1;
		}
		// write byte
		target.writeByte(b);
	}
}
 
Example #6
Source Project: cascading-flink   Author: dataArtisans   File: DefinedTupleSerializer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Tuple deserialize(Tuple reuse, DataInputView source) throws IOException {

	Tuple tuple = getReuseOrNew(reuse);

	// read null mask
	NullMaskSerDeUtils.readNullMask(nullFields, this.length, source);

	for (int i = 0; i < this.length; i++) {
		Object field;
		if(!this.nullFields[i]) {
			field = fieldSers[i].deserialize(source);
		}
		else {
			field = null;
		}
		tuple.set(i, field);
	}

	return tuple;
}
 
Example #7
Source Project: SpyGlass   Author: ParallelAI   File: HBaseRawScheme.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall)
		throws IOException {
	Tuple result = new Tuple();

	Object key = sourceCall.getContext()[0];
	Object value = sourceCall.getContext()[1];
	boolean hasNext = sourceCall.getInput().next(key, value);
	if (!hasNext) {
		return false;
	}

	// Skip nulls
	if (key == null || value == null) {
		return true;
	}

	ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
	Result row = (Result) value;
	result.add(keyWritable);
	result.add(row);
	sourceCall.getIncomingEntry().setTuple(result);
	return true;
}
 
Example #8
Source Project: cascading-flink   Author: dataArtisans   File: CoGroupInGate.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void run(Object input) {

	Iterator<Tuple2<Tuple, Tuple[]>> iterator;
	try {
		iterator = (Iterator<Tuple2<Tuple, Tuple[]>>) input;
	}
	catch(ClassCastException cce) {
		throw new RuntimeException("CoGroupInGate requires Iterator<Tuple2<Tuple, Tuple[]>", cce);
	}

	resultIterator.reset(iterator);
	resultIterator.hasNext(); // load first element into closure

	tupleEntryIterator.reset(resultIterator);
	keyEntry.setTuple( this.closure.getGroupTuple(null) );

	next.receive( this, grouping );
}
 
Example #9
Source Project: plunger   Author: HotelsDotCom   File: DataTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleEntryListWithFields() throws Exception {
  Fields fields = new Fields("A", "B");
  List<Tuple> tuples = new ArrayList<Tuple>();
  tuples.add(new Tuple(1, 100));
  tuples.add(new Tuple(2, 200));

  List<TupleEntry> entryList = new Data(fields, tuples).withFields(new Fields("B")).asTupleEntryList();
  assertThat(entryList.size(), is(2));
  assertThat(entryList.get(0).size(), is(1));
  assertThat(entryList.get(0).getInteger("B"), is(100));
  assertThat(entryList.get(1).size(), is(1));
  assertThat(entryList.get(1).getInteger("B"), is(200));
}
 
Example #10
Source Project: cascading-flink   Author: dataArtisans   File: FlinkFlowStep.java    License: Apache License 2.0 5 votes vote down vote up
private void translateSink(FlowProcess flowProcess, DataSet<Tuple> input, FlowNode node) {

		Tap tap = this.getSingle(node.getSinkTaps());
		Configuration sinkConfig = this.getNodeConfig(node);
		tap.sinkConfInit(flowProcess, sinkConfig);

		int desiredDop = tap.getScheme().getNumSinkParts();
		int inputDop = ((Operator)input).getParallelism();
		int dop;

		if (inputDop == 1) {
			// input operators have dop 1. Probably because they perform a non-keyed reduce or coGroup
			dop = 1;
		}
		else {
			if (desiredDop > 0) {
				// output dop explicitly set.
				if (input instanceof GroupReduceOperator) {
					// input is a reduce and we must preserve its sorting.
					// we must set the desired dop also for reduce and related operators
					adjustDopOfReduceOrCoGroup((GroupReduceOperator) input, desiredDop);
				}
				dop = desiredDop;
			}
			else {
				dop = inputDop;
			}
		}

		input
				.output(new TapOutputFormat(node))
				.name(tap.getIdentifier())
				.setParallelism(dop)
				.withParameters(FlinkConfigConverter.toFlinkConfig(sinkConfig));

	}
 
Example #11
Source Project: plunger   Author: HotelsDotCom   File: TupleComparatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void compareSingleSortField() {
  TupleComparator tupleComparator = new TupleComparator(new Fields("A"), new Fields("A"));
  assertThat(tupleComparator.compare(new Tuple(2), new Tuple(1)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1), new Tuple(2)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(1), new Tuple(1)), is(0));
}
 
Example #12
Source Project: plunger   Author: HotelsDotCom   File: DataTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleListOrderBy() throws Exception {
  Fields fields = new Fields("A", "B");
  List<Tuple> tuples = new ArrayList<Tuple>();
  tuples.add(new Tuple(1, 200));
  tuples.add(new Tuple(2, 100));

  List<Tuple> entryList = new Data(fields, tuples).orderBy(new Fields("B")).asTupleList();
  assertThat(entryList.size(), is(2));
  assertThat(entryList.get(0).getInteger(0), is(2));
  assertThat(entryList.get(0).getInteger(1), is(100));
  assertThat(entryList.get(1).getInteger(0), is(1));
  assertThat(entryList.get(1).getInteger(1), is(200));
}
 
Example #13
Source Project: plunger   Author: HotelsDotCom   File: DataTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void init() {
  Fields fields = new Fields("x");
  List<Tuple> tuples = new ArrayList<Tuple>();
  Data tupleSource = new Data(fields, tuples);

  assertThat(tupleSource.getDeclaredFields(), is(fields));
  assertThat(tupleSource.getTuples(), is(tuples));
}
 
Example #14
Source Project: cascading-flink   Author: dataArtisans   File: FlinkFlowStep.java    License: Apache License 2.0 5 votes vote down vote up
private DataSet<Tuple2<Tuple, Tuple[]>> prepareInnerCrossInput(List<DataSet<Tuple>> inputs, FlowNode node, Fields[] inputFields, int dop) {

		int numJoinInputs = inputs.size();

		TypeInformation<Tuple2<Tuple, Tuple[]>> tupleJoinListsTypeInfo =
				new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
						new TupleTypeInfo(Fields.UNKNOWN),
						new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, 1))
				);

		int mapDop = ((Operator)inputs.get(0)).getParallelism();

		// prepare tuple list for join
		DataSet<Tuple2<Tuple, Tuple[]>> tupleJoinLists = inputs.get(0)
				.map(new JoinPrepareMapper(numJoinInputs, null, null))
				.returns(tupleJoinListsTypeInfo)
				.setParallelism(mapDop)
				.name("coGroup-" + node.getID());

		for (int i = 1; i < inputs.size(); i++) {

			tupleJoinListsTypeInfo =
					new org.apache.flink.api.java.typeutils.TupleTypeInfo<>(
							new TupleTypeInfo(Fields.UNKNOWN),
							new TupleArrayTypeInfo(numJoinInputs, Arrays.copyOf(inputFields, i+1))
					);

			tupleJoinLists = tupleJoinLists.crossWithTiny(inputs.get(i))
					.with(new TupleAppendCrosser(i))
					.returns(tupleJoinListsTypeInfo)
					.setParallelism(dop)
					.name("coGroup-" + node.getID());
		}

		return tupleJoinLists;
	}
 
Example #15
Source Project: SpyGlass   Author: ParallelAI   File: JDBCScheme.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean source( FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall ) throws IOException
{
    Object key = sourceCall.getContext()[ 0 ];
    Object value = sourceCall.getContext()[ 1 ];
    boolean result = sourceCall.getInput().next( key, value );

    if( !result )
        return false;

    Tuple newTuple = ( (TupleRecord) value ).getTuple();
    sourceCall.getIncomingEntry().setTuple( newTuple );

    return true;
}
 
Example #16
Source Project: plunger   Author: HotelsDotCom   File: AbstractOperationCallStubTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void collectTupleEntryCopiesCollectedTupleEntries() throws Exception {
  MockOperation operation = new MockOperation();
  TupleEntry tupleEntry = new TupleEntry(declaredField, new Tuple("value1"));
  operation.getOutputCollector().add(tupleEntry);
  tupleEntry.setString(declaredField, "value2");
  operation.getOutputCollector().add(tupleEntry);
  List<TupleEntry> tupleEntries = operation.result().asTupleEntryList();
  assertThat(tupleEntries.size(), is(2));
  assertThat(tupleEntries.get(0).getString(declaredField), is("value1"));
  assertThat(tupleEntries.get(1).getString(declaredField), is("value2"));
}
 
Example #17
Source Project: parquet-mr   Author: apache   File: TestParquetTupleScheme.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Tuple name = new Tuple();
  name.addString(arguments.getString(0));
  name.addString(arguments.getString(1));

  result.add(name);
  functionCall.getOutputCollector().add(result);
}
 
Example #18
Source Project: plunger   Author: HotelsDotCom   File: FunctionCallStubTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void next() {
  stub = new FunctionCallStub.Builder<String>(FIELDS).addTuple(1).addTuple(2).build();
  assertThat(stub.getArguments(), is(nullValue()));

  stub.nextOperateCall();
  assertThat(stub.getArguments(), is(new TupleEntry(FIELDS, new Tuple(1))));

  stub.nextOperateCall();
  assertThat(stub.getArguments(), is(new TupleEntry(FIELDS, new Tuple(2))));
}
 
Example #19
Source Project: plunger   Author: HotelsDotCom   File: AggregatorCallStubTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void collectTupleEntry() {
  stub = new AggregatorCallStub.Builder<String>(GROUP_FIELDS, NON_GROUP_FIELDS).build();
  assertThat(stub.result().asTupleEntryList().isEmpty(), is(true));

  stub.getOutputCollector().add(new TupleEntry(NON_GROUP_FIELDS, new Tuple(1)));
  assertThat(stub.result().asTupleEntryList().size(), is(1));
  assertThat(stub.result().asTupleEntryList().get(0), is(new TupleEntry(NON_GROUP_FIELDS, new Tuple(1))));
}
 
Example #20
Source Project: cascading-flink   Author: dataArtisans   File: TupleArraySerializer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TypeSerializer<Tuple[]> duplicate() {

	TypeSerializer<Tuple>[] serializerCopies = new TypeSerializer[this.fillLength];
	for(int i=0; i<this.fillLength; i++) {
		serializerCopies[i] = this.tupleSerializers[i].duplicate();
	}
	return new TupleArraySerializer(this.length, serializerCopies);
}
 
Example #21
Source Project: parquet-mr   Author: apache   File: ParquetTupleScheme.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<JobConf> fp, SourceCall<Object[], RecordReader> sc)
    throws IOException {
  Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
  boolean hasNext = sc.getInput().next(null, value);
  if (!hasNext) { return false; }

  // Skip nulls
  if (value == null) { return true; }

  sc.getIncomingEntry().setTuple(value.get());
  return true;
}
 
Example #22
Source Project: cascading-flink   Author: dataArtisans   File: UnknownTupleComparator.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void setReference(Tuple toCompare) {

	if(!areKeysAbs) {
		makeKeysAbs(keyPositions, toCompare.size());
		areKeysAbs = true;
	}

	for (int i = 0; i < this.keyPositions.length; i++) {
		this.comparators[i].setReference(toCompare.getObject(keyPositions[i]));
	}
}
 
Example #23
Source Project: plunger   Author: HotelsDotCom   File: DataBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void addMultipleTupleEntriesVarArgsInvalidLength() {
  Fields fields = new Fields("A", "B");
  DataBuilder builder = new DataBuilder(fields);
  builder.addTupleEntries(new TupleEntry(fields, new Tuple(1, 2)), new TupleEntry(new Fields("A", "B", "C"),
      new Tuple(1, 2, 3)));
}
 
Example #24
Source Project: parquet-mr   Author: apache   File: TupleReadSupport.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public RecordMaterializer<Tuple> prepareForRead(
    Configuration configuration,
    Map<String, String> keyValueMetaData,
    MessageType fileSchema,
    ReadContext readContext) {
  MessageType requestedSchema = readContext.getRequestedSchema();
  return new TupleRecordMaterializer(requestedSchema);
}
 
Example #25
Source Project: parquet-mr   Author: apache   File: TestParquetTBaseScheme.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
  TupleEntry arguments = functionCall.getArguments();
  Tuple result = new Tuple();

  Name name = (Name) arguments.getObject(0);
  result.add(name.getFirst_name());
  result.add(name.getLast_name());
  functionCall.getOutputCollector().add(result);
}
 
Example #26
Source Project: plunger   Author: HotelsDotCom   File: AggregatorCallStubTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void collectIrregularTupleEntry() {
  stub = new AggregatorCallStub.Builder<String>(GROUP_FIELDS, NON_GROUP_FIELDS).build();
  assertThat(stub.result().asTupleEntryList().isEmpty(), is(true));

  stub.getOutputCollector().add(new TupleEntry(new Fields("X", String.class), new Tuple(1)));
}
 
Example #27
Source Project: cascading-flink   Author: dataArtisans   File: GroupByReducer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void reduce(Iterable<Tuple> input, Collector<Tuple> output) throws Exception {

	this.streamGraph.setTupleCollector(output);

	if(! this.calledPrepare) {
		this.streamGraph.prepare();
		this.calledPrepare = true;

		this.groupSource.start(this.groupSource);

		processBeginTime = System.currentTimeMillis();
		currentProcess.increment( SliceCounters.Process_Begin_Time, processBeginTime );
	}


	try {
		this.groupSource.run(input.iterator());
	}
	catch( OutOfMemoryError error ) {
		throw error;
	}
	catch( Throwable throwable ) {

		if( throwable instanceof CascadingException ) {
			throw (CascadingException) throwable;
		}

		throw new FlowException( "internal error during GroupByReducer execution", throwable );
	}
}
 
Example #28
Source Project: plunger   Author: HotelsDotCom   File: DataBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void copyFromTupleSource() {
  Data toCopySource = new DataBuilder(new Fields("A", "B")).addTuple(4, 2).copyTuple().set("A", 1).build();

  Data copied = new DataBuilder(new Fields("A", "B")).copyTuplesFrom(toCopySource).build();

  List<Tuple> tuples = copied.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(4, 2)));
  assertThat(tuples.get(1), is(new Tuple(1, 2)));
}
 
Example #29
Source Project: plunger   Author: HotelsDotCom   File: DataTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleEntryListWithFieldsNone() throws Exception {
  Fields fields = new Fields("A", "B");
  List<Tuple> tuples = new ArrayList<Tuple>();
  tuples.add(new Tuple(1, 100));
  tuples.add(new Tuple(2, 200));

  List<TupleEntry> entryList = new Data(fields, tuples).withFields(Fields.NONE).asTupleEntryList();
  assertThat(entryList.size(), is(2));
  assertThat(entryList.get(0).size(), is(0));
  assertThat(entryList.get(1).size(), is(0));
}
 
Example #30
Source Project: parquet-mr   Author: apache   File: ParquetTupleScheme.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess<? extends JobConf> fp, SourceCall<Object[], RecordReader> sc)
    throws IOException {
  Container<Tuple> value = (Container<Tuple>) sc.getInput().createValue();
  boolean hasNext = sc.getInput().next(null, value);
  if (!hasNext) { return false; }

  // Skip nulls
  if (value == null) { return true; }

  sc.getIncomingEntry().setTuple(value.get());
  return true;
}