cascading.tuple.Fields Java Examples

The following examples show how to use cascading.tuple.Fields. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void selectFieldsToSetUsingMultipleEntriesIterableInsert() {
  Fields fields = new Fields("A", "B", "C", "D");
  DataBuilder builder = new DataBuilder(fields);
  Fields subFields = new Fields("B", "D");

  List<TupleEntry> tupleEntries = Arrays.asList(new TupleEntry(subFields, new Tuple(1, 2)), new TupleEntry(subFields,
      new Tuple(3, 4)));

  builder.withFields(subFields).addTupleEntries(tupleEntries);
  Data source = builder.build();

  List<Tuple> tuples = source.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(null, 1, null, 2)));
  assertThat(tuples.get(1), is(new Tuple(null, 3, null, 4)));
}
 
Example #2
Source File: TupleComparatorTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void compareMultipleSortFields() {
  TupleComparator tupleComparator = new TupleComparator(new Fields("A", "B"), new Fields("A", "B"));
  assertThat(tupleComparator.compare(new Tuple(0, 0), new Tuple(0, 0)), is(0));
  assertThat(tupleComparator.compare(new Tuple(0, 0), new Tuple(0, 1)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(0, 0), new Tuple(1, 0)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(0, 0), new Tuple(1, 1)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(0, 1), new Tuple(0, 0)), is(1));
  assertThat(tupleComparator.compare(new Tuple(0, 1), new Tuple(0, 1)), is(0));
  assertThat(tupleComparator.compare(new Tuple(0, 1), new Tuple(1, 0)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(0, 1), new Tuple(1, 1)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(1, 0), new Tuple(0, 0)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1, 0), new Tuple(0, 1)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1, 0), new Tuple(1, 0)), is(0));
  assertThat(tupleComparator.compare(new Tuple(1, 0), new Tuple(1, 1)), is(-1));
  assertThat(tupleComparator.compare(new Tuple(1, 1), new Tuple(0, 0)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1, 1), new Tuple(0, 1)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1, 1), new Tuple(1, 0)), is(1));
  assertThat(tupleComparator.compare(new Tuple(1, 1), new Tuple(1, 1)), is(0));
}
 
Example #3
Source File: DataTest.java    From plunger with Apache License 2.0 6 votes vote down vote up
@Test
public void asTupleListWithFieldsOrdering() throws Exception {
  Fields fields = new Fields("A", "B");
  List<Tuple> tuples = new ArrayList<Tuple>();
  tuples.add(new Tuple(1, 100));
  tuples.add(new Tuple(2, 200));

  List<Tuple> entryList = new Data(fields, tuples).withFields(new Fields("B", "A")).asTupleList();
  assertThat(entryList.size(), is(2));
  assertThat(entryList.get(0).size(), is(2));
  assertThat(entryList.get(0).getInteger(0), is(100));
  assertThat(entryList.get(0).getInteger(1), is(1));
  assertThat(entryList.get(1).size(), is(2));
  assertThat(entryList.get(1).getInteger(0), is(200));
  assertThat(entryList.get(1).getInteger(1), is(2));
}
 
Example #4
Source File: SchemaIntersection.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public SchemaIntersection(MessageType fileSchema, Fields requestedFields) {
  if(requestedFields == Fields.UNKNOWN)
    requestedFields = Fields.ALL;

  Fields newFields = Fields.NONE;
  List<Type> newSchemaFields = new ArrayList<Type>();
  int schemaSize = fileSchema.getFieldCount();

  for (int i = 0; i < schemaSize; i++) {
    Type type = fileSchema.getType(i);
    Fields name = new Fields(type.getName());

    if(requestedFields.contains(name)) {
      newFields = newFields.append(name);
      newSchemaFields.add(type);
    }
  }

  this.sourceFields = newFields;
  this.requestedSchema = new MessageType(fileSchema.getName(), newSchemaFields);
}
 
Example #5
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void addCoerceTypes() {
  DataBuilder builder = new DataBuilder(new Fields("A", "B", "C", "D"), new Class<?>[] { String.class, Integer.class,
      int.class, boolean.class });
  Data source = builder.addTuple(1, "1", null, null).build();
  List<Tuple> tuples = source.getTuples();
  assertThat(tuples.get(0), is(new Tuple("1", 1, 0, false)));
}
 
Example #6
Source File: DataTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void asTupleList() throws Exception {
  Fields fields = new Fields("A", "B");
  List<Tuple> tuples = new ArrayList<Tuple>();
  tuples.add(new Tuple(1, 100));
  tuples.add(new Tuple(2, 200));

  List<Tuple> entryList = new Data(fields, tuples).asTupleList();
  assertThat(entryList.size(), is(2));
  assertThat(entryList.get(0).getInteger(0), is(1));
  assertThat(entryList.get(0).getInteger(1), is(100));
  assertThat(entryList.get(1).getInteger(0), is(2));
  assertThat(entryList.get(1).getInteger(1), is(200));
}
 
Example #7
Source File: FunctionCallStub.java    From plunger with Apache License 2.0 5 votes vote down vote up
public Builder<C> withFields(Fields... fields) {
  Fields fieldMask = Fields.merge(fields);
  try {
    this.fields.select(fieldMask);
    this.fieldMask = fieldMask;
  } catch (FieldsResolverException e) {
    throw new IllegalArgumentException("selected fields must be contained in record fields: selected fields="
        + fieldMask + ", source fields=" + this.fields);
  }
  return this;
}
 
Example #8
Source File: Data.java    From plunger with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the result as a {@link TupleEntry} list.
 */
public List<TupleEntry> asTupleEntryList() {
  List<Tuple> tuples = asTupleList();
  Fields selectedFields = selectedFields();
  List<TupleEntry> tupleEntries = new ArrayList<TupleEntry>(tuples.size());
  for (Tuple tuple : tuples) {
    tupleEntries.add(new TupleEntry(selectedFields, tuple, true));
  }
  return Collections.unmodifiableList(tupleEntries);
}
 
Example #9
Source File: JDBCScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
/**
 * Constructor JDBCScheme creates a new JDBCScheme instance.
 *
 * @param inputFormatClass  of type Class<? extends DBInputFormat>
 * @param outputFormatClass of type Class<? extends DBOutputFormat>
 * @param columnFields      of type Fields
 * @param columns           of type String[]
 * @param orderBy           of type String[]
 * @param conditions        of type String
 * @param limit             of type long
 * @param updateByFields    of type Fields
 * @param updateBy          of type String[]
 */
public JDBCScheme( Class<? extends DBInputFormat> inputFormatClass, Class<? extends DBOutputFormat> outputFormatClass, Fields columnFields, String[] columns, String[] orderBy, String conditions, long limit, Fields updateByFields, String[] updateBy )
{
    this.columnFields = columnFields;

    verifyColumns( columnFields, columns );

    setSinkFields( columnFields );
    setSourceFields( columnFields );

    if( updateBy != null && updateBy.length != 0 )
    {
        this.updateBy = updateBy;
        this.updateByFields = updateByFields;

        if( updateByFields.size() != updateBy.length )
            throw new IllegalArgumentException( "updateByFields and updateBy must be the same size" );

        if( !this.columnFields.contains( this.updateByFields ) )
            throw new IllegalArgumentException( "columnFields must contain updateByFields column names" );

        this.updateValueFields = columnFields.subtract( updateByFields ).append( updateByFields );
        this.updateIfTuple = Tuple.size( updateByFields.size() ); // all nulls
    }

    this.columns = columns;
    this.orderBy = orderBy;
    this.conditions = conditions;
    this.limit = limit;

    this.inputFormatClass = inputFormatClass;
    this.outputFormatClass = outputFormatClass;
}
 
Example #10
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void addMultipleTupleEntriesVarArgsInvalidLength() {
  Fields fields = new Fields("A", "B");
  DataBuilder builder = new DataBuilder(fields);
  builder.addTupleEntries(new TupleEntry(fields, new Tuple(1, 2)), new TupleEntry(new Fields("A", "B", "C"),
      new Tuple(1, 2, 3)));
}
 
Example #11
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void addMultipleTuplesIterableWithFields() {
  DataBuilder builder = new DataBuilder(new Fields("A", "B"));

  List<Tuple> tupleList = Arrays.asList(new Tuple(1), new Tuple(2));
  builder.withFields(new Fields("B")).addTuples(tupleList);

  Data source = builder.build();

  List<Tuple> tuples = source.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(null, 1)));
  assertThat(tuples.get(1), is(new Tuple(null, 2)));
}
 
Example #12
Source File: FlinkFlowStep.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
private DataSet<?> prepareCoGroupInput(List<DataSet<Tuple>> inputs, FlowNode node, int dop) {

		CoGroup coGroup = (CoGroup)getSingle(node.getSinkElements());

		Joiner joiner = coGroup.getJoiner();

		int numJoinInputs = coGroup.isSelfJoin() ? coGroup.getNumSelfJoins() + 1 : inputs.size();

		Fields[] inputFields = new Fields[numJoinInputs];
		Fields[] keyFields = new Fields[numJoinInputs];
		String[][] flinkKeys = new String[numJoinInputs][];
		List<DataSet<Tuple>> joinInputs = computeSpliceInputsFieldsKeys(coGroup, node, inputs, inputFields, keyFields, flinkKeys);

		if(joiner.getClass().equals(InnerJoin.class)) {
			if(!keyFields[0].isNone()) {
				return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop);
			}
			else {
				// Cartesian product
				return prepareInnerCrossInput(joinInputs, node, inputFields, dop);
			}
		}
		else if(joiner.getClass().equals(BufferJoin.class)) {
			return prepareBufferCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop);
		}
		else {
			return prepareFullOuterCoGroupInput(joinInputs, node, inputFields, keyFields, flinkKeys, dop);
		}

	}
 
Example #13
Source File: PlungerAssert.java    From plunger with Apache License 2.0 5 votes vote down vote up
public static Matcher<TupleEntry> tupleEntry(Fields fields, Tuple tuple) {
  if (fields.size() != tuple.size()) {
    throw new IllegalArgumentException("Fields size (" + fields.size() + ") does not match tuple size ("
        + tuple.size() + ")");
  }
  return tupleEntry(new TupleEntry(fields, tuple));
}
 
Example #14
Source File: DataTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void selectedFieldsOrdering() {
  Fields fields = new Data(new Fields("A", "B", "C"), new ArrayList<Tuple>())
      .withFields(new Fields("C", "A", "B"))
      .selectedFields();
  assertThat(fields, is(new Fields("C", "A", "B")));
}
 
Example #15
Source File: HBaseScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
@Override
public boolean source(FlowProcess<JobConf> flowProcess,
    SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  Tuple result = new Tuple();

  Object key = sourceCall.getContext()[0];
  Object value = sourceCall.getContext()[1];
  boolean hasNext = sourceCall.getInput().next(key, value);
  if (!hasNext) { return false; }

  // Skip nulls
  if (key == null || value == null) { return true; }

  ImmutableBytesWritable keyWritable = (ImmutableBytesWritable) key;
  Result row = (Result) value;
  result.add(keyWritable);

  for (int i = 0; i < this.familyNames.length; i++) {
    String familyName = this.familyNames[i];
    byte[] familyNameBytes = Bytes.toBytes(familyName);
    Fields fields = this.valueFields[i];
    for (int k = 0; k < fields.size(); k++) {
      String fieldName = (String) fields.get(k);
      byte[] fieldNameBytes = Bytes.toBytes(fieldName);
      byte[] cellValue = row.getValue(familyNameBytes, fieldNameBytes);
      result.add(cellValue != null ? new ImmutableBytesWritable(cellValue) : null);
    }
  }

  sourceCall.getIncomingEntry().setTuple(result);

  return true;
}
 
Example #16
Source File: TapTypeUtilTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void hadoopPartitionTap() {
  Class<?> tapType = TapTypeUtil.getTapConfigClass(new cascading.tap.hadoop.PartitionTap(
      new cascading.tap.hadoop.Hfs(new cascading.scheme.hadoop.TextDelimited(), ""), new DelimitedPartition(
          new Fields("A"))));
  assertEquals(Configuration.class, tapType);
}
 
Example #17
Source File: TupleArrayTypeInfo.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public TupleArrayTypeInfo(int length, Fields[] fields) {
	this.length = length;
	this.fillLength = fields.length;

	this.tupleTypes = new TupleTypeInfo[this.fillLength];
	for(int i=0; i<this.fillLength; i++) {
		this.tupleTypes[i] = new TupleTypeInfo(fields[i]);
	}
}
 
Example #18
Source File: Data.java    From plunger with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the result as a {@link Tuple} list.
 */
public List<Tuple> asTupleList() {
  List<Tuple> sorted = new ArrayList<Tuple>(tuples);
  if (sortFields != null && sortFields.size() > 0) {
    Collections.sort(sorted, new TupleComparator(declaredFields, sortFields));
  }
  Fields selectedFields = selectedFields();
  List<Tuple> selected = new ArrayList<Tuple>(sorted.size());
  for (Tuple tuple : sorted) {
    Tuple filtered = new Tuple(tuple).remove(declaredFields, selectedFields);
    selected.add(filtered);
  }
  return Collections.unmodifiableList(selected);
}
 
Example #19
Source File: FunctionCallStubTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void collectIrregularTupleEntry() {
  stub = new FunctionCallStub.Builder<String>(FIELDS).build();
  assertThat(stub.result().asTupleEntryList().isEmpty(), is(true));

  stub.getOutputCollector().add(new TupleEntry(new Fields("X", String.class), new Tuple(1)));
}
 
Example #20
Source File: CoGroupBufferClosure.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public CoGroupBufferClosure(FlowProcess flowProcess, int numSelfJoins, Fields[] joinFields, Fields[] valueFields) {
	super(flowProcess, joinFields, valueFields);
	this.numSelfJoins = numSelfJoins;

	this.emptyTuple = Tuple.size( joinFields[0].size() );
	FactoryLoader loader = FactoryLoader.getInstance();

	this.tupleCollectionFactory = loader.loadFactoryFrom( flowProcess, TUPLE_COLLECTION_FACTORY, SpillingTupleCollectionFactory.class );

	initLists();
}
 
Example #21
Source File: Data.java    From plunger with Apache License 2.0 5 votes vote down vote up
/**
 * Specifies that the returned results be restricted to the specified {@link Fields}.
 */
public Data withFields(Fields... fields) {
  if (fields != null && fields.length > 0) {
    for (Fields fieldsElement : fields) {
      // this check seems unnecessary, but Fields.merge() doesn't seem to handle this case
      if (fieldsElement == Fields.ALL) {
        withFields = Fields.ALL;
        return this;
      }
    }
    withFields = Fields.merge(fields);
  }
  return this;
}
 
Example #22
Source File: HBaseScheme.java    From SpyGlass with Apache License 2.0 5 votes vote down vote up
private void setSourceSink(Fields keyFields, Fields[] columnFields) {
  Fields allFields = keyFields;

  if (columnFields.length != 0) {
    allFields = Fields.join(keyFields, Fields.join(columnFields)); // prepend
  }

  setSourceFields(allFields);
  setSinkFields(allFields);
}
 
Example #23
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test(expected = IllegalArgumentException.class)
public void addMultipleTupleEntriesIterableWithFieldsInvalidLength() {
  DataBuilder builder = new DataBuilder(new Fields("A", "B"));

  List<TupleEntry> tupleEntries = Arrays.asList(new TupleEntry(new Fields("B"), new Tuple(2)), new TupleEntry(
      new Fields("A", "B"), new Tuple(1, 2)));

  builder.withFields(new Fields("B")).addTupleEntries(tupleEntries);
}
 
Example #24
Source File: TupleReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public ReadContext init(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) {
  Fields requestedFields = getRequestedFields(configuration);
  if (requestedFields == null) {
    return new ReadContext(fileSchema);
  } else {
    SchemaIntersection intersection = new SchemaIntersection(fileSchema, requestedFields);
    return new ReadContext(intersection.getRequestedSchema());
  }
}
 
Example #25
Source File: TupleReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
static protected Fields getRequestedFields(Configuration configuration) {
  String fieldsString = configuration.get(PARQUET_CASCADING_REQUESTED_FIELDS);

  if(fieldsString == null)
    return Fields.ALL;

  String[] parts = fieldsString.split(":");
  if(parts.length == 0)
    return Fields.ALL;
  else
    return new Fields(parts);
}
 
Example #26
Source File: WordCount.java    From cascading-flink with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {

		if (args.length < 2) {
			throw new IllegalArgumentException("Please specify input and ouput paths as arguments.");
		}

		Fields token = new Fields( "token", String.class );
		Fields text = new Fields( "text" );
		RegexSplitGenerator splitter = new RegexSplitGenerator( token, "\\s+" );
		// only returns "token"
		Pipe docPipe = new Each( "token", text, splitter, Fields.RESULTS );

		Pipe wcPipe = new Pipe( "wc", docPipe );
		wcPipe = new AggregateBy( wcPipe, token, new CountBy(new Fields("count")));

		Tap inTap = new Hfs(new TextDelimited(text, "\n" ), args[0]);
		Tap outTap = new Hfs(new TextDelimited(false, "\n"), args[1], SinkMode.REPLACE);

		FlowDef flowDef = FlowDef.flowDef().setName( "wc" )
				.addSource( docPipe, inTap )
				.addTailSink( wcPipe, outTap );

		FlowConnector flowConnector = new FlinkConnector();

		Flow wcFlow = flowConnector.connect( flowDef );

		wcFlow.complete();
	}
 
Example #27
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void addMultipleTupleEntriesVarArgs() {
  Fields fields = new Fields("A", "B");
  DataBuilder builder = new DataBuilder(fields);
  builder.addTupleEntries(new TupleEntry(fields, new Tuple(1, 2)), new TupleEntry(fields, new Tuple(3, 4)));
  Data source = builder.build();

  List<Tuple> tuples = source.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(1, 2)));
  assertThat(tuples.get(1), is(new Tuple(3, 4)));
}
 
Example #28
Source File: ParquetTupleScheme.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public Fields retrieveSourceFields(FlowProcess<? extends JobConf> flowProcess, Tap tap) {
   MessageType schema = readSchema(flowProcess, tap);
   SchemaIntersection intersection = new SchemaIntersection(schema, getSourceFields());

   setSourceFields(intersection.getSourceFields());

   return getSourceFields();
 }
 
Example #29
Source File: DataBuilderTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void addTupleTupleWithFields() {
  DataBuilder builder = new DataBuilder(new Fields("A", "B"));
  builder.withFields(new Fields("B")).addTuple(new Tuple(2)).addTuple(new Tuple(4));
  Data source = builder.build();

  List<Tuple> tuples = source.getTuples();

  assertThat(tuples.size(), is(2));
  assertThat(tuples.get(0), is(new Tuple(null, 2)));
  assertThat(tuples.get(1), is(new Tuple(null, 4)));
}
 
Example #30
Source File: SortTest.java    From plunger with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplete() throws Exception {

  Bucket sink = new Bucket();

  Fields inFields = Fields.join(FIELD_S, FIELD_X, FIELD_Y);

  TupleListTap source = new DataBuilder(inFields)
      .addTuple("A", "a", "za")
      .addTuple("B", "b", "zb")
      .addTuple("AA", "aa", "zaa")
      .addTuple("BB", "bb", "zbb")
      .toTap();

  FlowDef flowDef = defineFlow(source, sink);

  new LocalFlowConnector().connect(flowDef).complete();

  List<TupleEntry> tupleEntries = sink.result().asTupleEntryList();

  assertThat(tupleEntries.get(0).getString(FIELD_S), is("A"));
  assertThat(tupleEntries.get(0).getString(FIELD_Y), is("za"));
  assertThat(tupleEntries.get(0).getString(FIELD_V), is("a"));
  assertThat(tupleEntries.get(1).getString(FIELD_S), is("AA"));
  assertThat(tupleEntries.get(1).getString(FIELD_Y), is("zaa"));
  assertThat(tupleEntries.get(1).getString(FIELD_V), is("aa"));
  assertThat(tupleEntries.get(2).getString(FIELD_S), is("B"));
  assertThat(tupleEntries.get(3).getString(FIELD_S), is("BB"));
  assertThat(tupleEntries.get(3).getString(FIELD_Y), is("zbb"));
  assertThat(tupleEntries.get(3).getString(FIELD_V), is("bb"));

}