Java Code Examples for org.apache.pig.data.DataType#TUPLE

The following examples show how to use org.apache.pig.data.DataType#TUPLE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CastUtils.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param caster LoadCaster to be used to convert the bytes into a field.
 * @param bytes
 * @param fieldSchema schema of Bag or Tuple; pass in null if a simple type.
 * @param dataType type from DataType
 * @return converted object.
 * @throws IOException
 */
public static Object convertToType(LoadCaster caster, byte[] bytes,
        ResourceFieldSchema fieldSchema, byte dataType) throws IOException {
    switch (dataType) {
    case (DataType.BAG): return caster.bytesToBag(bytes, fieldSchema);
    case (DataType.BYTEARRAY): return new DataByteArray(bytes);
    case (DataType.CHARARRAY): return caster.bytesToCharArray(bytes);
    case (DataType.DOUBLE): return caster.bytesToDouble(bytes);
    case (DataType.FLOAT): return caster.bytesToFloat(bytes);
    case (DataType.INTEGER): return caster.bytesToInteger(bytes);
    case (DataType.BIGINTEGER): return caster.bytesToBigInteger(bytes);
    case (DataType.BIGDECIMAL): return caster.bytesToBigDecimal(bytes);
    case (DataType.LONG): return caster.bytesToLong(bytes);
    case (DataType.BOOLEAN): return caster.bytesToBoolean(bytes);
    case (DataType.DATETIME): return caster.bytesToDateTime(bytes);
    case (DataType.MAP): return caster.bytesToMap(bytes, fieldSchema);
    case (DataType.TUPLE): return caster.bytesToTuple(bytes, fieldSchema);
    default: throw new IOException("Unknown type " + dataType);
    }
}
 
Example 2
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testOutputSchemaWithDefaultDelimiterForBagToTupleStringUDF() throws Exception {

	FieldSchema tupSch = new FieldSchema(null, DataType.TUPLE);
	tupSch.schema = new Schema();
	tupSch.schema.add(new FieldSchema(null, DataType.INTEGER));
	tupSch.schema.add(new FieldSchema(null, DataType.CHARARRAY));

	FieldSchema bagSch = new FieldSchema(null, DataType.BAG);
	bagSch.schema = new Schema(tupSch);

	Schema inputSch = new Schema();
	inputSch.add(bagSch);

	BagToString udf = new BagToString();
	Schema outputSchema = udf.outputSchema(inputSch);

	assertEquals("schema of BagToTuple input", outputSchema.getField(0).type,
			DataType.CHARARRAY);

}
 
Example 3
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Convert an Avro schema to a Pig schema
 */
public static ResourceSchema convert(Schema schema) throws IOException {

    if (AvroStorageUtils.containsGenericUnion(schema))
        throw new IOException ("We don't accept schema containing generic unions.");

    Set<Schema> visitedRecords = new HashSet<Schema>();
    ResourceFieldSchema inSchema = inconvert(schema, FIELD, visitedRecords);

    ResourceSchema tupleSchema;
    if (inSchema.getType() == DataType.TUPLE) {
        tupleSchema = inSchema.getSchema();
    } else { // other typs
        ResourceFieldSchema tupleWrapper = AvroStorageUtils.wrapAsTuple(inSchema);

        ResourceSchema topSchema = new ResourceSchema();
        topSchema.setFields(new ResourceFieldSchema[] { tupleWrapper });

        tupleSchema = topSchema;

    }
    return tupleSchema;
}
 
Example 4
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
private static void convertEmptyTupleToBytearrayTuple(
        FieldSchema fs) {
    if(fs.type == DataType.TUPLE
            && fs.schema != null
            && fs.schema.size() == 0){
        fs.schema.add(new FieldSchema(null, DataType.BYTEARRAY));
        return;
    }

    if(fs.schema != null){
        for(FieldSchema inFs : fs.schema.getFields()){
            convertEmptyTupleToBytearrayTuple(inFs);
        }
    }

}
 
Example 5
Source File: StreamingQuantile.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  Schema tupleSchema = new Schema();
  if (ordinalOutputSchema)
  {
    for (int i = 0; i < this.numQuantiles; i++) 
    {
      tupleSchema.add(new Schema.FieldSchema("quantile_" + i, DataType.DOUBLE));
    }
  }
  else
  {
    for (Double x : this.quantiles)
      tupleSchema.add(new Schema.FieldSchema("quantile_" + x.toString().replace(".", "_"), DataType.DOUBLE));
  }

  try {
    return new Schema(new FieldSchema(null, tupleSchema, DataType.TUPLE));
  } catch(FrontendException e) {
    throw new RuntimeException(e);
  }
}
 
Example 6
Source File: VARBAG.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {

    try {

  // subschema describing the fields in the tuples of the bag
        List<Schema.FieldSchema> tokenFs = new ArrayList<Schema.FieldSchema>();
    //tokenFs.add(new Schema.FieldSchema(null, this.fieldType )); 
    tokenFs.add(new Schema.FieldSchema( this.tupleColName.toUpperCase(), this.fieldType )); 

        Schema tupleSchema = new Schema( tokenFs );
        Schema.FieldSchema tupleFs = new Schema.FieldSchema( this.bagColName.toUpperCase(), tupleSchema, DataType.TUPLE);

        Schema bagSchema = new Schema(tupleFs);
        bagSchema.setTwoLevelAccessRequired(true);
        Schema.FieldSchema bagFs = new Schema.FieldSchema( null, bagSchema, DataType.BAG);

        return new Schema(bagFs);

    } catch (FrontendException e) {
        throw new RuntimeException("Unable to create schema for BAG.");
    }
 
}
 
Example 7
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test that ResourceSchema is correctly created given a
 * pig.Schema and vice versa 
 */
@Test
public void testResourceFlatSchemaCreation2() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    
    Schema origSchema = new Schema(
            new Schema.FieldSchema("t1", 
                    new Schema(
                            new Schema.FieldSchema("t0", 
                                    TypeCheckingTestUtil.genFlatSchema(
                                            aliases,types), 
                                            DataType.TUPLE)), DataType.BAG));
                    
    ResourceSchema rsSchema = new ResourceSchema(origSchema);

    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
}
 
Example 8
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test(expected=java.lang.RuntimeException.class)
public void testInvalidOutputSchemaForBagToTupleStringUDF() throws Exception {

	FieldSchema tupSch = new FieldSchema(null, DataType.TUPLE);
	tupSch.schema = new Schema();
	tupSch.schema.add(new FieldSchema(null, DataType.INTEGER));
	tupSch.schema.add(new FieldSchema(null, DataType.CHARARRAY));

	FieldSchema bagSch = new FieldSchema(null, DataType.BAG);
	bagSch.schema = new Schema(tupSch);

	Schema inputSch = new Schema();
	inputSch.add(bagSch);
	inputSch.add(new FieldSchema(null, DataType.DOUBLE));

	BagToString udf = new BagToString();
	// expecting an exception because the delimiter is not of type Data.CHARARRAY
	udf.outputSchema(inputSch);
}
 
Example 9
Source File: UnorderedPairs.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try {
    if (input.size() != 1)
    {
      throw new RuntimeException("Expected input to have only a single field");
    }
    
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG)
    {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;

    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
      throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }      
    
    Schema ouputTupleSchema = new Schema();
    ouputTupleSchema.add(new Schema.FieldSchema("elem1", inputBagSchema.getField(0).schema.clone(), DataType.TUPLE));
    ouputTupleSchema.add(new Schema.FieldSchema("elem2", inputBagSchema.getField(0).schema.clone(), DataType.TUPLE));
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             ouputTupleSchema, 
                                             DataType.BAG));
  }
  catch (Exception e) {
    return null;
  }
}
 
Example 10
Source File: EqualToExpr.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Result getNextBoolean() throws ExecException {
  try {
    Result left, right;

    switch (operandType) {
    case DataType.BYTEARRAY:
    case DataType.DOUBLE:
    case DataType.FLOAT:
    case DataType.BOOLEAN:
    case DataType.INTEGER:
    case DataType.BIGINTEGER:
    case DataType.BIGDECIMAL:
    case DataType.LONG:
    case DataType.DATETIME:
    case DataType.CHARARRAY:
    case DataType.TUPLE:
    case DataType.MAP: {
        Result r = accumChild(null, operandType);
        if (r != null) {
            return r;
        }
        left = lhs.getNext(operandType);
        right = rhs.getNext(operandType);
        return doComparison(left, right);
    }

    default: {
        int errCode = 2067;
        String msg = this.getClass().getSimpleName() + " does not know how to " +
        "handle type: " + DataType.findTypeName(operandType);
        throw new ExecException(msg, errCode, PigException.BUG);
    }

    }
  } catch (RuntimeException e) {
      throw new ExecException("exception while executing " + this.toString() + ": " + e.toString(), 2067, PigException.BUG, e);
  }
}
 
Example 11
Source File: Schema.java    From spork with Apache License 2.0 5 votes vote down vote up
public static Schema getPigSchema(ResourceSchema rSchema) 
throws FrontendException {
    if(rSchema == null) {
        return null;
    }
    List<FieldSchema> fsList = new ArrayList<FieldSchema>();
    for(ResourceFieldSchema rfs : rSchema.getFields()) {
        FieldSchema fs = new FieldSchema(rfs.getName(), 
                rfs.getSchema() == null ? 
                        null : getPigSchema(rfs.getSchema()), rfs.getType());
        
        if(rfs.getType() == DataType.BAG) {
            if (fs.schema != null) { // allow partial schema
                if (fs.schema.size() == 1) {
                    FieldSchema innerFs = fs.schema.getField(0);
                    if (innerFs.type != DataType.TUPLE) {
                        ResourceFieldSchema.throwInvalidSchemaException();
                    }
                } else {
                    ResourceFieldSchema.throwInvalidSchemaException();
                }
            } 
        }
        fsList.add(fs);
    }
    return new Schema(fsList);
}
 
Example 12
Source File: Identity.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        return new Schema(new Schema.FieldSchema(null, input, DataType.TUPLE));
    } catch (FrontendException e) {
        throw new RuntimeException(e);
    }
}
 
Example 13
Source File: TypeCheckingExpVisitor.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Check if the fieldSch is a bag with empty tuple schema
 * @param fieldSch
 * @return
 * @throws FrontendException
 */
private static boolean isNotBagWithEmptyTuple(FieldSchema fieldSch)
throws FrontendException {
    boolean isBagWithEmptyTuple = false;
    if(fieldSch.type == DataType.BAG
            && fieldSch.schema != null
            && fieldSch.schema.getField(0) != null
            && fieldSch.schema.getField(0).type == DataType.TUPLE
            && fieldSch.schema.getField(0).schema == null
    ){
        isBagWithEmptyTuple = true;
    }
    return !isBagWithEmptyTuple;
}
 
Example 14
Source File: AvroStorageUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
/** check whether it is just a wrapped tuple */
public static boolean isTupleWrapper(ResourceFieldSchema pigSchema) {
    Boolean status = false;
    if(pigSchema.getType() == DataType.TUPLE)
        if(pigSchema.getName() != null)
            if(pigSchema.getName().equals(AvroStorageUtils.PIG_TUPLE_WRAPPER))
                status = true;
    return status;
}
 
Example 15
Source File: TestPigStreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDeserialize__bug() throws Exception {
    byte[] input = "|(_|-_|,_32|,_987654321098765432|,_987654321098765432|)_|_".getBytes();

    FieldSchema f1 = new FieldSchema("", DataType.CHARARRAY);
    FieldSchema f2 = new FieldSchema("", DataType.INTEGER);
    FieldSchema f3 = new FieldSchema("", DataType.LONG);
    FieldSchema f4 = new FieldSchema("", DataType.LONG);

    List<FieldSchema> fsl = new ArrayList<FieldSchema>();
    fsl.add(f1);
    fsl.add(f2);
    fsl.add(f3);
    fsl.add(f4);
    Schema schema = new Schema(fsl);
    FieldSchema fs = new FieldSchema("", schema, DataType.TUPLE);
    PigStreamingUDF sp = new PigStreamingUDF(fs);

    
    Tuple expectedOutput1 = tf.newTuple(4);
    expectedOutput1.set(0, null);
    expectedOutput1.set(1, 32);
    expectedOutput1.set(2, 987654321098765432L);
    expectedOutput1.set(3, 987654321098765432L);
    
    Object out = sp.deserialize(input, 0, input.length);
    Assert.assertEquals(tf.newTuple(expectedOutput1), out);
}
 
Example 16
Source File: AvroSchema2Pig.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Add a field schema to a bag schema
 */
static protected void add2BagSchema(ResourceFieldSchema fieldSchema,
                                    ResourceFieldSchema subFieldSchema) throws IOException
{

    ResourceFieldSchema wrapped =
            (subFieldSchema.getType() == DataType.TUPLE) ? subFieldSchema
                    : AvroStorageUtils.wrapAsTuple(subFieldSchema);

    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { wrapped });

    fieldSchema.setSchema(listSchema);

}
 
Example 17
Source File: BagToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema inputSchema) {
	try {
		if ((inputSchema == null) || inputSchema.size() != 1) {
			throw new RuntimeException("Expecting 1 input, found " + 
					((inputSchema == null) ? 0 : inputSchema.size()));
		}

		Schema.FieldSchema inputFieldSchema = inputSchema.getField(0);
		if (inputFieldSchema.type != DataType.BAG) {
			throw new RuntimeException("Expecting a bag of tuples: {()}");
		}

		// first field in the bag schema
		Schema.FieldSchema firstFieldSchema = inputFieldSchema.schema.getField(0);
		if ((firstFieldSchema == null) || (firstFieldSchema.schema == null)
				|| firstFieldSchema.schema.size() < 1) {
			throw new RuntimeException("Expecting a bag of tuples: {()}, found: " + inputSchema);
		}

		if (firstFieldSchema.type != DataType.TUPLE) {
			throw new RuntimeException("Expecting a bag of tuples: {()}, found: " + inputSchema);
		}

		// now for output schema
		Schema tupleOutputSchema = new Schema();
		for (int i = 0; i < firstFieldSchema.schema.size(); ++i) {
			tupleOutputSchema.add(firstFieldSchema.schema.getField(i));
		}
		return new Schema(new Schema.FieldSchema(getSchemaName(this
				.getClass().getName().toLowerCase(), inputSchema), tupleOutputSchema,
				DataType.TUPLE));
	} catch (FrontendException e) {
		e.printStackTrace();
		return null;
	}
}
 
Example 18
Source File: POSort.java    From spork with Apache License 2.0 5 votes vote down vote up
private Result getResult(PhysicalPlan plan, byte resultType) throws ExecException {
	ExpressionOperator Op = (ExpressionOperator) plan.getLeaves().get(0);
	Result res = null;

	switch (resultType) {
          case DataType.BYTEARRAY:
          case DataType.CHARARRAY:
          case DataType.DOUBLE:
          case DataType.FLOAT:
          case DataType.BOOLEAN:
          case DataType.INTEGER:
          case DataType.LONG:
          case DataType.BIGINTEGER:
          case DataType.BIGDECIMAL:
          case DataType.DATETIME:
          case DataType.TUPLE:
              res = Op.getNext(resultType);
              break;

          default: {
              int errCode = 2082;
              String msg = "Did not expect result of type: " +
                      DataType.findTypeName(resultType);
                  throw new ExecException(msg, errCode, PigException.BUG);
          }

          }
	return res;
}
 
Example 19
Source File: RegexExtractAll.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input), 
                DataType.TUPLE));
    } catch (Exception e) {
        return null;
    }
}
 
Example 20
Source File: WeightedReservoirSample.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  try {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG) {
      throw new RuntimeException("Expected a BAG as input");
    }
    
    Schema inputBagSchema = inputFieldSchema.schema;
    
    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
        throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }
    
    Schema tupleSchema = inputBagSchema.getField(0).schema;
    
    if(tupleSchema == null) {
        throw new RuntimeException("The tuple of input bag has no schema");
    }
    
    List<Schema.FieldSchema> fieldSchemaList = tupleSchema.getFields();
    
    if(fieldSchemaList == null || fieldSchemaList.size() <= Math.max(0, this.weightIdx)) {
        throw new RuntimeException("The field schema of the input tuple is null " +
        		                   "or the tuple size is no more than the weight field index: "
                                   + this.weightIdx);
    }
    
    if(fieldSchemaList.get(this.weightIdx).type != DataType.INTEGER &&
       fieldSchemaList.get(this.weightIdx).type != DataType.LONG &&
       fieldSchemaList.get(this.weightIdx).type != DataType.FLOAT &&
       fieldSchemaList.get(this.weightIdx).type != DataType.DOUBLE)
    {
        String[] expectedTypes = new String[] {DataType.findTypeName(DataType.INTEGER),
                                               DataType.findTypeName(DataType.LONG),
                                               DataType.findTypeName(DataType.FLOAT),
                                               DataType.findTypeName(DataType.DOUBLE)};
        throw new RuntimeException("Expect the type of the weight field of the input tuple to be of (" +
                java.util.Arrays.toString(expectedTypes) + "), but instead found (" + 
                DataType.findTypeName(fieldSchemaList.get(this.weightIdx).type) + "), weight field: " + 
                this.weightIdx);
    } 
    
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             inputFieldSchema.schema, DataType.BAG));    
  } catch (FrontendException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}