org.apache.pig.ResourceSchema.ResourceFieldSchema Java Examples

The following examples show how to use org.apache.pig.ResourceSchema.ResourceFieldSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenRandomData.java    From spork with Apache License 2.0 6 votes vote down vote up
public static ResourceFieldSchema getMixedTupleToConvertFieldSchema() throws IOException {
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);
    ResourceFieldSchema longfs = new ResourceFieldSchema();
    longfs.setType(DataType.LONG);
    ResourceFieldSchema floatfs = new ResourceFieldSchema();
    floatfs.setType(DataType.FLOAT);
    ResourceFieldSchema doublefs = new ResourceFieldSchema();
    doublefs.setType(DataType.DOUBLE);
    ResourceFieldSchema boolfs = new ResourceFieldSchema();
    boolfs.setType(DataType.BOOLEAN);
    ResourceFieldSchema dtfs = new ResourceFieldSchema();
    dtfs.setType(DataType.DATETIME);
    
    ResourceSchema tupleSchema = new ResourceSchema();
    tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, longfs, intfs, doublefs, floatfs, stringfs, intfs, doublefs, floatfs, boolfs, dtfs});
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);
    
    return tuplefs;
}
 
Example #2
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String location, Job job) throws IOException {
    ResourceSchema schema = getSchema(location, job);
    List<String> predicateFields = new ArrayList<String>();
    for (ResourceFieldSchema field : schema.getFields()) {
        switch(field.getType()) {
        case DataType.BOOLEAN:
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
        case DataType.DATETIME:
        case DataType.CHARARRAY:
        case DataType.BIGINTEGER:
        case DataType.BIGDECIMAL:
            predicateFields.add(field.getName());
            break;
        default:
            // Skip DataType.BYTEARRAY, DataType.TUPLE, DataType.MAP and DataType.BAG
            break;
        }
    }
    return predicateFields;
}
 
Example #3
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test that ResourceSchema is correctly created given a
 * pig.Schema and vice versa 
 */
@Test
public void testResourceFlatSchemaCreation() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema origSchema = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    ResourceSchema rsSchema = new ResourceSchema(origSchema);
    assertEquals("num fields", aliases.length, rsSchema.getFields().length);
    ResourceSchema.ResourceFieldSchema[] fields = rsSchema.getFields();
    for (int i=0; i<fields.length; i++) {
        assertEquals(fields[i].getName(), aliases[i]);
        assertEquals(fields[i].getType(), types[i]);
    }
    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
}
 
Example #4
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test invalid Resource Schema: multiple fields for a bag
 * @throws IOException 
 */
@Test(expected=FrontendException.class) 
public void testToPigSchemaWithInvalidSchema() throws IOException {
    ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("fld0").setType(DataType.CHARARRAY),
            new ResourceFieldSchema()
                .setName("fld1").setType(DataType.DOUBLE),        
            new ResourceFieldSchema()
                .setName("fld2").setType(DataType.INTEGER)
    };
    
    ResourceSchema rSchema0 = new ResourceSchema()
        .setFields(level0);
    
    ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
    };
}
 
Example #5
Source File: Utf8StorageConverter.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Tuple bytesToTuple(byte[] b, ResourceFieldSchema fieldSchema) throws IOException {
    if(b == null)
        return null;
    Tuple t;

    try {
        ByteArrayInputStream bis = new ByteArrayInputStream(b);
        PushbackInputStream in = new PushbackInputStream(bis);
        t = consumeTuple(in, fieldSchema);
    }
    catch (IOException e) {
        LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
                "converted to type tuple, caught ParseException <" +
                e.getMessage() + "> field discarded",
                PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
        return null;
    }

    return t;
}
 
Example #6
Source File: Utf8StorageConverter.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, Object> bytesToMap(byte[] b, ResourceFieldSchema fieldSchema) throws IOException {
    if(b == null)
        return null;
    Map<String, Object> map;
    try {
        ByteArrayInputStream bis = new ByteArrayInputStream(b);
        PushbackInputStream in = new PushbackInputStream(bis);
        map = consumeMap(in, fieldSchema);
    }
    catch (IOException e) {
        LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
                "converted to type map, caught ParseException <" +
                e.getMessage() + "> field discarded",
                PigWarning.FIELD_DISCARDED_TYPE_CONVERSION_FAILED, mLog);
        return null;
    }
    return map;
}
 
Example #7
Source File: TestConversions.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBytesToBagWithConversion() throws IOException {
    DataBag b = GenRandomData.genFloatDataBag(r,5,100);
    ResourceFieldSchema fs = GenRandomData.getFloatDataBagFieldSchema(5);
    DataBag convertedBag = ps.getLoadCaster().bytesToBag(b.toString().getBytes(), fs);

    Iterator<Tuple> iter1 = b.iterator();
    Iterator<Tuple> iter2 = convertedBag.iterator();
    for (int i=0;i<100;i++) {
        Tuple t1 = (Tuple)iter1.next();
        assertTrue(iter2.hasNext());
        Tuple t2 = (Tuple)iter2.next();
        for (int j=0;j<5;j++) {
            assertTrue(t2.get(j) instanceof Integer);
            Integer expectedValue = ((Float)t1.get(j)).intValue();
            assertEquals(expectedValue, t2.get(j));
        }
    }
}
 
Example #8
Source File: AegisthusLoader.java    From aegisthus with Apache License 2.0 5 votes vote down vote up
protected ResourceFieldSchema subfield(String name, byte type, ResourceSchema schema) throws IOException {
	ResourceFieldSchema fs = new ResourceFieldSchema();
	fs.setName(name);
	fs.setType(type);
	fs.setSchema(schema);
	return fs;
}
 
Example #9
Source File: TestConversions.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public  void testBytesToMap() throws IOException
{
    ResourceFieldSchema fs = GenRandomData.getRandMapFieldSchema();

    for (int i = 0; i < MAX; i++) {
        Map<String, Object>  m = GenRandomData.genRandMap(r,5);
        String expectedMapString = DataType.mapToString(m);
        Map<String, Object> convertedMap = ps.getLoadCaster().bytesToMap(expectedMapString.getBytes(), fs);
        assertTrue(TestHelper.mapEquals(m, convertedMap));
    }

}
 
Example #10
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkSchema(ResourceSchema rs) throws IOException {
    ResourceFieldSchema fs = new ResourceFieldSchema();
    fs.setType(DataType.TUPLE);
    fs.setSchema(rs);
    typeInfo = OrcUtils.getTypeInfo(fs);
    Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
    p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo));
}
 
Example #11
Source File: PigSchema2Avro.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Convert a pig ResourceSchema to avro schema
 * 
 */
public static Schema convert(ResourceSchema pigSchema, boolean nullable) throws IOException {
    ResourceFieldSchema[] pigFields = pigSchema.getFields();

    /* remove the pig tuple wrapper */
    if (pigFields.length == 1) {

        AvroStorageLog.details("Ignore the pig tuple wrapper.");
        return convert(pigFields[0], nullable);
    } else
        return convertRecord(pigFields, nullable);
}
 
Example #12
Source File: AvroStorageUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
/** wrap a pig schema as tuple */
public static ResourceFieldSchema wrapAsTuple(ResourceFieldSchema subFieldSchema) throws IOException {
    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { subFieldSchema });

    ResourceFieldSchema tupleWrapper = new ResourceFieldSchema();
    tupleWrapper.setType(DataType.TUPLE);
    tupleWrapper.setName(PIG_TUPLE_WRAPPER);
    tupleWrapper.setSchema(listSchema);

    return tupleWrapper;
}
 
Example #13
Source File: Utf8StorageConverter.java    From spork with Apache License 2.0 5 votes vote down vote up
private Object bytesToObject(byte[] b, ResourceFieldSchema fs) throws IOException {
    Object field;
    if (DataType.isComplex(fs.getType())) {
        ByteArrayInputStream bis = new ByteArrayInputStream(b);
        PushbackInputStream in = new PushbackInputStream(bis);
        field = consumeComplexType(in, fs);
    }
    else {
        field = parseSimpleType(b, fs);
    }
    return field;
}
 
Example #14
Source File: Utf8StorageConverter.java    From spork with Apache License 2.0 5 votes vote down vote up
private DataBag consumeBag(PushbackInputStream in, ResourceFieldSchema fieldSchema) throws IOException {
    if (fieldSchema==null) {
        throw new IOException("Schema is null");
    }
    ResourceFieldSchema[] fss=fieldSchema.getSchema().getFields();
    Tuple t;
    int buf;
    while ((buf=in.read())!='{') {
        if (buf==-1) {
            throw new IOException("Unexpect end of bag");
        }
    }
    if (fss.length!=1)
        throw new IOException("Only tuple is allowed inside bag schema");
    ResourceFieldSchema fs = fss[0];
    DataBag db = DefaultBagFactory.getInstance().newDefaultBag();
    while (true) {
        t = consumeTuple(in, fs);
        if (t!=null)
            db.add(t);
        while ((buf=in.read())!='}'&&buf!=',') {
            if (buf==-1) {
                throw new IOException("Unexpect end of bag");
            }
        }
        if (buf=='}')
            break;
    }
    return db;
}
 
Example #15
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
public ResourceSchema fixSchema(ResourceSchema s){
  for (ResourceFieldSchema filed : s.getFields()) {
    if(filed.getType() == DataType.NULL)
      filed.setType(DataType.BYTEARRAY);
  }
  return s;
}
 
Example #16
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
public void putNext(Tuple t) throws IOException {
    // Build a ByteArrayOutputStream to write the JSON into
    ByteArrayOutputStream baos = new ByteArrayOutputStream(BUF_SIZE);
    // Build the generator
    JsonGenerator json =
        jsonFactory.createJsonGenerator(baos, JsonEncoding.UTF8);

    // Write the beginning of the top level tuple object
    json.writeStartObject();
    
    ResourceFieldSchema[] fields = schema.getFields();
    for (int i = 0; i < fields.length; i++) {
        int tupleLength = t.size();
        //write col if exists in tuple, null otherwise
        if (i < tupleLength) {
            writeField(json, fields[i], t.get(i));
        } else {
            writeField(json, fields[i], null);
        }
    }
    json.writeEndObject();
    json.close();

    // Hand a null key and our string to Hadoop
    try {
        writer.write(null, new Text(baos.toByteArray()));
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    }
}
 
Example #17
Source File: AvroSchema2Pig.java    From Cubert with Apache License 2.0 5 votes vote down vote up
/**
 * Add a field schema to a bag schema
 */
static protected void add2BagSchema(ResourceFieldSchema fieldSchema,
                                    ResourceFieldSchema subFieldSchema) throws IOException
{

    ResourceFieldSchema wrapped =
            (subFieldSchema.getType() == DataType.TUPLE) ? subFieldSchema
                    : AvroStorageUtils.wrapAsTuple(subFieldSchema);

    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { wrapped });

    fieldSchema.setSchema(listSchema);

}
 
Example #18
Source File: AvroSchema2Pig.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
  * Add a field schema to a bag schema
  */
static protected void add2BagSchema(ResourceFieldSchema fieldSchema,
                                ResourceFieldSchema subFieldSchema)
                                throws IOException {

    ResourceFieldSchema wrapped = (subFieldSchema.getType() == DataType.TUPLE)
                                                          ? subFieldSchema
                                                          : AvroStorageUtils.wrapAsTuple(subFieldSchema);

    ResourceSchema listSchema = new ResourceSchema();
    listSchema.setFields(new ResourceFieldSchema[] { wrapped });

    fieldSchema.setSchema(listSchema);

}
 
Example #19
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private String getReqiredColumnNamesString(ResourceSchema schema, boolean[] requiredColumns) {
    StringBuilder sb = new StringBuilder();
    ResourceFieldSchema[] fields = schema.getFields();
    for (int i = 0; i < requiredColumns.length; i++) {
        if (requiredColumns[i]) {
            sb.append(fields[i]).append(",");
        }
    }
    if(sb.charAt(sb.length() - 1) == ',') {
        sb.deleteCharAt(sb.length() - 1);
    }
    return sb.toString();
}
 
Example #20
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public ResourceSchema getSchema(String location, Job job)
        throws IOException {
    if (typeInfo == null) {
        typeInfo = getTypeInfo(location, job);
        // still null means case of multiple load store
        if (typeInfo == null) {
            return null;
        }
    }

    ResourceFieldSchema fs = OrcUtils.getResourceFieldSchema(typeInfo);
    return fs.getSchema();
}
 
Example #21
Source File: PigSchema2Avro.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Validate a Pig tuple is compatible with Avro record. If the Avro schema 
 * is not complete (with uncovered fields), then convert those fields using 
 * methods in set 1. 
 * 
 * Notice that users can get rid of Pig tuple wrappers, e.g. an Avro schema
 * "int" is compatible with a Pig schema "T:(int)"
 * 
 */
protected static Schema validateAndConvertRecord(Schema avroSchema, ResourceFieldSchema[] pigFields) throws IOException {

    /* Get rid of Pig tuple wrappers. */
    if (!avroSchema.getType().equals(Schema.Type.RECORD)) {
        if (pigFields.length != 1)
            throw new IOException("Expect only one field in Pig tuple schema. Avro schema is " + avroSchema.getType());

        return validateAndConvert(avroSchema, pigFields[0]);
    }

    /* validate and convert a pig tuple with avro record */
    boolean isPartialSchema = AvroStorageUtils.isUDPartialRecordSchema(avroSchema);
    AvroStorageLog.details("isPartialSchema=" + isPartialSchema);

    String typeName = isPartialSchema ? getRecordName() : avroSchema.getName();
    Schema outSchema = Schema.createRecord(typeName, avroSchema.getDoc(), avroSchema.getNamespace(), false);

    List<Schema.Field> inFields = avroSchema.getFields();
    if (!isPartialSchema && inFields.size() != pigFields.length) {
        throw new IOException("Expect " + inFields.size() + " fields in pig schema." + " But there are " + pigFields.length);
    }

    List<Schema.Field> outFields = new ArrayList<Schema.Field>();

    for (int i = 0; i < pigFields.length; i++) {
        /* get user defined avro field schema */
        Field inputField = isPartialSchema ? AvroStorageUtils.getUDField(avroSchema, i) : inFields.get(i);

        /* get schema */
        Schema fieldSchema = null;
        if (inputField == null) { 
            /* convert pig schema (nullable) */
            fieldSchema = convert(pigFields[i], true);
        } else if (inputField.schema() == null) { 
            /* convert pig schema (not-null) */
            fieldSchema = convert(pigFields[i], false);
        } else { 
            /* validate pigFields[i] with given avro schema */
            fieldSchema = validateAndConvert(inputField.schema(),
                                            pigFields[i]);
        }

        /* get field name of output */
        String outname = (isPartialSchema) ? pigFields[i].getName() : inputField.name();
        if (outname == null)
            outname = FIELD_NAME + "_" + i; // field name cannot be null

        /* get doc of output */
        String doc = (isPartialSchema) ? pigFields[i].getDescription() : inputField.doc();

        JsonNode defaultvalue = (inputField != null) ? inputField.defaultValue() : null;

        outFields.add(new Field(outname, fieldSchema, doc, defaultvalue));

    }

    outSchema.setFields(outFields);
    return outSchema;

}
 
Example #22
Source File: PigValueWriter.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Checks to see if the given field is a schema-less Map that has values.
 * @return true if Map has no schema but has values (mixed schema map). false if not a Map or if Map is just empty.
 */
private boolean isPopulatedMixedValueMap(ResourceFieldSchema schema, int field, Tuple object) {
    if (schema.getType() != DataType.MAP) {
        // Can't be a mixed value map if it's not a map at all.
        return false;
    }

    try {
        Object fieldValue = object.get(field);
        Map<?, ?> map = (Map<?, ?>) fieldValue;
        return schema.getSchema() == null && !(map == null || map.isEmpty());
    } catch (ExecException e) {
        throw new EsHadoopIllegalStateException(e);
    }
}
 
Example #23
Source File: Utils.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * This method adds FieldSchema of 'input source tag/path' as the first
 * field. This will be called only when PigStorage is invoked with
 * '-tagFile' or '-tagPath' option and the schema file is present to be
 * loaded.
 *
 * @param schema
 * @param fieldName
 * @return ResourceSchema
 */
public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) {
    ResourceFieldSchema[] fieldSchemas = schema.getFields();
    ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY));
    ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1];
    fieldSchemasWithSourceTag[0] = sourceTagSchema;
    for(int j = 0; j < fieldSchemas.length; j++) {
        fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j];
    }
    return schema.setFields(fieldSchemasWithSourceTag);
}
 
Example #24
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
ResourceFieldSchema getTupleFieldSchema() throws IOException {
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);

    ResourceSchema tupleSchema = new ResourceSchema();
    tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs});
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);

    return tuplefs;
}
 
Example #25
Source File: PigPerformanceLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
public DataBag bytesToBag(byte[] b, ResourceFieldSchema fs) throws IOException {
    if (b == null) return null;

    DataBag bag = bagFactory.newDefaultBag();

    int pos = 0;
    while (pos < b.length) {
        Tuple t = tupleFactory.newTuple(1);

        // Figure out how long until the next element in the list.
        int start = pos;
        while (pos < b.length && b[pos] != 2) pos++; // 2 is ^B

        byte[] copy = new byte[pos - start];
        int i, j;
        for (i = start + 1, j = 0; i < pos; i++, j++) copy[j] = b[i];

        // The first byte will tell us what type the field is.
        try {
            switch (b[start]) {
                case 105: t.set(0, bytesToInteger(copy)); break;
                case 108: t.set(0, bytesToLong(copy)); break;
                case 102: t.set(0, bytesToFloat(copy)); break;
                case 100: t.set(0, bytesToDouble(copy)); break;
                case 115: t.set(0, bytesToCharArray(copy)); break;
                case 109: t.set(0, bytesToMap(copy)); break;
                case 98: t.set(0, bytesToBag(copy, null)); break;
                default: throw new RuntimeException("unknown type " + b[start]);
            }
        } catch (ExecException ee) {
            throw new IOException(ee);
        }
        pos++; // move past the separator
        bag.add(t);
    }

    return bag;
}
 
Example #26
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapIntegerValueType() throws Exception{
    String myMap = "[key1#1]";
    Schema schema = Utils.getSchemaFromString("m:map[int]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Integer);
    String value = String.valueOf(v);
    assertEquals("1", value);
}
 
Example #27
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getSmallTupleFieldSchema() throws IOException{
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);
    
    ResourceSchema tupleSchema = new ResourceSchema();
    tupleSchema.setFields(new ResourceFieldSchema[]{stringfs, intfs});
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);
    
    return tuplefs;
}
 
Example #28
Source File: TestConversions.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBytesToTupleWithConversion() throws IOException {
    for (int i=0;i<100;i++) {
        Tuple t = GenRandomData.genMixedTupleToConvert(r);
        ResourceFieldSchema fs = GenRandomData.getMixedTupleToConvertFieldSchema();
        Tuple convertedTuple = ps.getLoadCaster().bytesToTuple(t.toString().getBytes(), fs);

        assertTrue(convertedTuple.get(0) instanceof String);
        assertEquals(convertedTuple.get(0), ((Integer)t.get(0)).toString());

        assertTrue(convertedTuple.get(1) instanceof Long);
        Integer origValue1 = (Integer)t.get(1);
        assertEquals(convertedTuple.get(1), Long.valueOf(origValue1.longValue()));

        assertNull(convertedTuple.get(2));

        assertTrue(convertedTuple.get(3) instanceof Double);
        Float origValue3 = (Float)t.get(3);
        assertEquals(((Double)convertedTuple.get(3)).doubleValue(), origValue3.doubleValue(), 0.01);

        assertTrue(convertedTuple.get(4) instanceof Float);
        Double origValue4 = (Double)t.get(4);
        assertEquals((Float)convertedTuple.get(4), origValue4.floatValue(), 0.01);

        assertTrue(convertedTuple.get(5) instanceof String);
        assertEquals(convertedTuple.get(5), t.get(5));

        assertNull(convertedTuple.get(6));

        assertNull(convertedTuple.get(7));

        assertNull(convertedTuple.get(8));

        assertTrue(convertedTuple.get(9) instanceof Boolean);
        String origValue9 = (String)t.get(9);
        assertEquals(Boolean.valueOf(origValue9), convertedTuple.get(9));
    }
}
 
Example #29
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{
    ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema();
    
    ResourceSchema outBagSchema = new ResourceSchema();
    outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema outBagfs = new ResourceFieldSchema();
    outBagfs.setSchema(outBagSchema);
    outBagfs.setType(DataType.BAG);
    
    return outBagfs;
}
 
Example #30
Source File: TestStore.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testStoreComplexData() throws Exception {
    inpDB = GenRandomData.genRandFullTupTextDataBag(new Random(), 10, 100);
    storeAndCopyLocally(inpDB);
    PigStorage ps = new PigStorage("\t");
    int size = 0;
    BufferedReader br = new BufferedReader(new FileReader(outputFileName));
    for(String line=br.readLine();line!=null;line=br.readLine()){
        String[] flds = line.split("\t",-1);
        Tuple t = new DefaultTuple();

        ResourceFieldSchema mapfs = GenRandomData.getRandMapFieldSchema();
        ResourceFieldSchema bagfs = GenRandomData.getSmallTupDataBagFieldSchema();
        ResourceFieldSchema tuplefs = GenRandomData.getSmallTupleFieldSchema();

        t.append(flds[0].compareTo("")!=0 ? ps.getLoadCaster().bytesToBag(flds[0].getBytes(), bagfs) : null);
        t.append(flds[1].compareTo("")!=0 ? new DataByteArray(flds[1].getBytes()) : null);
        t.append(flds[2].compareTo("")!=0 ? ps.getLoadCaster().bytesToCharArray(flds[2].getBytes()) : null);
        t.append(flds[3].compareTo("")!=0 ? ps.getLoadCaster().bytesToDouble(flds[3].getBytes()) : null);
        t.append(flds[4].compareTo("")!=0 ? ps.getLoadCaster().bytesToFloat(flds[4].getBytes()) : null);
        t.append(flds[5].compareTo("")!=0 ? ps.getLoadCaster().bytesToInteger(flds[5].getBytes()) : null);
        t.append(flds[6].compareTo("")!=0 ? ps.getLoadCaster().bytesToLong(flds[6].getBytes()) : null);
        t.append(flds[7].compareTo("")!=0 ? ps.getLoadCaster().bytesToMap(flds[7].getBytes(), mapfs) : null);
        t.append(flds[8].compareTo("")!=0 ? ps.getLoadCaster().bytesToTuple(flds[8].getBytes(), tuplefs) : null);
        t.append(flds[9].compareTo("")!=0 ? ps.getLoadCaster().bytesToBoolean(flds[9].getBytes()) : null);
        t.append(flds[10].compareTo("")!=0 ? ps.getLoadCaster().bytesToDateTime(flds[10].getBytes()) : null);
        assertEquals(true, TestHelper.bagContains(inpDB, t));
        ++size;
    }
    assertEquals(true, size==inpDB.size());
    br.close();
}