org.apache.pig.ResourceSchema Java Examples

The following examples show how to use org.apache.pig.ResourceSchema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test invalid Resource Schema: multiple fields for a bag
 * @throws IOException 
 */
@Test(expected=FrontendException.class) 
public void testToPigSchemaWithInvalidSchema() throws IOException {
    ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("fld0").setType(DataType.CHARARRAY),
            new ResourceFieldSchema()
                .setName("fld1").setType(DataType.DOUBLE),        
            new ResourceFieldSchema()
                .setName("fld2").setType(DataType.INTEGER)
    };
    
    ResourceSchema rSchema0 = new ResourceSchema()
        .setFields(level0);
    
    ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
    };
}
 
Example #2
Source File: CSVExcelStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
public void prepareToWrite(RecordWriter writer) {
    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature });

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema != null) {
        // Parse the schema from the string stored in the properties object.
        try {
            schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
        } catch (ParserException pex) {
            logger.warn("Could not parse schema for storing.");
        }
    }

    if (headerTreatment == Headers.DEFAULT) {
        headerTreatment = Headers.SKIP_OUTPUT_HEADER;
    }

    // PigStorage's prepareToWrite()
    super.prepareToWrite(writer);
}
 
Example #3
Source File: FixedWidthLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    // Save reader to use in getNext()
    this.reader = reader;

    splitIndex = split.getSplitIndex();

    // Get schema from front-end
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE));
    if (requiredFields != null) {
        numRequiredFields = 0;
        for (int i = 0; i < requiredFields.length; i++) {
            if (requiredFields[i])
                numRequiredFields++;
        }
    }
}
 
Example #4
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleInMap() throws IOException {
  Schema icebergSchema = new Schema(
      optional(
          1, "nested_list",
          MapType.ofOptional(
              2, 3,
              StringType.get(),
              ListType.ofOptional(
                  4, StructType.of(
                      required(5, "id", LongType.get()),
                      optional(6, "data", StringType.get()))))));

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  // The output should contain a nested struct within a list within a map, I think.
  assertEquals("nested_list:[{(id:long,data:chararray)}]", pigSchema.toString());
}
 
Example #5
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitive() throws IOException {
  Schema icebergSchema = new Schema(
      optional(1, "b", BooleanType.get()),
      optional(2, "i", IntegerType.get()),
      optional(3, "l", LongType.get()),
      optional(4, "f", FloatType.get()),
      optional(5, "d", DoubleType.get()),
      optional(6, "dec", DecimalType.of(0, 2)),
      optional(7, "s", StringType.get()),
      optional(8, "bi", BinaryType.get())
  );

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  assertEquals(
      "b:boolean,i:int,l:long,f:float,d:double,dec:bigdecimal,s:chararray,bi:bytearray", pigSchema.toString());
}
 
Example #6
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testPrimitive() throws IOException {
  Schema icebergSchema = new Schema(
      optional(1, "b", BooleanType.get()),
      optional(1, "i", IntegerType.get()),
      optional(2, "l", LongType.get()),
      optional(3, "f", FloatType.get()),
      optional(4, "d", DoubleType.get()),
      optional(5, "dec", DecimalType.of(0,2)),
      optional(5, "s", StringType.get()),
      optional(6,"bi", BinaryType.get())
  );

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  assertEquals("b:boolean,i:int,l:long,f:float,d:double,dec:bigdecimal,s:chararray,bi:bytearray", pigSchema.toString());
}
 
Example #7
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test invalid Resource Schema: bag without tuple field
 * @throws IOException 
 */
@Test(expected=FrontendException.class) 
public void testToPigSchemaWithInvalidSchema2() throws IOException {
    ResourceFieldSchema[] level0 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("fld0").setType(DataType.CHARARRAY)
    };
    
    ResourceSchema rSchema0 = new ResourceSchema()
        .setFields(level0);
    
    ResourceFieldSchema[] level2 = new ResourceFieldSchema[] {
            new ResourceFieldSchema()
                .setName("t2").setType(DataType.BAG).setSchema(rSchema0)
    };
     
}
 
Example #8
Source File: SchemaUtilTest.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testTupleInMap() throws IOException {
  Schema icebergSchema = new Schema(
      optional(
          1, "nested_list",
          MapType.ofOptional(
              2, 3,
              StringType.get(),
              ListType.ofOptional(
                  4, StructType.of(
                      required(5, "id", LongType.get()),
                      optional(6, "data", StringType.get()))))));

  ResourceSchema pigSchema = SchemaUtil.convert(icebergSchema);
  assertEquals("nested_list:[{(id:long,data:chararray)}]", pigSchema.toString()); // The output should contain a nested struct within a list within a map, I think.
}
 
Example #9
Source File: AegisthusLoader.java    From aegisthus with Apache License 2.0 6 votes vote down vote up
protected ResourceSchema columnSchema() throws IOException {
	ResourceSchema schema = new ResourceSchema();
	List<ResourceFieldSchema> fields = new ArrayList<>();

	fields.add(field("name", DataType.BYTEARRAY));
	fields.add(field("value", DataType.BYTEARRAY));
	fields.add(field("ts", DataType.LONG));
	fields.add(field("status", DataType.CHARARRAY));
	fields.add(field("ttl", DataType.LONG));

	ResourceSchema tuple = new ResourceSchema();
	tuple.setFields(fields.toArray(new ResourceFieldSchema[0]));

	ResourceFieldSchema fs = new ResourceFieldSchema();
	fs.setName("column");
	fs.setType(DataType.TUPLE);

	fs.setSchema(tuple);
	fields.clear();
	fields.add(fs);
	schema.setFields(fields.toArray(new ResourceFieldSchema[0]));

	return schema;
}
 
Example #10
Source File: OrcStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public List<String> getPredicateFields(String location, Job job) throws IOException {
    ResourceSchema schema = getSchema(location, job);
    List<String> predicateFields = new ArrayList<String>();
    for (ResourceFieldSchema field : schema.getFields()) {
        switch(field.getType()) {
        case DataType.BOOLEAN:
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
        case DataType.DATETIME:
        case DataType.CHARARRAY:
        case DataType.BIGINTEGER:
        case DataType.BIGDECIMAL:
            predicateFields.add(field.getName());
            break;
        default:
            // Skip DataType.BYTEARRAY, DataType.TUPLE, DataType.MAP and DataType.BAG
            break;
        }
    }
    return predicateFields;
}
 
Example #11
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test that ResourceSchema is correctly created given a
 * pig.Schema and vice versa 
 */
@Test
public void testResourceFlatSchemaCreation() 
throws ExecException, SchemaMergeException, FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema origSchema = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    ResourceSchema rsSchema = new ResourceSchema(origSchema);
    assertEquals("num fields", aliases.length, rsSchema.getFields().length);
    ResourceSchema.ResourceFieldSchema[] fields = rsSchema.getFields();
    for (int i=0; i<fields.length; i++) {
        assertEquals(fields[i].getName(), aliases[i]);
        assertEquals(fields[i].getType(), types[i]);
    }
    Schema genSchema = Schema.getPigSchema(rsSchema);
    assertTrue("generated schema equals original", 
            Schema.equals(genSchema, origSchema, true, false));
}
 
Example #12
Source File: JsonLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
    this.reader = reader;
    
    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    // Parse the schema from the string stored in the properties object.
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    jsonFactory = new JsonFactory();
}
 
Example #13
Source File: AvroStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public final void checkSchema(final ResourceSchema rs) throws IOException {
  if (rs == null) {
    throw new IOException("checkSchema: called with null ResourceSchema");
  }
  Schema avroSchema = AvroStorageSchemaConversionUtilities
      .resourceSchemaToAvroSchema(rs,
          (schemaName == null || schemaName.length() == 0)
              ? "pig_output" : schemaName,
              schemaNameSpace,
              Maps.<String, List<Schema>> newHashMap(),
              doubleColonsToDoubleUnderscores);
  if (avroSchema == null) {
    throw new IOException("checkSchema: could not translate ResourceSchema to Avro Schema");
  }
  setOutputAvroSchema(avroSchema);
}
 
Example #14
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test one-level Pig Schema: multiple fields for a bag
 */
@Test
public void testResourceSchemaWithInvalidPigSchema() 
throws FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema level0 = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    Schema.FieldSchema fld0 = 
        new Schema.FieldSchema("f0", level0, DataType.BAG);
    Schema level1 = new Schema(fld0);
    try {
        Schema.getPigSchema(new ResourceSchema(level1));
        Assert.fail();
    } catch(FrontendException e) {
        assertTrue(e.getErrorCode()==2218);
    }
}
 
Example #15
Source File: JsonStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
    // Store the record writer reference so we can use it when it's time
    // to write tuples
    this.writer = writer;

    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    // Parse the schema from the string stored in the properties object.
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    // Build a Json factory
    jsonFactory = new JsonFactory();
}
 
Example #16
Source File: PigTuple.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
public void setSchema(ResourceSchema schema) {
    schemaField = new ResourceFieldSchema();
    schemaField.setType(DataType.TUPLE);
    try {
        schemaField.setSchema(schema);
    } catch (IOException ex) {
        throw new EsHadoopIllegalStateException(String.format("Cannot use schema [%s]", schema), ex);
    }
}
 
Example #17
Source File: Utils.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * This method adds FieldSchema of 'input source tag/path' as the first
 * field. This will be called only when PigStorage is invoked with
 * '-tagFile' or '-tagPath' option and the schema file is present to be
 * loaded.
 *
 * @param schema
 * @param fieldName
 * @return ResourceSchema
 */
public static ResourceSchema getSchemaWithInputSourceTag(ResourceSchema schema, String fieldName) {
    ResourceFieldSchema[] fieldSchemas = schema.getFields();
    ResourceFieldSchema sourceTagSchema = new ResourceFieldSchema(new FieldSchema(fieldName, DataType.CHARARRAY));
    ResourceFieldSchema[] fieldSchemasWithSourceTag = new ResourceFieldSchema[fieldSchemas.length + 1];
    fieldSchemasWithSourceTag[0] = sourceTagSchema;
    for(int j = 0; j < fieldSchemas.length; j++) {
        fieldSchemasWithSourceTag[j + 1] = fieldSchemas[j];
    }
    return schema.setFields(fieldSchemasWithSourceTag);
}
 
Example #18
Source File: ReadToEndLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public ResourceSchema getSchema(String location, Job job) throws IOException {
    if (wrappedLoadFunc instanceof LoadMetadata) {
        return ((LoadMetadata) wrappedLoadFunc).getSchema(location, job);
    } else {
        return null;
    }
}
 
Example #19
Source File: TestStore.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void storeSchema(ResourceSchema schema, String location,
        Job job) throws IOException {
    FileSystem fs = FileSystem.get(job.getConfiguration());

    FileStatus[] outputFiles = fs.listStatus(new Path(location),
            Util.getSuccessMarkerPathFilter());
    // verify that output is available prior to storeSchema call
    Path resultPath = null;
    if (outputFiles != null && outputFiles.length > 0
            && outputFiles[0].getPath().getName().startsWith("part-")) {
        resultPath = outputFiles[0].getPath();
    }
    if (resultPath == null) {
        FileStatus[] listing = fs.listStatus(new Path(location));
        for (FileStatus fstat : listing) {
            System.err.println("Output File:" + fstat.getPath());
        }
        // not creating the marker file below fails the test
        throw new IOException("" + resultPath + " not available in storeSchema");
    }
    // create a file to test that this method got called - if it gets called
    // multiple times, the create will throw an Exception
    fs.create(
            new Path(location + "_storeSchema_test"),
            false);
}
 
Example #20
Source File: HBaseStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkSchema(ResourceSchema s) throws IOException {
    if (! (caster_ instanceof LoadStoreCaster)) {
        LOG.error("Caster must implement LoadStoreCaster for writing to HBase.");
        throw new IOException("Bad Caster " + caster_.getClass());
    }
    schema_ = s;
    getUDFProperties().setProperty(contextSignature + "_schema",
                                   ObjectSerializer.serialize(schema_));
}
 
Example #21
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
ResourceFieldSchema getTupleFieldSchema() throws IOException {
    ResourceFieldSchema stringfs = new ResourceFieldSchema();
    stringfs.setType(DataType.CHARARRAY);
    ResourceFieldSchema intfs = new ResourceFieldSchema();
    intfs.setType(DataType.INTEGER);

    ResourceSchema tupleSchema = new ResourceSchema();
    tupleSchema.setFields(new ResourceFieldSchema[]{intfs, stringfs});
    ResourceFieldSchema tuplefs = new ResourceFieldSchema();
    tuplefs.setSchema(tupleSchema);
    tuplefs.setType(DataType.TUPLE);

    return tuplefs;
}
 
Example #22
Source File: DBStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkSchema(ResourceSchema s) throws IOException {
    // We won't really check the schema here, we'll store it in our
    // UDFContext properties object so we have it when we need it on the
    // backend

    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    p.setProperty(SCHEMA_SIGNATURE, s.toString());
}
 
Example #23
Source File: PigStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void storeSchema(ResourceSchema schema, String location,
        Job job) throws IOException {
    if (isSchemaOn) {
        JsonMetadata metadataWriter = new JsonMetadata();
        byte recordDel = '\n';
        metadataWriter.setFieldDel(fieldDel);
        metadataWriter.setRecordDel(recordDel);
        metadataWriter.storeSchema(schema, location, job);
    }
}
 
Example #24
Source File: FixedWidthLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
public FixedWidthLoader(String columnSpec, String skipHeaderStr, String schemaStr) {
    try {
        columns = parseColumnSpec(columnSpec);
        schemaStr = schemaStr.replaceAll("[\\s\\r\\n]", "");
        schema = new ResourceSchema(Utils.getSchemaFromString(schemaStr));
        fields = schema.getFields();

        for (int i = 0; i < fields.length; i++) {
            byte fieldType = fields[i].getType();
            if (fieldType == DataType.MAP || fieldType == DataType.TUPLE || fieldType == DataType.BAG) {
                throw new IllegalArgumentException(
                    "Field \"" + fields[i].getName() + "\" is an object type (map, tuple, or bag). " + 
                    "Object types are not supported by FixedWidthLoader."
                );
            }
        }

        if (fields.length < columns.size())
            warn("More columns specified in column spec than fields specified in schema. Only loading fields specified in schema.",
                 PigWarning.UDF_WARNING_2);
        else if (fields.length > columns.size())
            throw new IllegalArgumentException("More fields specified in schema than columns specified in column spec.");
    } catch (ParserException e) {
        throw new IllegalArgumentException("Invalid schema format: " + e.getMessage());
    }

    if (skipHeaderStr.equalsIgnoreCase("SKIP_HEADER"))
        skipHeader = true;
}
 
Example #25
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
public ResourceSchema fixSchema(ResourceSchema s){
  for (ResourceFieldSchema filed : s.getFields()) {
    if(filed.getType() == DataType.NULL)
      filed.setType(DataType.BYTEARRAY);
  }
  return s;
}
 
Example #26
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapDoubleValueType() throws Exception{
    String myMap = "[key1#0.1]";
    Schema schema = Utils.getSchemaFromString("m:map[double]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Double);
    String value = String.valueOf(v);
    assertEquals("0.1", value);
}
 
Example #27
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapStringValueType() throws Exception{
    String myMap = "[key1#value1]";
    Schema schema = Utils.getSchemaFromString("m:map[chararray]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof String);
    String value = String.valueOf(v);
    assertEquals("value1", value);
}
 
Example #28
Source File: JsonStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkSchema(ResourceSchema s) throws IOException {
    // We won't really check the schema here, we'll store it in our
    // UDFContext properties object so we have it when we need it on the
    // backend
    
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    p.setProperty(SCHEMA_SIGNATURE, fixSchema(s).toString());
}
 
Example #29
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getSmallTupDataBagFieldSchema() throws IOException {
    ResourceFieldSchema tuplefs = getSmallTupleFieldSchema();
    
    ResourceSchema bagSchema = new ResourceSchema();
    bagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema bagfs = new ResourceFieldSchema();
    bagfs.setSchema(bagSchema);
    bagfs.setType(DataType.BAG);
    
    return bagfs;
}
 
Example #30
Source File: GenRandomData.java    From spork with Apache License 2.0 5 votes vote down vote up
public static ResourceFieldSchema getFullTupTextDataBagFieldSchema() throws IOException{
    ResourceFieldSchema tuplefs = getSmallBagTextTupleFieldSchema();
    
    ResourceSchema outBagSchema = new ResourceSchema();
    outBagSchema.setFields(new ResourceFieldSchema[]{tuplefs});
    ResourceFieldSchema outBagfs = new ResourceFieldSchema();
    outBagfs.setSchema(outBagSchema);
    outBagfs.setType(DataType.BAG);
    
    return outBagfs;
}