Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema#add()

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY);
    Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER);
    Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT);
    Schema bagSchema = new Schema();
    bagSchema.add(charFs);
    bagSchema.add(intFs);
    bagSchema.add(floatFs);
    Schema.FieldSchema bagFs;
    try {
        bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG);
    } catch (FrontendException fee) {
        return null;
    }
    return new Schema(bagFs);
}
 
Example 2
Source File: MarkovPairs.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try {
    Schema tupleSchema = new Schema();
               
    FieldSchema fieldSchema = input.getField(0);
    
    if (fieldSchema.type != DataType.BAG)
    {
      throw new RuntimeException(String.format("Expected input schema to be BAG, but instead found %s",
                                               DataType.findTypeName(fieldSchema.type)));
    }
    
    FieldSchema fieldSchema2 = fieldSchema.schema.getField(0);
    
    tupleSchema.add(new Schema.FieldSchema("elem1", fieldSchema2.schema));
    tupleSchema.add(new Schema.FieldSchema("elem2", fieldSchema2.schema));
    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass().getName().toLowerCase(), input),
                                             tupleSchema,
                                             DataType.BAG));
  }
  catch (Exception e) {
    return null;
  }
}
 
Example 3
Source File: WeeksBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 4
Source File: BagToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema inputSchema) {
	try {
		if ((inputSchema == null) || inputSchema.size() != 1) {
			throw new RuntimeException("Expecting 1 input, found " + 
					((inputSchema == null) ? 0 : inputSchema.size()));
		}

		Schema.FieldSchema inputFieldSchema = inputSchema.getField(0);
		if (inputFieldSchema.type != DataType.BAG) {
			throw new RuntimeException("Expecting a bag of tuples: {()}");
		}

		// first field in the bag schema
		Schema.FieldSchema firstFieldSchema = inputFieldSchema.schema.getField(0);
		if ((firstFieldSchema == null) || (firstFieldSchema.schema == null)
				|| firstFieldSchema.schema.size() < 1) {
			throw new RuntimeException("Expecting a bag of tuples: {()}, found: " + inputSchema);
		}

		if (firstFieldSchema.type != DataType.TUPLE) {
			throw new RuntimeException("Expecting a bag of tuples: {()}, found: " + inputSchema);
		}

		// now for output schema
		Schema tupleOutputSchema = new Schema();
		for (int i = 0; i < firstFieldSchema.schema.size(); ++i) {
			tupleOutputSchema.add(firstFieldSchema.schema.getField(i));
		}
		return new Schema(new Schema.FieldSchema(getSchemaName(this
				.getClass().getName().toLowerCase(), inputSchema), tupleOutputSchema,
				DataType.TUPLE));
	} catch (FrontendException e) {
		e.printStackTrace();
		return null;
	}
}
 
Example 5
Source File: COUNT.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.BAG));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 6
Source File: YearsBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 7
Source File: TestUnion.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testCastingAfterUnionWithMultipleLoadersDifferentCasters()
    throws Exception {
    // Note that different caster case only works when each field is still coming
    // from the single Loader.
    // In the case below, 'a' is coming from A(PigStorage)
    // and 'b' is coming from B(TextLoader). No overlaps.
    File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"1","2","3"});
    File f2 = Util.createInputFile("tmp", "i2.txt", new String[] {"a","b","c"});

    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    //PigStorage and TextLoader have different LoadCasters
    ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray);");
    ps.registerQuery("B = load '" + Util.encodeEscape(f2.getAbsolutePath()) + "' using TextLoader() as (b:bytearray);");
    ps.registerQuery("C = union onschema A,B;");
    ps.registerQuery("D = foreach C generate (int)a as a,(chararray)b as b;");

    Schema dumpSchema = ps.dumpSchema("D");
    Schema expected = new Schema ();
    expected.add(new Schema.FieldSchema("a", DataType.INTEGER));
    expected.add(new Schema.FieldSchema("b", DataType.CHARARRAY));
    assertEquals(expected, dumpSchema);

    Iterator<Tuple> itr = ps.openIterator("D");
    int recordCount = 0;
    while(itr.next() != null)
        ++recordCount;
    assertEquals(6, recordCount);

}
 
Example 8
Source File: REGEX_EXTRACT_ALL.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 9
Source File: MinutesBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 10
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBincond() throws Throwable {
    String query = "a= load 'a' as (name: chararray, age: int, gpa: float);"
    + "b = group a by name;"
    + "c = foreach b generate (IsEmpty(a) ? " + TestBinCondFieldSchema.class.getName() + "(*): a);";

    LOForEach foreach = getForeachFromPlan(query);

    Schema.FieldSchema charFs = new FieldSchema(null, DataType.CHARARRAY);
    Schema.FieldSchema intFs = new FieldSchema(null, DataType.INTEGER);
    Schema.FieldSchema floatFs = new FieldSchema(null, DataType.FLOAT);
    Schema tupleSchema= new Schema();
    tupleSchema.add(charFs);
    tupleSchema.add(intFs);
    tupleSchema.add(floatFs);
    Schema.FieldSchema bagFs = null;
    Schema bagSchema = new Schema();
    bagSchema.add(new FieldSchema(null, tupleSchema, DataType.TUPLE));

    try {
        bagFs = new Schema.FieldSchema(null, bagSchema, DataType.BAG);
    } catch (FrontendException fee) {
        fail("Did not expect an error");
    }

    Schema expectedSchema = new Schema(bagFs);
    Schema foreachSch = org.apache.pig.newplan.logical.Util.translateSchema(foreach.getSchema());
    assertTrue(Schema.equals(foreachSch, expectedSchema, false, true));
}
 
Example 11
Source File: MonthsBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    s.add(new Schema.FieldSchema(null, DataType.DATETIME));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 12
Source File: RegexExtract.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.INTEGER));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 13
Source File: TestUnion.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testCastingAfterUnionWithMultipleLoadersSameCaster()
    throws Exception {
    // Fields coming from different loaders but
    // having the same LoadCaster.
    File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"1\ta","2\tb","3\tc"});
    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    // PigStorage and PigStorageWithStatistics have the same
    // LoadCaster(== Utf8StorageConverter)
    ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray, b:bytearray);");
    ps.registerQuery("B = load '" + Util.encodeEscape(f1.getAbsolutePath()) +
      "' using org.apache.pig.test.PigStorageWithStatistics() as (a:bytearray, b:bytearray);");
    ps.registerQuery("C = union onschema A,B;");
    ps.registerQuery("D = foreach C generate (int)a as a,(chararray)b as b;");
    // 'a' is coming from A and 'b' is coming from B; No overlaps.

    Schema dumpSchema = ps.dumpSchema("D");
    Schema expected = new Schema ();
    expected.add(new Schema.FieldSchema("a", DataType.INTEGER));
    expected.add(new Schema.FieldSchema("b", DataType.CHARARRAY));
    assertEquals(expected, dumpSchema);

    Iterator<Tuple> itr = ps.openIterator("D");
    int recordCount = 0;
    while(itr.next() != null)
        ++recordCount;
    assertEquals(6, recordCount);

}
 
Example 14
Source File: TestStitch.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSchema() throws Exception {
    Schema s = new Schema();
    Schema in = new Schema();
    s.add(new FieldSchema("x", DataType.CHARARRAY));
    s.add(new FieldSchema("y", DataType.INTEGER));
    in.add(new FieldSchema("A", s, DataType.BAG));
    FieldSchema fs = new FieldSchema("Over",
            new Schema(Schema.generateNestedSchema(DataType.BAG,
                    DataType.NULL)), DataType.BAG);
    in.add(fs);
    Stitch func = new Stitch();
    Schema out = func.outputSchema(in);
    assertEquals("{stitched: {x: chararray,y: int,{NULL}}}", out.toString());
}
 
Example 15
Source File: DiffDate.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 16
Source File: ToTuple.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        Schema tupleSchema = new Schema();
        for (int i = 0; i < input.size(); ++i) {
            tupleSchema.add(input.getField(i));
        }
        return new Schema(new Schema.FieldSchema(getSchemaName(this
                .getClass().getName().toLowerCase(), input), tupleSchema,
                DataType.TUPLE));
    } catch (Exception e) {
        return null;
    }
}
 
Example 17
Source File: ISOYearsBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 18
Source File: RegexExtractAll.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 19
Source File: VespaDocumentOperationTest.java    From vespa with Apache License 2.0 4 votes vote down vote up
private void addToBagWithSchema(String alias, byte type, Tuple value, Schema schemaInField, Schema schema,DataBag bag)
        throws FrontendException {
    schema.add(new Schema.FieldSchema(alias, schemaInField, type));
    bag.add(value);
}
 
Example 20
Source File: PageRank.java    From datafu with Apache License 2.0 4 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
  try
  {
    Schema.FieldSchema inputFieldSchema = input.getField(0);

    if (inputFieldSchema.type != DataType.BAG)
    {
      throw new RuntimeException("Expected a BAG as input");
    }

    Schema inputBagSchema = inputFieldSchema.schema;

    if (inputBagSchema.getField(0).type != DataType.TUPLE)
    {
      throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(inputBagSchema.getField(0).type)));
    }
    
    Schema inputTupleSchema = inputBagSchema.getField(0).schema;
    
    if (!this.enableNodeBiasing)
    {
      if (inputTupleSchema.size() != 2)
      {
        throw new RuntimeException("Expected two fields for the node data");
      }
    }
    else
    {
      if (inputTupleSchema.size() != 3)
      {
        throw new RuntimeException("Expected three fields for the node data");
      }
    }
    
    if (inputTupleSchema.getField(0).type != DataType.INTEGER)
    {
      throw new RuntimeException(String.format("Expected source to be an INTEGER, but instead found %s",
                                               DataType.findTypeName(inputTupleSchema.getField(0).type)));
    }

    if (inputTupleSchema.getField(1).type != DataType.BAG)
    {
      throw new RuntimeException(String.format("Expected edges to be represented with a BAG"));
    }
    
    if (this.enableNodeBiasing && inputTupleSchema.getField(2).type != DataType.DOUBLE)
    {
      throw new RuntimeException(String.format("Expected node bias to be a DOUBLE, but instead found %s",
                                               DataType.findTypeName(inputTupleSchema.getField(2).type)));
    }

    Schema.FieldSchema edgesFieldSchema = inputTupleSchema.getField(1);

    if (edgesFieldSchema.schema.getField(0).type != DataType.TUPLE)
    {
      throw new RuntimeException(String.format("Expected edges field to contain a TUPLE, but instead found %s",
                                               DataType.findTypeName(edgesFieldSchema.schema.getField(0).type)));
    }
    
    Schema edgesTupleSchema = edgesFieldSchema.schema.getField(0).schema;
    
    if (edgesTupleSchema.size() != 2)
    {
      throw new RuntimeException("Expected two fields for the edge data");
    }
    
    if (edgesTupleSchema.getField(0).type != DataType.INTEGER)
    {
      throw new RuntimeException(String.format("Expected destination edge ID to an INTEGER, but instead found %s",
                                               DataType.findTypeName(edgesTupleSchema.getField(0).type)));
    }

    if (edgesTupleSchema.getField(1).type != DataType.DOUBLE)
    {
      throw new RuntimeException(String.format("Expected destination edge weight to a DOUBLE, but instead found %s",
                                               DataType.findTypeName(edgesTupleSchema.getField(1).type)));
    }

    Schema tupleSchema = new Schema();
    tupleSchema.add(new Schema.FieldSchema("node",DataType.INTEGER));
    tupleSchema.add(new Schema.FieldSchema("rank",DataType.FLOAT));

    return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             tupleSchema,
                                             DataType.BAG));
  }
  catch (FrontendException e)
  {
    throw new RuntimeException(e);
  }
}