Java Code Examples for org.apache.pig.impl.util.Utils#getSchemaFromString()

The following examples show how to use org.apache.pig.impl.util.Utils#getSchemaFromString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProjectStarRangeInUdf.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testProjMixExpand1NoSchema() throws IOException {

    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + "f = foreach l1 generate TOBAG(*, $0 .. $2) as tt;"
        ; 
    Schema sch = Utils.getSchemaFromString("tt : {(NullALias)}");
    sch.getField(0).schema.getField(0).schema.getField(0).alias = null;
    sch.getField(0).schema.getField(0).schema.getField(0).type = DataType.NULL;
    
    compileAndCompareSchema(sch, query, "f");
    Iterator<Tuple> it = pigServer.openIterator("f");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStringAsByteArray(
                new String[] {
                        "({('10'),('20'),('30'),('40'),('50'),('10'),('20'),('30')})",
                        "({('11'),('21'),('31'),('41'),('51'),('11'),('21'),('31')})",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example 2
Source File: FixedWidthLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void prepareToRead(RecordReader reader, PigSplit split) throws IOException {
    // Save reader to use in getNext()
    this.reader = reader;

    splitIndex = split.getSplitIndex();

    // Get schema from front-end
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature });

    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE));
    if (requiredFields != null) {
        numRequiredFields = 0;
        for (int i = 0; i < requiredFields.length; i++) {
            if (requiredFields[i])
                numRequiredFields++;
        }
    }
}
 
Example 3
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example 4
Source File: TestPigServer.java    From spork with Apache License 2.0 6 votes vote down vote up
private void registerScalarScript(boolean useScalar, String expectedSchemaStr) throws IOException {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("A = load 'adata' AS (a: int, b: int);");
    //scalar
    pig.registerQuery("C = FOREACH A GENERATE *;");
    String overrideScalar = useScalar ? "C = FILTER A BY b % 2 == 0; " : "";
    pig.registerQuery("B = FOREACH (GROUP A BY a) { " +
            overrideScalar +
            "D = FILTER A BY b % 2 == 1;" +
            "GENERATE group AS a, A.b AS every, C.b AS even, D.b AS odd;" +
            "};");
    Schema dumpedSchema = pig.dumpSchema("B");
    Schema expectedSchema = Utils.getSchemaFromString(
            expectedSchemaStr);
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 5
Source File: TestSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
// See PIG-730
public void testMergeSchemaWithTwoLevelAccess() throws Exception {
    // Generate two schemas
    Schema s1 = Utils.getSchemaFromString("a:{t:(a0:int, a1:int)}");
    Schema s2 = Utils.getSchemaFromString("b:{t:(b0:int, b1:int)}");
    s1.getField(0).schema.setTwoLevelAccessRequired(true);
    s1.getField(0).schema.setTwoLevelAccessRequired(false);
    Schema s3 = Schema.mergeSchema(s1, s2, true);
    assertEquals(s3, s2);
}
 
Example 6
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapFloatValueType() throws Exception{
    String myMap = "[key1#0.1f]";
    Schema schema = Utils.getSchemaFromString("m:map[float]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Float);
    String value = String.valueOf(v);
    assertEquals("0.1", value);
}
 
Example 7
Source File: TestPigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void testConversion(String pigSchemaString, String schemaString) throws Exception {
  Schema pigSchema = Utils.getSchemaFromString(pigSchemaString);
  MessageType schema = pigSchemaConverter.convert(pigSchema);
  MessageType expectedMT = MessageTypeParser.parseMessageType(schemaString);
  assertEquals("converting "+pigSchemaString+" to "+schemaString, expectedMT, schema);

  MessageType filtered = pigSchemaConverter.filter(schema, pigSchema, null);
  assertEquals("converting "+pigSchemaString+" to "+schemaString+" and filtering", schema.toString(), filtered.toString());
}
 
Example 8
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeLimit() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = limit a 10;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 9
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeFilter() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = filter a by field1 > 10;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 10
Source File: PigSerializationEventConverterTest.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
private ResourceSchema createSchema(String schema) {
    try {
        return new ResourceSchema(Utils.getSchemaFromString(schema));
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    }
}
 
Example 11
Source File: DBStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Initialise the database connection and prepared statement here.
 */
@SuppressWarnings("unchecked")
@Override
public void prepareToWrite(RecordWriter writer)
    throws IOException {
  ps = null;
  con = null;
  if (insertQuery == null) {
    throw new IOException("SQL Insert command not specified");
  }
  try {
    if (user == null || pass == null) {
      con = DriverManager.getConnection(jdbcURL);
    } else {
      con = DriverManager.getConnection(jdbcURL, user, pass);
    }
    con.setAutoCommit(false);
    ps = con.prepareStatement(insertQuery);
  } catch (SQLException e) {
    log.error("Unable to connect to JDBC @" + jdbcURL);
    throw new IOException("JDBC Error", e);
  }
  count = 0;

  // Try to get the schema from the UDFContext object.
  UDFContext udfc = UDFContext.getUDFContext();
  Properties p =
      udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
  String strSchema = p.getProperty(SCHEMA_SIGNATURE);
  if (strSchema != null) {
      // Parse the schema from the string stored in the properties object.
      schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
  }
}
 
Example 12
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeCross() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = cross a, b;") ;
    Schema dumpedSchema = pig.dumpSchema("c") ;
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 13
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeCogroup() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = cogroup a by field1, b by field4;") ;
    Schema dumpedSchema = pig.dumpSchema("c") ;
    Schema expectedSchema = Utils.getSchemaFromString("group:int,a:{(field1:int,field2:float,field3:chararray)},b:{(field4:bytearray,field5:double,field6:chararray)}");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 14
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in both the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "   l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
        + " l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " 
        + " cg1 = cogroup l1 by i, l2 by i; "
        + " f1 = foreach cg1 generate group as gkey, flatten(l1), flatten(l2); "
        + " cg2 = cogroup l2 by i, l1 by i; "
        + " f2 = foreach cg1 generate group as gkey, flatten(l2), flatten(l1); "
        + "u = union onschema f1, f2; " ; 
    Util.registerMultiLineQuery(pig, query);
            
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
    assertEquals("Checking expected schema",sch, expectedSch);

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
}
 
Example 15
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeSort() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = order a by * desc;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 16
Source File: TestTupleRecordConsumer.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private MessageType getMessageType(String pigSchemaString) throws ParserException {
  Schema pigSchema = Utils.getSchemaFromString(pigSchemaString);
  return new PigSchemaConverter().convert(pigSchema);
}
 
Example 17
Source File: GroovyEvalFunc.java    From spork with Apache License 2.0 4 votes vote down vote up
public GroovyEvalFunc(String path, String namespace, String methodName, Object target) throws IOException {
  String fqmn = "".equals(namespace) ? methodName : namespace + ScriptEngine.NAMESPACE_SEPARATOR + methodName;

  Class c = scriptClasses.get(path);

  if (null == c) {
    try {
      c = GroovyScriptEngine.getEngine().loadScriptByName(new File(path).toURI().toString());
    } catch (ScriptException se) {
      throw new IOException(se);
    } catch (ResourceException re) {
      throw new IOException(re);
    }
  }

  scriptClasses.put(path, c);

  Method[] methods = c.getMethods();

  int matches = 0;

  for (Method m : methods) {
    if (m.getName().equals(methodName)) {
      this.method = m;
      matches++;
    }
  }

  if (null == this.method) {
    throw new IOException("Method " + methodName + " was not found in '" + path + "'");
  }

  if (matches > 1) {
    throw new IOException("There are " + matches + " methods with name '" + methodName + "', please make sure method names are unique within the Groovy class.");
  }

  //
  // Extract schema
  //

  Annotation[] annotations = this.method.getAnnotations();

  for (Annotation annotation : annotations) {
    if (annotation.annotationType().equals(OutputSchemaFunction.class)) {
      this.schemaFunction = new GroovyEvalFuncObject(path, namespace, ((OutputSchemaFunction) annotation).value());
      break;
    } else if (annotation.annotationType().equals(OutputSchema.class)) {
      this.schema = Utils.getSchemaFromString(((OutputSchema) annotation).value());
      break;
    }
  }

  //
  // For static method, invocation target is null, for non
  // static method, create/set invocation target unless passed
  // to the constructor
  //

  if (!Modifier.isStatic(this.method.getModifiers())) {
    if (null != target) {
      this.invocationTarget = target;
    } else {
      try {
        this.invocationTarget = c.newInstance();
      } catch (InstantiationException ie) {
        throw new IOException(ie);
      } catch (IllegalAccessException iae) {
        throw new IOException(iae);
      }
    }
  }
}
 
Example 18
Source File: PigSchemaSaveTest.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testSchemaSerializationPlusBase64() throws Exception {
    Schema schemaFromString = Utils.getSchemaFromString("name:bytearray,links:{(missing:chararray)}");
    Schema schemaSaved = IOUtils.deserializeFromBase64(IOUtils.serializeToBase64(schemaFromString));
    assertEquals(schemaFromString.toString(), schemaSaved.toString());
}
 
Example 19
Source File: TestPlanGeneration.java    From spork with Apache License 2.0 4 votes vote down vote up
public SchemaLoader(String schemaString) throws ParserException {
    schema = Utils.getSchemaFromString(schemaString);
}
 
Example 20
Source File: EvalFunc.java    From spork with Apache License 2.0 3 votes vote down vote up
/**
 * Report the schema of the output of this UDF.  Pig will make use of
 * this in error checking, optimization, and planning.  The schema
 * of input data to this UDF is provided.
 * <p>
 * The default implementation interprets the {@link OutputSchema} annotation,
 * if one is present. Otherwise, it returns <code>null</code> (no known output schema).
 *
 * @param input Schema of the input
 * @return Schema of the output
 */
public Schema outputSchema(Schema input) {
    OutputSchema schema = this.getClass().getAnnotation(OutputSchema.class);
    try {
        return (schema == null) ? null : Utils.getSchemaFromString(schema.value());
    } catch (ParserException e) {
        throw new RuntimeException(e);
    }
}