org.apache.pig.impl.logicalLayer.schema.Schema Java Examples

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestResourceSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test one-level Pig Schema: multiple fields for a bag
 */
@Test
public void testResourceSchemaWithInvalidPigSchema() 
throws FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema level0 = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    Schema.FieldSchema fld0 = 
        new Schema.FieldSchema("f0", level0, DataType.BAG);
    Schema level1 = new Schema(fld0);
    try {
        Schema.getPigSchema(new ResourceSchema(level1));
        Assert.fail();
    } catch(FrontendException e) {
        assertTrue(e.getErrorCode()==2218);
    }
}
 
Example #2
Source File: TestEvalPipeline2.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testDescribeNestedAlias() throws Exception{
    String[] input = {
            "1\t3",
            "2\t4",
            "3\t5"
    };

    Util.createInputFile(cluster, "table_testDescribeNestedAlias", input);
    pigServer.registerQuery("A = LOAD 'table_testDescribeNestedAlias' as (a0, a1);");
    pigServer.registerQuery("P = GROUP A by a1;");
    // Test RelationalOperator
    pigServer.registerQuery("B = FOREACH P { D = ORDER A by $0; generate group, D.$0; };");

    // Test ExpressionOperator - negative test case
    pigServer.registerQuery("C = FOREACH A { D = a0/a1; E=a1/a0; generate E as newcol; };");
    Schema schema = pigServer.dumpSchemaNested("B", "D");
    Assert.assertTrue(schema.toString().equalsIgnoreCase("{a0: bytearray,a1: bytearray}"));
    try {
        schema = pigServer.dumpSchemaNested("C", "E");
    } catch (FrontendException e) {
        Assert.assertTrue(e.getErrorCode() == 1113);
    }
}
 
Example #3
Source File: AliasEvalFuncTest.java    From datafu with Apache License 2.0 6 votes vote down vote up
@Test
public void getBagTest() throws Exception
{
   ReportBuilder udf = new ReportBuilder();
   udf.setUDFContextSignature("test");
   List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>();
   fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG));
   fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER));
   fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE));
   fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE));
   Schema schemaTuple = new Schema(fieldSchemaList);
   Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG));
   udf.outputSchema(schemaBag);

   Tuple inputTuple = TupleFactory.getInstance().newTuple();
   DataBag inputBag = BagFactory.getInstance().newDefaultBag();
   inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713)));
   inputTuple.append(inputBag);
   DataBag outputBag = udf.exec(inputTuple);
   Assert.assertEquals(inputBag, outputBag);
}
 
Example #4
Source File: GroovyEvalFunc.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  if (null != this.schemaFunction) {
    try {
      Tuple t = TupleFactory.getInstance().newTuple(1);
      // Strip enclosing '{}' from schema
      t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
      return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
    } catch (ParserException pe) {
      throw new RuntimeException(pe);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe);
    }
  } else {
    return this.schema;
  }
}
 
Example #5
Source File: TestLimitSchemaStore.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test //end to end test
public void testLimitStoreSchema1() throws Exception{
    Util.createLocalInputFile("student", new String[]{"joe smith:18:3.5","amy brown:25:2.5","jim fox:20:4.0","leo fu:55:3.0"});
    
    pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
    pigServer.registerQuery("d = distinct a;");
    pigServer.registerQuery("lim = limit d 1;");
    String outFile = "limitSchemaOut";
    Util.deleteDirectory(new File(outFile));
    pigServer.store("lim", outFile,  "PigStorage('\\t', '-schema')");
    pigServer.dumpSchema("lim");
    
    pigServer.registerQuery("b = LOAD '" + outFile + "' using PigStorage('\\t', '-schema');");
    Schema genSchema = pigServer.dumpSchema("b");
    System.err.println(genSchema);
    Assert.assertNotNull(genSchema);
    
}
 
Example #6
Source File: ScorePMML_ElNinoTest.java    From Surus with Apache License 2.0 6 votes vote down vote up
private Schema buildElNinoInputSchema() throws FrontendException {

    	// Build Field Schema
    	List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
        fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("buoy"       , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("day"        , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("latitude"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("longitude"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("zon_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("mer_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("humidity"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("airtemp"    , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("s_s_temp"   , DataType.DOUBLE   ));

        return new Schema(fieldSchemas);

    }
 
Example #7
Source File: VespaDocumentOperation.java    From vespa with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static void writeField(String name, Object value, Byte type, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth) throws IOException {
    if (shouldWriteField(name, properties, depth)) {
        String operation = getPartialOperation(mapPartialOperationMap, name, properties);
        // check if the name has the property update-map-fields/remove-map-fields
        // if yes, we need special treatments here as we need to loop through the tuple
        // be aware the the operation here is not vespa operation such as "put" and "update"
        // operation here are the field name we wish use to such as "assign" and "remove"
        if (operation != null) {
            writePartialUpdateAndRemoveMap(name, value, g, properties, schema, op, depth, operation);
        } else {
            g.writeFieldName(name);
            if (shouldWritePartialUpdate(op, depth)) {
                writePartialUpdate(value, type, g, name, properties, schema, op, depth);
            } else {
                writeValue(value, type, g, name, properties, schema, op, depth);
            }
        }

    }
}
 
Example #8
Source File: TestOrderBy3.java    From spork with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    ArrayList<Tuple> tuples = new ArrayList<Tuple>();

    log.info("Setting up");

    pigServer = new PigServer(ExecType.LOCAL);
    data = resetData(pigServer);

    Random r = new Random();
    for (int i = 0; i < MAX; i++) {
        tuples.add(tuple(i,GenRandomData.genRandString(r)));
    }

    Schema s = new Schema();
    s.add(new Schema.FieldSchema("index", DataType.INTEGER));
    s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
    data.set("test", s, tuples);
}
 
Example #9
Source File: TestThriftToPigCompatibility.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * <ul> steps:
 * <li>Writes using the thrift mapping
 * <li>Reads using the pig mapping
 * <li>Use Elephant bird to convert from thrift to pig
 * <li>Check that both transformations give the same result
 * @param o the object to convert
 * @throws TException
 */
public static <T extends TBase<?,?>> void validateSameTupleAsEB(T o) throws TException {
  final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
  @SuppressWarnings("unchecked")
  final Class<T> class1 = (Class<T>) o.getClass();
  final MessageType schema = thriftSchemaConverter.convert(class1);

  final StructType structType = ThriftSchemaConverter.toStructType(class1);
  final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
  final Schema pigSchema = thriftToPig.toSchema();
  final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
  RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
  final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
  ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
  o.write(p);
  final Tuple t = tupleRecordConverter.getCurrentRecord();
  final Tuple expected = thriftToPig.getPigTuple(o);
  assertEquals(expected.toString(), t.toString());
  final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
  assertEquals(schema.toString(), filtered.toString());
}
 
Example #10
Source File: XPath.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {

	final List<FuncSpec> funcList = new ArrayList<FuncSpec>();

	/*either two chararray arguments*/
	List<FieldSchema> fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));

	Schema twoArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));

	/*or two chararray and a boolean argument*/
	fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));

	Schema threeArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));

	return funcList;
}
 
Example #11
Source File: PigSchemaConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}
 
Example #12
Source File: TestMergeJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMergeJoinSch2() throws IOException{
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
    Schema mjSch = null, shjSch = null;
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using 'merge';");
    mjSch = pigServer.dumpSchema("C");
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
    shjSch = pigServer.dumpSchema("C");
    Assert.assertTrue(shjSch == null);
}
 
Example #13
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in one of the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g = group l1 by i; "
    + "f = foreach g generate flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";

    String query = query_prefix + "u = union onschema f, l2; " ; 
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    Iterator<Tuple> it = pig.openIterator("u");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
    // now try reversing the order of relation
    query = query_prefix + "u = union onschema l2, f; " ; 
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    it = pig.openIterator("u");
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #14
Source File: TupleDiff.java    From datafu with Apache License 2.0 5 votes vote down vote up
private FieldSchema getFieldSchema(FieldSchema fieldSchema, int fieldNum) throws ExecException, FrontendException {
	if (fieldSchema == null) {
		return null;
	}

	Schema schema = fieldSchema.schema;

	return schema.size() < (fieldNum + 1) ? null : schema.getField(fieldNum);
}
 
Example #15
Source File: GetSecond.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}
 
Example #16
Source File: ISOToSecond.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));

    return funcList;
}
 
Example #17
Source File: TestSecondarySort.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedSortEndToEnd1() throws Exception {
    File tmpFile1 = Util.createTempFileDelOnExit("test", "txt");
    PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));
    ps1.println("1\t2\t3");
    ps1.println("1\t3\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("2\t3\t4");
    ps1.close();

    String expected[] = {
            "(2,{(2,3,4)})",
            "(1,{(1,2,3),(1,2,4),(1,2,4),(1,2,4),(1,3,4)})"
    };

    String clusterPath = Util.removeColon(tmpFile1.getCanonicalPath());

    Util.copyFromLocalToCluster(cluster, tmpFile1.getCanonicalPath(), clusterPath);
    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterPath) + "' AS (a0, a1, a2);");
    pigServer.registerQuery("B = group A by $0 parallel 2;");
    pigServer.registerQuery("C = foreach B { D = limit A 10; E = order D by $1; generate group, E;};");
    Iterator<Tuple> iter = pigServer.openIterator("C");
    Schema s = pigServer.dumpSchema("C");

    Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(s));
    Util.deleteFile(cluster, clusterPath);
}
 
Example #18
Source File: ISODaysBetween.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example #19
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnTypedMap() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input);

    String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example #20
Source File: ExampleEasyCubeAggregator.java    From Cubert with Apache License 2.0 5 votes vote down vote up
@Override
public FieldSchema outputSchema(Schema inputSchema) throws IOException
{
    List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
    fieldSchemas.add(new FieldSchema("sum", DataType.LONG));
    fieldSchemas.add(new FieldSchema("sum_squared", DataType.LONG));
    Schema nestedTupleSchema = new Schema(fieldSchemas);

    return new FieldSchema("resultsTuple", nestedTupleSchema, DataType.TUPLE);
}
 
Example #21
Source File: ROUND.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.BYTEARRAY))));
    funcList.add(new FuncSpec(DoubleRound.class.getName(),  new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));
    funcList.add(new FuncSpec(FloatRound.class.getName(),   new Schema(new Schema.FieldSchema(null, DataType.FLOAT))));
    return funcList;
}
 
Example #22
Source File: RubySchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * This method will fix any name conflicts in a schema. It's important to note that
 * this will change the Schema object itself. It will deal with any collisions in things
 * named tuple_#, bag_#, map_#, or val_#, as these are generally names generated by
 * Util.getSchemaFromString. In the case of another name conflict, it will not be
 * changed, as that name conflict was created by the user.
 *
 * @param s a Schema object to fix in place
 */
private static void fixSchemaNames(Schema s) {
    if (s == null)
         return;
    // This regex detects names that could possibly collide that we should change
    Pattern p = Pattern.compile("(bag_|tuple_|map_|val_)(\\d+)", Pattern.CASE_INSENSITIVE);
    Set<String> names = new HashSet<String>(s.size(), 1.0f);
    for (Schema.FieldSchema fs : s.getFields()) {
        if (fs.alias == null)
             continue;
        Matcher m = p.matcher(fs.alias);
        if (m.matches() && names.contains(fs.alias)) {
            String prefix = m.group(1);
            int suffix = Integer.parseInt(m.group(2));
            while (names.contains(prefix + suffix))
                suffix++;
            fs.alias = prefix + suffix;
        }
        names.add(fs.alias);
        if (fs.schema != null) {
            if (fs.type == DataType.BAG) {
                try {
                    fixSchemaNames(fs.schema.getField(0).schema);
                } catch (FrontendException e) {
                    throw new RuntimeException("Error recursively fixing schema: " + s, e);
                }
            } else {
                fixSchemaNames(fs.schema);
            }
        }
    }
}
 
Example #23
Source File: ToMilliSeconds.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}
 
Example #24
Source File: TupleReadSupport.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public ReadContext init(InitContext initContext) {
  Schema pigSchema = getPigSchema(initContext.getConfiguration());
  RequiredFieldList requiredFields = getRequiredFields(initContext.getConfiguration());
  boolean columnIndexAccess = initContext.getConfiguration().getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);

  if (pigSchema == null) {
    return new ReadContext(initContext.getFileSchema());
  } else {

    // project the file schema according to the requested Pig schema
    MessageType parquetRequestedSchema = new PigSchemaConverter(columnIndexAccess).filter(initContext.getFileSchema(), pigSchema, requiredFields);
    return new ReadContext(parquetRequestedSchema);
  }
}
 
Example #25
Source File: TestProjectRange.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeOrderByMixNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + " o = order l1 by  $1 .. $2 DESC,  $0 , $4 .. DESC;"
        ;
    compileAndCompareSchema((Schema)null, query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {false, false,true,false};
    checkNumExpressionPlansForSort(lp, 4, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    pigServer.explain("o", System.err);
    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(11,21,31,41,51)",
                        "(10,20,30,40,50)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example #26
Source File: StyTest.java    From validatar with Apache License 2.0 5 votes vote down vote up
@Test
public void testNullTypeInTuple() throws IOException {
    Query query = new Query();
    query.value = "";
    Schema fakeSchema = getSchema(makeFieldSchema("a", DataType.NULL));
    Tuple fakeTuple = makeTuple("something");

    sty = getSty(withMockResult(withMockSchema(getServer(), fakeSchema), fakeTuple));
    runWithoutOutput(() -> sty.execute(query));
    Assert.assertFalse(query.failed());
    List<TypedObject> result = query.getResult().getColumn("a").getValues();
    Assert.assertNotNull(result);
    Assert.assertEquals(result.size(), 1);
    Assert.assertNull(result.get(0));
}
 
Example #27
Source File: TypeCheckingTestUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
public static Schema genFlatSchema(String[] aliases, byte[] types) {
    if (aliases.length != types.length) {
        throw new AssertionError(" aliase number and type number don't match") ;
    }
    List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ;
    for(int i=0; i<aliases.length ;i++) {
        fsList.add(new Schema.FieldSchema(aliases[i], types[i])) ;
    }
    return new Schema(fsList) ;
}
 
Example #28
Source File: MapSummaryData.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void add(Schema schema, Map<?, ?> m) {
  super.add(m);
  size.add(m.size());
  FieldSchema field = getField(schema, 0);
  if (m.size() > 0 && key == null) {
    key = new FieldSummaryData();
    key.setName(getName(field));
    value = new FieldSummaryData();
    value.setName(getName(field));
  }
  for (Map.Entry<?, ?> entry : m.entrySet()) {
    key.add(null, entry.getKey());
    value.add(getSchema(field), entry.getValue());
  }
}
 
Example #29
Source File: TestSchemaUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBagSchema() throws Exception {
    String bagName="mybag";
    String tupleName = "mytuple";
    String[] fieldNames = new String[] { "field_0", "field_1" };
    Byte[] dataTypes = new Byte[] { DataType.LONG, DataType.CHARARRAY };

    String expected = "{mybag: {mytuple: (field_0: long,field_1: chararray)}}";
    Schema bagSchema = SchemaUtil.newBagSchema(bagName,tupleName,
            fieldNames, dataTypes);
    assertEquals(expected, bagSchema.toString());

    bagSchema = SchemaUtil.newBagSchema(bagName,tupleName, Arrays
            .asList(fieldNames), Arrays.asList(dataTypes));
    assertEquals(expected, bagSchema.toString());

    expected = "{b: {t: (field_0: long,field_1: chararray)}}";
    bagSchema = SchemaUtil.newBagSchema(fieldNames, dataTypes);
    assertEquals(expected, bagSchema.toString());

    bagSchema = SchemaUtil.newBagSchema(Arrays.asList(fieldNames),
            Arrays.asList(dataTypes));
    assertEquals(expected, bagSchema.toString());

    expected = "{b: {t: (f0: long,f1: chararray)}}";
    bagSchema = SchemaUtil.newBagSchema(dataTypes);
    assertEquals(expected, bagSchema.toString());

    bagSchema = SchemaUtil.newBagSchema(Arrays.asList(dataTypes));
    assertEquals(expected, bagSchema.toString());
}
 
Example #30
Source File: Entropy.java    From datafu with Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
          throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
          throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }