Java Code Examples for org.apache.pig.impl.logicalLayer.schema.Schema

The following examples show how to use org.apache.pig.impl.logicalLayer.schema.Schema. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: spork   Source File: TestOrderBy3.java    License: Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    ArrayList<Tuple> tuples = new ArrayList<Tuple>();

    log.info("Setting up");

    pigServer = new PigServer(ExecType.LOCAL);
    data = resetData(pigServer);

    Random r = new Random();
    for (int i = 0; i < MAX; i++) {
        tuples.add(tuple(i,GenRandomData.genRandString(r)));
    }

    Schema s = new Schema();
    s.add(new Schema.FieldSchema("index", DataType.INTEGER));
    s.add(new Schema.FieldSchema("name", DataType.CHARARRAY));
    data.set("test", s, tuples);
}
 
Example 2
Source Project: spork   Source File: XPath.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {

	final List<FuncSpec> funcList = new ArrayList<FuncSpec>();

	/*either two chararray arguments*/
	List<FieldSchema> fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));

	Schema twoArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));

	/*or two chararray and a boolean argument*/
	fields = new ArrayList<FieldSchema>();
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
	fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));

	Schema threeArgInSchema = new Schema(fields);

	funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));

	return funcList;
}
 
Example 3
Source Project: vespa   Source File: VespaDocumentOperation.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
private static void writeField(String name, Object value, Byte type, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth) throws IOException {
    if (shouldWriteField(name, properties, depth)) {
        String operation = getPartialOperation(mapPartialOperationMap, name, properties);
        // check if the name has the property update-map-fields/remove-map-fields
        // if yes, we need special treatments here as we need to loop through the tuple
        // be aware the the operation here is not vespa operation such as "put" and "update"
        // operation here are the field name we wish use to such as "assign" and "remove"
        if (operation != null) {
            writePartialUpdateAndRemoveMap(name, value, g, properties, schema, op, depth, operation);
        } else {
            g.writeFieldName(name);
            if (shouldWritePartialUpdate(op, depth)) {
                writePartialUpdate(value, type, g, name, properties, schema, op, depth);
            } else {
                writeValue(value, type, g, name, properties, schema, op, depth);
            }
        }

    }
}
 
Example 4
Source Project: spork   Source File: GroovyEvalFunc.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  if (null != this.schemaFunction) {
    try {
      Tuple t = TupleFactory.getInstance().newTuple(1);
      // Strip enclosing '{}' from schema
      t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
      return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
    } catch (ParserException pe) {
      throw new RuntimeException(pe);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe);
    }
  } else {
    return this.schema;
  }
}
 
Example 5
Source Project: spork   Source File: TestResourceSchema.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test one-level Pig Schema: multiple fields for a bag
 */
@Test
public void testResourceSchemaWithInvalidPigSchema() 
throws FrontendException {
    String [] aliases ={"f1", "f2"};
    byte[] types = {DataType.CHARARRAY, DataType.INTEGER};
    Schema level0 = TypeCheckingTestUtil.genFlatSchema(
            aliases,types);
    Schema.FieldSchema fld0 = 
        new Schema.FieldSchema("f0", level0, DataType.BAG);
    Schema level1 = new Schema(fld0);
    try {
        Schema.getPigSchema(new ResourceSchema(level1));
        Assert.fail();
    } catch(FrontendException e) {
        assertTrue(e.getErrorCode()==2218);
    }
}
 
Example 6
Source Project: spork   Source File: TestEvalPipeline2.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDescribeNestedAlias() throws Exception{
    String[] input = {
            "1\t3",
            "2\t4",
            "3\t5"
    };

    Util.createInputFile(cluster, "table_testDescribeNestedAlias", input);
    pigServer.registerQuery("A = LOAD 'table_testDescribeNestedAlias' as (a0, a1);");
    pigServer.registerQuery("P = GROUP A by a1;");
    // Test RelationalOperator
    pigServer.registerQuery("B = FOREACH P { D = ORDER A by $0; generate group, D.$0; };");

    // Test ExpressionOperator - negative test case
    pigServer.registerQuery("C = FOREACH A { D = a0/a1; E=a1/a0; generate E as newcol; };");
    Schema schema = pigServer.dumpSchemaNested("B", "D");
    Assert.assertTrue(schema.toString().equalsIgnoreCase("{a0: bytearray,a1: bytearray}"));
    try {
        schema = pigServer.dumpSchemaNested("C", "E");
    } catch (FrontendException e) {
        Assert.assertTrue(e.getErrorCode() == 1113);
    }
}
 
Example 7
Source Project: datafu   Source File: AliasEvalFuncTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void getBagTest() throws Exception
{
   ReportBuilder udf = new ReportBuilder();
   udf.setUDFContextSignature("test");
   List<Schema.FieldSchema> fieldSchemaList = new ArrayList<Schema.FieldSchema>();
   fieldSchemaList.add(new Schema.FieldSchema("msisdn", DataType.LONG));
   fieldSchemaList.add(new Schema.FieldSchema("ts", DataType.INTEGER));
   fieldSchemaList.add(new Schema.FieldSchema("center_lon", DataType.DOUBLE));
   fieldSchemaList.add(new Schema.FieldSchema("center_lat", DataType.DOUBLE));
   Schema schemaTuple = new Schema(fieldSchemaList);
   Schema schemaBag = new Schema(new Schema.FieldSchema(ReportBuilder.ORDERED_ROUTES, schemaTuple, DataType.BAG));
   udf.outputSchema(schemaBag);

   Tuple inputTuple = TupleFactory.getInstance().newTuple();
   DataBag inputBag = BagFactory.getInstance().newDefaultBag();
   inputBag.add(TupleFactory.getInstance().newTuple(Arrays.asList(71230000000L, 1382351612, 10.697, 20.713)));
   inputTuple.append(inputBag);
   DataBag outputBag = udf.exec(inputTuple);
   Assert.assertEquals(inputBag, outputBag);
}
 
Example 8
Source Project: spork   Source File: TestLimitSchemaStore.java    License: Apache License 2.0 6 votes vote down vote up
@Test //end to end test
public void testLimitStoreSchema1() throws Exception{
    Util.createLocalInputFile("student", new String[]{"joe smith:18:3.5","amy brown:25:2.5","jim fox:20:4.0","leo fu:55:3.0"});
    
    pigServer.registerQuery("a = load 'student' using " + PigStorage.class.getName() + "(':') as (name, age, gpa);");
    pigServer.registerQuery("d = distinct a;");
    pigServer.registerQuery("lim = limit d 1;");
    String outFile = "limitSchemaOut";
    Util.deleteDirectory(new File(outFile));
    pigServer.store("lim", outFile,  "PigStorage('\\t', '-schema')");
    pigServer.dumpSchema("lim");
    
    pigServer.registerQuery("b = LOAD '" + outFile + "' using PigStorage('\\t', '-schema');");
    Schema genSchema = pigServer.dumpSchema("b");
    System.err.println(genSchema);
    Assert.assertNotNull(genSchema);
    
}
 
Example 9
Source Project: Surus   Source File: ScorePMML_ElNinoTest.java    License: Apache License 2.0 6 votes vote down vote up
private Schema buildElNinoInputSchema() throws FrontendException {

    	// Build Field Schema
    	List<FieldSchema> fieldSchemas = new ArrayList<FieldSchema>();
        fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("buoy"       , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("day"        , DataType.CHARARRAY));
        fieldSchemas.add(new Schema.FieldSchema("latitude"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("longitude"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("zon_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("mer_winds"  , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("humidity"   , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("airtemp"    , DataType.DOUBLE   ));
        fieldSchemas.add(new Schema.FieldSchema("s_s_temp"   , DataType.DOUBLE   ));

        return new Schema(fieldSchemas);

    }
 
Example 10
Source Project: parquet-mr   Source File: TestThriftToPigCompatibility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * <ul> steps:
 * <li>Writes using the thrift mapping
 * <li>Reads using the pig mapping
 * <li>Use Elephant bird to convert from thrift to pig
 * <li>Check that both transformations give the same result
 * @param o the object to convert
 * @throws TException
 */
public static <T extends TBase<?,?>> void validateSameTupleAsEB(T o) throws TException {
  final ThriftSchemaConverter thriftSchemaConverter = new ThriftSchemaConverter();
  @SuppressWarnings("unchecked")
  final Class<T> class1 = (Class<T>) o.getClass();
  final MessageType schema = thriftSchemaConverter.convert(class1);

  final StructType structType = ThriftSchemaConverter.toStructType(class1);
  final ThriftToPig<T> thriftToPig = new ThriftToPig<T>(class1);
  final Schema pigSchema = thriftToPig.toSchema();
  final TupleRecordMaterializer tupleRecordConverter = new TupleRecordMaterializer(schema, pigSchema, true);
  RecordConsumer recordConsumer = new ConverterConsumer(tupleRecordConverter.getRootConverter(), schema);
  final MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
  ParquetWriteProtocol p = new ParquetWriteProtocol(new RecordConsumerLoggingWrapper(recordConsumer), columnIO, structType);
  o.write(p);
  final Tuple t = tupleRecordConverter.getCurrentRecord();
  final Tuple expected = thriftToPig.getPigTuple(o);
  assertEquals(expected.toString(), t.toString());
  final MessageType filtered = new PigSchemaConverter().filter(schema, pigSchema);
  assertEquals(schema.toString(), filtered.toString());
}
 
Example 11
Source Project: spork   Source File: RollupDimensions.java    License: Apache License 2.0 5 votes vote down vote up
@Override
   public Schema outputSchema(Schema input) {
// "dimensions" string is the default namespace assigned to the output
// schema. this can be overridden by specifying user defined schema
// names in foreach operator. if user defined schema names are not
// specified then the output schema of foreach operator using this UDF
// will have "dimensions::" namespace for all fields in the tuple
try {
    return new Schema(new FieldSchema("dimensions", input, DataType.BAG));
} catch (FrontendException e) {
    // we are specifying BAG explicitly, so this should not happen.
    throw new RuntimeException(e);
}
   }
 
Example 12
Source Project: spork   Source File: TestProjectRange.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeOrderByStartNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + " o = order l1 by $3 .. DESC;"
        ;
    compileAndCompareSchema((Schema)null, query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {false};
    checkNumExpressionPlansForSort(lp, 1, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    pigServer.explain("o", System.err);
    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(11,21,31,41,51)",
                        "(10,20,30,40,50)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example 13
Source Project: vespa   Source File: VespaDocumentOperation.java    License: Apache License 2.0 5 votes vote down vote up
private static void writePartialUpdateAndRemoveMap(String name, Object value, JsonGenerator g, Properties properties, Schema schema, Operation op, int depth, String operation) throws IOException {
    schema = (schema != null) ? schema.getField(0).schema : null;
    // extract the key of map and keys in map for writing json when partial updating maps
    Schema valueSchema = (schema != null) ? schema.getField(1).schema : null;
    // data format  { ( key; id, value: (abc,123,(123234,bbaa))) }
    // the first element of each tuple in the bag will be the map to update
    // the second element of each tuple in the bag will be the new value of the map
    DataBag bag = (DataBag) value;
    for (Tuple element : bag) {
        if (element.size() != 2) {
            continue;
        }
        String k = (String) element.get(0);
        Object v = element.get(1);
        Byte t = DataType.findType(v);
        if (t == DataType.TUPLE) {
            g.writeFieldName(name + "{" + k + "}");
            if (operation.equals(PARTIAL_UPDATE_REMOVE)) {
                g.writeStartObject();
                g.writeFieldName(PARTIAL_UPDATE_REMOVE);
                g.writeNumber(0);
                g.writeEndObject();
            } else {
                writePartialUpdate(v, t, g, name, properties, valueSchema, op, depth);
            }
        }
    }
}
 
Example 14
Source Project: spork   Source File: TestTypedMap.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexCast2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast2", input);

    String query = "a = load '" + tmpDirName + "/testComplexCast2' as (m:[int]);" +
        "b = foreach a generate ([long])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[long]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example 15
Source Project: spork   Source File: TOP.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
    try {
        if (input.size() < 3) {
            return null;
        }
        return new Schema(input.getField(2));
    } catch (Exception e) {
        return null;
    }
}
 
Example 16
Source Project: spork   Source File: DoubleDoubleBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    List<Schema.FieldSchema> fields = new ArrayList<Schema.FieldSchema>();
    fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
    fields.add(new Schema.FieldSchema(null, DataType.DOUBLE));
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(fields)));

    return funcList;
}
 
Example 17
Source Project: spork   Source File: DoubleBase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DOUBLE))));

    return funcList;
}
 
Example 18
Source Project: spork   Source File: TOBAG.java    License: Apache License 2.0 5 votes vote down vote up
private boolean nullEquals(Schema currentSchema, Schema newSchema) {
    if(currentSchema == null){
        if(newSchema != null){
            return false;
        }
        return true;
    }
    return Schema.equals(currentSchema, newSchema, false, true);
}
 
Example 19
Source Project: parquet-mr   Source File: MapSummaryData.java    License: Apache License 2.0 5 votes vote down vote up
public void add(Schema schema, Map<?, ?> m) {
  super.add(m);
  size.add(m.size());
  FieldSchema field = getField(schema, 0);
  if (m.size() > 0 && key == null) {
    key = new FieldSummaryData();
    key.setName(getName(field));
    value = new FieldSummaryData();
    value.setName(getName(field));
  }
  for (Map.Entry<?, ?> entry : m.entrySet()) {
    key.add(null, entry.getKey());
    value.add(getSchema(field), entry.getValue());
  }
}
 
Example 20
Source Project: spork   Source File: TypeCheckingTestUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static Schema genFlatSchema(String[] aliases, byte[] types) {
    if (aliases.length != types.length) {
        throw new AssertionError(" aliase number and type number don't match") ;
    }
    List<Schema.FieldSchema> fsList = new ArrayList<Schema.FieldSchema>() ;
    for(int i=0; i<aliases.length ;i++) {
        fsList.add(new Schema.FieldSchema(aliases[i], types[i])) ;
    }
    return new Schema(fsList) ;
}
 
Example 21
Source Project: validatar   Source File: StyTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNullTypeInTuple() throws IOException {
    Query query = new Query();
    query.value = "";
    Schema fakeSchema = getSchema(makeFieldSchema("a", DataType.NULL));
    Tuple fakeTuple = makeTuple("something");

    sty = getSty(withMockResult(withMockSchema(getServer(), fakeSchema), fakeTuple));
    runWithoutOutput(() -> sty.execute(query));
    Assert.assertFalse(query.failed());
    List<TypedObject> result = query.getResult().getColumn("a").getValues();
    Assert.assertNotNull(result);
    Assert.assertEquals(result.size(), 1);
    Assert.assertNull(result.get(0));
}
 
Example 22
Source Project: spork   Source File: TestProjectRange.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeOrderByMixNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + " o = order l1 by  $1 .. $2 DESC,  $0 , $4 .. DESC;"
        ;
    compileAndCompareSchema((Schema)null, query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {false, false,true,false};
    checkNumExpressionPlansForSort(lp, 4, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    pigServer.explain("o", System.err);
    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(11,21,31,41,51)",
                        "(10,20,30,40,50)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example 23
Source Project: spork   Source File: ToMilliSeconds.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}
 
Example 24
Source Project: datafu   Source File: Entropy.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Schema outputSchema(Schema input)
{
    try {
        Schema.FieldSchema inputFieldSchema = input.getField(0);

        if (inputFieldSchema.type != DataType.BAG)
        {
          throw new RuntimeException("Expected a BAG as input");
        }
        
        Schema inputBagSchema = inputFieldSchema.schema;
        
        if (inputBagSchema.getField(0).type != DataType.TUPLE)
        {
          throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s",
                                                   DataType.findTypeName(inputBagSchema.getField(0).type)));
        }
        
        return new Schema(new Schema.FieldSchema(getSchemaName(this.getClass()
                                                               .getName()
                                                               .toLowerCase(), input),
                                             DataType.DOUBLE));
      } catch (FrontendException e) {
        throw new RuntimeException(e);
      }
 }
 
Example 25
Source Project: spork   Source File: ISODaysBetween.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    Schema s = new Schema();
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    s.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
    funcList.add(new FuncSpec(this.getClass().getName(), s));
    return funcList;
}
 
Example 26
Source Project: spork   Source File: TestTypedMap.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testUnTypedMap() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testUnTypedMap", input);

    String query = "a = load '" + tmpDirName + "/testUnTypedMap' as (m:[]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof DataByteArray);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example 27
Source Project: spork   Source File: TestSecondarySort.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedSortEndToEnd1() throws Exception {
    File tmpFile1 = Util.createTempFileDelOnExit("test", "txt");
    PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile1));
    ps1.println("1\t2\t3");
    ps1.println("1\t3\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("1\t2\t4");
    ps1.println("2\t3\t4");
    ps1.close();

    String expected[] = {
            "(2,{(2,3,4)})",
            "(1,{(1,2,3),(1,2,4),(1,2,4),(1,2,4),(1,3,4)})"
    };

    String clusterPath = Util.removeColon(tmpFile1.getCanonicalPath());

    Util.copyFromLocalToCluster(cluster, tmpFile1.getCanonicalPath(), clusterPath);
    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(clusterPath) + "' AS (a0, a1, a2);");
    pigServer.registerQuery("B = group A by $0 parallel 2;");
    pigServer.registerQuery("C = foreach B { D = limit A 10; E = order D by $1; generate group, E;};");
    Iterator<Tuple> iter = pigServer.openIterator("C");
    Schema s = pigServer.dumpSchema("C");

    Util.checkQueryOutputsAfterSortRecursive(iter, expected, org.apache.pig.newplan.logical.Util.translateSchema(s));
    Util.deleteFile(cluster, clusterPath);
}
 
Example 28
Source Project: spork   Source File: TestMergeJoin.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testMergeJoinSch2() throws IOException{
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "';");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "';");
    Schema mjSch = null, shjSch = null;
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1) using 'merge';");
    mjSch = pigServer.dumpSchema("C");
    pigServer.registerQuery("C = join A by ($0,$1), B by ($0,$1);");
    shjSch = pigServer.dumpSchema("C");
    Assert.assertTrue(shjSch == null);
}
 
Example 29
Source Project: parquet-mr   Source File: PigSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<Type> filterTupleSchema(GroupType schemaToFilter, Schema requestedPigSchema, RequiredFieldList requiredFieldsList) {
  List<FieldSchema> fields = requestedPigSchema.getFields();
  List<Type> newFields = new ArrayList<Type>();
  for (int i = 0; i < fields.size(); i++) {
    FieldSchema fieldSchema = fields.get(i);
    String name = name(fieldSchema.alias, "field_"+i);
    if (schemaToFilter.containsField(name)) {
      newFields.add(filter(schemaToFilter.getType(name), fieldSchema));
    }
  }
  return newFields;
}
 
Example 30
Source Project: spork   Source File: GetSecond.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
    List<FuncSpec> funcList = new ArrayList<FuncSpec>();
    funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.DATETIME))));

    return funcList;
}