org.apache.pig.impl.util.Utils Java Examples

The following examples show how to use org.apache.pig.impl.util.Utils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JsonLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
public void prepareToRead(RecordReader reader, PigSplit split)
throws IOException {
    this.reader = reader;
    
    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    // Parse the schema from the string stored in the properties object.
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    jsonFactory = new JsonFactory();
}
 
Example #2
Source File: MapRedUtil.java    From spork with Apache License 2.0 6 votes vote down vote up
public static FileSpec checkLeafIsStore(
        PhysicalPlan plan,
        PigContext pigContext) throws ExecException {
    try {
        PhysicalOperator leaf = plan.getLeaves().get(0);
        FileSpec spec = null;
        if(!(leaf instanceof POStore)){
            String scope = leaf.getOperatorKey().getScope();
            POStore str = new POStore(new OperatorKey(scope,
                NodeIdGenerator.getGenerator().getNextNodeId(scope)));
            spec = new FileSpec(FileLocalizer.getTemporaryPath(
                pigContext).toString(),
                new FuncSpec(Utils.getTmpFileCompressorName(pigContext)));
            str.setSFile(spec);
            plan.addAsLeaf(str);
        } else{
            spec = ((POStore)leaf).getSFile();
        }
        return spec;
    } catch (Exception e) {
        int errCode = 2045;
        String msg = "Internal error. Not able to check if the leaf node is a store operator.";
        throw new ExecException(msg, errCode, PigException.BUG, e);
    }
}
 
Example #3
Source File: TestJoin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testIndirectSelfJoinData() throws Exception {
    setUp(ExecType.LOCAL);
    Data data = resetData(pigServer);

    Set<Tuple> tuples = Sets.newHashSet(tuple("a", 1), tuple("b", 2), tuple("c", 3));
    data.set("foo", Utils.getSchemaFromString("field1:chararray,field2:int"), tuples);
    pigServer.registerQuery("A = load 'foo' using mock.Storage();");
    pigServer.registerQuery("B = foreach A generate field1, field2*2 as field2;");
    pigServer.registerQuery("C = join A by field1, B by field1;");
    pigServer.registerQuery("D = foreach C generate A::field1 as field1_a, B::field1 as field1_b, A::field2 as field2_a, B::field2 as field2_b;");
    pigServer.registerQuery("store D into 'foo_out' using mock.Storage();");

    Set<Tuple> expected = Sets.newHashSet(tuple("a", "a", 1, 2), tuple("b", "b", 2, 4), tuple("c", "c", 3, 6));
    List<Tuple> out = data.get("foo_out");
    assertEquals("Expected size was "+expected.size()+" but was "+out.size(), expected.size(), out.size());
    for (Tuple t : out) {
        assertTrue("Should have found tuple "+t+" in expected: "+expected, expected.remove(t));
    }
    assertTrue("All expected tuples should have been found, remaining: "+expected, expected.isEmpty());
}
 
Example #4
Source File: JsFunction.java    From spork with Apache License 2.0 6 votes vote down vote up
public JsFunction(String functionName) {
    this.jsScriptEngine = JsScriptEngine.getInstance();
    this.functionName = functionName;
    Object outputSchemaObj = jsScriptEngine.jsEval(this.getClass().getName() + "(String)",
            functionName + ".outputSchema");
    //if no schema defined, fall back to bytearray
    if (outputSchemaObj == null || outputSchemaObj instanceof Undefined) {
        this.outputSchema = new Schema(new Schema.FieldSchema(null, DataType.BYTEARRAY));
    }
    else {
        try {
            this.outputSchema = Utils.getSchemaFromString(outputSchemaObj.toString());
        }
        catch (ParserException e) {
            throw new IllegalArgumentException(functionName
                    + ".outputSchema is not a valid schema: " + e.getMessage(), e);
        }
    }

}
 
Example #5
Source File: GroovyEvalFunc.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  if (null != this.schemaFunction) {
    try {
      Tuple t = TupleFactory.getInstance().newTuple(1);
      // Strip enclosing '{}' from schema
      t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
      return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
    } catch (ParserException pe) {
      throw new RuntimeException(pe);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe);
    }
  } else {
    return this.schema;
  }
}
 
Example #6
Source File: TestJoin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testIndirectSelfJoinRealias() throws Exception {
    setUp(ExecType.LOCAL);
    Data data = resetData(pigServer);

    Set<Tuple> tuples = Sets.newHashSet(tuple("a"), tuple("b"), tuple("c"));
    data.set("foo", Utils.getSchemaFromString("field1:chararray"), tuples);
    pigServer.registerQuery("A = load 'foo' using mock.Storage();");
    pigServer.registerQuery("B = foreach A generate *;");
    pigServer.registerQuery("C = join A by field1, B by field1;");
    assertEquals(Utils.getSchemaFromString("A::field1:chararray, B::field1:chararray"), pigServer.dumpSchema("C"));
    pigServer.registerQuery("D = foreach C generate B::field1, A::field1 as field2;");
    assertEquals(Utils.getSchemaFromString("B::field1:chararray, field2:chararray"), pigServer.dumpSchema("D"));
    pigServer.registerQuery("E = foreach D generate field1, field2;");
    assertEquals(Utils.getSchemaFromString("B::field1:chararray, field2:chararray"), pigServer.dumpSchema("E"));
    pigServer.registerQuery("F = foreach E generate field2;");
    pigServer.registerQuery("store F into 'foo_out' using mock.Storage();");
    List<Tuple> out = data.get("foo_out");
    assertEquals("Expected size was "+tuples.size()+" but was "+out.size(), tuples.size(), out.size());
    for (Tuple t : out) {
        assertTrue("Should have found tuple "+t+" in expected: "+tuples, tuples.remove(t));
    }
    assertTrue("All expected tuples should have been found, remaining: "+tuples, tuples.isEmpty());
}
 
Example #7
Source File: JobControlCompiler.java    From spork with Apache License 2.0 6 votes vote down vote up
private static String addSingleFileToDistributedCache(
        PigContext pigContext, Configuration conf, String filename,
        String prefix) throws IOException {

    if (!pigContext.inIllustrator && !FileLocalizer.fileExists(filename, pigContext)) {
        throw new IOException(
                "Internal error: skew join partition file "
                        + filename + " does not exist");
    }

    String symlink = filename;

    // XXX Hadoop currently doesn't support distributed cache in local mode.
    // This line will be removed after the support is added by Hadoop team.
    if (!Utils.isLocal(pigContext, conf)) {
        symlink = prefix + "_"
                + Integer.toString(System.identityHashCode(filename)) + "_"
                + Long.toString(System.currentTimeMillis());
        filename = filename + "#" + symlink;
        setupDistributedCache(pigContext, conf, new String[] { filename },
                false);
    }

    return symlink;
}
 
Example #8
Source File: JsonStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
    // Store the record writer reference so we can use it when it's time
    // to write tuples
    this.writer = writer;

    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p =
        udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature});
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    // Parse the schema from the string stored in the properties object.
    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));

    // Build a Json factory
    jsonFactory = new JsonFactory();
}
 
Example #9
Source File: JobControlCompiler.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void visitMergeJoin(POMergeJoin join) throws VisitorException {

    // XXX Hadoop currently doesn't support distributed cache in local mode.
    // This line will be removed after the support is added
    if (Utils.isLocal(pigContext, conf)) return;

    String indexFile = join.getIndexFile();

    // merge join may not use an index file
    if (indexFile == null) return;

    try {
        String symlink = addSingleFileToDistributedCache(pigContext,
                conf, indexFile, "indexfile_");
        join.setIndexFile(symlink);
    } catch (IOException e) {
        String msg = "Internal error. Distributed cache could not " +
                "be set up for merge join index file";
        throw new VisitorException(msg, e);
    }
}
 
Example #10
Source File: JobControlCompiler.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void visitMergeCoGroup(POMergeCogroup mergeCoGrp)
        throws VisitorException {

    // XXX Hadoop currently doesn't support distributed cache in local mode.
    // This line will be removed after the support is added
    if (Utils.isLocal(pigContext, conf)) return;

    String indexFile = mergeCoGrp.getIndexFileName();

    if (indexFile == null) throw new VisitorException("No index file");

    try {
        String symlink = addSingleFileToDistributedCache(pigContext,
                conf, indexFile, "indexfile_mergecogrp_");
        mergeCoGrp.setIndexFileName(symlink);
    } catch (IOException e) {
        String msg = "Internal error. Distributed cache could not " +
                "be set up for merge cogrp index file";
        throw new VisitorException(msg, e);
    }
}
 
Example #11
Source File: FetchOptimizer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void visitStore(POStore st) throws VisitorException{
    String basePathName = st.getSFile().getFileName();

    //plan is fetchable if POStore belongs to EXPLAIN
    if ("fakefile".equals(basePathName)) {
        return;
    }

    //Otherwise check if target storage format equals to the intermediate storage format
    //and its path points to a temporary storage path
    boolean hasTmpStorageClass = st.getStoreFunc().getClass()
        .equals(Utils.getTmpFileStorageClass(pc.getProperties()));

    try {
        boolean hasTmpTargetPath = isTempPath(basePathName);
        if (!(hasTmpStorageClass && hasTmpTargetPath)) {
            planFetchable = false;
        }
    }
    catch (IOException e) {
        String msg = "Internal error. Could not retrieve temporary store location.";
        throw new VisitorException(msg, e);
    }
}
 
Example #12
Source File: TupleConsumerPerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void read(PageReadStore columns, String pigSchemaString, String message) throws ParserException {
    System.out.println(message);
    MessageColumnIO columnIO = newColumnFactory(pigSchemaString);
    TupleReadSupport tupleReadSupport = new TupleReadSupport();
    Map<String, String> pigMetaData = pigMetaData(pigSchemaString);
    MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchemaString));
    ReadContext init = tupleReadSupport.init(null, pigMetaData, schema);
    RecordMaterializer<Tuple> recordConsumer = tupleReadSupport.prepareForRead(null, pigMetaData, schema, init);
    RecordReader<Tuple> recordReader = columnIO.getRecordReader(columns, recordConsumer);
    // TODO: put this back
//  if (DEBUG) {
//    recordConsumer = new RecordConsumerLoggingWrapper(recordConsumer);
//  }
    read(recordReader, 10000, pigSchemaString);
    read(recordReader, 10000, pigSchemaString);
    read(recordReader, 10000, pigSchemaString);
    read(recordReader, 10000, pigSchemaString);
    read(recordReader, 10000, pigSchemaString);
    read(recordReader, 100000, pigSchemaString);
    read(recordReader, 1000000, pigSchemaString);
    System.out.println();
  }
 
Example #13
Source File: TupleConsumerPerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
  String pigSchema = pigSchema(false, false);
  String pigSchemaProjected = pigSchema(true, false);
  String pigSchemaNoString = pigSchema(true, true);
  MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));

  MemPageStore memPageStore = new MemPageStore(0);
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore, ParquetProperties.builder()
          .withPageSize(50*1024*1024)
          .withDictionaryEncoding(false)
          .build());
  write(memPageStore, columns, schema, pigSchema);
  columns.flush();
  read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
  System.out.println(columns.getBufferedSize()+" bytes used total");
  System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
 
Example #14
Source File: TestBuiltinInvoker.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testStringSize() throws Exception {
    Set<Tuple> input = Sets.newHashSet();
    Set<Tuple> expected = Sets.newHashSet();
    for (int i = 0; i < 1000; i++) {
        String val = Integer.toString(r.nextInt());
        input.add(tuple(val));
        expected.add(tuple(val, val.length()));
    }
    data.set("foo", Utils.getSchemaFromString("x:chararray"), input);

    pigServer.registerQuery("a = load 'foo' using mock.Storage();");
    pigServer.registerQuery("b = foreach @ generate $0, invoke($0)length();");
    pigServer.registerQuery("store b into 'bar' using mock.Storage();");

    dataIsEqual(expected, data.get("bar"));
}
 
Example #15
Source File: TestStore.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinStorageGetSchema() throws IOException, ParserException {
    String input[] = new String[] { "hello\t1\t10.1", "bye\t2\t20.2" };
    String inputFileName = "testGetSchema-input.txt";
    String outputFileName = "testGetSchema-output.txt";
    try {
        Util.createInputFile(pig.getPigContext(),
                inputFileName, input);
        String query = "a = load '" + inputFileName + "' as (c:chararray, " +
                "i:int,d:double);store a into '" + outputFileName + "' using " +
                        "BinStorage();";
        pig.setBatchOn();
        Util.registerMultiLineQuery(pig, query);
        pig.executeBatch();
        ResourceSchema rs = new BinStorage().getSchema(outputFileName,
                new Job(ConfigurationUtil.toConfiguration(pig.getPigContext().
                        getProperties())));
        Schema expectedSchema = Utils.getSchemaFromString(
                "c:chararray,i:int,d:double");
        assertTrue("Checking binstorage getSchema output", Schema.equals(
                expectedSchema, Schema.getPigSchema(rs), true, true));
    } finally {
        Util.deleteFile(pig.getPigContext(), inputFileName);
        Util.deleteFile(pig.getPigContext(), outputFileName);
    }
}
 
Example #16
Source File: TestLoaderStorerShipCacheFiles.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testShipOrcLoader() throws Exception {
    String query = "a = load 'test/org/apache/pig/builtin/orc/orc-file-11-format.orc' using OrcStorage();" +
            "store a into 'ooo';";
    PhysicalPlan pp = Util.buildPp(pigServer, query);

    String hadoopVersion = "20S";
    if (Utils.isHadoop23() || Utils.isHadoop2()) {
        hadoopVersion = "23";
    }
    String[] expectedJars = new String[] {"hive-common", "hive-exec", "hive-serde", 
            "hive-shims-0." + hadoopVersion, "hive-shims-common-0", "hive-shims-common-secure",
            "kryo"};

    checkPlan(pp, expectedJars, 7, pigServer.getPigContext());
}
 
Example #17
Source File: FileLocalizer.java    From spork with Apache License 2.0 6 votes vote down vote up
private static synchronized ContainerDescriptor getTempContainer(final PigContext pigContext)
        throws DataStorageException {
    ContainerDescriptor tempContainer = null;
    String tdir= Utils.substituteVars(pigContext.getProperties().getProperty(PigConfiguration.PIG_TEMP_DIR, "/tmp"));
    try {
        do {
            tempContainer = pigContext.getDfs().asContainer(tdir + "/temp" + r.nextInt());
        } while (tempContainer.exists());
        createContainer(tempContainer);
    }
    catch (IOException e) {
        // try one last time in case this was due IO Exception caused by dir
        // operations on directory created by another JVM at the same instant
        tempContainer = pigContext.getDfs().asContainer(tdir + "/temp" + r.nextInt());
        try {
            createContainer(tempContainer);
        }
        catch (IOException e1) {
            throw new DataStorageException(e1);
        }
    }
    return tempContainer;
}
 
Example #18
Source File: DependencyOrderWalker.java    From spork with Apache License 2.0 6 votes vote down vote up
protected void doAllPredecessors(O node,
                               Set<O> seen,
                               Collection<O> fifo) throws VisitorException {
    if (!seen.contains(node)) {
        // We haven't seen this one before.
        Collection<O> preds = Utils.mergeCollection(mPlan.getPredecessors(node), mPlan.getSoftLinkPredecessors(node));
        if (preds != null && preds.size() > 0) {
            // Do all our predecessors before ourself
            for (O op : preds) {
                doAllPredecessors(op, seen, fifo);
            }
        }
        // Now do ourself
        seen.add(node);
        fifo.add(node);
    }
}
 
Example #19
Source File: ReverseDependencyOrderWalker.java    From spork with Apache License 2.0 6 votes vote down vote up
protected void doAllSuccessors(O node,
                               Set<O> seen,
                               Collection<O> fifo) throws VisitorException {
    if (!seen.contains(node)) {
        // We haven't seen this one before.
        Collection<O> succs = Utils.mergeCollection(mPlan.getSuccessors(node), mPlan.getSoftLinkSuccessors(node));
        if (succs != null && succs.size() > 0) {
            // Do all our successors before ourself
            for (O op : succs) {
                doAllSuccessors(op, seen, fifo);
            }
        }
        // Now do ourself
        seen.add(node);
        fifo.add(node);
    }
}
 
Example #20
Source File: FixedWidthStorer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void prepareToWrite(RecordWriter writer) throws IOException {
    // Store writer to use in putNext()
    this.writer = writer;

    // Get the schema string from the UDFContext object.
    UDFContext udfc = UDFContext.getUDFContext();
    Properties p = udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature });
    String strSchema = p.getProperty(SCHEMA_SIGNATURE);
    if (strSchema == null) {
        throw new IOException("Could not find schema in UDF context");
    }

    schema = new ResourceSchema(Utils.getSchemaFromString(strSchema));
    fields = schema.getFields();
}
 
Example #21
Source File: TestProjectStarRangeInUdf.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testProjMixExpand1NoSchema() throws IOException {

    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';"
        + "f = foreach l1 generate TOBAG(*, $0 .. $2) as tt;"
        ; 
    Schema sch = Utils.getSchemaFromString("tt : {(NullALias)}");
    sch.getField(0).schema.getField(0).schema.getField(0).alias = null;
    sch.getField(0).schema.getField(0).schema.getField(0).type = DataType.NULL;
    
    compileAndCompareSchema(sch, query, "f");
    Iterator<Tuple> it = pigServer.openIterator("f");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStringAsByteArray(
                new String[] {
                        "({('10'),('20'),('30'),('40'),('50'),('10'),('20'),('30')})",
                        "({('11'),('21'),('31'),('41'),('51'),('11'),('21'),('31')})",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #22
Source File: FetchLauncher.java    From spork with Apache License 2.0 5 votes vote down vote up
private void init(PhysicalPlan pp, POStore poStore) throws IOException {
    poStore.setStoreImpl(new FetchPOStoreImpl(pigContext));
    poStore.setUp();

    TaskAttemptID taskAttemptID = HadoopShims.getNewTaskAttemptID();
    HadoopShims.setTaskAttemptId(conf, taskAttemptID);

    if (!PlanHelper.getPhysicalOperators(pp, POStream.class).isEmpty()) {
        MapRedUtil.setupStreamingDirsConfSingle(poStore, pigContext, conf);
    }

    String currentTime = Long.toString(System.currentTimeMillis());
    conf.set("pig.script.submitted.timestamp", currentTime);
    conf.set("pig.job.submitted.timestamp", currentTime);

    PhysicalOperator.setReporter(new FetchProgressableReporter());
    SchemaTupleBackend.initialize(conf, pigContext);

    UDFContext udfContext = UDFContext.getUDFContext();
    udfContext.addJobConf(conf);
    udfContext.setClientSystemProps(pigContext.getProperties());
    udfContext.serialize(conf);

    PigMapReduce.sJobConfInternal.set(conf);
    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new FetchTaskContext(new FetchContext()));
    PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);
}
 
Example #23
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapLongValueType() throws Exception{
    String myMap = "[key1#1l]";
    Schema schema = Utils.getSchemaFromString("m:map[long]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Long);
    String value = String.valueOf(v);
    assertEquals("1", value);
}
 
Example #24
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapIntegerValueType() throws Exception{
    String myMap = "[key1#1]";
    Schema schema = Utils.getSchemaFromString("m:map[int]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Integer);
    String value = String.valueOf(v);
    assertEquals("1", value);
}
 
Example #25
Source File: TestTextDataParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapFloatValueType() throws Exception{
    String myMap = "[key1#0.1f]";
    Schema schema = Utils.getSchemaFromString("m:map[float]");
    ResourceFieldSchema rfs = new ResourceSchema(schema).getFields()[0];
    Map<String, Object> map = ps.getLoadCaster().bytesToMap(myMap.getBytes(), rfs);
    String key = map.keySet().iterator().next();
    Object v = map.get("key1");
    assertEquals("key1", key);
    assertTrue(v instanceof Float);
    String value = String.valueOf(v);
    assertEquals("0.1", value);
}
 
Example #26
Source File: ScriptSchemaTestLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public ResourceSchema getSchema(String location, Job job)
		throws IOException {

	scriptSchema = Utils.getScriptSchema(getUDFContextSignature(),
			job.getConfiguration());

	return null;
}
 
Example #27
Source File: TestBuiltinInvoker.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL);

    data = resetData(pigServer);

    data.set("chardata", Utils.getSchemaFromString("x:chararray"), chardata);
    data.set("charintdata", Utils.getSchemaFromString("x:chararray"), charintdata);

    r = new Random(42L);
}
 
Example #28
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeSort() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = order a by * desc;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example #29
Source File: DepthFirstWalker.java    From spork with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void depthFirst(O node,
                        Collection<O> successors,
                        Set<O> seen,
                        PlanVisitor<O, P> visitor) throws VisitorException {
    if (successors == null) return;

    for (O suc : successors) {
        if (seen.add(suc)) {
            suc.visit(visitor);
            Collection<O> newSuccessors = Utils.mergeCollection(mPlan.getSuccessors(suc), mPlan.getSoftLinkSuccessors(suc));
            depthFirst(suc, newSuccessors, seen, visitor);
        }
    }
}
 
Example #30
Source File: StreamingUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
public StreamingUDF(String language,
                    String filePath, String funcName,
                    String outputSchemaString, String schemaLineNumber,
                    String execType, String isIllustrate)
                            throws StreamingUDFOutputSchemaException, ExecException {
    this.language = language;
    this.filePath = filePath;
    this.funcName = funcName;
    try {
        this.schema = Utils.getSchemaFromString(outputSchemaString);
        //ExecTypeProvider.fromString doesn't seem to load the ExecTypes in
        //mapreduce mode so we'll try to figure out the exec type ourselves.
        if (execType.equals("local")) {
            this.execType = ExecType.LOCAL;
        } else if (execType.equals("mapreduce")) {
            this.execType = ExecType.MAPREDUCE;
        } else {
            //Not sure what exec type - try to get it from the string.
            this.execType = ExecTypeProvider.fromString(execType);
        }
    } catch (ParserException pe) {
        throw new StreamingUDFOutputSchemaException(pe.getMessage(), Integer.valueOf(schemaLineNumber));
    } catch (IOException ioe) {
        String errorMessage = "Invalid exectype passed to StreamingUDF. Should be local or mapreduce";
        log.error(errorMessage, ioe);
        throw new ExecException(errorMessage, ioe);
    }
    this.isIllustrate = isIllustrate;
}