org.apache.pig.impl.PigContext Java Examples

The following examples show how to use org.apache.pig.impl.PigContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobControlCompiler.java    From spork with Apache License 2.0 6 votes vote down vote up
public static void setOutputFormat(org.apache.hadoop.mapreduce.Job job) {
    // the OutputFormat we report to Hadoop is always PigOutputFormat which
    // can be wrapped with LazyOutputFormat provided if it is supported by
    // the Hadoop version and PigConfiguration.PIG_OUTPUT_LAZY is set
    if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) {
        try {
            Class<?> clazz = PigContext
                    .resolveClassName("org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat");
            Method method = clazz.getMethod("setOutputFormatClass",
                    org.apache.hadoop.mapreduce.Job.class, Class.class);
            method.invoke(null, job, PigOutputFormat.class);
        } catch (Exception e) {
            job.setOutputFormatClass(PigOutputFormat.class);
            log.warn(PigConfiguration.PIG_OUTPUT_LAZY
                    + " is set but LazyOutputFormat couldn't be loaded. Default PigOutputFormat will be used");
        }
    } else {
        job.setOutputFormatClass(PigOutputFormat.class);
    }
}
 
Example #2
Source File: LineageTrimmingVisitor.java    From spork with Apache License 2.0 6 votes vote down vote up
public LineageTrimmingVisitor(LogicalPlan plan,
        Map<LOLoad, DataBag> baseData,
        ExampleGenerator eg,
        Map<Operator, PhysicalOperator> LogToPhyMap,
        PhysicalPlan physPlan, PigContext pc) throws IOException, InterruptedException {
    super(plan, new PreOrderDepthFirstWalker(plan));
    // this.baseData.putAll(baseData);
    this.baseData = baseData;
    this.plan = plan;
    this.LogToPhyMap = LogToPhyMap;
    this.pc = pc;
    this.physPlan = physPlan;
    this.eg = eg;
    this.inputToDataMap = new HashMap<FileSpec, DataBag>();
    init();
}
 
Example #3
Source File: TestGrunt.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBagConstantWithSchema() throws Throwable {
    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd = "a = load 'input1'; b = foreach a generate "
            + "{(1, '1', 0.4f),(2, '2', 0.45)} as "
            + "b: bag{t:(i: int, c:chararray, d: double)};\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}
 
Example #4
Source File: POCast.java    From spork with Apache License 2.0 6 votes vote down vote up
private void instantiateFunc() throws IOException {
    if (caster != null) return;

    if (funcSpec != null) {
        Object obj = PigContext
                .instantiateFuncFromSpec(funcSpec);
        if (obj instanceof LoadFunc) {
            caster = ((LoadFunc)obj).getLoadCaster();
        } else if (obj instanceof StreamToPig) {
            caster = ((StreamToPig)obj).getLoadCaster();
        } else {
            throw new IOException("Invalid class type "
                    + funcSpec.getClassName());
        }
    }
}
 
Example #5
Source File: FetchOptimizer.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Checks whether the plan fulfills the prerequisites needed for fetching.
 *
 * @param pc PigContext
 * @param pp the physical plan to be examined
 * @return
 */
private static boolean isEligible(PigContext pc, PhysicalPlan pp) {
    if (!isFetchEnabled(pc)) {
        return false;
    }

    List<PhysicalOperator> roots = pp.getRoots();
    for (PhysicalOperator po : roots) {
        if (!(po instanceof POLoad)) {
            String msg = "Expected physical operator at root is POLoad. Found : "
                    + po.getClass().getCanonicalName() + ". Fetch optimizer will be disabled.";
            LOG.debug(msg);
            return false;
        }
    }

    //consider single leaf jobs only
    int leafSize = pp.getLeaves().size();
    if (pp.getLeaves().size() != 1) {
        LOG.debug("Expected physical plan should have one leaf. Found " + leafSize);
        return false;
    }

    return true;
}
 
Example #6
Source File: TestGroupConstParallelTez.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    TezOperPlan tezPlan = buildTezPlan(pp, pc);

    LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc);
    loaderStorer.visit();

    ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc);
    parallelismSetter.visit();

    DAG tezDag = getTezDAG(tezPlan, pc);
    TezDagBuilder dagBuilder = new TezDagBuilder(pc, tezPlan, tezDag, null);
    dagBuilder.visit();
    for (Vertex v : tezDag.getVertices()) {
        if (!v.getInputVertices().isEmpty()) {
            assertEquals(v.getParallelism(), 100);
        }
    }
}
 
Example #7
Source File: FetchOptimizer.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Visits the plan with {@link FetchablePlanVisitor} and checks whether the
 * plan is fetchable.
 *
 * @param pc PigContext
 * @param pp the physical plan to be examined
 * @return true if the plan is fetchable
 * @throws VisitorException
 */
public static boolean isPlanFetchable(PigContext pc, PhysicalPlan pp) throws VisitorException {
    if (isEligible(pc, pp)) {
        FetchablePlanVisitor fpv = new FetchablePlanVisitor(pc, pp);
        fpv.visit();
        // Plan is fetchable only if FetchablePlanVisitor returns true AND
        // limit is present in the plan, i.e: limit is pushed up to the loader.
        // Limit is a safeguard. If the input is large, and there is no limit, 
        // fetch optimizer will fetch the entire input to the client. That can be dangerous.
        if (!fpv.isPlanFetchable()) {
            return false;
        }
        for (POLoad load : PlanHelper.getPhysicalOperators(pp, POLoad.class)) {
            if (load.getLimit() == -1) {
                return false;
            }
        }
        pc.getProperties().setProperty(PigImplConstants.CONVERTED_TO_FETCH, "true");
        init(pp);
        return true;
    }
    return false;
}
 
Example #8
Source File: EmptyPigStats.java    From spork with Apache License 2.0 6 votes vote down vote up
public EmptyPigStats(PigContext pigContext, POStore poStore) {
    super.pigContext = pigContext;
    super.startTime = super.endTime = System.currentTimeMillis();
    super.userId = System.getProperty("user.name");

    Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());

    // initialize empty stats
    OutputStats os = new OutputStats(null, -1, -1, true);
    os.setConf(conf);
    os.setPOStore(poStore);
    this.outputStatsList = Collections.unmodifiableList(Arrays.asList(os));

    InputStats is = new InputStats(null, -1, -1, true);
    is.setConf(conf);
    this.inputStatsList = Collections.unmodifiableList(Arrays.asList(is));
}
 
Example #9
Source File: PigTest.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Registers a pig scripts with its variables substituted.
 *
 * @throws IOException If a temp file containing the pig script could not be created.
 * @throws ParseException The pig script could not have all its variables substituted.
 */
protected void registerScript() throws IOException, ParseException {
  getCluster();

  BufferedReader reader = new BufferedReader(new StringReader(this.originalTextPigScript));
  PigContext context = getPigServer().getPigContext();

  String substitutedPig = context.doParamSubstitution(reader,
                                                      args == null ? null : Arrays.asList(args),
                                                      argFiles == null ? null : Arrays.asList(argFiles));
  LOG.info(substitutedPig);

  File f = File.createTempFile("tmp", "pigunit");
  PrintWriter pw = new PrintWriter(f);
  pw.println(substitutedPig);
  pw.close();

  String pigSubstitutedFile = f.getCanonicalPath();
  getPigServer().registerScript(pigSubstitutedFile, aliasOverrides);
}
 
Example #10
Source File: TestGrunt.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testIllustrateScript6() throws Throwable {
    // empty line/field test
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());
    PigContext context = server.getPigContext();

    String strCmd = "illustrate -script "
            + basedir + "/illustrate6.pig;";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}
 
Example #11
Source File: GruntParser.java    From spork with Apache License 2.0 5 votes vote down vote up
private String runPreprocessor(String scriptPath, List<String> params, List<String> paramFiles)
    throws IOException, ParseException {

    PigContext context = mPigServer.getPigContext();
    BufferedReader reader = new BufferedReader(new FileReader(scriptPath));
    String result = context.doParamSubstitution(reader, params, paramFiles);
    reader.close();
    return result;
}
 
Example #12
Source File: PythonScriptEngine.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
protected Map<String, List<PigStats>> main(PigContext context,
        String scriptFile) throws IOException {
    log.warn("ScriptFile: " + scriptFile);
    registerFunctions(scriptFile, null, context);
    return getPigStatsMap();
}
 
Example #13
Source File: TestMRExecutionEngine.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test(expected = ExecException.class)
public void testJobConfGeneration() throws ExecException {
    Configuration conf = new Configuration(false);
    conf.set("foo", "bar");
    PigContext pigContext = new PigContext(ExecType.MAPREDUCE, conf);
    // This should fail as pig expects Hadoop configs are present in
    // classpath.
    pigContext.connect();
}
 
Example #14
Source File: TestGrunt.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBagSchema() throws Throwable {
    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd = "a = load 'input1' as (b: bag{t:(i: int, c:chararray, f: float)});\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}
 
Example #15
Source File: TestGrunt.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFsCommand() throws Throwable {

    PigServer server = new PigServer(cluster.getExecType(),cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd =
            "fs -ls /;"
                    +"fs -mkdir /fstmp;"
                    +"fs -mkdir /fstmp/foo;"
                    +"cd /fstmp;"
                    +"fs -copyFromLocal test/org/apache/pig/test/data/passwd bar;"
                    +"a = load 'bar';"
                    +"cd foo;"
                    +"store a into 'baz';"
                    +"cd /;"
                    +"fs -ls .;"
                    +"fs -rmr /fstmp/foo/baz;"
                    +"cd";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);
    grunt.exec();

}
 
Example #16
Source File: FetchLauncher.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an empty MR plan
 *
 * @param pp - Physical plan
 * @param pc - PigContext
 * @param ps - PrintStream to write the plan to
 * @param format format of the output plan
 * @throws PlanException
 * @throws VisitorException
 * @throws IOException
 */
public void explain(PhysicalPlan pp, PigContext pc, PrintStream ps, String format)
        throws PlanException, VisitorException, IOException {
    if ("xml".equals(format)) {
        ps.println("<mapReducePlan>No MR jobs. Fetch only</mapReducePlan>");
    }
    else {
        ps.println("#--------------------------------------------------");
        ps.println("# Map Reduce Plan                                  ");
        ps.println("#--------------------------------------------------");
        ps.println("No MR jobs. Fetch only.");
    }
    return;
}
 
Example #17
Source File: TestGrunt.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFileCmds() throws Throwable {
    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd =
        "rmf bar; rmf baz;"
        +"a = load '"
        + Util.generateURI("file:test/org/apache/pig/test/data/passwd", context) + "';"
        +"store a into 'bar';"
        +"cp bar baz;"
        +"rm bar; rm baz;"
        +"store a into 'baz';"
        +"store a into 'bar';"
        +"rm baz; rm bar;"
        +"store a into 'baz';"
        +"mv baz bar;"
        +"b = load 'bar';"
        +"store b into 'baz';"
        +"cat baz;"
        +"rm baz;"
        +"rm bar;\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}
 
Example #18
Source File: TestMultiQueryLocal.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext context = new PigContext(ExecType.LOCAL, new Properties());
    context.getProperties().setProperty(PigConfiguration.PIG_OPT_MULTIQUERY, ""+true);
    myPig = new PigServer(context);
    myPig.getPigContext().getProperties().setProperty("pig.usenewlogicalplan", "false");
    myPig.getPigContext().getProperties().setProperty(PigConfiguration.PIG_TEMP_DIR, "build/test/tmp/");
    TMP_DIR = FileLocalizer.getTemporaryPath(myPig.getPigContext()).toUri().getPath();
    deleteOutputFiles();
}
 
Example #19
Source File: ForEachConverter.java    From spork with Apache License 2.0 5 votes vote down vote up
void initializeJobConf() {
    if (this.jobConf == null) {
        this.jobConf = KryoSerializer.deserializeJobConf(this.confBytes);
        PigMapReduce.sJobConfInternal.set(jobConf);
        try {
            MapRedUtil.setupUDFContext(jobConf);
            PigContext pc = (PigContext) ObjectSerializer.deserialize(jobConf.get("pig.pigContext"));
            SchemaTupleBackend.initialize(jobConf, pc);

        } catch (IOException ioe) {
            String msg = "Problem while configuring UDFContext from ForEachConverter.";
            throw new RuntimeException(msg, ioe);
        }
    }
}
 
Example #20
Source File: Main.java    From spork with Apache License 2.0 5 votes vote down vote up
protected static PigProgressNotificationListener makeListener(Properties properties) {

        try {
            return PigContext.instantiateObjectFromParams(
                        ConfigurationUtil.toConfiguration(properties),
                        PROGRESS_NOTIFICATION_LISTENER_KEY,
                        PROGRESS_NOTIFICATION_LISTENER_ARG_KEY,
                        PigProgressNotificationListener.class);
        } catch (ExecException e) {
            throw new RuntimeException(e);
        }
    }
 
Example #21
Source File: Grunt.java    From spork with Apache License 2.0 5 votes vote down vote up
public Grunt(BufferedReader in, PigContext pigContext) throws ExecException {
    this.in = in;
    this.pig = new PigServer(pigContext);

    if (in != null) {
        parser = new GruntParser(in, pig);
    }
}
 
Example #22
Source File: EvalFunc.java    From spork with Apache License 2.0 5 votes vote down vote up
private Type getReturnTypeFromSpec(FuncSpec funcSpec){
    try{
        return ((EvalFunc<?>)PigContext.instantiateFuncFromSpec(funcSpec)).getReturnType();
    }catch (ClassCastException e){
        throw new RuntimeException(funcSpec + " does not specify an eval func", e);
    }
}
 
Example #23
Source File: TestFRJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
private void setUpHashTable() throws IOException {
    FileSpec replFile = new FileSpec(repl, new FuncSpec(PigStorage.class.getName() + "()"));
    POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
    PigContext pc = new PigContext(ExecType.MAPREDUCE, PigMapReduce.sJobConfInternal.get());
    pc.connect();

    ld.setPc(pc);
    for (Result res = ld.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = ld
            .getNextTuple()) {
        Tuple tup = (Tuple)res.result;
        LoadFunc lf = ((LoadFunc)PigContext.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
        String key = lf.getLoadCaster().bytesToCharArray(
                ((DataByteArray)tup.get(keyField)).get());
        Tuple csttup = TupleFactory.getInstance().newTuple(2);
        csttup.set(0, key);
        csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
        DataBag vals = null;
        if (replTbl.containsKey(key)) {
            vals = replTbl.get(key);
        }
        else {
            vals = BagFactory.getInstance().newDefaultBag();
            replTbl.put(key, vals);
        }
        vals.add(csttup);
    }
}
 
Example #24
Source File: InvokerGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
private Class<?>[] getArgumentClassArray(String[] argumentTypes) {
    Class<?>[] arguments = new Class<?>[argumentTypes.length];
    for (int i = 0; i < argumentTypes.length; i++) {
        try {
            arguments[i]= nameToClassObjectMap.get(argumentTypes[i]);
            if (arguments[i] == null) {
                arguments[i] = PigContext.resolveClassName(argumentTypes[i]);
            }
        } catch (IOException e) {
            throw new RuntimeException("Unable to find class in PigContext: " + argumentTypes[i], e);
        }
    }
    return arguments;
}
 
Example #25
Source File: TezResourceManager.java    From spork with Apache License 2.0 5 votes vote down vote up
public void init(PigContext pigContext, Configuration conf) throws IOException {
    if (!inited) {
        this.stagingDir = FileLocalizer.getTemporaryResourcePath(pigContext);
        this.remoteFs = FileSystem.get(conf);
        this.conf = conf;
        this.pigContext = pigContext;
        this.inited = true;
    }
}
 
Example #26
Source File: TestSecondarySort.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    if (cluster == null) {
        cluster = getCluster();
        pc = new PigContext(cluster.getExecType(), cluster.getProperties());
        try {
            pc.connect();
        } catch (ExecException e) {
            throw new RuntimeException(e);
        }
    }
    pigServer = new PigServer(pc);
}
 
Example #27
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty( "output.compression.enabled", "true" );
    pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" );

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output2.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example #28
Source File: ReadToEndLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
public ReadToEndLoader(LoadFunc wrappedLoadFunc, Configuration conf,
        String inputLocation, int splitIndex, PigContext pigContext) throws IOException {
    this.wrappedLoadFunc = wrappedLoadFunc;
    this.inputLocation = inputLocation;
    this.conf = conf;
    this.curSplitIndex = splitIndex;
    this.pigContext = pigContext;
    init();
}
 
Example #29
Source File: QueryParserUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
static void setHdfsServers(String absolutePath, PigContext pigContext) throws URISyntaxException {
    // Get native host
    String defaultFS = (String)pigContext.getProperties().get("fs.default.name");
    if (defaultFS==null)
        defaultFS = (String)pigContext.getProperties().get("fs.defaultFS");

    URI defaultFSURI = new URI(defaultFS);

    Configuration conf = new Configuration(true);
    ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    Set<String> remoteHosts = getRemoteHosts(absolutePath, defaultFSURI, conf);

    String hdfsServersString = (String)pigContext.getProperties().get(MRConfiguration.JOB_HDFS_SERVERS);
    if (hdfsServersString == null) hdfsServersString = "";
    String hdfsServers[] = hdfsServersString.split(",");

    for (String remoteHost : remoteHosts) {
        boolean existing = false;
        for (String hdfsServer : hdfsServers) {
            if (hdfsServer.equals(remoteHost)) {
                existing = true;
            }
        }
        if (!existing) {
            if (!hdfsServersString.isEmpty()) {
                hdfsServersString = hdfsServersString + ",";
            }
            hdfsServersString = hdfsServersString + remoteHost;
        }
    }

    if (!hdfsServersString.isEmpty()) {
        pigContext.getProperties().setProperty(MRConfiguration.JOB_HDFS_SERVERS, hdfsServersString);
    }
}
 
Example #30
Source File: HJob.java    From spork with Apache License 2.0 5 votes vote down vote up
public HJob(JOB_STATUS status,
            PigContext pigContext,
            POStore store,
            String alias) {
    this.status = status;
    this.pigContext = pigContext;
    this.poStore = store;
    this.outFileSpec = poStore.getSFile();
    this.alias = alias;
}