Java Code Examples for org.apache.pig.tools.pigstats.PigStats#getJobGraph()

The following examples show how to use org.apache.pig.tools.pigstats.PigStats#getJobGraph() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: VespaStorageTest.java    From vespa with Apache License 2.0 5 votes vote down vote up
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
 
Example 2
Source File: GruntParser.java    From spork with Apache License 2.0 5 votes vote down vote up
private void executeBatch() throws IOException {
    if (mPigServer.isBatchOn()) {
        if (mExplain != null) {
            explainCurrentBatch();
        }

        if (!mLoadOnly) {
            mPigServer.executeBatch();
            PigStats stats = PigStats.get();
            JobGraph jg = stats.getJobGraph();
            Iterator<JobStats> iter = jg.iterator();
            while (iter.hasNext()) {
                JobStats js = iter.next();
                if (!js.isSuccessful()) {
                    mNumFailedJobs++;
                    Exception exp = (js.getException() != null) ? js.getException()
                            : new ExecException(
                                    "Job " + (js.getJobId() == null ? "" : js.getJobId() + " ") +
                                    "failed, hadoop does not return any error message",
                                    2244);
                    LogUtils.writeLog(exp,
                            mPigServer.getPigContext().getProperties().getProperty("pig.logfile"),
                            log,
                            "true".equalsIgnoreCase(mPigServer.getPigContext().getProperties().getProperty("verbose")),
                            "Pig Stack Trace");
                } else {
                    mNumSucceededJobs++;
                }
            }
        }
    }
}
 
Example 3
Source File: PigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Retrieves a list of Job objects from the PigStats object
 * @param stats
 * @return A list of ExecJob objects
 */
protected List<ExecJob> getJobs(PigStats stats) {
    LinkedList<ExecJob> jobs = new LinkedList<ExecJob>();
    if (stats instanceof EmptyPigStats) {
        HJob job = new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, stats.result(null)
                .getPOStore(), null);
        jobs.add(job);
        return jobs;
    }
    JobGraph jGraph = stats.getJobGraph();
    Iterator<JobStats> iter = jGraph.iterator();
    while (iter.hasNext()) {
        JobStats js = iter.next();
        for (OutputStats output : js.getOutputs()) {
            if (js.isSuccessful()) {
                jobs.add(new HJob(HJob.JOB_STATUS.COMPLETED, pigContext, output
                        .getPOStore(), output.getAlias(), stats));
            } else {
                HJob hjob = new HJob(HJob.JOB_STATUS.FAILED, pigContext, output
                        .getPOStore(), output.getAlias(), stats);
                hjob.setException(js.getException());
                jobs.add(hjob);
            }
        }
    }
    return jobs;
}
 
Example 4
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapOnly() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    for(int i = 0; i < MAX; i++) {
        int t = r.nextInt(100);
        pw.println(t);
        if(t > 50) count ++;
    }
    pw.close();
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = filter a by $0 > 50;");
    pigServer.registerQuery("c = foreach b generate $0 - 50;");
    ExecJob job = pigServer.store("c", "output_map_only");
    PigStats pigStats = job.getStatistics();
    
    //counting the no. of bytes in the output file
    //long filesize = cluster.getFileSystem().getFileStatus(new Path("output_map_only")).getLen();
    InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
            "output_map_only", pigServer.getPigContext()), pigServer
            .getPigContext());

    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();
    
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output_map_only"), true);

    System.out.println("============================================");
    System.out.println("Test case Map Only");
    System.out.println("============================================");

    JobGraph jg = pigStats.getJobGraph();
    Iterator<JobStats> iter = jg.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();                    

        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(count, js.getMapOutputRecords());
        assertEquals(0, js.getReduceInputRecords());
        assertEquals(0, js.getReduceOutputRecords());
        System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
        assertEquals(filesize, js.getHdfsBytesWritten());
    }

}
 
Example 5
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapOnlyBinStorage() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    for(int i = 0; i < MAX; i++) {
        int t = r.nextInt(100);
        pw.println(t);
        if(t > 50)
            count ++;
    }
    pw.close();
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = filter a by $0 > 50;");
    pigServer.registerQuery("c = foreach b generate $0 - 50;");
    ExecJob job = pigServer.store("c", "output_map_only", "BinStorage");
    PigStats pigStats = job.getStatistics();
    
    InputStream is = FileLocalizer.open(FileLocalizer.fullPath(
            "output_map_only", pigServer.getPigContext()),
            pigServer.getPigContext());

    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();

    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output_map_only"), true);

    System.out.println("============================================");
    System.out.println("Test case Map Only");
    System.out.println("============================================");

    JobGraph jp = pigStats.getJobGraph();
    Iterator<JobStats> iter = jp.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();
    
        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(count, js.getMapOutputRecords());
        assertEquals(0, js.getReduceInputRecords());
        assertEquals(0, js.getReduceOutputRecords());
    }
        
    System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
    assertEquals(filesize, pigStats.getBytesWritten());
}
 
Example 6
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapReduceOnly() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    int [] nos = new int[10];
    for(int i = 0; i < 10; i++)
        nos[i] = 0;

    for(int i = 0; i < MAX; i++) {
        int index = r.nextInt(10);
        int value = r.nextInt(100);
        nos[index] += value;
        pw.println(index + "\t" + value);
    }
    pw.close();

    for(int i = 0; i < 10; i++) {
        if(nos[i] > 0) count ++;
    }

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = group a by $0;");
    pigServer.registerQuery("c = foreach b generate group;");

    ExecJob job = pigServer.store("c", "output");
    PigStats pigStats = job.getStatistics();
    InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
            pigServer.getPigContext()), pigServer.getPigContext());

    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();

    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output"), true);

    System.out.println("============================================");
    System.out.println("Test case MapReduce");
    System.out.println("============================================");

    JobGraph jp = pigStats.getJobGraph();
    Iterator<JobStats> iter = jp.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();
        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(MAX, js.getMapOutputRecords());
        System.out.println("Reduce input records : " + js.getReduceInputRecords());
        assertEquals(MAX, js.getReduceInputRecords());
        System.out.println("Reduce output records : " + js.getReduceOutputRecords());
        assertEquals(count, js.getReduceOutputRecords());
    }
    System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
    assertEquals(filesize, pigStats.getBytesWritten());
}
 
Example 7
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapReduceOnlyBinStorage() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    int [] nos = new int[10];
    for(int i = 0; i < 10; i++)
        nos[i] = 0;

    for(int i = 0; i < MAX; i++) {
        int index = r.nextInt(10);
        int value = r.nextInt(100);
        nos[index] += value;
        pw.println(index + "\t" + value);
    }
    pw.close();

    for(int i = 0; i < 10; i++) {
        if(nos[i] > 0) count ++;
    }

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = group a by $0;");
    pigServer.registerQuery("c = foreach b generate group;");
    ExecJob job = pigServer.store("c", "output", "BinStorage");
    PigStats pigStats = job.getStatistics();

    InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
            pigServer.getPigContext()), pigServer.getPigContext());
    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();
    
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output"), true);

    System.out.println("============================================");
    System.out.println("Test case MapReduce");
    System.out.println("============================================");

    JobGraph jp = pigStats.getJobGraph();
    Iterator<JobStats> iter = jp.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();
        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(MAX, js.getMapOutputRecords());
        System.out.println("Reduce input records : " + js.getReduceInputRecords());
        assertEquals(MAX, js.getReduceInputRecords());
        System.out.println("Reduce output records : " + js.getReduceOutputRecords());
        assertEquals(count, js.getReduceOutputRecords());
    }
    System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
    assertEquals(filesize, pigStats.getBytesWritten());
}
 
Example 8
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapCombineReduce() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    int [] nos = new int[10];
    for(int i = 0; i < 10; i++)
        nos[i] = 0;

    for(int i = 0; i < MAX; i++) {
        int index = r.nextInt(10);
        int value = r.nextInt(100);
        nos[index] += value;
        pw.println(index + "\t" + value);
    }
    pw.close();

    for(int i = 0; i < 10; i++) {
        if(nos[i] > 0) count ++;
    }

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = group a by $0;");
    pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");
    ExecJob job = pigServer.store("c", "output");
    PigStats pigStats = job.getStatistics();

    InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
            pigServer.getPigContext()), pigServer.getPigContext());
    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();
 
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output"), true);

    System.out.println("============================================");
    System.out.println("Test case MapCombineReduce");
    System.out.println("============================================");
    
    JobGraph jp = pigStats.getJobGraph();
    Iterator<JobStats> iter = jp.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();
        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(MAX, js.getMapOutputRecords());
        System.out.println("Reduce input records : " + js.getReduceInputRecords());
        assertEquals(count, js.getReduceInputRecords());
        System.out.println("Reduce output records : " + js.getReduceOutputRecords());
        assertEquals(count, js.getReduceOutputRecords());
    }
    System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
    assertEquals(filesize, pigStats.getBytesWritten());
}
 
Example 9
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMapCombineReduceBinStorage() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    int [] nos = new int[10];
    for(int i = 0; i < 10; i++)
        nos[i] = 0;

    for(int i = 0; i < MAX; i++) {
        int index = r.nextInt(10);
        int value = r.nextInt(100);
        nos[index] += value;
        pw.println(index + "\t" + value);
    }
    pw.close();

    for(int i = 0; i < 10; i++) {
        if(nos[i] > 0) count ++;
    }

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = group a by $0;");
    pigServer.registerQuery("c = foreach b generate group, SUM(a.$1);");

    ExecJob job = pigServer.store("c", "output", "BinStorage");
    PigStats pigStats = job.getStatistics();
    
    InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
            pigServer.getPigContext()), pigServer.getPigContext());

    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output"), true);

    System.out.println("============================================");
    System.out.println("Test case MapCombineReduce");
    System.out.println("============================================");
 
    JobGraph jp = pigStats.getJobGraph();
    Iterator<JobStats> iter = jp.iterator();
    while (iter.hasNext()) {
        MRJobStats js = (MRJobStats) iter.next();
        System.out.println("Map input records : " + js.getMapInputRecords());
        assertEquals(MAX, js.getMapInputRecords());
        System.out.println("Map output records : " + js.getMapOutputRecords());
        assertEquals(MAX, js.getMapOutputRecords());
        System.out.println("Reduce input records : " + js.getReduceInputRecords());
        assertEquals(count, js.getReduceInputRecords());
        System.out.println("Reduce output records : " + js.getReduceOutputRecords());
        assertEquals(count, js.getReduceOutputRecords());
    }
    System.out.println("Hdfs bytes written : " + pigStats.getBytesWritten());
    assertEquals(filesize, pigStats.getBytesWritten());
}
 
Example 10
Source File: TestCounters.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testMultipleMRJobs() throws IOException, ExecException {
    int count = 0;
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    int [] nos = new int[10];
    for(int i = 0; i < 10; i++)
        nos[i] = 0;

    for(int i = 0; i < MAX; i++) {
        int index = r.nextInt(10);
        int value = r.nextInt(100);
        nos[index] += value;
        pw.println(index + "\t" + value);
    }
    pw.close();

    for(int i = 0; i < 10; i++) { 
        if(nos[i] > 0) count ++;
    }

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = order a by $0;");
    pigServer.registerQuery("c = group b by $0;");
    pigServer.registerQuery("d = foreach c generate group, SUM(b.$1);");
    ExecJob job = pigServer.store("d", "output");
    PigStats pigStats = job.getStatistics();
    
    InputStream is = FileLocalizer.open(FileLocalizer.fullPath("output",
            pigServer.getPigContext()), pigServer.getPigContext());
    long filesize = 0;
    while(is.read() != -1) filesize++;
    
    is.close();
    
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("output"), true);
    
    System.out.println("============================================");
    System.out.println("Test case MultipleMRJobs");
    System.out.println("============================================");
    
    JobGraph jp = pigStats.getJobGraph();
    MRJobStats js = (MRJobStats)jp.getSinks().get(0);
    
    System.out.println("Job id: " + js.getName());
    System.out.println(jp.toString());
    
    System.out.println("Map input records : " + js.getMapInputRecords());
    assertEquals(MAX, js.getMapInputRecords());
    System.out.println("Map output records : " + js.getMapOutputRecords());
    assertEquals(MAX, js.getMapOutputRecords());
    System.out.println("Reduce input records : " + js.getReduceInputRecords());
    assertEquals(count, js.getReduceInputRecords());
    System.out.println("Reduce output records : " + js.getReduceOutputRecords());
    assertEquals(count, js.getReduceOutputRecords());
    
    System.out.println("Hdfs bytes written : " + js.getHdfsBytesWritten());
    assertEquals(filesize, js.getHdfsBytesWritten());

}
 
Example 11
Source File: PigServer.java    From spork with Apache License 2.0 3 votes vote down vote up
/**
 * Executes a Pig Latin script up to and including indicated alias and stores the resulting
 * records into a file.  That is, if a user does:
 * <pre>
 * PigServer server = new PigServer();
 * server.registerQuery("A = load 'foo';");
 * server.registerQuery("B = filter A by $0 &gt; 0;");
 * server.registerQuery("C = order B by $1;");
 * </pre>
 * Then
 * <pre>
 * server.store("B", "bar", "mystorefunc");
 * </pre>
 * filtered but unsorted data will be stored to the file <tt>bar</tt> using
 * <tt>mystorefunc</tt>.  If instead a user does
 * <pre>
 * server.store("C", "bar", "mystorefunc");
 * </pre>
 * filtered and sorted data will be stored to the file <tt>bar</tt> using
 * <tt>mystorefunc</tt>.
 * <p>
 * @param id The alias to store
 * @param filename The file to which to store to
 * @param func store function to use
 * @return {@link ExecJob} containing information about this job
 * @throws IOException
 */
public ExecJob store(String id, String filename, String func)
        throws IOException {
    PigStats stats = storeEx(id, filename, func);
    if (stats.getOutputStats().size() < 1) {
        throw new IOException("Couldn't retrieve job.");
    }
    OutputStats output = stats.getOutputStats().get(0);

    if(stats.isSuccessful()){
        return  new HJob(JOB_STATUS.COMPLETED, pigContext, output
                .getPOStore(), output.getAlias(), stats);
    }else{
        HJob job = new HJob(JOB_STATUS.FAILED, pigContext,
                output.getPOStore(), output.getAlias(), stats);

        //check for exception
        Exception ex = null;
        for(JobStats js : stats.getJobGraph()){
            if(js.getException() != null) {
                ex = js.getException();
            }
        }
        job.setException(ex);
        return job;
    }
}