Java Code Examples for org.apache.pig.PigServer#setBatchOn()

The following examples show how to use org.apache.pig.PigServer#setBatchOn() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMultiStorageCompression.java    From spork with Apache License 2.0 6 votes vote down vote up
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {

   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
Example 2
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testDatePartitionedFiles() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + endingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    Iterator<Tuple> result = server.openIterator("a");

    while ((result.next()) != null) {
        count++;
    }

    Assert.assertEquals(datePartitionedRowCount, count);
}
 
Example 3
Source File: BoundScript.java    From spork with Apache License 2.0 6 votes vote down vote up
private PigStats exec(String query) throws IOException {
    LOG.info("Query to run:\n" + query);
    List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners();
    PigContext pc = scriptContext.getPigContext();
    ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState();
    ScriptState.start(scriptState);
    ScriptState.get().setScript(query);
    for (PigProgressNotificationListener listener : listeners) {
        ScriptState.get().registerListener(listener);
    }
    PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
    pigServer.setBatchOn();
    GruntParser grunt = new GruntParser(new StringReader(query), pigServer);
    grunt.setInteractive(false);
    try {
        grunt.parseStopOnError(true);
    } catch (ParseException e) {
        throw new IOException("Failed to parse script " + e.getMessage(), e);
    }
    pigServer.executeBatch();
    return PigStats.get();
}
 
Example 4
Source File: TestAssert.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Verify that ASSERT operator works
 * @throws Exception
 */
@Test
public void testPositive() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3)
            );

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("ASSERT A BY i > 0;");
    pigServer.registerQuery("STORE A INTO 'bar' USING mock.Storage();");

    pigServer.executeBatch();

    List<Tuple> out = data.get("bar");
    assertEquals(3, out.size());
    assertEquals(tuple(1), out.get(0));
    assertEquals(tuple(2), out.get(1));
    assertEquals(tuple(3), out.get(2));
}
 
Example 5
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldStoreTupleAsHiveArray() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOTUPLE(f2,f3);");

    //when
    server.store("b", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");

    assertEquals(2, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyArray.class, o.getClass());

    LazyArray arr = (LazyArray)o;
    List<Object> values = arr.getList();
    for(Object value : values) {
        assertEquals(LazyString.class, value.getClass());
        String valueStr =((LazyString) value).getWritableObject().toString();
        assertEquals("Sample value", valueStr);
    }

}
 
Example 6
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexAccessProjection() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
  
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
  pigServer.registerQuery("C = foreach B generate n1, n3;");
  pigServer.registerQuery("STORE C into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(int i = 0; i < rows; i++) {
    Tuple t = actualList.get(i);
    
    assertEquals(2, t.size());
    
    assertEquals(i, t.get(0));
    assertEquals(i*2L, t.get(1));
  }
}
 
Example 7
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testYearMonthDayHourPartitionedFilesWithProjectionAndPartitionColumns()
        throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(yearMonthDayHourPartitionedDir.getAbsolutePath())
            + "' using " + funcSpecString + ";");
    server.registerQuery("f = FILTER a by year=='2010';");
    server.registerQuery("r = foreach f generate year, f2, f3, month, day, hour;");
    server.registerQuery("b = ORDER r BY year, month, day, hour;");
    Iterator<Tuple> result = server.openIterator("b");

    Tuple t = null;
    while ((t = result.next()) != null) {
        System.out.println("Tuple: " + t);
        assertEquals(6, t.size());
        count++;
    }
    System.out.println("Count: " + count);
    Assert.assertEquals(240, count);
}
 
Example 8
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReqestedSchemaColumnPruning() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, "a"+i, i*2));
  }
  data.set("in", "i:int, a:chararray, b:int", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');");
  pigServer.registerQuery("G = foreach C generate n1,b,n2,i;");
  pigServer.registerQuery("STORE G into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(Tuple t : actualList) {
      assertEquals(4, t.size());
      assertTrue(t.isNull(0));
      assertTrue(t.isNull(2));
  }
}
 
Example 9
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingMultipleNonPartitionedFiles() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedDir = simpleDataDir.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedDir) + "' using " + funcSpecString
            + ";");

    server.registerQuery("b = foreach a generate f1;");

    Iterator<Tuple> result = server.openIterator("b");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(1, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleDirFileCount * simpleRowCount, count);
}
 
Example 10
Source File: VespaQueryTest.java    From vespa with Apache License 2.0 5 votes vote down vote up
private PigServer setup(String script, String endpoint) throws Exception {
    Configuration conf = new HdfsConfiguration();
    Map<String, String> parameters = new HashMap<>();
    parameters.put("ENDPOINT", endpoint);

    PigServer ps = new PigServer(ExecType.LOCAL, conf);
    ps.setBatchOn();
    ps.registerScript(script, parameters);

    return ps;
}
 
Example 11
Source File: TestLocationInPhysicalPlan.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
    File input = File.createTempFile("test", "input");
    input.deleteOnExit();
    File output = File.createTempFile("test", "output");
    output.delete();
    Util.createLocalInputFile(input.getAbsolutePath(), new String[] {
        "1,2,3",
        "1,1,3",
        "1,1,1",
        "3,1,1",
        "1,2,1",
    });
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.setBatchOn();
    pigServer.registerQuery(
            "A = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' using PigStorage();\n"
        +  	"B = GROUP A BY $0;\n"
        + 	"A = FOREACH B GENERATE COUNT(A);\n"
        +	"STORE A INTO '" + Util.encodeEscape(output.getAbsolutePath()) + "';");
    ExecJob job = pigServer.executeBatch().get(0);
    List<OriginalLocation> originalLocations = job.getPOStore().getOriginalLocations();
    Assert.assertEquals(1, originalLocations.size());
    OriginalLocation originalLocation = originalLocations.get(0);
    Assert.assertEquals(4, originalLocation.getLine());
    Assert.assertEquals(0, originalLocation.getOffset());
    Assert.assertEquals("A", originalLocation.getAlias());
    JobStats jStats = (JobStats)job.getStatistics().getJobGraph().getSinks().get(0);
    Assert.assertEquals("M: A[1,4],A[3,4],B[2,4] C: A[3,4],B[2,4] R: A[3,4]", jStats.getAliasLocation());
}
 
Example 12
Source File: TestMockStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBadUsage2() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  Data data = resetData(pigServer);

  data.set("input",
      tuple("a"),
      tuple("b"),
      tuple("c")
      );

  pigServer.setBatchOn();
  pigServer.registerQuery(
       "A = LOAD 'input' USING mock.Storage();"
      +"B = LOAD 'input' USING mock.Storage();"
      +"STORE A INTO 'output' USING mock.Storage();"
      +"STORE B INTO 'output' USING mock.Storage();");
  List<ExecJob> results = pigServer.executeBatch();
  boolean failed = false;
  for (ExecJob execJob : results) {
      if (execJob.getStatus() == JOB_STATUS.FAILED) {
          failed = true;
          break;
      }
  }
  assertTrue("job should have failed for storing twice in the same location", failed);

}
 
Example 13
Source File: TestGrunt.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShellCommandOrder() throws Throwable {
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());

    String strRemove = "rm";

    if (Util.WINDOWS)
    {
        strRemove = "del";
    }

    File inputFile = File.createTempFile("testInputFile", ".txt");
    PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile));
    pwInput.println("1");
    pwInput.close();

    File inputScript = File.createTempFile("testInputScript", "");
    File outputFile = File.createTempFile("testOutputFile", ".txt");
    outputFile.delete();
    PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript));
    pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';");
    pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';");
    pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath()));
    pwScript.close();

    InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile());
    server.setBatchOn();
    server.registerScript(inputStream);
    List<ExecJob> execJobs = server.executeBatch();
    assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED);
}
 
Example 14
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void test1DayDatePartitionedFilesWithProjection() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + startingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    server.registerQuery("b = FOREACH a GENERATE f2 as p;");
    Iterator<Tuple> result = server.openIterator("b");

    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(1, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(50, count);
}
 
Example 15
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pig);
    
    // Use batch to force multiple outputs from relation l3. This causes 
    // ImplicitSplitInsertVisitor to call SchemaResetter. 
    pig.setBatchOn();
    
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
        + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" 
        + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
        + "u = union onschema l1, l2, l3;"
        + "store u into 'out1' using mock.Storage;"
        + "store l3 into 'out2' using mock.Storage;"
    ;

    Util.registerMultiLineQuery(pig, query);
    
    pig.executeBatch();
    
    
    List<Tuple> list1 = data.get("out1");
    List<Tuple> list2 = data.get("out2");
    
    List<Tuple> expectedRes = 
            Util.getTuplesFromConstantTupleStrings(
                    new String[] {
                            "(1,2,null,null)",
                            "(5,3,null,null)",
                            "(1,2,3,null)",
                            "(4,5,6,null)",
                    });
    
    Util.checkQueryOutputsAfterSort(list1, expectedRes);
    
    assertEquals(0, list2.size());
}
 
Example 16
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingSingleFile() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    server.registerQuery("b = foreach a generate f1;");

    Iterator<Tuple> result = server.openIterator("b");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(1, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example 17
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty( "output.compression.enabled", "true" );
    pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" );

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output2.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example 18
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testTypePersuasion() throws Exception {
  Properties p = new Properties();  
  p.setProperty(STRICT_TYPE_CHECKING, Boolean.FALSE.toString());
  
  PigServer pigServer = new PigServer(ExecType.LOCAL, p); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, (long)i, (float)i, (double)i, Integer.toString(i), Boolean.TRUE));
  }
  data.set("in", "i:int, l:long, f:float, d:double, s:chararray, b:boolean", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '"+out+"' using " + ParquetStorer.class.getName()+"();");
  pigServer.executeBatch();
    
  List<Tuple> actualList = null;
   
  byte [] types = { INTEGER, LONG, FLOAT, DOUBLE, CHARARRAY, BOOLEAN };
  
  //Test extracting values using each type.
  for(int i=0; i<types.length; i++) {
    String query = "B = LOAD '" + out + "' using " + ParquetLoader.class.getName()+
      "('i:" + DataType.findTypeName(types[i%types.length])+"," +
      "  l:" + DataType.findTypeName(types[(i+1)%types.length]) +"," +
      "  f:" + DataType.findTypeName(types[(i+2)%types.length]) +"," +
      "  d:" + DataType.findTypeName(types[(i+3)%types.length]) +"," +
      "  s:" + DataType.findTypeName(types[(i+4)%types.length]) +"," +
      "  b:" + DataType.findTypeName(types[(i+5)%types.length]) +"');";
    
    System.out.println("Query: " + query);
    pigServer.registerQuery(query);
    pigServer.registerQuery("STORE B into 'out"+i+"' using mock.Storage();");
    pigServer.executeBatch();

    actualList = data.get("out" + i);

    assertEquals(rows, actualList.size());
    for(Tuple t : actualList) {
        assertTrue(t.getType(0) == types[i%types.length]);
        assertTrue(t.getType(1) == types[(i+1)%types.length]);
        assertTrue(t.getType(2) == types[(i+2)%types.length]);
        assertTrue(t.getType(3) == types[(i+3)%types.length]);
        assertTrue(t.getType(4) == types[(i+4)%types.length]);
        assertTrue(t.getType(5) == types[(i+5)%types.length]);
    }
  }
  
}
 
Example 19
Source File: TestParser.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testRemoteServerList2() throws ExecException, IOException {
    pigServer = new PigServer(LOCAL);
    Properties pigProperties = pigServer.getPigContext().getProperties();
    pigProperties.setProperty("fs.default.name", "hdfs://a.com:8020");
    Configuration conf;

    pigServer.setBatchOn();

    Data data = Storage.resetData(pigServer.getPigContext());
    data.set("/user/pig/1.txt");// no data

    pigServer.registerQuery("a = load '/user/pig/1.txt' using mock.Storage;");
    pigServer.registerQuery("store a into '/user/pig/1.txt';");

    System.out.println("hdfs-servers: "
            + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS));
    conf = ConfigurationUtil.toConfiguration(pigProperties);
    assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) == null ||
            conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.default.name"))||
            conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.defaultFS")));

    pigServer.registerQuery("store a into 'hdfs://b.com/user/pig/1.txt' using mock.Storage;");
    System.out.println("hdfs-servers: "
            + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS));
    conf = ConfigurationUtil.toConfiguration(pigProperties);
    assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
            conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://b.com"));

    pigServer.registerQuery("store a into 'har://hdfs-c.com:8020/user/pig/1.txt' using mock.Storage;");
    System.out.println("hdfs-servers: "
            + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS));
    conf = ConfigurationUtil.toConfiguration(pigProperties);
    assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
            conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://c.com:8020"));

    pigServer.registerQuery("store a into 'hdfs://d.com:8020/user/pig/1.txt' using mock.Storage;");
    System.out.println("hdfs-servers: "
            + pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS));
    conf = ConfigurationUtil.toConfiguration(pigProperties);
    assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
            conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://d.com:8020"));

}
 
Example 20
Source File: TestMRJobStats.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testNoTaskReportProperty() throws IOException{
    MiniGenericCluster cluster = MiniGenericCluster.buildCluster(MiniGenericCluster.EXECTYPE_MR);
    Properties properties = cluster.getProperties();

    String inputFile = "input";
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, inputFile));
    pw.println("100\tapple");
    pw.println("200\torange");
    pw.close();

    // Enable task reports in job statistics
    properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "false");
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.setBatchOn();

    // Launch a map-only job
    pigServer.registerQuery("A = load '" + inputFile + "' as (id:int, fruit:chararray);");
    pigServer.registerQuery("store A into 'task_reports';");
    List<ExecJob> jobs = pigServer.executeBatch();
    PigStats pigStats = jobs.get(0).getStatistics();
    MRJobStats jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0);

    // Make sure JobStats includes TaskReports information
    long minMapTime = jobStats.getMinMapTime();
    long maxMapTime = jobStats.getMaxMapTime();
    long avgMapTime = jobStats.getAvgMapTime();
    assertTrue("TaskReports are enabled, so minMapTime shouldn't be -1", minMapTime != -1l);
    assertTrue("TaskReports are enabled, so maxMapTime shouldn't be -1", maxMapTime != -1l);
    assertTrue("TaskReports are enabled, so avgMapTime shouldn't be -1", avgMapTime != -1l);

    // Disable task reports in job statistics
    properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "true");

    // Launch another map-only job
    pigServer.registerQuery("B = load '" + inputFile + "' as (id:int, fruit:chararray);");
    pigServer.registerQuery("store B into 'no_task_reports';");
    jobs = pigServer.executeBatch();
    pigStats = jobs.get(0).getStatistics();
    jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0);

    // Make sure JobStats doesn't include any TaskReports information
    minMapTime = jobStats.getMinMapTime();
    maxMapTime = jobStats.getMaxMapTime();
    avgMapTime = jobStats.getAvgMapTime();
    assertEquals("TaskReports are disabled, so minMapTime should be -1", -1l, minMapTime);
    assertEquals("TaskReports are disabled, so maxMapTime should be -1", -1l, maxMapTime);
    assertEquals("TaskReports are disabled, so avgMapTime should be -1", -1l, avgMapTime);

    cluster.shutDown();
}