Java Code Examples for org.apache.pig.PigServer#executeBatch()

The following examples show how to use org.apache.pig.PigServer#executeBatch() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMultiStorageCompression.java    From spork with Apache License 2.0 6 votes vote down vote up
private void runQuery(String outputPath, String compressionType)
      throws Exception, ExecException, IOException, FrontendException {

   // create a data file
   String filename = TestHelper.createTempFile(data, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   patternString = patternString.replace("\\", "\\\\");
   String query = "A = LOAD '" + Util.encodeEscape(filename)
         + "' USING PigStorage(',') as (a,b,c);";

   String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
         + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('"
         + Util.encodeEscape(outputPath) + "','0', '" + compressionType + "', '\\t');";

   // Run Pig
   pig.setBatchOn();
   pig.registerQuery(query);
   pig.registerQuery(query2);

   pig.executeBatch();
}
 
Example 2
Source File: TestStreaming.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testStreamingStderrLogsShouldNotBePersistedByDefault() throws Exception {

    Util.createInputFile(cluster, "mydummyinput.txt", new String[] { "dummy"});

    PigServer pig = new PigServer(cluster.getExecType(),cluster.getProperties());
    pig.setBatchOn();

    pig.registerQuery("define mycmd `echo dummy` ;");
    pig.registerQuery("A = load 'mydummyinput.txt' as (f1:chararray);");
    pig.registerQuery("B = stream A through mycmd;");
    pig.registerQuery("store B into 'output_dir_001' ;");
    pig.executeBatch();

    Assert.assertTrue(Util.exists(pig.getPigContext(), "output_dir_001"));
    Assert.assertFalse(Util.exists(pig.getPigContext(), "output_dir_001/_logs/mycmd"));

}
 
Example 3
Source File: TestAssert.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Verify that ASSERT operator works
 * @throws Exception
 */
@Test
public void testPositive() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3)
            );

    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("ASSERT A BY i > 0;");
    pigServer.registerQuery("STORE A INTO 'bar' USING mock.Storage();");

    pigServer.executeBatch();

    List<Tuple> out = data.get("bar");
    assertEquals(3, out.size());
    assertEquals(tuple(1), out.get(0));
    assertEquals(tuple(2), out.get(1));
    assertEquals(tuple(3), out.get(2));
}
 
Example 4
Source File: BoundScript.java    From spork with Apache License 2.0 6 votes vote down vote up
private PigStats exec(String query) throws IOException {
    LOG.info("Query to run:\n" + query);
    List<PigProgressNotificationListener> listeners = ScriptState.get().getAllListeners();
    PigContext pc = scriptContext.getPigContext();
    ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState();
    ScriptState.start(scriptState);
    ScriptState.get().setScript(query);
    for (PigProgressNotificationListener listener : listeners) {
        ScriptState.get().registerListener(listener);
    }
    PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
    pigServer.setBatchOn();
    GruntParser grunt = new GruntParser(new StringReader(query), pigServer);
    grunt.setInteractive(false);
    try {
        grunt.parseStopOnError(true);
    } catch (ParseException e) {
        throw new IOException("Failed to parse script " + e.getMessage(), e);
    }
    pigServer.executeBatch();
    return PigStats.get();
}
 
Example 5
Source File: BoundScript.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public PigStats call() throws Exception {
    LOG.info("Query to run:\n" + query);
    PigContext pc = scriptContext.getPigContext();
    ScriptState scriptState = pc.getExecutionEngine().instantiateScriptState();
    ScriptState.start(scriptState);
    ScriptState.get().setScript(query);
    ScriptState.get().registerListener(adaptor);
    PigServer pigServer = new PigServer(ctx, true);
    pigServer.setBatchOn();
    GruntParser grunt = new GruntParser(new StringReader(query), pigServer);
    grunt.setInteractive(false);
    try {
        grunt.parseStopOnError(true);
    } catch (ParseException e) {
        throw new IOException("Failed to parse script", e);
    }
    pigServer.executeBatch();
    return PigStats.get();
}
 
Example 6
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReqestedSchemaColumnPruning() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, "a"+i, i*2));
  }
  data.set("in", "i:int, a:chararray, b:int", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');");
  pigServer.registerQuery("G = foreach C generate n1,b,n2,i;");
  pigServer.registerQuery("STORE G into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(Tuple t : actualList) {
      assertEquals(4, t.size());
      assertTrue(t.isNull(0));
      assertTrue(t.isNull(2));
  }
}
 
Example 7
Source File: TestBuiltin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSFPig() throws Exception {
    Util.resetStateForExecModeSwitch();
    PigServer mrPigServer = new PigServer(cluster.getExecType(), properties);
    String inputStr = "amy\tbob\tcharlene\tdavid\terin\tfrank";
    Util.createInputFile(cluster, "testSFPig-input.txt", new String[]
                                                                {inputStr});
    DataByteArray[] input = { new DataByteArray("amy"),
        new DataByteArray("bob"), new DataByteArray("charlene"),
        new DataByteArray("david"), new DataByteArray("erin"),
        new DataByteArray("frank") };
    Tuple f1 = Util.loadTuple(TupleFactory.getInstance().
            newTuple(input.length), input);
    String outputLocation = "testSFPig-output.txt";
    String query = "a = load 'testSFPig-input.txt';" +
            "store a into '" + outputLocation + "';";
    mrPigServer.setBatchOn();
    Util.registerMultiLineQuery(mrPigServer, query);
    mrPigServer.executeBatch();
    LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), outputLocation, 0);
    Tuple f2 = lfunc.getNext();
    Util.deleteFile(cluster, "testSFPig-input.txt");

    Util.deleteFile(cluster, outputLocation);
    assertEquals(f1, f2);
}
 
Example 8
Source File: TestGrunt.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShellCommandOrder() throws Throwable {
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());

    String strRemove = "rm";

    if (Util.WINDOWS)
    {
        strRemove = "del";
    }

    File inputFile = File.createTempFile("testInputFile", ".txt");
    PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile));
    pwInput.println("1");
    pwInput.close();

    File inputScript = File.createTempFile("testInputScript", "");
    File outputFile = File.createTempFile("testOutputFile", ".txt");
    outputFile.delete();
    PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript));
    pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';");
    pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';");
    pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath()));
    pwScript.close();

    InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile());
    server.setBatchOn();
    server.registerScript(inputStream);
    List<ExecJob> execJobs = server.executeBatch();
    assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED);
}
 
Example 9
Source File: TestScalarAliases.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testScalarAliasesSplitClause() throws Exception{
    Util.resetStateForExecModeSwitch();
    pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String[] input = {
            "1\t5",
            "2\t10",
            "3\t20"
    };

    // Test the use of scalars in expressions
    String inputPath = "table_testScalarAliasesSplitClause";
    String output = "table_testScalarAliasesSplitClauseDir";
    Util.createInputFile(cluster, inputPath, input);
    // Test in script mode
    pigServer.setBatchOn();
    pigServer.registerQuery("A = LOAD '"+inputPath+"' as (a0: long, a1: double);");
    pigServer.registerQuery("B = group A all;");
    pigServer.registerQuery("C = foreach B generate COUNT(A) as count;");
    pigServer.registerQuery("split A into Y if (2 * C.count) < a1, X if a1 == 5;");
    pigServer.registerQuery("Store Y into '"+output+"';");
    pigServer.executeBatch();
    // Check output
    pigServer.registerQuery("Z = LOAD '"+output+"' as (a0: int, a1: double);");

    Iterator<Tuple> iter = pigServer.openIterator("Z");

    // Y gets only last 2 elements
    Tuple t = iter.next();
    assertTrue(t.toString().equals("(2,10.0)"));

    t = iter.next();
    assertTrue(t.toString().equals("(3,20.0)"));

    assertFalse(iter.hasNext());
    Util.deleteFile(cluster, output);

}
 
Example 10
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery2() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input2.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);
    PigContext pigContext = pig.getPigContext();
    pigContext.getProperties().setProperty( "output.compression.enabled", "true" );
    pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" );

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output2.bz2';");
    pig.registerQuery("store a into 'output2';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output2.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example 11
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output.bz2';");
    pig.registerQuery("store a into 'output';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example 12
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnionOnSchemaAdditionalColumnsWithImplicitSplit() throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Data data = Storage.resetData(pig);
    
    // Use batch to force multiple outputs from relation l3. This causes 
    // ImplicitSplitInsertVisitor to call SchemaResetter. 
    pig.setBatchOn();
    
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j: int);"
        + "l2 = load '" + INP_FILE_3NUMS + "' as (i : int, j : int, k : int);" 
        + "l3 = load '" + INP_FILE_EMPTY + "' as (i : int, j : int, k : int, l :int);"
        + "u = union onschema l1, l2, l3;"
        + "store u into 'out1' using mock.Storage;"
        + "store l3 into 'out2' using mock.Storage;"
    ;

    Util.registerMultiLineQuery(pig, query);
    
    pig.executeBatch();
    
    
    List<Tuple> list1 = data.get("out1");
    List<Tuple> list2 = data.get("out2");
    
    List<Tuple> expectedRes = 
            Util.getTuplesFromConstantTupleStrings(
                    new String[] {
                            "(1,2,null,null)",
                            "(5,3,null,null)",
                            "(1,2,3,null)",
                            "(4,5,6,null)",
                    });
    
    Util.checkQueryOutputsAfterSort(list1, expectedRes);
    
    assertEquals(0, list2.size());
}
 
Example 13
Source File: TestCounters.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testMapOnlyMultiQueryStores() throws Exception {
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, file));
    for(int i = 0; i < MAX; i++) {
        int t = r.nextInt(100);
        pw.println(t);
    }
    pw.close();
    
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, 
            cluster.getProperties());
    pigServer.setBatchOn();
    pigServer.registerQuery("a = load '" + file + "';");
    pigServer.registerQuery("b = filter a by $0 > 50;");
    pigServer.registerQuery("c = filter a by $0 <= 50;");
    pigServer.registerQuery("store b into '/tmp/outout1';");
    pigServer.registerQuery("store c into '/tmp/outout2';");
    List<ExecJob> jobs = pigServer.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    assertTrue(stats.getOutputLocations().size() == 2);
    
    cluster.getFileSystem().delete(new Path(file), true);
    cluster.getFileSystem().delete(new Path("/tmp/outout1"), true);
    cluster.getFileSystem().delete(new Path("/tmp/outout2"), true);

    MRJobStats js = (MRJobStats)stats.getJobGraph().getSinks().get(0);
    
    Map<String, Long> entry = js.getMultiStoreCounters();
    long counter = 0;
    for (Long val : entry.values()) {
        counter += val;
    }
    
    assertEquals(MAX, counter);       
}
 
Example 14
Source File: TestMockStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBadUsage2() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  Data data = resetData(pigServer);

  data.set("input",
      tuple("a"),
      tuple("b"),
      tuple("c")
      );

  pigServer.setBatchOn();
  pigServer.registerQuery(
       "A = LOAD 'input' USING mock.Storage();"
      +"B = LOAD 'input' USING mock.Storage();"
      +"STORE A INTO 'output' USING mock.Storage();"
      +"STORE B INTO 'output' USING mock.Storage();");
  List<ExecJob> results = pigServer.executeBatch();
  boolean failed = false;
  for (ExecJob execJob : results) {
      if (execJob.getStatus() == JOB_STATUS.FAILED) {
          failed = true;
          break;
      }
  }
  assertTrue("job should have failed for storing twice in the same location", failed);

}
 
Example 15
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testPredicatePushdown() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);

  PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
  pigServer.setValidateEachStatement(true);

  String out = "target/out";
  String out2 = "target/out2";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();

  pigServer.deleteFile(out2);
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
  pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
  pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
  List<ExecJob> jobs = pigServer.executeBatch();

  long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();

  assertEquals(2, recordsRead);
}
 
Example 16
Source File: TestMultiStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * The actual method that run the test in local or cluster mode. 
 * 
 * @param pigServer
 * @param mode
 * @param queries
 * @throws IOException
 */
private void testMultiStorage( Mode mode, String outPath,
    String... queries) throws IOException {
  PigServer pigServer = (Mode.local == mode) ? this.pigServerLocal : this.pigServer;
  pigServer.setBatchOn();
  for (String query : queries) {
    pigServer.registerQuery(query);
  }
  pigServer.executeBatch();
  verifyResults(mode, outPath);
}
 
Example 17
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexAccess() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
  pigServer.registerQuery("STORE B into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(int i = 0; i < rows; i++) {
    Tuple t = actualList.get(i);
    
    assertEquals(4, t.size());
    
    assertEquals(i, t.get(0));
    assertEquals(i * 1.0, t.get(1));
    assertEquals(i*2L, t.get(2));
    assertEquals("v"+i, t.get(3));
  }
}
 
Example 18
Source File: VespaStorageTest.java    From vespa with Apache License 2.0 5 votes vote down vote up
private void assertAllDocumentsOk(String script, Configuration conf) throws Exception {
    PigServer ps = setup(script, conf);
    List<ExecJob> jobs = ps.executeBatch();
    PigStats stats = jobs.get(0).getStatistics();
    for (JobStats js : stats.getJobGraph()) {
        Counters hadoopCounters = ((MRJobStats)js).getHadoopCounters();
        assertNotNull(hadoopCounters);
        VespaCounters counters = VespaCounters.get(hadoopCounters);
        assertEquals(10, counters.getDocumentsSent());
        assertEquals(0, counters.getDocumentsFailed());
        assertEquals(10, counters.getDocumentsOk());
    }
}
 
Example 19
Source File: TestMRJobStats.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testNoTaskReportProperty() throws IOException{
    MiniGenericCluster cluster = MiniGenericCluster.buildCluster(MiniGenericCluster.EXECTYPE_MR);
    Properties properties = cluster.getProperties();

    String inputFile = "input";
    PrintWriter pw = new PrintWriter(Util.createInputFile(cluster, inputFile));
    pw.println("100\tapple");
    pw.println("200\torange");
    pw.close();

    // Enable task reports in job statistics
    properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "false");
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.setBatchOn();

    // Launch a map-only job
    pigServer.registerQuery("A = load '" + inputFile + "' as (id:int, fruit:chararray);");
    pigServer.registerQuery("store A into 'task_reports';");
    List<ExecJob> jobs = pigServer.executeBatch();
    PigStats pigStats = jobs.get(0).getStatistics();
    MRJobStats jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0);

    // Make sure JobStats includes TaskReports information
    long minMapTime = jobStats.getMinMapTime();
    long maxMapTime = jobStats.getMaxMapTime();
    long avgMapTime = jobStats.getAvgMapTime();
    assertTrue("TaskReports are enabled, so minMapTime shouldn't be -1", minMapTime != -1l);
    assertTrue("TaskReports are enabled, so maxMapTime shouldn't be -1", maxMapTime != -1l);
    assertTrue("TaskReports are enabled, so avgMapTime shouldn't be -1", avgMapTime != -1l);

    // Disable task reports in job statistics
    properties.setProperty(PigConfiguration.PIG_NO_TASK_REPORT, "true");

    // Launch another map-only job
    pigServer.registerQuery("B = load '" + inputFile + "' as (id:int, fruit:chararray);");
    pigServer.registerQuery("store B into 'no_task_reports';");
    jobs = pigServer.executeBatch();
    pigStats = jobs.get(0).getStatistics();
    jobStats = (MRJobStats) pigStats.getJobGraph().getJobList().get(0);

    // Make sure JobStats doesn't include any TaskReports information
    minMapTime = jobStats.getMinMapTime();
    maxMapTime = jobStats.getMaxMapTime();
    avgMapTime = jobStats.getAvgMapTime();
    assertEquals("TaskReports are disabled, so minMapTime should be -1", -1l, minMapTime);
    assertEquals("TaskReports are disabled, so maxMapTime should be -1", -1l, maxMapTime);
    assertEquals("TaskReports are disabled, so avgMapTime should be -1", -1l, avgMapTime);

    cluster.shutDown();
}
 
Example 20
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testTypePersuasion() throws Exception {
  Properties p = new Properties();  
  p.setProperty(STRICT_TYPE_CHECKING, Boolean.FALSE.toString());
  
  PigServer pigServer = new PigServer(ExecType.LOCAL, p); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, (long)i, (float)i, (double)i, Integer.toString(i), Boolean.TRUE));
  }
  data.set("in", "i:int, l:long, f:float, d:double, s:chararray, b:boolean", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '"+out+"' using " + ParquetStorer.class.getName()+"();");
  pigServer.executeBatch();
    
  List<Tuple> actualList = null;
   
  byte [] types = { INTEGER, LONG, FLOAT, DOUBLE, CHARARRAY, BOOLEAN };
  
  //Test extracting values using each type.
  for(int i=0; i<types.length; i++) {
    String query = "B = LOAD '" + out + "' using " + ParquetLoader.class.getName()+
      "('i:" + DataType.findTypeName(types[i%types.length])+"," +
      "  l:" + DataType.findTypeName(types[(i+1)%types.length]) +"," +
      "  f:" + DataType.findTypeName(types[(i+2)%types.length]) +"," +
      "  d:" + DataType.findTypeName(types[(i+3)%types.length]) +"," +
      "  s:" + DataType.findTypeName(types[(i+4)%types.length]) +"," +
      "  b:" + DataType.findTypeName(types[(i+5)%types.length]) +"');";
    
    System.out.println("Query: " + query);
    pigServer.registerQuery(query);
    pigServer.registerQuery("STORE B into 'out"+i+"' using mock.Storage();");
    pigServer.executeBatch();

    actualList = data.get("out" + i);

    assertEquals(rows, actualList.size());
    for(Tuple t : actualList) {
        assertTrue(t.getType(0) == types[i%types.length]);
        assertTrue(t.getType(1) == types[(i+1)%types.length]);
        assertTrue(t.getType(2) == types[(i+2)%types.length]);
        assertTrue(t.getType(3) == types[(i+3)%types.length]);
        assertTrue(t.getType(4) == types[(i+4)%types.length]);
        assertTrue(t.getType(5) == types[(i+5)%types.length]);
    }
  }
  
}