Java Code Examples for org.apache.pig.PigServer#deleteFile()

The following examples show how to use org.apache.pig.PigServer#deleteFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PerfTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

    StringBuilder schemaString = new StringBuilder("a0: chararray");
    for (int i = 1; i < COLUMN_COUNT; i++) {
      schemaString.append(", a" + i + ": chararray");
    }
    String out = "target/PerfTest";
    {
      PigServer pigServer = new PigServer(ExecType.LOCAL);
      Data data = Storage.resetData(pigServer);
      Collection<Tuple> list = new ArrayList<Tuple>();
      for (int i = 0; i < ROW_COUNT; i++) {
        Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
        for (int j = 0; j < COLUMN_COUNT; j++) {
          tuple.set(j, "a" + i + "_" + j);
        }
        list.add(tuple);
      }
      data.set("in", schemaString.toString(), list);
      pigServer.setBatchOn();
      pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
      pigServer.deleteFile(out);
      pigServer.registerQuery("Store A into '"+out+"' using "+ParquetStorer.class.getName()+"();");

      if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
        throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
      }
    }
    load(out, 1);
    load(out, 2);
    load(out, 3);
    load(out, 4);
    load(out, 5);
    load(out, 10);
    load(out, 20);
    load(out, 50);
    System.out.println(results);
  }
 
Example 2
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReqestedSchemaColumnPruning() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, "a"+i, i*2));
  }
  data.set("in", "i:int, a:chararray, b:int", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');");
  pigServer.registerQuery("G = foreach C generate n1,b,n2,i;");
  pigServer.registerQuery("STORE G into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(Tuple t : actualList) {
      assertEquals(4, t.size());
      assertTrue(t.isNull(0));
      assertTrue(t.isNull(2));
  }
}
 
Example 3
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexAccess() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
  pigServer.registerQuery("STORE B into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(int i = 0; i < rows; i++) {
    Tuple t = actualList.get(i);
    
    assertEquals(4, t.size());
    
    assertEquals(i, t.get(0));
    assertEquals(i * 1.0, t.get(1));
    assertEquals(i*2L, t.get(2));
    assertEquals("v"+i, t.get(3));
  }
}
 
Example 4
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexAccessProjection() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
  
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('n1:int, n2:double, n3:long, n4:chararray', 'true');");
  pigServer.registerQuery("C = foreach B generate n1, n3;");
  pigServer.registerQuery("STORE C into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(int i = 0; i < rows; i++) {
    Tuple t = actualList.get(i);
    
    assertEquals(2, t.size());
    
    assertEquals(i, t.get(0));
    assertEquals(i*2L, t.get(1));
  }
}
 
Example 5
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testPredicatePushdown() throws Exception {
  Configuration conf = new Configuration();
  conf.setBoolean(ParquetLoader.ENABLE_PREDICATE_FILTER_PUSHDOWN, true);

  PigServer pigServer = new PigServer(ExecType.LOCAL, conf);
  pigServer.setValidateEachStatement(true);

  String out = "target/out";
  String out2 = "target/out2";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, i*1.0, i*2L, "v"+i));
  }
  data.set("in", "c1:int, c2:double, c3:long, c4:chararray", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();

  pigServer.deleteFile(out2);
  pigServer.registerQuery("B = LOAD '" + out + "' using " + ParquetLoader.class.getName() + "('c1:int, c2:double, c3:long, c4:chararray');");
  pigServer.registerQuery("C = FILTER B by c1 == 1 or c1 == 5;");
  pigServer.registerQuery("STORE C into '" + out2 +"' using mock.Storage();");
  List<ExecJob> jobs = pigServer.executeBatch();

  long recordsRead = jobs.get(0).getStatistics().getInputStats().get(0).getNumberRecords();

  assertEquals(2, recordsRead);
}
 
Example 6
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test deleteFile fails if its disallowed via the blacklist
 */
@Test(expected = FrontendException.class)
public void testBlacklistRemoveWithPigServer() throws Exception {
    ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "rm");
    PigServer pigServer = new PigServer(ctx);

    pigServer.deleteFile("foo");
}
 
Example 7
Source File: TestParquetLoader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testTypePersuasion() throws Exception {
  Properties p = new Properties();  
  p.setProperty(STRICT_TYPE_CHECKING, Boolean.FALSE.toString());
  
  PigServer pigServer = new PigServer(ExecType.LOCAL, p); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, (long)i, (float)i, (double)i, Integer.toString(i), Boolean.TRUE));
  }
  data.set("in", "i:int, l:long, f:float, d:double, s:chararray, b:boolean", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '"+out+"' using " + ParquetStorer.class.getName()+"();");
  pigServer.executeBatch();
    
  List<Tuple> actualList = null;
   
  byte [] types = { INTEGER, LONG, FLOAT, DOUBLE, CHARARRAY, BOOLEAN };
  
  //Test extracting values using each type.
  for(int i=0; i<types.length; i++) {
    String query = "B = LOAD '" + out + "' using " + ParquetLoader.class.getName()+
      "('i:" + DataType.findTypeName(types[i%types.length])+"," +
      "  l:" + DataType.findTypeName(types[(i+1)%types.length]) +"," +
      "  f:" + DataType.findTypeName(types[(i+2)%types.length]) +"," +
      "  d:" + DataType.findTypeName(types[(i+3)%types.length]) +"," +
      "  s:" + DataType.findTypeName(types[(i+4)%types.length]) +"," +
      "  b:" + DataType.findTypeName(types[(i+5)%types.length]) +"');";
    
    System.out.println("Query: " + query);
    pigServer.registerQuery(query);
    pigServer.registerQuery("STORE B into 'out"+i+"' using mock.Storage();");
    pigServer.executeBatch();

    actualList = data.get("out" + i);

    assertEquals(rows, actualList.size());
    for(Tuple t : actualList) {
        assertTrue(t.getType(0) == types[i%types.length]);
        assertTrue(t.getType(1) == types[(i+1)%types.length]);
        assertTrue(t.getType(2) == types[(i+2)%types.length]);
        assertTrue(t.getType(3) == types[(i+3)%types.length]);
        assertTrue(t.getType(4) == types[(i+4)%types.length]);
        assertTrue(t.getType(5) == types[(i+5)%types.length]);
    }
  }
  
}
 
Example 8
Source File: TestParquetStorer.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void testComplexSchema() throws ExecException, Exception {
  String out = "target/out";
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  Data data = Storage.resetData(pigServer);
  Collection<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < 1000; i++) {
    list.add(tuple("a"+i, bag(tuple("o", "b"))));
  }
  for (int i = 10; i < 2000; i++) {
    list.add(tuple("a"+i, bag(tuple("o", "b"), tuple("o", "b"), tuple("o", "b"), tuple("o", "b"))));
  }
  for (int i = 20; i < 3000; i++) {
    list.add(tuple("a"+i, bag(tuple("o", "b"), tuple("o", null), tuple(null, "b"), tuple(null, null))));
  }
  for (int i = 30; i < 4000; i++) {
    list.add(tuple("a"+i, null));
  }
  Collections.shuffle((List<?>)list);
  data.set("in", "a:chararray, b:{t:(c:chararray, d:chararray)}", list );
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '"+out+"' using "+ParquetStorer.class.getName()+"();");
  if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
    throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
  }

  {
    pigServer.registerQuery("B = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"();");
    pigServer.registerQuery("Store B into 'out' using mock.Storage();");
    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
      throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
    }

    List<Tuple> result = data.get("out");
    assertEquals(list, result);
    final Schema schema = data.getSchema("out");
    assertEquals("{a:chararray, b:{t:(c:chararray, d:chararray)}}".replaceAll(" ", ""), schema.toString().replaceAll(" ", ""));
  }

  {
    pigServer.registerQuery("C = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"('a:chararray');");
    pigServer.registerQuery("Store C into 'out2' using mock.Storage();");
    if (pigServer.executeBatch().get(0).getStatus() != JOB_STATUS.COMPLETED) {
      throw new RuntimeException("Job failed", pigServer.executeBatch().get(0).getException());
    }

    final Function<Tuple,Object> grabFirstColumn = new Function<Tuple,Object>() {
      @Override
      public Object apply(Tuple input) {
        try {
            return input.get(0);
        } catch (ExecException e) {
          throw new RuntimeException(e);
        }
      }
    };

    List<Tuple> result2 = data.get("out2");
    // Functional programming!!
    Object[] result2int = Collections2.transform(result2, grabFirstColumn).toArray();
    Object[] input2int = Collections2.transform(list, grabFirstColumn).toArray();

    assertArrayEquals(input2int, result2int);
  }
}