Java Code Examples for org.apache.pig.impl.PigContext#connect()

The following examples show how to use org.apache.pig.impl.PigContext#connect() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPigServer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigTempDir() throws Throwable {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    File pigTempDir = new File(tempDir, FILE_SEPARATOR + "tmp" + FILE_SEPARATOR + "test");
    properties.put("pig.temp.dir", pigTempDir.getPath());
    PigContext pigContext=new PigContext(ExecType.LOCAL, properties);
    pigContext.connect();
    FileLocalizer.setInitialized(false);

    String tempPath= FileLocalizer.getTemporaryPath(pigContext).toString();
    Path path = new Path(tempPath);
    assertTrue(tempPath.startsWith(pigTempDir.toURI().toString()));

    FileSystem fs = FileSystem.get(path.toUri(),
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    FileStatus status = fs.getFileStatus(path.getParent());
    // Temporary root dir should have 700 as permission
    assertEquals("rwx------", status.getPermission().toString());
    pigTempDir.delete();
    FileLocalizer.setInitialized(false);
}
 
Example 2
Source File: TestFRJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
private void setUpHashTable() throws IOException {
    FileSpec replFile = new FileSpec(repl, new FuncSpec(PigStorage.class.getName() + "()"));
    POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile);
    PigContext pc = new PigContext(ExecType.MAPREDUCE, PigMapReduce.sJobConfInternal.get());
    pc.connect();

    ld.setPc(pc);
    for (Result res = ld.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = ld
            .getNextTuple()) {
        Tuple tup = (Tuple)res.result;
        LoadFunc lf = ((LoadFunc)PigContext.instantiateFuncFromSpec(ld.getLFile().getFuncSpec()));
        String key = lf.getLoadCaster().bytesToCharArray(
                ((DataByteArray)tup.get(keyField)).get());
        Tuple csttup = TupleFactory.getInstance().newTuple(2);
        csttup.set(0, key);
        csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get()));
        DataBag vals = null;
        if (replTbl.containsKey(key)) {
            vals = replTbl.get(key);
        }
        else {
            vals = BagFactory.getInstance().newDefaultBag();
            replTbl.put(key, vals);
        }
        vals.add(csttup);
    }
}
 
Example 3
Source File: TestMergeJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
   public void testParallelism() throws Exception{
       String query = "A = LOAD '" + INPUT_FILE + "';" +
                      "B = LOAD '" + INPUT_FILE + "';" +
                      "C = join A by $0, B by $0 using 'merge' parallel 50;" + 
                      "store C into 'out';";
PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
   pc.connect();
MROperPlan mro = Util.buildMRPlan(Util.buildPp(pigServer, query),pc);
       Assert.assertEquals(1,mro.getRoots().get(0).getRequestedParallelism());
   }
 
Example 4
Source File: TestSecondarySort.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    if (cluster == null) {
        cluster = getCluster();
        pc = new PigContext(cluster.getExecType(), cluster.getProperties());
        try {
            pc.connect();
        } catch (ExecException e) {
            throw new RuntimeException(e);
        }
    }
    pigServer = new PigServer(pc);
}
 
Example 5
Source File: TestJobControlCompiler.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * specifically tests that REGISTERED jars get added to distributed cache
 * @throws Exception
 */
@Test
public void testJarAddedToDistributedCache() throws Exception {

  // creating a jar with a UDF *not* in the current classloader
  File tmpFile = File.createTempFile("Some_", ".jar");
  tmpFile.deleteOnExit();
  String className = createTestJar(tmpFile);
  final String testUDFFileName = className+".class";

  // JobControlCompiler setup
  PigServer pigServer = new PigServer(ExecType.MAPREDUCE);
  PigContext pigContext = pigServer.getPigContext();
  pigContext.connect();
  pigContext.addJar(tmpFile.getAbsolutePath());
  JobControlCompiler jobControlCompiler = new JobControlCompiler(pigContext, CONF);
  MROperPlan plan = new MROperPlan();
  MapReduceOper mro = new MapReduceOper(new OperatorKey());
  mro.UDFs = new HashSet<String>();
  mro.UDFs.add(className+"()");
  plan.add(mro);

  // compiling the job
  JobControl jobControl = jobControlCompiler.compile(plan , "test");
  JobConf jobConf = jobControl.getWaitingJobs().get(0).getJobConf();

  // verifying the jar gets on distributed cache
  Path[] fileClassPaths = DistributedCache.getFileClassPaths(jobConf);
  // guava jar is not shipped with Hadoop 2.x
  Assert.assertEquals("size for "+Arrays.toString(fileClassPaths), HadoopShims.isHadoopYARN() ? 5 : 6, fileClassPaths.length);
  Path distributedCachePath = fileClassPaths[0];
  Assert.assertEquals("ends with jar name: "+distributedCachePath, distributedCachePath.getName(), tmpFile.getName());
  // hadoop bug requires path to not contain hdfs://hotname in front
  Assert.assertTrue("starts with /: "+distributedCachePath,
      distributedCachePath.toString().startsWith("/"));
  Assert.assertTrue("jar pushed to distributed cache should contain testUDF",
      jarContainsFileNamed(new File(fileClassPaths[0].toUri().getPath()), testUDFFileName));
}
 
Example 6
Source File: TestCollectedGroup.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNonCollectableLoader() throws Exception{
    String query = "A = LOAD '" + INPUT_FILE + "' as (id, name, grade);" +
                   "B = group A by id using 'collected';";
    PigContext pc = new PigContext(cluster.getExecType(),cluster.getProperties());
    pc.connect();
    try {
        Util.buildMRPlan(Util.buildPp(pigServer, query),pc);
        Assert.fail("Must throw MRCompiler Exception");
    } catch (Exception e) {
        Assert.assertTrue(e instanceof MRCompilerException);
    }
}
 
Example 7
Source File: TypeCheckingTestUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
public static org.apache.pig.newplan.logical.relational.LOLoad 
genDummyLOLoadNewLP( org.apache.pig.newplan.logical.relational.LogicalPlan plan) throws ExecException  {
    String pigStorage = PigStorage.class.getName() ;
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    org.apache.pig.newplan.logical.relational.LOLoad load =
    newLOLoad(
            new FileSpec("pi", new FuncSpec(pigStorage)),
            null, plan, new Configuration(ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration()))
    );
    return load ;
}
 
Example 8
Source File: TestMRExecutionEngine.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testJobConfGenerationWithUserConfigs() throws ExecException {
    Configuration conf = new Configuration(false);
    // This property allows Pig to depend on user Configuration 
    // and not the classpath
    conf.set("pig.use.overriden.hadoop.configs", "true");
    conf.set(MRConfiguration.JOB_TRACKER, "host:12345");
    conf.set("apache", "pig");
    PigContext pigContext = new PigContext(ExecType.MAPREDUCE, conf);
    pigContext.connect();
    JobConf jc = ((MRExecutionEngine)pigContext.getExecutionEngine()).getJobConf();
    assertEquals(jc.get(MRConfiguration.JOB_TRACKER), "host:12345");
    assertEquals(jc.get("apache"), "pig");
}
 
Example 9
Source File: LogicalPlanBuilder.java    From spork with Apache License 2.0 5 votes vote down vote up
LogicalPlanBuilder(IntStream input) throws ExecException {
    pigContext = new PigContext( ExecType.LOCAL, new Properties() );
    pigContext.connect();
    this.scope = "test";
    this.fileNameMap = new HashMap<String, String>();
    this.intStream = input;
    this.filter = new BlackAndWhitelistFilter(this.pigContext);
}
 
Example 10
Source File: TestMRCompiler.java    From spork with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setUpBeforeClass() throws Exception {
    cluster = MiniCluster.buildCluster();
    pc = new PigContext(ExecType.LOCAL, new Properties());
    pcMR = new PigContext(ExecType.MAPREDUCE, cluster.getProperties());
    pc.connect();
}
 
Example 11
Source File: TestNewPlanLogicalOptimizer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}
 
Example 12
Source File: TestGroupConstParallel.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test parallelism for group by constant
 * @throws Throwable
 */
@Test
public void testGroupConstWithParallel() throws Throwable {
    PigContext pc = new PigContext(cluster.getExecType(), cluster.getProperties());
    pc.defaultParallel = 100;
    pc.connect();
    
    String query = "a = load '" + INPUT_FILE + "';\n" + "b = group a by 1;" + "store b into 'output';";
    PigServer pigServer = new PigServer( cluster.getExecType(), cluster.getProperties() );
    PhysicalPlan pp = Util.buildPp( pigServer, query );

    checkGroupConstWithParallelResult(pp, pc);
}
 
Example 13
Source File: TestPigRunner.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testIsTempFile() throws Exception {
    PigContext context = new PigContext(ExecType.LOCAL, new Properties());
    context.connect();
    for (int i=0; i<100; i++) {
        String file = FileLocalizer.getTemporaryPath(context).toString();
        assertTrue("not a temp file: " + file, PigStatsUtil.isTempFile(file));
    }
}
 
Example 14
Source File: TestNewPlanOperatorPlan.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}
 
Example 15
Source File: TestScalarVisitor.java    From spork with Apache License 2.0 5 votes vote down vote up
private LogicalPlan visit(String query) throws RecognitionException, ParsingFailureException, IOException {
    LogicalPlan plan = ParserTestingUtils.generateLogicalPlan( query );
    PigContext pc = new PigContext( ExecType.LOCAL, new Properties() );
    pc.connect();
    ScalarVisitor visitor = new ScalarVisitor(plan, pc, "test");
    visitor.visit();
    return plan;
}
 
Example 16
Source File: TestGroupConstParallel.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 *  Test parallelism for group by column
 * @throws Throwable
 */
@Test
public void testGroupNonConstWithParallel() throws Throwable {
    PigContext pc = new PigContext(cluster.getExecType(), cluster.getProperties());
    pc.defaultParallel = 100;
    pc.connect();
    
    PigServer pigServer = new PigServer( cluster.getExecType(), cluster.getProperties() );
    String query =  "a = load '" + INPUT_FILE + "';\n" + "b = group a by $0;" + "store b into 'output';";
    
    PhysicalPlan pp = Util.buildPp( pigServer, query );

    checkGroupNonConstWithParallelResult(pp, pc);
}
 
Example 17
Source File: TestInputOutputMiniClusterFileValidator.java    From spork with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    ctx = new PigContext(cluster.getExecType(), cluster.getProperties());
    ctx.connect() ;
    pig = new PigServer(ctx);
}
 
Example 18
Source File: TestMergeJoinOuter.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompilation(){
    try{
        String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" + 
        "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
        "C = join A by id left, B by id using 'merge';" +
        "store C into 'out';";
        LogicalPlan lp = Util.buildLp(pigServer, query);
        LOStore store = (LOStore)lp.getSinks().get(0);
        LOJoin join = (LOJoin)lp.getPredecessors(store).get(0);
        assertEquals(LOJoin.JOINTYPE.MERGE, join.getJoinType());

        PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
        pc.connect();
        PhysicalPlan phyP = Util.buildPp(pigServer, query);
        PhysicalOperator phyOp = phyP.getLeaves().get(0);
        assertTrue(phyOp instanceof POStore);
        phyOp = phyOp.getInputs().get(0);
        assertTrue(phyOp instanceof POForEach);
        assertEquals(1,phyOp.getInputs().size());
        assertTrue(phyOp.getInputs().get(0) instanceof POMergeCogroup);
        
        MROperPlan mrPlan = Util.buildMRPlan(phyP,pc);            
        assertEquals(2,mrPlan.size());

        Iterator<MapReduceOper> itr = mrPlan.iterator();
        List<MapReduceOper> opers = new ArrayList<MapReduceOper>();
        opers.add(itr.next());
        opers.add(itr.next());
        //Order of entrySet is not guaranteed with jdk1.7
        Collections.sort(opers);
        
        assertTrue(opers.get(0).reducePlan.isEmpty());
        assertFalse(opers.get(0).mapPlan.isEmpty());
        
        assertFalse(opers.get(1).reducePlan.isEmpty());
        assertFalse(opers.get(1).mapPlan.isEmpty());


    } catch(Exception e){
        e.printStackTrace();
        fail("Compilation of merged cogroup failed.");
    }

}
 
Example 19
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 4 votes vote down vote up
@Before
public void setUp() throws Exception {
    ctx = new PigContext(ExecType.LOCAL, new Properties());
    ctx.connect();
}
 
Example 20
Source File: TestMapSideCogroup.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompilation(){
    try{
        PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
        String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" + 
        "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
        "D = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
        "C = cogroup A by id, B by id, D by id using 'merge';" +
        "store C into 'output';";
        LogicalPlan lp = Util.buildLp(pigServer, query);
        Operator op = lp.getSinks().get(0);
        LOCogroup cogrp = (LOCogroup)lp.getPredecessors(op).get(0);
        assertEquals(LOCogroup.GROUPTYPE.MERGE, cogrp.getGroupType());

        PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
        pc.connect();
        PhysicalPlan phyP = Util.buildPp(pigServer, query);
        PhysicalOperator phyOp = phyP.getLeaves().get(0);
        assertTrue(phyOp instanceof POStore);
        phyOp = phyOp.getInputs().get(0);
        assertTrue(phyOp instanceof POMergeCogroup);

        MROperPlan mrPlan = Util.buildMRPlan(phyP,pc);            
        assertEquals(2,mrPlan.size());

        Iterator<MapReduceOper> itr = mrPlan.iterator();
        List<MapReduceOper> opers = new ArrayList<MapReduceOper>();
        opers.add(itr.next());
        opers.add(itr.next());
        //Order of entrySet is not guaranteed with jdk1.7
        Collections.sort(opers);
        
        assertTrue(opers.get(0).reducePlan.isEmpty());
        assertFalse(opers.get(0).mapPlan.isEmpty());
        
        assertFalse(opers.get(1).reducePlan.isEmpty());
        assertFalse(opers.get(1).mapPlan.isEmpty());
    } catch(Exception e){
        e.printStackTrace();
        fail("Compilation of merged cogroup failed.");
    }

}