Java Code Examples for org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil#toConfiguration()

The following examples show how to use org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil#toConfiguration() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DefaultIndexableLoader.java From spork with Apache License 2.0

6 votes

private void initRightLoader(int [] splitsToBeRead) throws IOException{
    PigContext pc = (PigContext) ObjectSerializer
            .deserialize(PigMapReduce.sJobConfInternal.get().get("pig.pigContext"));
    
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    
    // Hadoop security need this property to be set
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        conf.set(MRConfiguration.JOB_CREDENTIALS_BINARY, 
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }
    
    //create ReadToEndLoader that will read the given splits in order
    loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec),
            conf, inpLocation, splitsToBeRead);
}

Example 2

Source File: Pig.java From spork with Apache License 2.0

6 votes

/**
 * Run a filesystem command.  Any output from this command is written to
 * stdout or stderr as appropriate.
 * @param cmd Filesystem command to run along with its arguments as one
 * string.
 * @throws IOException
 */
public static int fs(String cmd) throws IOException {
    ScriptPigContext ctx = getScriptContext();
    FsShell shell = new FsShell(ConfigurationUtil.toConfiguration(ctx
            .getPigContext().getProperties()));
    int code = -1;
    if (cmd != null) {
        String[] cmdTokens = cmd.split("\\s+");
        if (!cmdTokens[0].startsWith("-")) cmdTokens[0] = "-" + cmdTokens[0];
        try {
            code = shell.run(cmdTokens);
        } catch (Exception e) {
            throw new IOException("Run filesystem command failed", e);
        }
    }
    return code;
}

Example 3

Source File: TestGroupConstParallelMR.java From spork with Apache License 2.0

5 votes

@Override
public void checkGroupConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    
    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals("parallism", 1, parallel);
}

Example 4

Source File: TestBuiltin.java From spork with Apache License 2.0

5 votes

@Test
public void testSFPig() throws Exception {
    Util.resetStateForExecModeSwitch();
    PigServer mrPigServer = new PigServer(cluster.getExecType(), properties);
    String inputStr = "amy\tbob\tcharlene\tdavid\terin\tfrank";
    Util.createInputFile(cluster, "testSFPig-input.txt", new String[]
                                                                {inputStr});
    DataByteArray[] input = { new DataByteArray("amy"),
        new DataByteArray("bob"), new DataByteArray("charlene"),
        new DataByteArray("david"), new DataByteArray("erin"),
        new DataByteArray("frank") };
    Tuple f1 = Util.loadTuple(TupleFactory.getInstance().
            newTuple(input.length), input);
    String outputLocation = "testSFPig-output.txt";
    String query = "a = load 'testSFPig-input.txt';" +
            "store a into '" + outputLocation + "';";
    mrPigServer.setBatchOn();
    Util.registerMultiLineQuery(mrPigServer, query);
    mrPigServer.executeBatch();
    LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), outputLocation, 0);
    Tuple f2 = lfunc.getNext();
    Util.deleteFile(cluster, "testSFPig-input.txt");

    Util.deleteFile(cluster, outputLocation);
    assertEquals(f1, f2);
}

Example 5

Source File: Util.java From spork with Apache License 2.0

5 votes

/**
 * @param pigContext
 * @param fileName
 * @param input
 * @throws IOException
 */
public static void createInputFile(PigContext pigContext,
        String fileName, String[] input) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(
            pigContext.getProperties());
    createInputFile(FileSystem.get(conf), fileName, input);
}

Example 6

Source File: Util.java From spork with Apache License 2.0

5 votes

static public void deleteFile(PigContext pigContext, String fileName)
throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(
            pigContext.getProperties());
    FileSystem fs = FileSystem.get(conf);
    if(Util.WINDOWS){
        fileName = fileName.replace('\\','/');
    }
    fs.delete(new Path(fileName), true);
}

Example 7

Source File: SparkUtil.java From spork with Apache License 2.0

5 votes

public static JobConf newJobConf(PigContext pigContext) throws IOException {
    JobConf jobConf = new JobConf(
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    jobConf.set("pig.pigContext", ObjectSerializer.serialize(pigContext));
    UDFContext.getUDFContext().serialize(jobConf);
    jobConf.set("udf.import.list",
            ObjectSerializer.serialize(PigContext.getPackageImportList()));
    return jobConf;
}

Example 8

Source File: TestPigRunner.java From spork with Apache License 2.0

5 votes

@Test
public void simpleTest2() throws Exception {
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + INPUT_FILE + "' as (a0:int, a1:int, a2:int);");
    w.println("B = filter A by a0 == 3;");
    w.println("C = limit B 1;");
    w.println("dump C;");
    w.close();

    try {
        String[] args = { "-Dstop.on.failure=true", "-Dopt.multiquery=false", "-Daggregate.warning=false", "-x", execType, PIG_FILE };
        PigStats stats = PigRunner.run(args, new TestNotificationListener(execType));

        assertTrue(stats instanceof EmptyPigStats);
        assertTrue(stats.isSuccessful());
        assertEquals(0, stats.getNumberJobs());
        assertEquals(stats.getJobGraph().size(), 0);

        Configuration conf = ConfigurationUtil.toConfiguration(stats.getPigProperties());
        assertTrue(conf.getBoolean("stop.on.failure", false));
        assertTrue(!conf.getBoolean("aggregate.warning", true));
        assertTrue(!conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true));
        assertTrue(conf.getBoolean("opt.fetch", true));
    } finally {
        new File(PIG_FILE).delete();
        Util.deleteFile(cluster, OUTPUT_FILE);
    }
}

Example 9

Source File: TestPigRunner.java From spork with Apache License 2.0

5 votes

@Test
public void simpleTest() throws Exception {
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + INPUT_FILE + "' as (a0:int, a1:int, a2:int);");
    w.println("B = group A by a0;");
    w.println("C = foreach B generate group, COUNT(A);");
    w.println("store C into '" + OUTPUT_FILE + "';");
    w.close();

    try {
        String[] args = { "-Dstop.on.failure=true", "-Dopt.multiquery=false", "-Dopt.fetch=false", "-Daggregate.warning=false", "-x", execType, PIG_FILE };
        PigStats stats = PigRunner.run(args, new TestNotificationListener(execType));

        assertTrue(stats.isSuccessful());

        assertEquals(1, stats.getNumberJobs());
        String name = stats.getOutputNames().get(0);
        assertEquals(OUTPUT_FILE, name);
        assertEquals(12, stats.getBytesWritten());
        assertEquals(3, stats.getRecordWritten());

        assertEquals("A,B,C",
                ((JobStats)stats.getJobGraph().getSinks().get(0)).getAlias());

        Configuration conf = ConfigurationUtil.toConfiguration(stats.getPigProperties());
        assertTrue(conf.getBoolean("stop.on.failure", false));
        assertTrue(!conf.getBoolean("aggregate.warning", true));
        assertTrue(!conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true));
        assertTrue(!conf.getBoolean("opt.fetch", true));
    } finally {
        new File(PIG_FILE).delete();
        Util.deleteFile(cluster, OUTPUT_FILE);
    }
}

Example 10

Source File: TestJobSubmissionMR.java From spork with Apache License 2.0

5 votes

@Override
public void checkDefaultParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals(100, parallel);
    Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
}

Example 11

Source File: TestNewPlanLogicalOptimizer.java From spork with Apache License 2.0

5 votes

@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}

Example 12

Source File: TestJobSubmission.java From spork with Apache License 2.0

4 votes

@Test
public void testReducerNumEstimationForOrderBy() throws Exception{
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ",
            Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    pc.getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getProperties().setProperty("pig.exec.reducers.max", "10");

    String query = "a = load '/passwd';" +
            "b = order a by $0;" +
            "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);

    MROperPlan mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jobControl = jcc.compile(mrPlan, query);

    assertEquals(2, mrPlan.size());

    // first job uses a single reducer for the sampling
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    // Simulate the first job having run so estimation kicks in.
    MapReduceOper sort = mrPlan.getLeaves().get(0);
    jcc.updateMROpPlan(jobControl.getReadyJobs());
    FileLocalizer.create(sort.getQuantFile(), pc);
    jobControl = jcc.compile(mrPlan, query);

    sort = mrPlan.getLeaves().get(0);
    long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);
    assertEquals(reducer, sort.getRequestedParallelism());

    // the second job estimates reducers
    Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" + "b = order a by $0 PARALLEL 2;" +
            "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);

    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);
    assertEquals(2, sort.getRequestedParallelism());

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
    query = "a = load 'hbase://passwd' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
            "b = order a by $0 ;" +
            "store b into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(2, mrPlan.size());

    sort = mrPlan.getLeaves().get(0);

    // the requested parallel will be -1 if users don't set any of default_parallel, paralllel
    // and the estimation doesn't take effect. MR framework will finally set it to 1.
    assertEquals(-1, sort.getRequestedParallelism());

    // test order by with three jobs (after optimization)
    query = "a = load '/passwd';" +
            "b = foreach a generate $0, $1, $2;" +
            "c = order b by $0;" +
            "store c into 'output';";
    pp = Util.buildPp(ps, query);

    mrPlan = Util.buildMRPlanWithOptimizer(pp, pc);
    assertEquals(3, mrPlan.size());

    // Simulate the first 2 jobs having run so estimation kicks in.
    sort = mrPlan.getLeaves().get(0);
    FileLocalizer.create(sort.getQuantFile(), pc);

    jobControl = jcc.compile(mrPlan, query);
    Util.copyFromLocalToCluster(cluster, "test/org/apache/pig/test/data/passwd", ((POLoad) sort.mapPlan.getRoots().get(0)).getLFile().getFileName());

    //First job is just foreach with projection, mapper-only job, so estimate gets ignored
    Util.assertParallelValues(-1, -1, -1, 0, jobControl.getWaitingJobs().get(0).getJobConf());

    jcc.updateMROpPlan(jobControl.getReadyJobs());
    jobControl = jcc.compile(mrPlan, query);
    jcc.updateMROpPlan(jobControl.getReadyJobs());

    //Second job is a sampler, which requests and gets 1 reducer
    Util.assertParallelValues(-1, 1, -1, 1, jobControl.getWaitingJobs().get(0).getJobConf());

    jobControl = jcc.compile(mrPlan, query);
    sort = mrPlan.getLeaves().get(0);
    assertEquals(reducer, sort.getRequestedParallelism());

    //Third job is the order, which uses the estimated number of reducers
    Util.assertParallelValues(-1, -1, reducer, reducer, jobControl.getWaitingJobs().get(0).getJobConf());
}

Example 13

Source File: TestJobSubmission.java From spork with Apache License 2.0

4 votes

@Test
public void testReducerNumEstimation() throws Exception{
    // Skip the test for Tez. Tez use a different mechanism.
    // Equivalent test is in TestTezAutoParallelism
    Assume.assumeTrue("Skip this test for TEZ",
            Util.isMapredExecType(cluster.getExecType()));
    // use the estimation
    Configuration conf = HBaseConfiguration.create(new Configuration());
    HBaseTestingUtility util = new HBaseTestingUtility(conf);
    int clientPort = util.startMiniZKCluster().getClientPort();
    util.startMiniHBaseCluster(1, 1);

    String query = "a = load '/passwd';" +
            "b = group a by $0;" +
            "store b into 'output';";
    PigServer ps = new PigServer(cluster.getExecType(), cluster.getProperties());
    PhysicalPlan pp = Util.buildPp(ps, query);
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    pc.getConf().setProperty(HConstants.ZOOKEEPER_CLIENT_PORT, Integer.toString(clientPort));
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    JobControl jc=jcc.compile(mrPlan, "Test");
    Job job = jc.getWaitingJobs().get(0);
    long reducer=Math.min((long)Math.ceil(new File("test/org/apache/pig/test/data/passwd").length()/100.0), 10);

    Util.assertParallelValues(-1, -1, reducer, reducer, job.getJobConf());

    // use the PARALLEL key word, it will override the estimated reducer number
    query = "a = load '/passwd';" +
            "b = group a by $0 PARALLEL 2;" +
            "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc=jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, 2, -1, 2, job.getJobConf());

    final byte[] COLUMNFAMILY = Bytes.toBytes("pig");
    util.createTable(Bytes.toBytesBinary("test_table"), COLUMNFAMILY);

    // the estimation won't take effect when it apply to non-dfs or the files doesn't exist, such as hbase
    query = "a = load 'hbase://test_table' using org.apache.pig.backend.hadoop.hbase.HBaseStorage('c:f1 c:f2');" +
            "b = group a by $0 ;" +
            "store b into 'output';";
    pp = Util.buildPp(ps, query);
    mrPlan = Util.buildMRPlan(pp, pc);

    pc.getConf().setProperty("pig.exec.reducers.bytes.per.reducer", "100");
    pc.getConf().setProperty("pig.exec.reducers.max", "10");

    ConfigurationValidator.validatePigProperties(pc.getProperties());
    conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    jcc = new JobControlCompiler(pc, conf);
    jc=jcc.compile(mrPlan, "Test");
    job = jc.getWaitingJobs().get(0);

    Util.assertParallelValues(-1, -1, -1, 1, job.getJobConf());

    util.deleteTable(Bytes.toBytesBinary("test_table"));
    // In HBase 0.90.1 and above we can use util.shutdownMiniHBaseCluster()
    // here instead.
    MiniHBaseCluster hbc = util.getHBaseCluster();
    if (hbc != null) {
        hbc.shutdown();
        hbc.join();
    }
    util.shutdownMiniZKCluster();
}

Example 14

Source File: Util.java From spork with Apache License 2.0

4 votes

public static String[] readOutput(PigContext pigContext,
        String fileName) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(
            pigContext.getProperties());
    return readOutput(FileSystem.get(conf), fileName);
}

Example 15

Source File: MRCompiler.java From spork with Apache License 2.0

4 votes

public void aggregateScalarsFiles() throws PlanException, IOException {
    List<MapReduceOper> mrOpList = new ArrayList<MapReduceOper>();
    for(MapReduceOper mrOp: MRPlan) {
        mrOpList.add(mrOp);
    }

    Configuration conf =
        ConfigurationUtil.toConfiguration(pigContext.getProperties());
    boolean combinable = !conf.getBoolean("pig.noSplitCombination", false);

    Set<FileSpec> seen = new HashSet<FileSpec>();

    for(MapReduceOper mro_scalar_consumer: mrOpList) {
        for(PhysicalOperator scalar: mro_scalar_consumer.scalars) {
            MapReduceOper mro_scalar_producer = phyToMROpMap.get(scalar);
            if (scalar instanceof POStore) {
                FileSpec oldSpec = ((POStore)scalar).getSFile();
                if( seen.contains(oldSpec) ) {
                  continue;
                }
                seen.add(oldSpec);
                if ( combinable
                     && (mro_scalar_producer.reducePlan.isEmpty() ?
                          hasTooManyInputFiles(mro_scalar_producer, conf)
                          : (mro_scalar_producer.requestedParallelism >= fileConcatenationThreshold))) {
                    PhysicalPlan pl = mro_scalar_producer.reducePlan.isEmpty() ?
                                        mro_scalar_producer.mapPlan : mro_scalar_producer.reducePlan;
                    FileSpec newSpec = getTempFileSpec();

                    // replace oldSpec in mro with newSpec
                    new FindStoreNameVisitor(pl, newSpec, oldSpec).visit();
                    seen.add(newSpec);

                    POStore newSto = getStore();
                    newSto.setSFile(oldSpec);
                    MapReduceOper catMROp = getConcatenateJob(newSpec, mro_scalar_producer, newSto);
                    MRPlan.connect(mro_scalar_producer, catMROp);

                    // Need to add it to the PhysicalPlan and phyToMROpMap
                    // so that softlink can be created
                    phyToMROpMap.put(newSto, catMROp);
                    plan.add(newSto);

                    for (PhysicalOperator succ :
                            plan.getSoftLinkSuccessors(scalar).toArray(new PhysicalOperator[0])) {
                        plan.createSoftLink(newSto, succ);
                        plan.removeSoftLink(scalar, succ);
                    }
                }
            }
        }
    }
}

Example 16

Source File: LoaderProcessor.java From spork with Apache License 2.0

4 votes

public LoaderProcessor(TezOperPlan plan, PigContext pigContext) {
    super(plan, new DependencyOrderWalker<TezOperator, TezOperPlan>(plan));
    this.pc = pigContext;
    this.conf = ConfigurationUtil.toConfiguration(pc.getProperties());;
}

Example 17

Source File: TestParser.java From spork with Apache License 2.0

4 votes

@Test
public void testRemoteServerList() throws ExecException, IOException {
    for (ExecType execType : execTypes) {
        setUp(execType);
        Properties pigProperties = pigServer.getPigContext().getProperties();
        pigProperties.setProperty("fs.default.name", "hdfs://a.com:8020");
        Configuration conf;

        Data data = Storage.resetData(pigServer.getPigContext());
        data.set("/user/pig/1.txt");// no data

        pigServer.registerQuery("a = load '/user/pig/1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) == null ||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.default.name"))||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.defaultFS")));

        pigServer.registerQuery("a = load 'hdfs://a.com/user/pig/1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS) == null ||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.default.name"))||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.defaultFS")));

        pigServer.registerQuery("a = load 'har:///1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(pigProperties.getProperty(MRConfiguration.JOB_HDFS_SERVERS) == null ||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.default.name"))||
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).equals(pigProperties.get("fs.defaultFS")));

        pigServer.registerQuery("a = load 'hdfs://b.com/user/pig/1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://b.com"));

        pigServer.registerQuery("a = load 'har://hdfs-c.com/user/pig/1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://c.com"));

        pigServer.registerQuery("a = load 'hdfs://d.com:8020/user/pig/1.txt' using mock.Storage;");
        conf = ConfigurationUtil.toConfiguration(pigProperties);
        assertTrue(conf.get(MRConfiguration.JOB_HDFS_SERVERS) != null &&
                conf.get(MRConfiguration.JOB_HDFS_SERVERS).contains("hdfs://d.com:8020"));
    }
}

Example 18

Source File: TestBuiltin.java From spork with Apache License 2.0

4 votes

@Test
public void testLFPig() throws Exception {
    Util.createInputFile(cluster, "input.txt", new String[]
        {"this:is:delimited:by:a:colon\n"});
    int arity1 = 6;
    LoadFunc lf = new PigStorage(":");
    LoadFunc p1 = new ReadToEndLoader(lf, ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "input.txt", 0);
    Tuple f1 = p1.getNext();
    assertTrue(f1.size() == arity1);
    Util.deleteFile(cluster, "input.txt");

    int LOOP_COUNT = 100;
    String[] input = new String[LOOP_COUNT * LOOP_COUNT];
    int n = 0;
    for (int i = 0; i < LOOP_COUNT; i++) {
        for (int j = 0; j < LOOP_COUNT; j++) {
            input[n++] = (i + "\t" + i + "\t" + j % 2);
        }
    }
    Util.createInputFile(cluster, "input.txt", input);

    LoadFunc p15 = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "input.txt", 0);

    int count = 0;
    while (true) {
        Tuple f15 = p15.getNext();
        if (f15 == null)
            break;
        count++;
        assertEquals(3, f15.size());
    }
    assertEquals(LOOP_COUNT * LOOP_COUNT, count);
    Util.deleteFile(cluster, "input.txt");

    String input2 = ":this:has:a:leading:colon\n";
    int arity2 = 6;
    Util.createInputFile(cluster, "input.txt", new String[] {input2});
    LoadFunc p2 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "input.txt", 0);
    Tuple f2 = p2.getNext();
    assertTrue(f2.size() == arity2);
    Util.deleteFile(cluster, "input.txt");

    String input3 = "this:has:a:trailing:colon:\n";
    int arity3 = 6;
    Util.createInputFile(cluster, "input.txt", new String[] {input3});
    LoadFunc p3 = new ReadToEndLoader(new PigStorage(":"), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "input.txt", 0);
    Tuple f3 = p3.getNext();
    assertTrue(f3.size() == arity3);
    Util.deleteFile(cluster, "input.txt");
}

Example 19

Source File: TezLauncher.java From spork with Apache License 2.0

4 votes

private void optimize(TezOperPlan tezPlan, PigContext pc) throws VisitorException {
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    boolean aggregateWarning = conf.getBoolean("aggregate.warning", false);

    NoopFilterRemover filter = new NoopFilterRemover(tezPlan);
    filter.visit();

    // Run CombinerOptimizer on Tez plan
    boolean nocombiner = conf.getBoolean(PigConfiguration.PIG_EXEC_NO_COMBINER, false);
    if (!pc.inIllustrator && !nocombiner)  {
        boolean doMapAgg = Boolean.parseBoolean(pc.getProperties().getProperty(
                PigConfiguration.PIG_EXEC_MAP_PARTAGG, "false"));
        CombinerOptimizer co = new CombinerOptimizer(tezPlan, doMapAgg);
        co.visit();
        co.getMessageCollector().logMessages(MessageType.Warning, aggregateWarning, log);
    }

    // Run optimizer to make use of secondary sort key when possible for nested foreach
    // order by and distinct. Should be done before AccumulatorOptimizer
    boolean noSecKeySort = conf.getBoolean(PigConfiguration.PIG_EXEC_NO_SECONDARY_KEY, false);
    if (!pc.inIllustrator && !noSecKeySort)  {
        SecondaryKeyOptimizerTez skOptimizer = new SecondaryKeyOptimizerTez(tezPlan);
        skOptimizer.visit();
    }

    boolean isMultiQuery = conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true);
    if (isMultiQuery) {
        // reduces the number of TezOpers in the Tez plan generated
        // by multi-query (multi-store) script.
        MultiQueryOptimizerTez mqOptimizer = new MultiQueryOptimizerTez(tezPlan);
        mqOptimizer.visit();
    }

    // Run AccumulatorOptimizer on Tez plan
    boolean isAccum = conf.getBoolean(PigConfiguration.PIG_OPT_ACCUMULATOR, true);
    if (isAccum) {
        AccumulatorOptimizer accum = new AccumulatorOptimizer(tezPlan);
        accum.visit();
    }

    // Use VertexGroup in Tez
    boolean isUnionOpt = conf.getBoolean(PigConfiguration.PIG_TEZ_OPT_UNION, true);
    if (isUnionOpt) {
        UnionOptimizer uo = new UnionOptimizer(tezPlan);
        uo.visit();
    }

}

Example 20

Source File: FetchLauncher.java From spork with Apache License 2.0

4 votes

public FetchLauncher(PigContext pigContext) {
    this.pigContext = pigContext;
    this.conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());
}