org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil Java Examples

The following examples show how to use org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PigOutputFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Before delegating calls to underlying OutputFormat or OutputCommitter
 * Pig needs to ensure the Configuration in the JobContext contains
 * the output location and StoreFunc
 * for the specific store - so set these up in the context for this specific
 * store
 * @param jobContext the {@link JobContext}
 * @param store the POStore
 * @throws IOException on failure
 */
public static void setLocation(JobContext jobContext, POStore store) throws
IOException {
    Job storeJob = new Job(jobContext.getConfiguration());
    StoreFuncInterface storeFunc = store.getStoreFunc();
    String outputLocation = store.getSFile().getFileName();
    storeFunc.setStoreLocation(outputLocation, storeJob);

    // the setStoreLocation() method would indicate to the StoreFunc
    // to set the output location for its underlying OutputFormat.
    // Typically OutputFormat's store the output location in the
    // Configuration - so we need to get the modified Configuration
    // containing the output location (and any other settings the
    // OutputFormat might have set) and merge it with the Configuration
    // we started with so that when this method returns the Configuration
    // supplied as input has the updates.
    ConfigurationUtil.mergeConf(jobContext.getConfiguration(),
            storeJob.getConfiguration());
}
 
Example #2
Source File: TestBuiltin.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * test {@link TextLoader} - this also tests that {@link TextLoader} is capable
 * of reading data a couple of dirs deep when the input specified is the top
 * level directory
 */
@Test
public void testLFText() throws Exception {
    String input1 = "This is some text.\nWith a newline in it.\n";
    String expected1 = "This is some text.";
    String expected2 = "With a newline in it.";
    Util.createInputFile(cluster,
            "testLFTest-input1.txt",
            new String[] {input1});
    // check that loading the top level dir still reading the file a couple
    // of subdirs below
    LoadFunc text1 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "testLFTest-input1.txt", 0);
    Tuple f1 = text1.getNext();
    Tuple f2 = text1.getNext();
    Util.deleteFile(cluster, "testLFTest-input1.txt");
    assertTrue(expected1.equals(f1.get(0).toString()) &&
        expected2.equals(f2.get(0).toString()));
    Util.createInputFile(cluster, "testLFTest-input2.txt", new String[] {});
    LoadFunc text2 = new ReadToEndLoader(new TextLoader(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), "testLFTest-input2.txt", 0);
    Tuple f3 = text2.getNext();
    Util.deleteFile(cluster, "testLFTest-input2.txt");
    assertTrue(f3 == null);
}
 
Example #3
Source File: Pig.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Run a filesystem command.  Any output from this command is written to
 * stdout or stderr as appropriate.
 * @param cmd Filesystem command to run along with its arguments as one
 * string.
 * @throws IOException
 */
public static int fs(String cmd) throws IOException {
    ScriptPigContext ctx = getScriptContext();
    FsShell shell = new FsShell(ConfigurationUtil.toConfiguration(ctx
            .getPigContext().getProperties()));
    int code = -1;
    if (cmd != null) {
        String[] cmdTokens = cmd.split("\\s+");
        if (!cmdTokens[0].startsWith("-")) cmdTokens[0] = "-" + cmdTokens[0];
        try {
            code = shell.run(cmdTokens);
        } catch (Exception e) {
            throw new IOException("Run filesystem command failed", e);
        }
    }
    return code;
}
 
Example #4
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * 
 * Generate a {@link LogicalPlan} containing a Load, Filter and Store
 * operators
 * 
 * @param inputFile
 * @param outputFile
 * @param dfs
 * @return
 * @throws Exception
 */
private LogicalPlan generateLogicalPlan(String inputFile,
        String outputFile, DataStorage dfs) throws Exception {
    LogicalPlan plan = new LogicalPlan();
    FileSpec filespec1 = new FileSpec(generateTmpFile(inputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage"));
    FileSpec filespec2 = new FileSpec(generateTmpFile(outputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad(filespec1, null, plan, ConfigurationUtil.toConfiguration(dfs.getConfiguration()));
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface) PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null);

    LOFilter filter = new LOFilter(plan);

    plan.add(load);
    plan.add(store);
    plan.add(filter);

    plan.connect(load, filter);
    plan.connect(filter, store);

    return plan;
}
 
Example #5
Source File: EmptyPigStats.java    From spork with Apache License 2.0 6 votes vote down vote up
public EmptyPigStats(PigContext pigContext, POStore poStore) {
    super.pigContext = pigContext;
    super.startTime = super.endTime = System.currentTimeMillis();
    super.userId = System.getProperty("user.name");

    Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties());

    // initialize empty stats
    OutputStats os = new OutputStats(null, -1, -1, true);
    os.setConf(conf);
    os.setPOStore(poStore);
    this.outputStatsList = Collections.unmodifiableList(Arrays.asList(os));

    InputStats is = new InputStats(null, -1, -1, true);
    is.setConf(conf);
    this.inputStatsList = Collections.unmodifiableList(Arrays.asList(is));
}
 
Example #6
Source File: TestInputOutputFileValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
private LogicalPlan genNewLoadStorePlan(String inputFile,
                                        String outputFile, DataStorage dfs)
                                    throws Throwable {
    LogicalPlan plan = new LogicalPlan() ;
    FileSpec filespec1 =
        new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ;
    FileSpec filespec2 =
        new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad( filespec1, null, plan,
            ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ;
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ;

    plan.add(load) ;
    plan.add(store) ;

    plan.connect(load, store) ;

    return plan ;
}
 
Example #7
Source File: TestStore.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinStorageGetSchema() throws IOException, ParserException {
    String input[] = new String[] { "hello\t1\t10.1", "bye\t2\t20.2" };
    String inputFileName = "testGetSchema-input.txt";
    String outputFileName = "testGetSchema-output.txt";
    try {
        Util.createInputFile(pig.getPigContext(),
                inputFileName, input);
        String query = "a = load '" + inputFileName + "' as (c:chararray, " +
                "i:int,d:double);store a into '" + outputFileName + "' using " +
                        "BinStorage();";
        pig.setBatchOn();
        Util.registerMultiLineQuery(pig, query);
        pig.executeBatch();
        ResourceSchema rs = new BinStorage().getSchema(outputFileName,
                new Job(ConfigurationUtil.toConfiguration(pig.getPigContext().
                        getProperties())));
        Schema expectedSchema = Utils.getSchemaFromString(
                "c:chararray,i:int,d:double");
        assertTrue("Checking binstorage getSchema output", Schema.equals(
                expectedSchema, Schema.getPigSchema(rs), true, true));
    } finally {
        Util.deleteFile(pig.getPigContext(), inputFileName);
        Util.deleteFile(pig.getPigContext(), outputFileName);
    }
}
 
Example #8
Source File: TestPigServer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigTempDir() throws Throwable {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    File pigTempDir = new File(tempDir, FILE_SEPARATOR + "tmp" + FILE_SEPARATOR + "test");
    properties.put("pig.temp.dir", pigTempDir.getPath());
    PigContext pigContext=new PigContext(ExecType.LOCAL, properties);
    pigContext.connect();
    FileLocalizer.setInitialized(false);

    String tempPath= FileLocalizer.getTemporaryPath(pigContext).toString();
    Path path = new Path(tempPath);
    assertTrue(tempPath.startsWith(pigTempDir.toURI().toString()));

    FileSystem fs = FileSystem.get(path.toUri(),
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    FileStatus status = fs.getFileStatus(path.getParent());
    // Temporary root dir should have 700 as permission
    assertEquals("rwx------", status.getPermission().toString());
    pigTempDir.delete();
    FileLocalizer.setInitialized(false);
}
 
Example #9
Source File: TestInputOutputMiniClusterFileValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
private LogicalPlan genNewLoadStorePlan(String inputFile,
                                        String outputFile, DataStorage dfs)
                                    throws Throwable {
    LogicalPlan plan = new LogicalPlan() ;
    FileSpec filespec1 =
        new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ;
    FileSpec filespec2 =
        new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad( filespec1, null, plan,
            ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ;
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ;

    plan.add(load) ;
    plan.add(store) ;

    plan.connect(load, store) ;

    return plan ;
}
 
Example #10
Source File: DefaultIndexableLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
private void initRightLoader(int [] splitsToBeRead) throws IOException{
    PigContext pc = (PigContext) ObjectSerializer
            .deserialize(PigMapReduce.sJobConfInternal.get().get("pig.pigContext"));
    
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    
    // Hadoop security need this property to be set
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        conf.set(MRConfiguration.JOB_CREDENTIALS_BINARY, 
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }
    
    //create ReadToEndLoader that will read the given splits in order
    loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec),
            conf, inpLocation, splitsToBeRead);
}
 
Example #11
Source File: TestFinish.java    From spork with Apache License 2.0 6 votes vote down vote up
private void checkAndCleanup(ExecType execType, String expectedFileName,
        String inputFileName) throws IOException {
    if (execType == cluster.getExecType()) {
        FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
                cluster.getProperties()));
        assertTrue(fs.exists(new Path(expectedFileName)));
        Util.deleteFile(cluster, inputFileName);
        Util.deleteFile(cluster, expectedFileName);
    } else if (execType == ExecType.LOCAL) {
        File f = new File(expectedFileName);
        assertTrue(f.exists());
        f.delete();
    } else {
        throw new IllegalArgumentException("invalid excetype " + execType.
                toString());
    }
}
 
Example #12
Source File: ParallelismSetter.java    From spork with Apache License 2.0 6 votes vote down vote up
public ParallelismSetter(TezOperPlan plan, PigContext pigContext) {
    super(plan, new DependencyOrderWalker<TezOperator, TezOperPlan>(plan));
    this.pc = pigContext;
    this.conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    this.autoParallelismEnabled = conf.getBoolean(PigConfiguration.PIG_TEZ_AUTO_PARALLELISM, true);
    try {
        this.estimator = conf.get(PigConfiguration.PIG_EXEC_REDUCER_ESTIMATOR) == null ? new TezOperDependencyParallelismEstimator()
        : PigContext.instantiateObjectFromParams(conf,
                PigConfiguration.PIG_EXEC_REDUCER_ESTIMATOR, PigConfiguration.PIG_EXEC_REDUCER_ESTIMATOR_CONSTRUCTOR_ARG_KEY,
                TezParallelismEstimator.class);
        this.estimator.setPigContext(pc);

    } catch (ExecException e) {
        throw new RuntimeException("Error instantiating TezParallelismEstimator", e);
    }
}
 
Example #13
Source File: HadoopClientServicesImpl.java    From pentaho-hadoop-shims with Apache License 2.0 5 votes vote down vote up
private void updatePigConfiguration( Properties properties, Configuration configuration ) {
  PropertiesUtil.loadDefaultProperties( properties );
  if ( configuration != null ) {
    properties.putAll( ConfigurationUtil.toProperties( ShimUtils.asConfiguration( configuration ) ) );
    properties.setProperty( "pig.use.overriden.hadoop.configs", "true" );
  }
}
 
Example #14
Source File: TestGroupConstParallelMR.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    
    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();
    
    assertEquals("parallism", 100, parallel);
}
 
Example #15
Source File: TestLookupInFiles.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testLookupInFiles() throws Exception {
    File tmpFile = File.createTempFile("test", ".txt");
    PrintStream ps1 = new PrintStream(new FileOutputStream(tmpFile));
    
    ps1.println("one");
    ps1.println("notexist");
    ps1.println("three");
    ps1.close();
    
    File lookupFile1 = File.createTempFile("lookup", ".txt");
    PrintStream lps1 = new PrintStream(new FileOutputStream(lookupFile1));
    
    lps1.println("one");
    lps1.println("two");
    lps1.println("three");
    lps1.close();
    
    File lookupFile2 = File.createTempFile("lookup", "txt");
    PrintStream lps2 = new PrintStream(new FileOutputStream(lookupFile2));
    
    lps2.println("one");
    lps2.println("ten");
    lps2.println("eleven");
    lps2.close();
    
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(pigServer.getPigContext().getProperties()));
    fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup1"));
    fs.copyFromLocalFile(new Path(lookupFile1.toString()), new Path("lookup2"));
    pigServer.registerQuery("A = LOAD '" + Util.generateURI(tmpFile.toString(), pigServer.getPigContext()) + "' AS (key:chararray);");
    pigServer.registerQuery("B = FOREACH A GENERATE org.apache.pig.piggybank.evaluation.string.LookupInFiles(key, 'lookup1', 'lookup2');");
    Iterator<Tuple> iter = pigServer.openIterator("B");
    
    int r = (Integer)iter.next().get(0);
    assertTrue(r==1);
    r = (Integer)iter.next().get(0);
    assertTrue(r==0);
}
 
Example #16
Source File: TestBuiltin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSFPig() throws Exception {
    Util.resetStateForExecModeSwitch();
    PigServer mrPigServer = new PigServer(cluster.getExecType(), properties);
    String inputStr = "amy\tbob\tcharlene\tdavid\terin\tfrank";
    Util.createInputFile(cluster, "testSFPig-input.txt", new String[]
                                                                {inputStr});
    DataByteArray[] input = { new DataByteArray("amy"),
        new DataByteArray("bob"), new DataByteArray("charlene"),
        new DataByteArray("david"), new DataByteArray("erin"),
        new DataByteArray("frank") };
    Tuple f1 = Util.loadTuple(TupleFactory.getInstance().
            newTuple(input.length), input);
    String outputLocation = "testSFPig-output.txt";
    String query = "a = load 'testSFPig-input.txt';" +
            "store a into '" + outputLocation + "';";
    mrPigServer.setBatchOn();
    Util.registerMultiLineQuery(mrPigServer, query);
    mrPigServer.executeBatch();
    LoadFunc lfunc = new ReadToEndLoader(new PigStorage(), ConfigurationUtil.
        toConfiguration(cluster.getProperties()), outputLocation, 0);
    Tuple f2 = lfunc.getNext();
    Util.deleteFile(cluster, "testSFPig-input.txt");

    Util.deleteFile(cluster, outputLocation);
    assertEquals(f1, f2);
}
 
Example #17
Source File: TestNewPlanLogicalOptimizer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}
 
Example #18
Source File: TestConstantCalculator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL, new Properties());
    pc = pigServer.getPigContext();
    SchemaTupleBackend.initialize(ConfigurationUtil.toConfiguration(pc.getProperties(), true),
            pc);
}
 
Example #19
Source File: Util.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * @param pigContext
 * @param fileName
 * @param input
 * @throws IOException
 */
public static void createInputFile(PigContext pigContext,
        String fileName, String[] input) throws IOException {
    Configuration conf = ConfigurationUtil.toConfiguration(
            pigContext.getProperties());
    createInputFile(FileSystem.get(conf), fileName, input);
}
 
Example #20
Source File: TestPigRunner.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void simpleTest() throws Exception {
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + INPUT_FILE + "' as (a0:int, a1:int, a2:int);");
    w.println("B = group A by a0;");
    w.println("C = foreach B generate group, COUNT(A);");
    w.println("store C into '" + OUTPUT_FILE + "';");
    w.close();

    try {
        String[] args = { "-Dstop.on.failure=true", "-Dopt.multiquery=false", "-Dopt.fetch=false", "-Daggregate.warning=false", "-x", execType, PIG_FILE };
        PigStats stats = PigRunner.run(args, new TestNotificationListener(execType));

        assertTrue(stats.isSuccessful());

        assertEquals(1, stats.getNumberJobs());
        String name = stats.getOutputNames().get(0);
        assertEquals(OUTPUT_FILE, name);
        assertEquals(12, stats.getBytesWritten());
        assertEquals(3, stats.getRecordWritten());

        assertEquals("A,B,C",
                ((JobStats)stats.getJobGraph().getSinks().get(0)).getAlias());

        Configuration conf = ConfigurationUtil.toConfiguration(stats.getPigProperties());
        assertTrue(conf.getBoolean("stop.on.failure", false));
        assertTrue(!conf.getBoolean("aggregate.warning", true));
        assertTrue(!conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true));
        assertTrue(!conf.getBoolean("opt.fetch", true));
    } finally {
        new File(PIG_FILE).delete();
        Util.deleteFile(cluster, OUTPUT_FILE);
    }
}
 
Example #21
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery3() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input3.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    String inputScript = "set mapred.output.compress true\n" +
            "set mapreduce.output.fileoutputformat.compress true\n" +
            "set mapred.output.compression.codec org.apache.hadoop.io.compress.BZip2Codec\n" +
            "set mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.BZip2Codec\n" +
            "a = load '" + inputFileName + "';\n" +
            "store a into 'output3.bz2';\n" +
            "store a into 'output3';";

    String inputScriptName = "script3.txt";
    PrintWriter pw = new PrintWriter(new FileWriter(inputScriptName));
    pw.println(inputScript);
    pw.close();

    PigServer pig = new PigServer(cluster.getExecType(), properties);

    FileInputStream fis = new FileInputStream(inputScriptName);
    pig.registerScript(fis);

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output3"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output3.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example #22
Source File: Util.java    From spork with Apache License 2.0 5 votes vote down vote up
static public void copyFromClusterToLocal(MiniGenericCluster cluster,
           String fileNameOnCluster, String localFileName) throws IOException {
       if(Util.WINDOWS){
           fileNameOnCluster = fileNameOnCluster.replace('\\','/');
           localFileName = localFileName.replace('\\','/');
       }
    File parent = new File(localFileName).getParentFile();
    if (!parent.exists()) {
        parent.mkdirs();
    }
    PrintWriter writer = new PrintWriter(new FileWriter(localFileName));

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            cluster.getProperties()));
       if(!fs.exists(new Path(fileNameOnCluster))) {
           throw new IOException("File " + fileNameOnCluster + " does not exists on the minicluster");
       }

       String line = null;
	   FileStatus fst = fs.getFileStatus(new Path(fileNameOnCluster));
	   if(fst.isDir()) {
	       throw new IOException("Only files from cluster can be copied locally," +
	       		" " + fileNameOnCluster + " is a directory");
	   }
       FSDataInputStream stream = fs.open(new Path(fileNameOnCluster));
       BufferedReader reader = new BufferedReader(new InputStreamReader(stream));
       while( (line = reader.readLine()) != null) {
       	writer.println(line);
       }

       reader.close();
       writer.close();
}
 
Example #23
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output.bz2';");
    pig.registerQuery("store a into 'output';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example #24
Source File: TestBZip.java    From spork with Apache License 2.0 5 votes vote down vote up
private void testCount(String inputFileName, Long expectedCount,
        int splitSize, String loadFuncSpec) throws IOException {
    String outputFile = "/tmp/bz-output";
    // simple load-store script to verify that the bzip input is getting
    // split
    String scriptToTestSplitting = "a = load '" +inputFileName + "' using " +
    loadFuncSpec + "; store a into '" + outputFile + "';";

    String script = "a = load '" + inputFileName + "';" +
            "b = group a all;" +
            "c = foreach b generate COUNT_STAR(a);";
    Properties props = new Properties();
    for (Entry<Object, Object> entry : properties.entrySet()) {
        props.put(entry.getKey(), entry.getValue());
    }
    props.setProperty(MRConfiguration.MAX_SPLIT_SIZE, Integer.toString(splitSize));
    PigServer pig = new PigServer(cluster.getExecType(), props);
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(props));
    fs.delete(new Path(outputFile), true);
    Util.registerMultiLineQuery(pig, scriptToTestSplitting);

    // verify that > 1 maps were launched due to splitting of the bzip input
    FileStatus[] files = fs.listStatus(new Path(outputFile));
    int numPartFiles = 0;
    for (FileStatus fileStatus : files) {
        if(fileStatus.getPath().getName().startsWith("part")) {
            numPartFiles++;
        }
    }
    assertEquals(true, numPartFiles > 0);

    // verify record count to verify we read bzip data correctly
    Util.registerMultiLineQuery(pig, script);
    Iterator<Tuple> it = pig.openIterator("c");
    Long result = (Long) it.next().get(0);
    assertEquals(expectedCount, result);

}
 
Example #25
Source File: TypeCheckingTestUtil.java    From spork with Apache License 2.0 5 votes vote down vote up
public static org.apache.pig.newplan.logical.relational.LOLoad 
genDummyLOLoadNewLP( org.apache.pig.newplan.logical.relational.LogicalPlan plan) throws ExecException  {
    String pigStorage = PigStorage.class.getName() ;
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    org.apache.pig.newplan.logical.relational.LOLoad load =
    newLOLoad(
            new FileSpec("pi", new FuncSpec(pigStorage)),
            null, plan, new Configuration(ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration()))
    );
    return load ;
}
 
Example #26
Source File: TestJobSubmissionMR.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkDefaultParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);

    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals(100, parallel);
    Util.assertParallelValues(100, -1, -1, 100, job.getJobConf());
}
 
Example #27
Source File: TestGroupConstParallelMR.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void checkGroupConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception {
    MROperPlan mrPlan = Util.buildMRPlan(pp, pc);
    
    ConfigurationValidator.validatePigProperties(pc.getProperties());
    Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties());
    JobControlCompiler jcc = new JobControlCompiler(pc, conf);
    
    JobControl jobControl = jcc.compile(mrPlan, "Test");
    Job job = jobControl.getWaitingJobs().get(0);
    int parallel = job.getJobConf().getNumReduceTasks();

    assertEquals("parallism", 1, parallel);
}
 
Example #28
Source File: TestPigRunner.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void simpleTest2() throws Exception {
    PrintWriter w = new PrintWriter(new FileWriter(PIG_FILE));
    w.println("A = load '" + INPUT_FILE + "' as (a0:int, a1:int, a2:int);");
    w.println("B = filter A by a0 == 3;");
    w.println("C = limit B 1;");
    w.println("dump C;");
    w.close();

    try {
        String[] args = { "-Dstop.on.failure=true", "-Dopt.multiquery=false", "-Daggregate.warning=false", "-x", execType, PIG_FILE };
        PigStats stats = PigRunner.run(args, new TestNotificationListener(execType));

        assertTrue(stats instanceof EmptyPigStats);
        assertTrue(stats.isSuccessful());
        assertEquals(0, stats.getNumberJobs());
        assertEquals(stats.getJobGraph().size(), 0);

        Configuration conf = ConfigurationUtil.toConfiguration(stats.getPigProperties());
        assertTrue(conf.getBoolean("stop.on.failure", false));
        assertTrue(!conf.getBoolean("aggregate.warning", true));
        assertTrue(!conf.getBoolean(PigConfiguration.PIG_OPT_MULTIQUERY, true));
        assertTrue(conf.getBoolean("opt.fetch", true));
    } finally {
        new File(PIG_FILE).delete();
        Util.deleteFile(cluster, OUTPUT_FILE);
    }
}
 
Example #29
Source File: TestNewPlanOperatorPlan.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}
 
Example #30
Source File: TestOrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws ExecException, IOException {
    pigServer = new PigServer(ExecType.LOCAL);
    fs = FileSystem.get(ConfigurationUtil.toConfiguration(pigServer.getPigContext().getProperties()));
    deleteTestFiles();
    pigServer.mkdirs(outbasedir);
    generateInputFiles();
}