org.apache.pig.StoreFuncInterface Java Examples

The following examples show how to use org.apache.pig.StoreFuncInterface. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * 
 * Generate a {@link LogicalPlan} containing a Load, Filter and Store
 * operators
 * 
 * @param inputFile
 * @param outputFile
 * @param dfs
 * @return
 * @throws Exception
 */
private LogicalPlan generateLogicalPlan(String inputFile,
        String outputFile, DataStorage dfs) throws Exception {
    LogicalPlan plan = new LogicalPlan();
    FileSpec filespec1 = new FileSpec(generateTmpFile(inputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage"));
    FileSpec filespec2 = new FileSpec(generateTmpFile(outputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad(filespec1, null, plan, ConfigurationUtil.toConfiguration(dfs.getConfiguration()));
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface) PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null);

    LOFilter filter = new LOFilter(plan);

    plan.add(load);
    plan.add(store);
    plan.add(filter);

    plan.connect(load, filter);
    plan.connect(filter, store);

    return plan;
}
 
Example #2
Source File: TestMRJobStats.java    From spork with Apache License 2.0 6 votes vote down vote up
private static POStore createPOStoreForFileBasedSystem(long size, StoreFuncInterface storeFunc,
        Configuration conf) throws Exception {

    File file = File.createTempFile("tempFile", ".tmp");
    file.deleteOnExit();
    RandomAccessFile f = new RandomAccessFile(file, "rw");
    f.setLength(size);
    f.close();

    storeFunc.setStoreLocation(file.getAbsolutePath(), new Job(conf));
    FuncSpec funcSpec = new FuncSpec(storeFunc.getClass().getCanonicalName());
    POStore poStore = new POStore(new OperatorKey());
    poStore.setSFile(new FileSpec(file.getAbsolutePath(), funcSpec));
    poStore.setStoreFunc(storeFunc);
    poStore.setUp();

    return poStore;
}
 
Example #3
Source File: TestInputOutputMiniClusterFileValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
private LogicalPlan genNewLoadStorePlan(String inputFile,
                                        String outputFile, DataStorage dfs)
                                    throws Throwable {
    LogicalPlan plan = new LogicalPlan() ;
    FileSpec filespec1 =
        new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ;
    FileSpec filespec2 =
        new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad( filespec1, null, plan,
            ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ;
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ;

    plan.add(load) ;
    plan.add(store) ;

    plan.connect(load, store) ;

    return plan ;
}
 
Example #4
Source File: TestInputOutputFileValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
private LogicalPlan genNewLoadStorePlan(String inputFile,
                                        String outputFile, DataStorage dfs)
                                    throws Throwable {
    LogicalPlan plan = new LogicalPlan() ;
    FileSpec filespec1 =
        new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ;
    FileSpec filespec2 =
        new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage"));
    LOLoad load = newLOLoad( filespec1, null, plan,
            ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ;
    LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ;

    plan.add(load) ;
    plan.add(store) ;

    plan.connect(load, store) ;

    return plan ;
}
 
Example #5
Source File: QueryParserUtils.java    From spork with Apache License 2.0 6 votes vote down vote up
public static void attachStorePlan(String scope, LogicalPlan lp, String fileName, String func,
        Operator input, String alias, PigContext pigContext) throws FrontendException {
    func = func == null ? pigContext.getProperties().getProperty(PigConfiguration.PIG_DEFAULT_STORE_FUNC, PigStorage.class.getName()) : func;

    FuncSpec funcSpec = new FuncSpec( func );
    StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec( funcSpec );

    fileName = removeQuotes( fileName );
    FileSpec fileSpec = new FileSpec( fileName, funcSpec );
    String sig = alias + "_" + LogicalPlanBuilder.newOperatorKey(scope);
    stoFunc.setStoreFuncUDFContextSignature(sig);
    LOStore store = new LOStore(lp, fileSpec, stoFunc, sig);
    store.setAlias(alias);

    try {
        stoFunc.relToAbsPathForStoreLocation( fileName, getCurrentDir( pigContext ) );
    } catch (IOException ioe) {
        FrontendException e = new FrontendException(  ioe.getMessage(), ioe );
        throw e;
    }

    lp.add( store );
    lp.connect( input, store );
}
 
Example #6
Source File: PigOutputFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Before delegating calls to underlying OutputFormat or OutputCommitter
 * Pig needs to ensure the Configuration in the JobContext contains
 * the output location and StoreFunc
 * for the specific store - so set these up in the context for this specific
 * store
 * @param jobContext the {@link JobContext}
 * @param store the POStore
 * @throws IOException on failure
 */
public static void setLocation(JobContext jobContext, POStore store) throws
IOException {
    Job storeJob = new Job(jobContext.getConfiguration());
    StoreFuncInterface storeFunc = store.getStoreFunc();
    String outputLocation = store.getSFile().getFileName();
    storeFunc.setStoreLocation(outputLocation, storeJob);

    // the setStoreLocation() method would indicate to the StoreFunc
    // to set the output location for its underlying OutputFormat.
    // Typically OutputFormat's store the output location in the
    // Configuration - so we need to get the modified Configuration
    // containing the output location (and any other settings the
    // OutputFormat might have set) and merge it with the Configuration
    // we started with so that when this method returns the Configuration
    // supplied as input has the updates.
    ConfigurationUtil.mergeConf(jobContext.getConfiguration(),
            storeJob.getConfiguration());
}
 
Example #7
Source File: StoreConverter.java    From spork with Apache License 2.0 6 votes vote down vote up
private static POStore configureStorer(JobConf jobConf,
        PhysicalOperator physicalOperator) throws IOException {
    ArrayList<POStore> storeLocations = Lists.newArrayList();
    POStore poStore = (POStore) physicalOperator;
    storeLocations.add(poStore);
    StoreFuncInterface sFunc = poStore.getStoreFunc();
    sFunc.setStoreLocation(poStore.getSFile().getFileName(),
            new org.apache.hadoop.mapreduce.Job(jobConf));
    poStore.setInputs(null);
    poStore.setParentPlan(null);

    jobConf.set(JobControlCompiler.PIG_MAP_STORES,
            ObjectSerializer.serialize(Lists.newArrayList()));
    jobConf.set(JobControlCompiler.PIG_REDUCE_STORES,
            ObjectSerializer.serialize(storeLocations));
    return poStore;
}
 
Example #8
Source File: PigOutputCommitter.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * @param conf
 * @param mapStores
 * @return
 * @throws IOException 
 */
@SuppressWarnings("unchecked")
private List<Pair<OutputCommitter, POStore>> getCommitters(
        TaskAttemptContext context,
        List<POStore> stores) throws IOException {
    List<Pair<OutputCommitter, POStore>> committers = 
        new ArrayList<Pair<OutputCommitter,POStore>>();
    for (POStore store : stores) {
        StoreFuncInterface sFunc = store.getStoreFunc();
        
        TaskAttemptContext updatedContext = setUpContext(context, store);
        try {
            committers.add(new Pair<OutputCommitter, POStore>(
                    sFunc.getOutputFormat().getOutputCommitter(
                            updatedContext), store));
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }
    return committers;
    
}
 
Example #9
Source File: MapReducePOStoreImpl.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public StoreFuncInterface createStoreFunc(POStore store)
        throws IOException {

    StoreFuncInterface storeFunc = store.getStoreFunc();

    // call the setStoreLocation on the storeFunc giving it the
    // Job. Typically this will result in the OutputFormat of the
    // storeFunc storing the output location in the Configuration
    // in the Job. The PigOutFormat.setLocation() method will merge
    // this modified Configuration into the configuration of the
    // Context we have
    PigOutputFormat.setLocation(context, store);
    OutputFormat<?,?> outputFormat = storeFunc.getOutputFormat();

    // create a new record writer
    try {
        writer = outputFormat.getRecordWriter(context);
    } catch (InterruptedException e) {
        throw new IOException(e);
    }

    storeFunc.prepareToWrite(writer);

    return storeFunc;
}
 
Example #10
Source File: PigOutputFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext taskattemptcontext)
            throws IOException, InterruptedException {
    setupUdfEnvAndStores(taskattemptcontext);
    if(mapStores.size() + reduceStores.size() == 1) {
        // single store case
        POStore store;
        if(mapStores.size() == 1) {
            store = mapStores.get(0);
        } else {
            store = reduceStores.get(0);
        }
        StoreFuncInterface sFunc = store.getStoreFunc();
        // set output location
        PigOutputFormat.setLocation(taskattemptcontext, store);
        // The above call should have update the conf in the JobContext
        // to have the output location - now call checkOutputSpecs()
        RecordWriter writer = sFunc.getOutputFormat().getRecordWriter(
                taskattemptcontext);
        return new PigRecordWriter(writer, sFunc, Mode.SINGLE_STORE);
    } else {
       // multi store case - in this case, all writing is done through
       // MapReducePOStoreImpl - set up a dummy RecordWriter
       return new PigRecordWriter(null, null, Mode.MULTI_STORE);
    }
}
 
Example #11
Source File: PigOutputCommitter.java    From spork with Apache License 2.0 5 votes vote down vote up
static public void storeCleanup(POStore store, Configuration conf)
        throws IOException {
    StoreFuncInterface storeFunc = store.getStoreFunc();
    if (storeFunc instanceof StoreMetadata) {
        Schema schema = store.getSchema();
        if (schema != null) {
            ((StoreMetadata) storeFunc).storeSchema(
                    new ResourceSchema(schema, store.getSortInfo()), store.getSFile()
                            .getFileName(), new Job(conf));
        }
    }
}
 
Example #12
Source File: TestMRJobStats.java    From spork with Apache License 2.0 5 votes vote down vote up
private static POStore createPOStoreForNonFileBasedSystem(StoreFuncInterface storeFunc,
        Configuration conf) throws Exception {

    String nonFileBasedUri = "hbase://tableName";
    storeFunc.setStoreLocation(nonFileBasedUri, new Job(conf));
    FuncSpec funcSpec = new FuncSpec(storeFunc.getClass().getCanonicalName());
    POStore poStore = new POStore(new OperatorKey());
    poStore.setSFile(new FileSpec(nonFileBasedUri, funcSpec));
    poStore.setStoreFunc(storeFunc);
    poStore.setUp();

    return poStore;
}
 
Example #13
Source File: PigOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
private void checkOutputSpecsHelper(List<POStore> stores, JobContext
        jobcontext) throws IOException, InterruptedException {
    for (POStore store : stores) {
        // make a copy of the original JobContext so that
        // each OutputFormat get a different copy
        JobContext jobContextCopy = HadoopShims.createJobContext(
                jobcontext.getConfiguration(), jobcontext.getJobID());

        // set output location
        PigOutputFormat.setLocation(jobContextCopy, store);

        StoreFuncInterface sFunc = store.getStoreFunc();
        OutputFormat of = sFunc.getOutputFormat();

        // The above call should have update the conf in the JobContext
        // to have the output location - now call checkOutputSpecs()
        try {
            of.checkOutputSpecs(jobContextCopy);
        } catch (IOException ioe) {
            boolean shouldThrowException = true;
            if (sFunc instanceof OverwritableStoreFunc) {
                if (((OverwritableStoreFunc) sFunc).shouldOverwrite()) {
                    if (ioe instanceof FileAlreadyExistsException
                            || ioe instanceof org.apache.hadoop.fs.FileAlreadyExistsException) {
                        shouldThrowException = false;
                    }
                }
            }
            if (shouldThrowException)
                throw ioe;
        }
    }
}
 
Example #14
Source File: PigOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
public PigRecordWriter(RecordWriter wrappedWriter, StoreFuncInterface sFunc,
        Mode mode)
        throws IOException {
    this.mode = mode;

    if(mode == Mode.SINGLE_STORE) {
        this.wrappedWriter = wrappedWriter;
        this.sFunc = sFunc;
        this.sFunc.prepareToWrite(this.wrappedWriter);
    }
}
 
Example #15
Source File: LogicalPlanBuilder.java    From spork with Apache License 2.0 5 votes vote down vote up
String buildStoreOp(SourceLocation loc, String alias, String inputAlias, String filename, FuncSpec funcSpec)
throws ParserValidationException {
    try {
        // Load StoreFunc class from default properties if funcSpec is null. Fallback on PigStorage if StoreFunc is not specified in properties.
        funcSpec = funcSpec == null ? new FuncSpec(pigContext.getProperties().getProperty(
                PigConfiguration.PIG_DEFAULT_STORE_FUNC, PigStorage.class.getName())) : funcSpec;
        StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec(funcSpec);
        String fileNameKey = inputAlias + "_" + (storeIndex++) ;

        String signature = inputAlias + "_" + newOperatorKey();
        stoFunc.setStoreFuncUDFContextSignature(signature);

        String absolutePath = fileNameMap.get(fileNameKey);
        if (absolutePath == null) {
            absolutePath = stoFunc.relToAbsPathForStoreLocation(
                    filename,
                    QueryParserUtils.getCurrentDir(pigContext));
            if (absolutePath!=null) {
                QueryParserUtils.setHdfsServers(absolutePath, pigContext);
            }
            fileNameMap.put(fileNameKey, absolutePath);
        }
        FileSpec fileSpec = new FileSpec(absolutePath, funcSpec);

        LOStore op = new LOStore(plan, fileSpec, stoFunc, signature);
        return buildOp(loc, op, alias, inputAlias, null);
    } catch(Exception ex) {
        throw new ParserValidationException(intStream, loc, ex);
    }
}
 
Example #16
Source File: FunctionType.java    From spork with Apache License 2.0 5 votes vote down vote up
public static void tryCasting(Class<?> func, byte funcType) {
    Class<?> typeClass;
    switch(funcType) {
    case FunctionType.EVALFUNC:
        typeClass = EvalFunc.class;
        break;
    case FunctionType.COMPARISONFUNC:
        typeClass = ComparisonFunc.class;
        break;
    case FunctionType.LOADFUNC:
        typeClass = LoadFunc.class;
        break;
    case FunctionType.STOREFUNC:
        typeClass = StoreFuncInterface.class;
        break;
    case FunctionType.PIGTOSTREAMFUNC:
        typeClass = PigToStream.class;
        break;
    case FunctionType.STREAMTOPIGFUNC:
        typeClass = StreamToPig.class;
        break;
    default:
        throw new IllegalArgumentException("Received an unknown function type: " + funcType);
    }
    if (!typeClass.isAssignableFrom(func)) {
        throw new ClassCastException(func + " does not implement " + typeClass);
    }
}
 
Example #17
Source File: PerfTest2.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public static void write(String out) throws IOException, ParserException,
    InterruptedException, ExecException {
  {
    StringBuilder schemaString = new StringBuilder("a0: chararray");
    for (int i = 1; i < COLUMN_COUNT; i++) {
      schemaString.append(", a" + i + ": chararray");
    }

    String location = out;
    String schema = schemaString.toString();

    StoreFuncInterface storer = new ParquetStorer();
    Job job = new Job(conf);
    storer.setStoreFuncUDFContextSignature("sig");
    String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI()));
    storer.setStoreLocation(absPath, job);
    storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema)));
    @SuppressWarnings("unchecked") // that's how the base class is defined
    OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat();
    // it's ContextUtil.getConfiguration(job) and not just conf !
    JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid ++));
    outputFormat.checkOutputSpecs(jobContext);
    if (schema != null) {
      ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema));
      storer.checkSchema(resourceSchema);
      if (storer instanceof StoreMetadata) {
        ((StoreMetadata)storer).storeSchema(resourceSchema, absPath, job);
      }
    }
    TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0));
    RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
    storer.prepareToWrite(recordWriter);

    for (int i = 0; i < ROW_COUNT; i++) {
      Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT);
      for (int j = 0; j < COLUMN_COUNT; j++) {
        tuple.set(j, "a" + i + "_" + j);
      }
      storer.putNext(tuple);
    }

    recordWriter.close(taskAttemptContext);
    OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext);
    outputCommitter.commitTask(taskAttemptContext);
    outputCommitter.commitJob(jobContext);

  }
}
 
Example #18
Source File: POStore.java    From spork with Apache License 2.0 4 votes vote down vote up
public void setStoreFunc(StoreFuncInterface storeFunc) {
    this.storer = storeFunc;
}
 
Example #19
Source File: ScalarVisitor.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
protected LogicalExpressionVisitor getVisitor(final LogicalExpressionPlan exprPlan)
throws FrontendException {
    return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) {

        @Override
        public void visit(ScalarExpression expr) throws FrontendException {
            // This is a scalar udf.
            ConstantExpression filenameConst = (ConstantExpression)exprPlan.getSuccessors( expr ).get( 1 );

            Operator refOp = expr.getImplicitReferencedOperator();
            Operator attachedOp = expr.getAttachedLogicalOperator();
            LogicalPlan lp = (LogicalPlan) attachedOp.getPlan();
            List<Operator> succs = lp.getSuccessors( refOp );
            LOStore store = null;
            FuncSpec interStorageFuncSpec = new FuncSpec(InterStorage.class.getName());
            if( succs != null ) {
                for( Operator succ : succs ) {
                    if( succ instanceof LOStore
                            && ((LOStore)succ).isTmpStore()
                            && interStorageFuncSpec.equals(
                                ((LOStore)succ).getOutputSpec().getFuncSpec() ) ) {
                        store = (LOStore)succ;
                        break;
                    }
                }
            }

            if( store == null ) {
                FileSpec fileSpec;
                try {
                    fileSpec = new FileSpec( FileLocalizer.getTemporaryPath( pigContext ).toString(), interStorageFuncSpec );                    // TODO: need to hookup the pigcontext.
                } catch (IOException e) {
                    throw new PlanValidationException( expr, "Failed to process scalar" + e);
                }
                StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec(interStorageFuncSpec);
                String sig = LogicalPlanBuilder.newOperatorKey(scope);
                stoFunc.setStoreFuncUDFContextSignature(sig);
                store = new LOStore(lp, fileSpec, stoFunc, sig);
                store.setTmpStore(true);
                lp.add( store );
                lp.connect( refOp, store );
            }
            
            expr.setImplicitReferencedOperator(store);
            filenameConst.setValue( store.getOutputSpec().getFileName() );
            
            if( lp.getSoftLinkSuccessors( store ) == null || 
                !lp.getSoftLinkSuccessors( store ).contains( attachedOp ) ) {
                lp.createSoftLink( store, attachedOp );
            }
        }

    };
}
 
Example #20
Source File: LOStore.java    From spork with Apache License 2.0 4 votes vote down vote up
public StoreFuncInterface getStoreFunc() {
    return storeFunc;
}
 
Example #21
Source File: LOStore.java    From spork with Apache License 2.0 4 votes vote down vote up
public LOStore(LogicalPlan plan, FileSpec outputFileSpec, StoreFuncInterface storeFunc, String signature) {
    super("LOStore", plan);
    this.output = outputFileSpec;
    this.storeFunc = storeFunc;
    this.signature = signature;
}
 
Example #22
Source File: POStoreImpl.java    From spork with Apache License 2.0 2 votes vote down vote up
/**
 * Set up the storer 
 * @param store - the POStore object
 * @throws IOException
 */
public abstract StoreFuncInterface createStoreFunc(POStore store) 
    throws IOException;