org.apache.pig.StoreFuncInterface Java Examples
The following examples show how to use
org.apache.pig.StoreFuncInterface.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 6 votes |
/** * * Generate a {@link LogicalPlan} containing a Load, Filter and Store * operators * * @param inputFile * @param outputFile * @param dfs * @return * @throws Exception */ private LogicalPlan generateLogicalPlan(String inputFile, String outputFile, DataStorage dfs) throws Exception { LogicalPlan plan = new LogicalPlan(); FileSpec filespec1 = new FileSpec(generateTmpFile(inputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage")); FileSpec filespec2 = new FileSpec(generateTmpFile(outputFile).getAbsolutePath(), new FuncSpec("org.apache.pig.builtin.PigStorage")); LOLoad load = newLOLoad(filespec1, null, plan, ConfigurationUtil.toConfiguration(dfs.getConfiguration())); LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface) PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null); LOFilter filter = new LOFilter(plan); plan.add(load); plan.add(store); plan.add(filter); plan.connect(load, filter); plan.connect(filter, store); return plan; }
Example #2
Source File: TestMRJobStats.java From spork with Apache License 2.0 | 6 votes |
private static POStore createPOStoreForFileBasedSystem(long size, StoreFuncInterface storeFunc, Configuration conf) throws Exception { File file = File.createTempFile("tempFile", ".tmp"); file.deleteOnExit(); RandomAccessFile f = new RandomAccessFile(file, "rw"); f.setLength(size); f.close(); storeFunc.setStoreLocation(file.getAbsolutePath(), new Job(conf)); FuncSpec funcSpec = new FuncSpec(storeFunc.getClass().getCanonicalName()); POStore poStore = new POStore(new OperatorKey()); poStore.setSFile(new FileSpec(file.getAbsolutePath(), funcSpec)); poStore.setStoreFunc(storeFunc); poStore.setUp(); return poStore; }
Example #3
Source File: TestInputOutputMiniClusterFileValidator.java From spork with Apache License 2.0 | 6 votes |
private LogicalPlan genNewLoadStorePlan(String inputFile, String outputFile, DataStorage dfs) throws Throwable { LogicalPlan plan = new LogicalPlan() ; FileSpec filespec1 = new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ; FileSpec filespec2 = new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")); LOLoad load = newLOLoad( filespec1, null, plan, ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ; LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ; plan.add(load) ; plan.add(store) ; plan.connect(load, store) ; return plan ; }
Example #4
Source File: TestInputOutputFileValidator.java From spork with Apache License 2.0 | 6 votes |
private LogicalPlan genNewLoadStorePlan(String inputFile, String outputFile, DataStorage dfs) throws Throwable { LogicalPlan plan = new LogicalPlan() ; FileSpec filespec1 = new FileSpec(inputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")) ; FileSpec filespec2 = new FileSpec(outputFile, new FuncSpec("org.apache.pig.builtin.PigStorage")); LOLoad load = newLOLoad( filespec1, null, plan, ConfigurationUtil.toConfiguration(dfs.getConfiguration())) ; LOStore store = new LOStore(plan, filespec2, (StoreFuncInterface)PigContext.instantiateFuncFromSpec(filespec2.getFuncSpec()), null) ; plan.add(load) ; plan.add(store) ; plan.connect(load, store) ; return plan ; }
Example #5
Source File: QueryParserUtils.java From spork with Apache License 2.0 | 6 votes |
public static void attachStorePlan(String scope, LogicalPlan lp, String fileName, String func, Operator input, String alias, PigContext pigContext) throws FrontendException { func = func == null ? pigContext.getProperties().getProperty(PigConfiguration.PIG_DEFAULT_STORE_FUNC, PigStorage.class.getName()) : func; FuncSpec funcSpec = new FuncSpec( func ); StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec( funcSpec ); fileName = removeQuotes( fileName ); FileSpec fileSpec = new FileSpec( fileName, funcSpec ); String sig = alias + "_" + LogicalPlanBuilder.newOperatorKey(scope); stoFunc.setStoreFuncUDFContextSignature(sig); LOStore store = new LOStore(lp, fileSpec, stoFunc, sig); store.setAlias(alias); try { stoFunc.relToAbsPathForStoreLocation( fileName, getCurrentDir( pigContext ) ); } catch (IOException ioe) { FrontendException e = new FrontendException( ioe.getMessage(), ioe ); throw e; } lp.add( store ); lp.connect( input, store ); }
Example #6
Source File: PigOutputFormat.java From spork with Apache License 2.0 | 6 votes |
/** * Before delegating calls to underlying OutputFormat or OutputCommitter * Pig needs to ensure the Configuration in the JobContext contains * the output location and StoreFunc * for the specific store - so set these up in the context for this specific * store * @param jobContext the {@link JobContext} * @param store the POStore * @throws IOException on failure */ public static void setLocation(JobContext jobContext, POStore store) throws IOException { Job storeJob = new Job(jobContext.getConfiguration()); StoreFuncInterface storeFunc = store.getStoreFunc(); String outputLocation = store.getSFile().getFileName(); storeFunc.setStoreLocation(outputLocation, storeJob); // the setStoreLocation() method would indicate to the StoreFunc // to set the output location for its underlying OutputFormat. // Typically OutputFormat's store the output location in the // Configuration - so we need to get the modified Configuration // containing the output location (and any other settings the // OutputFormat might have set) and merge it with the Configuration // we started with so that when this method returns the Configuration // supplied as input has the updates. ConfigurationUtil.mergeConf(jobContext.getConfiguration(), storeJob.getConfiguration()); }
Example #7
Source File: StoreConverter.java From spork with Apache License 2.0 | 6 votes |
private static POStore configureStorer(JobConf jobConf, PhysicalOperator physicalOperator) throws IOException { ArrayList<POStore> storeLocations = Lists.newArrayList(); POStore poStore = (POStore) physicalOperator; storeLocations.add(poStore); StoreFuncInterface sFunc = poStore.getStoreFunc(); sFunc.setStoreLocation(poStore.getSFile().getFileName(), new org.apache.hadoop.mapreduce.Job(jobConf)); poStore.setInputs(null); poStore.setParentPlan(null); jobConf.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(Lists.newArrayList())); jobConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(storeLocations)); return poStore; }
Example #8
Source File: PigOutputCommitter.java From spork with Apache License 2.0 | 6 votes |
/** * @param conf * @param mapStores * @return * @throws IOException */ @SuppressWarnings("unchecked") private List<Pair<OutputCommitter, POStore>> getCommitters( TaskAttemptContext context, List<POStore> stores) throws IOException { List<Pair<OutputCommitter, POStore>> committers = new ArrayList<Pair<OutputCommitter,POStore>>(); for (POStore store : stores) { StoreFuncInterface sFunc = store.getStoreFunc(); TaskAttemptContext updatedContext = setUpContext(context, store); try { committers.add(new Pair<OutputCommitter, POStore>( sFunc.getOutputFormat().getOutputCommitter( updatedContext), store)); } catch (InterruptedException e) { throw new IOException(e); } } return committers; }
Example #9
Source File: MapReducePOStoreImpl.java From spork with Apache License 2.0 | 6 votes |
@Override public StoreFuncInterface createStoreFunc(POStore store) throws IOException { StoreFuncInterface storeFunc = store.getStoreFunc(); // call the setStoreLocation on the storeFunc giving it the // Job. Typically this will result in the OutputFormat of the // storeFunc storing the output location in the Configuration // in the Job. The PigOutFormat.setLocation() method will merge // this modified Configuration into the configuration of the // Context we have PigOutputFormat.setLocation(context, store); OutputFormat<?,?> outputFormat = storeFunc.getOutputFormat(); // create a new record writer try { writer = outputFormat.getRecordWriter(context); } catch (InterruptedException e) { throw new IOException(e); } storeFunc.prepareToWrite(writer); return storeFunc; }
Example #10
Source File: PigOutputFormat.java From spork with Apache License 2.0 | 6 votes |
@Override public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext taskattemptcontext) throws IOException, InterruptedException { setupUdfEnvAndStores(taskattemptcontext); if(mapStores.size() + reduceStores.size() == 1) { // single store case POStore store; if(mapStores.size() == 1) { store = mapStores.get(0); } else { store = reduceStores.get(0); } StoreFuncInterface sFunc = store.getStoreFunc(); // set output location PigOutputFormat.setLocation(taskattemptcontext, store); // The above call should have update the conf in the JobContext // to have the output location - now call checkOutputSpecs() RecordWriter writer = sFunc.getOutputFormat().getRecordWriter( taskattemptcontext); return new PigRecordWriter(writer, sFunc, Mode.SINGLE_STORE); } else { // multi store case - in this case, all writing is done through // MapReducePOStoreImpl - set up a dummy RecordWriter return new PigRecordWriter(null, null, Mode.MULTI_STORE); } }
Example #11
Source File: PigOutputCommitter.java From spork with Apache License 2.0 | 5 votes |
static public void storeCleanup(POStore store, Configuration conf) throws IOException { StoreFuncInterface storeFunc = store.getStoreFunc(); if (storeFunc instanceof StoreMetadata) { Schema schema = store.getSchema(); if (schema != null) { ((StoreMetadata) storeFunc).storeSchema( new ResourceSchema(schema, store.getSortInfo()), store.getSFile() .getFileName(), new Job(conf)); } } }
Example #12
Source File: TestMRJobStats.java From spork with Apache License 2.0 | 5 votes |
private static POStore createPOStoreForNonFileBasedSystem(StoreFuncInterface storeFunc, Configuration conf) throws Exception { String nonFileBasedUri = "hbase://tableName"; storeFunc.setStoreLocation(nonFileBasedUri, new Job(conf)); FuncSpec funcSpec = new FuncSpec(storeFunc.getClass().getCanonicalName()); POStore poStore = new POStore(new OperatorKey()); poStore.setSFile(new FileSpec(nonFileBasedUri, funcSpec)); poStore.setStoreFunc(storeFunc); poStore.setUp(); return poStore; }
Example #13
Source File: PigOutputFormat.java From spork with Apache License 2.0 | 5 votes |
private void checkOutputSpecsHelper(List<POStore> stores, JobContext jobcontext) throws IOException, InterruptedException { for (POStore store : stores) { // make a copy of the original JobContext so that // each OutputFormat get a different copy JobContext jobContextCopy = HadoopShims.createJobContext( jobcontext.getConfiguration(), jobcontext.getJobID()); // set output location PigOutputFormat.setLocation(jobContextCopy, store); StoreFuncInterface sFunc = store.getStoreFunc(); OutputFormat of = sFunc.getOutputFormat(); // The above call should have update the conf in the JobContext // to have the output location - now call checkOutputSpecs() try { of.checkOutputSpecs(jobContextCopy); } catch (IOException ioe) { boolean shouldThrowException = true; if (sFunc instanceof OverwritableStoreFunc) { if (((OverwritableStoreFunc) sFunc).shouldOverwrite()) { if (ioe instanceof FileAlreadyExistsException || ioe instanceof org.apache.hadoop.fs.FileAlreadyExistsException) { shouldThrowException = false; } } } if (shouldThrowException) throw ioe; } } }
Example #14
Source File: PigOutputFormat.java From spork with Apache License 2.0 | 5 votes |
public PigRecordWriter(RecordWriter wrappedWriter, StoreFuncInterface sFunc, Mode mode) throws IOException { this.mode = mode; if(mode == Mode.SINGLE_STORE) { this.wrappedWriter = wrappedWriter; this.sFunc = sFunc; this.sFunc.prepareToWrite(this.wrappedWriter); } }
Example #15
Source File: LogicalPlanBuilder.java From spork with Apache License 2.0 | 5 votes |
String buildStoreOp(SourceLocation loc, String alias, String inputAlias, String filename, FuncSpec funcSpec) throws ParserValidationException { try { // Load StoreFunc class from default properties if funcSpec is null. Fallback on PigStorage if StoreFunc is not specified in properties. funcSpec = funcSpec == null ? new FuncSpec(pigContext.getProperties().getProperty( PigConfiguration.PIG_DEFAULT_STORE_FUNC, PigStorage.class.getName())) : funcSpec; StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec(funcSpec); String fileNameKey = inputAlias + "_" + (storeIndex++) ; String signature = inputAlias + "_" + newOperatorKey(); stoFunc.setStoreFuncUDFContextSignature(signature); String absolutePath = fileNameMap.get(fileNameKey); if (absolutePath == null) { absolutePath = stoFunc.relToAbsPathForStoreLocation( filename, QueryParserUtils.getCurrentDir(pigContext)); if (absolutePath!=null) { QueryParserUtils.setHdfsServers(absolutePath, pigContext); } fileNameMap.put(fileNameKey, absolutePath); } FileSpec fileSpec = new FileSpec(absolutePath, funcSpec); LOStore op = new LOStore(plan, fileSpec, stoFunc, signature); return buildOp(loc, op, alias, inputAlias, null); } catch(Exception ex) { throw new ParserValidationException(intStream, loc, ex); } }
Example #16
Source File: FunctionType.java From spork with Apache License 2.0 | 5 votes |
public static void tryCasting(Class<?> func, byte funcType) { Class<?> typeClass; switch(funcType) { case FunctionType.EVALFUNC: typeClass = EvalFunc.class; break; case FunctionType.COMPARISONFUNC: typeClass = ComparisonFunc.class; break; case FunctionType.LOADFUNC: typeClass = LoadFunc.class; break; case FunctionType.STOREFUNC: typeClass = StoreFuncInterface.class; break; case FunctionType.PIGTOSTREAMFUNC: typeClass = PigToStream.class; break; case FunctionType.STREAMTOPIGFUNC: typeClass = StreamToPig.class; break; default: throw new IllegalArgumentException("Received an unknown function type: " + funcType); } if (!typeClass.isAssignableFrom(func)) { throw new ClassCastException(func + " does not implement " + typeClass); } }
Example #17
Source File: PerfTest2.java From parquet-mr with Apache License 2.0 | 4 votes |
public static void write(String out) throws IOException, ParserException, InterruptedException, ExecException { { StringBuilder schemaString = new StringBuilder("a0: chararray"); for (int i = 1; i < COLUMN_COUNT; i++) { schemaString.append(", a" + i + ": chararray"); } String location = out; String schema = schemaString.toString(); StoreFuncInterface storer = new ParquetStorer(); Job job = new Job(conf); storer.setStoreFuncUDFContextSignature("sig"); String absPath = storer.relToAbsPathForStoreLocation(location, new Path(new File(".").getAbsoluteFile().toURI())); storer.setStoreLocation(absPath, job); storer.checkSchema(new ResourceSchema(Utils.getSchemaFromString(schema))); @SuppressWarnings("unchecked") // that's how the base class is defined OutputFormat<Void, Tuple> outputFormat = storer.getOutputFormat(); // it's ContextUtil.getConfiguration(job) and not just conf ! JobContext jobContext = ContextUtil.newJobContext(ContextUtil.getConfiguration(job), new JobID("jt", jobid ++)); outputFormat.checkOutputSpecs(jobContext); if (schema != null) { ResourceSchema resourceSchema = new ResourceSchema(Utils.getSchemaFromString(schema)); storer.checkSchema(resourceSchema); if (storer instanceof StoreMetadata) { ((StoreMetadata)storer).storeSchema(resourceSchema, absPath, job); } } TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(ContextUtil.getConfiguration(job), new TaskAttemptID("jt", jobid, true, 1, 0)); RecordWriter<Void, Tuple> recordWriter = outputFormat.getRecordWriter(taskAttemptContext); storer.prepareToWrite(recordWriter); for (int i = 0; i < ROW_COUNT; i++) { Tuple tuple = TupleFactory.getInstance().newTuple(COLUMN_COUNT); for (int j = 0; j < COLUMN_COUNT; j++) { tuple.set(j, "a" + i + "_" + j); } storer.putNext(tuple); } recordWriter.close(taskAttemptContext); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskAttemptContext); outputCommitter.commitTask(taskAttemptContext); outputCommitter.commitJob(jobContext); } }
Example #18
Source File: POStore.java From spork with Apache License 2.0 | 4 votes |
public void setStoreFunc(StoreFuncInterface storeFunc) { this.storer = storeFunc; }
Example #19
Source File: ScalarVisitor.java From spork with Apache License 2.0 | 4 votes |
@Override protected LogicalExpressionVisitor getVisitor(final LogicalExpressionPlan exprPlan) throws FrontendException { return new LogicalExpressionVisitor( exprPlan, new DependencyOrderWalker( exprPlan ) ) { @Override public void visit(ScalarExpression expr) throws FrontendException { // This is a scalar udf. ConstantExpression filenameConst = (ConstantExpression)exprPlan.getSuccessors( expr ).get( 1 ); Operator refOp = expr.getImplicitReferencedOperator(); Operator attachedOp = expr.getAttachedLogicalOperator(); LogicalPlan lp = (LogicalPlan) attachedOp.getPlan(); List<Operator> succs = lp.getSuccessors( refOp ); LOStore store = null; FuncSpec interStorageFuncSpec = new FuncSpec(InterStorage.class.getName()); if( succs != null ) { for( Operator succ : succs ) { if( succ instanceof LOStore && ((LOStore)succ).isTmpStore() && interStorageFuncSpec.equals( ((LOStore)succ).getOutputSpec().getFuncSpec() ) ) { store = (LOStore)succ; break; } } } if( store == null ) { FileSpec fileSpec; try { fileSpec = new FileSpec( FileLocalizer.getTemporaryPath( pigContext ).toString(), interStorageFuncSpec ); // TODO: need to hookup the pigcontext. } catch (IOException e) { throw new PlanValidationException( expr, "Failed to process scalar" + e); } StoreFuncInterface stoFunc = (StoreFuncInterface)PigContext.instantiateFuncFromSpec(interStorageFuncSpec); String sig = LogicalPlanBuilder.newOperatorKey(scope); stoFunc.setStoreFuncUDFContextSignature(sig); store = new LOStore(lp, fileSpec, stoFunc, sig); store.setTmpStore(true); lp.add( store ); lp.connect( refOp, store ); } expr.setImplicitReferencedOperator(store); filenameConst.setValue( store.getOutputSpec().getFileName() ); if( lp.getSoftLinkSuccessors( store ) == null || !lp.getSoftLinkSuccessors( store ).contains( attachedOp ) ) { lp.createSoftLink( store, attachedOp ); } } }; }
Example #20
Source File: LOStore.java From spork with Apache License 2.0 | 4 votes |
public StoreFuncInterface getStoreFunc() { return storeFunc; }
Example #21
Source File: LOStore.java From spork with Apache License 2.0 | 4 votes |
public LOStore(LogicalPlan plan, FileSpec outputFileSpec, StoreFuncInterface storeFunc, String signature) { super("LOStore", plan); this.output = outputFileSpec; this.storeFunc = storeFunc; this.signature = signature; }
Example #22
Source File: POStoreImpl.java From spork with Apache License 2.0 | 2 votes |
/** * Set up the storer * @param store - the POStore object * @throws IOException */ public abstract StoreFuncInterface createStoreFunc(POStore store) throws IOException;