org.apache.pig.impl.util.ObjectSerializer Java Examples
The following examples show how to use
org.apache.pig.impl.util.ObjectSerializer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (fields != null && requiredFieldList.getFields() != null) { requiredFields = new boolean[fields.length]; for (RequiredField f : requiredFieldList.getFields()) { requiredFields[f.getIndex()] = true; } UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature }); try { p.setProperty(REQUIRED_FIELDS_SIGNATURE, ObjectSerializer.serialize(requiredFields)); } catch (Exception e) { throw new RuntimeException("Cannot serialize requiredFields for pushProjection"); } } return new RequiredFieldResponse(true); }
Example #2
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { // Save reader to use in getNext() this.reader = reader; splitIndex = split.getSplitIndex(); // Get schema from front-end UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE)); if (requiredFields != null) { numRequiredFields = 0; for (int i = 0; i < requiredFields.length; i++) { if (requiredFields[i]) numRequiredFields++; } } }
Example #3
Source File: DefaultTuple.java From spork with Apache License 2.0 | 6 votes |
@Override public void setConf(Configuration conf) { try { mAsc = (boolean[]) ObjectSerializer.deserialize(conf.get("pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } // If there's only one entry in mAsc, it means it's for the whole // tuple. So we can't be looking for each column. mWholeTuple = (mAsc.length == 1); mFact = TupleFactory.getInstance(); }
Example #4
Source File: PigInputFormat.java From spork with Apache License 2.0 | 6 votes |
/** * Pass loader signature to LoadFunc and to InputFormat through * the conf * @param loadFunc the Loadfunc to set the signature on * @param inputIndex the index of the input corresponding to the loadfunc * @param conf the Configuration object into which the signature should be * set * @throws IOException on failure */ @SuppressWarnings("unchecked") static void passLoadSignature(LoadFunc loadFunc, int inputIndex, Configuration conf) throws IOException { List<String> inpSignatureLists = (ArrayList<String>)ObjectSerializer.deserialize( conf.get("pig.inpSignatures")); // signature can be null for intermediate jobs where it will not // be required to be passed down if(inpSignatureLists.get(inputIndex) != null) { loadFunc.setUDFContextSignature(inpSignatureLists.get(inputIndex)); conf.set("pig.loader.signature", inpSignatureLists.get(inputIndex)); } MapRedUtil.setupUDFContext(conf); }
Example #5
Source File: TezDagBuilder.java From spork with Apache License 2.0 | 6 votes |
private void addCombiner(PhysicalPlan combinePlan, TezOperator pkgTezOp, Configuration conf) throws IOException { POPackage combPack = (POPackage) combinePlan.getRoots().get(0); POLocalRearrange combRearrange = (POLocalRearrange) combinePlan .getLeaves().get(0); setIntermediateOutputKeyValue(combRearrange.getKeyType(), conf, pkgTezOp); LoRearrangeDiscoverer lrDiscoverer = new LoRearrangeDiscoverer( combinePlan, pkgTezOp, combPack); lrDiscoverer.visit(); combinePlan.remove(combPack); conf.set(TezRuntimeConfiguration.TEZ_RUNTIME_COMBINER_CLASS, MRCombiner.class.getName()); conf.set(MRJobConfig.COMBINE_CLASS_ATTR, PigCombiner.Combine.class.getName()); conf.setBoolean(MRConfiguration.MAPPER_NEW_API, true); conf.set("pig.pigContext", ObjectSerializer.serialize(pc)); conf.set("udf.import.list", ObjectSerializer.serialize(PigContext.getPackageImportList())); conf.set("pig.combinePlan", ObjectSerializer.serialize(combinePlan)); conf.set("pig.combine.package", ObjectSerializer.serialize(combPack)); conf.set("pig.map.keytype", ObjectSerializer .serialize(new byte[] { combRearrange.getKeyType() })); }
Example #6
Source File: TestPigTupleRawComparator.java From spork with Apache License 2.0 | 6 votes |
@Test public void testSortOrder() throws IOException { // prototype < t but we use inverse sort order list.set(2, (Double) list.get(2) + 0.1); NullableTuple t = new NullableTuple(tf.newTuple(list)); JobConf jobConf = new JobConf(); jobConf.set("pig.sortOrder", ObjectSerializer.serialize(new boolean[] {false})); comparator.setConf(jobConf); int res = compareHelper(prototype, t, comparator); assertEquals(-1 * Math.signum(prototype.compareTo(t)), Math.signum(res), 0); assertTrue(res > 0); jobConf.set("pig.sortOrder", ObjectSerializer.serialize(new boolean[] {true,true,false,true,true,true,true,true,true})); comparator.setConf(jobConf); res = compareHelper(prototype, t, comparator); assertEquals(-1 * Math.signum(prototype.compareTo(t)), Math.signum(res), 0); assertTrue(res > 0); }
Example #7
Source File: OrcStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size(); mRequiredColumns = new boolean[schemaSize]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
Example #8
Source File: BinInterSedes.java From spork with Apache License 2.0 | 6 votes |
@Override public void setConf(Configuration conf) { try { mAsc = (boolean[]) ObjectSerializer.deserialize(conf.get("pig.sortOrder")); mSecondaryAsc = (boolean[]) ObjectSerializer.deserialize(conf.get("pig.secondarySortOrder")); mIsSecondarySort = true; } catch (IOException ioe) { mLog.error("Unable to deserialize sort order object" + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } if (mSecondaryAsc == null) { mIsSecondarySort = false; } // If there's only one entry in mAsc, it means it's for the whole // tuple. So we can't be looking for each column. mWholeTuple = (mAsc.length == 1); mFact = TupleFactory.getInstance(); mSedes = InterSedesFactory.getInterSedesInstance(); }
Example #9
Source File: TestOrcStoragePushdown.java From spork with Apache License 2.0 | 6 votes |
private void testPredicatePushdownLocal(String filterStmt, int expectedRows) throws IOException { PigServer pigServer_disabledRule = new PigServer(ExecType.LOCAL); // Test with PredicatePushdownOptimizer disabled. HashSet<String> disabledOptimizerRules = new HashSet<String>(); disabledOptimizerRules.add("PredicatePushdownOptimizer"); pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY, ObjectSerializer.serialize(disabledOptimizerRules)); pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();"); pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";"); // Test with PredicatePushdownOptimizer enabled. pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();"); pigServer.registerQuery("E = filter D by " + filterStmt + ";"); //Verify that results are same Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows); }
Example #10
Source File: HBaseStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public void setStoreLocation(String location, Job job) throws IOException { if (location.startsWith("hbase://")){ job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8)); }else{ job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location); } String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema"); if (serializedSchema!= null) { schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema); } m_conf = initializeLocalJobConfig(job); // Not setting a udf property and getting the hbase delegation token // only once like in setLocation as setStoreLocation gets different Job // objects for each call and the last Job passed is the one that is // launched. So we end up getting multiple hbase delegation tokens. addHBaseDelegationToken(m_conf, job); }
Example #11
Source File: MRJobStats.java From spork with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings("unchecked") public void setConf(Configuration conf) { super.setConf(conf); try { this.mapStores = (List<POStore>) ObjectSerializer.deserialize(conf .get(JobControlCompiler.PIG_MAP_STORES)); this.reduceStores = (List<POStore>) ObjectSerializer.deserialize(conf .get(JobControlCompiler.PIG_REDUCE_STORES)); this.loads = (ArrayList<FileSpec>) ObjectSerializer.deserialize(conf .get("pig.inputs")); this.disableCounter = conf.getBoolean("pig.disable.counter", false); } catch (IOException e) { LOG.warn("Failed to deserialize the store list", e); } }
Example #12
Source File: StoreConverter.java From spork with Apache License 2.0 | 6 votes |
private static POStore configureStorer(JobConf jobConf, PhysicalOperator physicalOperator) throws IOException { ArrayList<POStore> storeLocations = Lists.newArrayList(); POStore poStore = (POStore) physicalOperator; storeLocations.add(poStore); StoreFuncInterface sFunc = poStore.getStoreFunc(); sFunc.setStoreLocation(poStore.getSFile().getFileName(), new org.apache.hadoop.mapreduce.Job(jobConf)); poStore.setInputs(null); poStore.setParentPlan(null); jobConf.set(JobControlCompiler.PIG_MAP_STORES, ObjectSerializer.serialize(Lists.newArrayList())); jobConf.set(JobControlCompiler.PIG_REDUCE_STORES, ObjectSerializer.serialize(storeLocations)); return poStore; }
Example #13
Source File: DefaultIndexableLoader.java From spork with Apache License 2.0 | 6 votes |
private void initRightLoader(int [] splitsToBeRead) throws IOException{ PigContext pc = (PigContext) ObjectSerializer .deserialize(PigMapReduce.sJobConfInternal.get().get("pig.pigContext")); Configuration conf = ConfigurationUtil.toConfiguration(pc.getProperties()); // Hadoop security need this property to be set if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { conf.set(MRConfiguration.JOB_CREDENTIALS_BINARY, System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } //create ReadToEndLoader that will read the given splits in order loader = new ReadToEndLoader((LoadFunc)PigContext.instantiateFuncFromSpec(rightLoaderFuncSpec), conf, inpLocation, splitsToBeRead); }
Example #14
Source File: PigDateTimeRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[]) ObjectSerializer.deserialize(conf .get("pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #15
Source File: HBaseStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void setLocation(String location, Job job) throws IOException { Properties udfProps = getUDFProperties(); job.getConfiguration().setBoolean("pig.noSplitCombination", true); m_conf = initializeLocalJobConfig(job); String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET); if (delegationTokenSet == null) { addHBaseDelegationToken(m_conf, job); udfProps.setProperty(HBASE_TOKEN_SET, "true"); } String tablename = location; if (location.startsWith("hbase://")) { tablename = location.substring(8); } m_conf.set(TableInputFormat.INPUT_TABLE, tablename); String projectedFields = udfProps.getProperty( projectedFieldsName() ); if (projectedFields != null) { // update columnInfo_ pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields)); } addFiltersWithoutColumnPrefix(columnInfo_); if (requiredFieldList != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature}); p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList)); } }
Example #16
Source File: TestPruneColumn.java From spork with Apache License 2.0 | 5 votes |
@Test public void testComplex2() throws Exception { HashSet<String> optimizerRules = new HashSet<String>(); optimizerRules.add("PushUpFilter"); pigServer.getPigContext().getProperties().setProperty( PigImplConstants.PIG_OPTIMIZER_RULES_KEY, ObjectSerializer.serialize(optimizerRules)); pigServer.registerQuery("A = load '"+ Util.generateURI(tmpFile13.toString(), pigServer.getPigContext()) + "' as (a:int, b:chararray);"); pigServer.registerQuery("B = FOREACH A generate a;"); pigServer.registerQuery("C = GROUP B by a;"); pigServer.registerQuery("D = filter C by group > 0 and group < 100;"); pigServer.registerQuery("E = FOREACH D {F = LIMIT B 1 ;GENERATE B.a as mya, FLATTEN(F.a) as setting;}"); pigServer.registerQuery("G = FOREACH E GENERATE mya, setting as setting;"); Iterator<Tuple> iter = pigServer.openIterator("G"); assertTrue(iter.hasNext()); Tuple t = iter.next(); assertEquals("({(1)},1)", t.toString()); assertTrue(iter.hasNext()); t = iter.next(); assertEquals("({(2),(2)},2)", t.toString()); assertTrue(iter.hasNext()); t = iter.next(); assertEquals("({(3),(3),(3)},3)", t.toString()); assertFalse(iter.hasNext()); assertTrue(checkLogFileMessage(new String[]{"Columns pruned for A: $1"})); pigServer.getPigContext().getProperties().remove(PigImplConstants.PIG_OPTIMIZER_RULES_KEY); }
Example #17
Source File: HBaseStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema s) throws IOException { if (! (caster_ instanceof LoadStoreCaster)) { LOG.error("Caster must implement LoadStoreCaster for writing to HBase."); throw new IOException("Bad Caster " + caster_.getClass()); } schema_ = s; getUDFProperties().setProperty(contextSignature + "_schema", ObjectSerializer.serialize(schema_)); }
Example #18
Source File: TestLimitVariable.java From spork with Apache License 2.0 | 5 votes |
@Test public void testLimitVariable4() throws IOException { String query = "a = load '" + inputFile.getName() + "' as (x:int, y:int);" + "b = group a all;" + "c = foreach b generate COUNT(a) as sum;" + "d = order a by $0 DESC;" + "e = filter d by $0 != 4;" + "f = limit e c.sum/2;" // return top half of the tuples ; try { HashSet<String> disabledOptimizerRules = new HashSet<String>(); disabledOptimizerRules.add("PushUpFilter"); pigServer.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY, ObjectSerializer.serialize(disabledOptimizerRules)); Util.registerMultiLineQuery(pigServer, query); Iterator<Tuple> it = pigServer.openIterator("f"); // Even if push up filter is disabled order should be retained List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings(new String[] { "(6,15)", "(5,10)", "(3,10)" }); Util.checkQueryOutputs(it, expectedRes); } finally { pigServer.getPigContext().getProperties().remove(PigImplConstants.PIG_OPTIMIZER_RULES_KEY); } }
Example #19
Source File: TestEvalPipeline2.java From spork with Apache License 2.0 | 5 votes |
@Test public void testProjectNullBag() throws Exception{ String[] input1 = { "{(1)}\t2", "\t3" }; HashSet<String> optimizerRules = new HashSet<String>(); optimizerRules.add("MergeForEach"); pigServer.getPigContext().getProperties().setProperty( PigImplConstants.PIG_OPTIMIZER_RULES_KEY, ObjectSerializer.serialize(optimizerRules)); Util.createInputFile(cluster, "table_testProjectNullBag", input1); pigServer.registerQuery("a = load 'table_testProjectNullBag' as (a0:bag{}, a1:int);"); pigServer.registerQuery("b = foreach a generate a0;"); Iterator<Tuple> iter = pigServer.openIterator("b"); Tuple t = iter.next(); Assert.assertTrue(t.toString().equals("({(1)})")); t = iter.next(); Assert.assertTrue(t.toString().equals("()")); Assert.assertFalse(iter.hasNext()); pigServer.getPigContext().getProperties().remove(PigImplConstants.PIG_OPTIMIZER_RULES_KEY); }
Example #20
Source File: TestPruneColumn.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple getNext() throws IOException { if (aliases==null) { aliases = (String[])ObjectSerializer.deserialize(UDFContext.getUDFContext().getUDFProperties(this.getClass()).getProperty(signature)); Tuple t = TupleFactory.getInstance().newTuple(); for (String s : aliases) t.append(s); return t; } return null; }
Example #21
Source File: PhoenixHBaseStorage.java From phoenix with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Parse the HBase table name and configure job */ @Override public void setStoreLocation(String location, Job job) throws IOException { String prefix = "hbase://"; if (location.startsWith(prefix)) { tableName = location.substring(prefix.length()); } config = new PhoenixPigConfiguration(job.getConfiguration()); config.configure(server, tableName, batchSize); String serializedSchema = getUDFProperties().getProperty(contextSignature + SCHEMA); if (serializedSchema != null) { schema = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema); } }
Example #22
Source File: PigIntRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #23
Source File: PigTupleDefaultRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[]) ObjectSerializer.deserialize(conf.get("pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } // If there's only one entry in mAsc, it means it's for the whole // tuple. So we can't be looking for each column. mWholeTuple = (mAsc.length == 1); }
Example #24
Source File: PigBigDecimalRawComparator.java From spork with Apache License 2.0 | 5 votes |
@Override public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #25
Source File: PigBigIntegerRawComparator.java From spork with Apache License 2.0 | 5 votes |
@Override public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #26
Source File: PigBytesRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf); }
Example #27
Source File: PigTextRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { String msg = "Unable to deserialize pig.sortOrder"; throw new RuntimeException(msg, ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #28
Source File: PigBooleanRawComparator.java From spork with Apache License 2.0 | 5 votes |
public void setConf(Configuration conf) { try { mAsc = (boolean[])ObjectSerializer.deserialize(conf.get( "pig.sortOrder")); } catch (IOException ioe) { mLog.error("Unable to deserialize pig.sortOrder " + ioe.getMessage()); throw new RuntimeException(ioe); } if (mAsc == null) { mAsc = new boolean[1]; mAsc[0] = true; } }
Example #29
Source File: PigInputFormat.java From spork with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static String getLoadLocation(int inputIndex, Configuration conf) throws IOException { ArrayList<FileSpec> inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize( conf.get(PIG_INPUTS)); return inputs.get(inputIndex).getFileName(); }
Example #30
Source File: PigInputFormat.java From spork with Apache License 2.0 | 5 votes |
/** * @param inputIndex * @param conf * @return * @throws IOException */ @SuppressWarnings("unchecked") private static LoadFunc getLoadFunc(int inputIndex, Configuration conf) throws IOException { ArrayList<FileSpec> inputs = (ArrayList<FileSpec>) ObjectSerializer.deserialize( conf.get(PIG_INPUTS)); FuncSpec loadFuncSpec = inputs.get(inputIndex).getFuncSpec(); return (LoadFunc) PigContext.instantiateFuncFromSpec(loadFuncSpec); }