org.apache.pig.impl.util.UDFContext Java Examples
The following examples show how to use
org.apache.pig.impl.util.UDFContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RegisteredJarVisibilityLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void setLocation(String location, Job job) throws IOException { UDFContext udfContext = UDFContext.getUDFContext(); Properties properties = udfContext.getUDFProperties(RegisteredJarVisibilityLoader.class); if (!properties.containsKey(REGISTERED_JAR_VISIBILITY_SCHEMA)) { LOG.info("Storing " + RegisteredJarVisibilitySchema.class.getName() + " in UDFContext."); properties.put(REGISTERED_JAR_VISIBILITY_SCHEMA, new RegisteredJarVisibilitySchema()); LOG.info("Stored " + RegisteredJarVisibilitySchema.class.getName() + " in UDFContext."); } else { LOG.info("Retrieving " + REGISTERED_JAR_VISIBILITY_SCHEMA + " from UDFContext."); RegisteredJarVisibilitySchema registeredJarVisibilitySchema = (RegisteredJarVisibilitySchema) properties.get(REGISTERED_JAR_VISIBILITY_SCHEMA); LOG.info("Retrieved " + REGISTERED_JAR_VISIBILITY_SCHEMA + " from UDFContext."); } super.setLocation(location, job); }
Example #2
Source File: PigStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public ResourceSchema getSchema(String location, Job job) throws IOException { if (!dontLoadSchema) { schema = (new JsonMetadata()).getSchema(location, job, isSchemaOn); if (signature != null && schema != null) { if(tagFile) { schema = Utils.getSchemaWithInputSourceTag(schema, "INPUT_FILE_NAME"); } else if(tagPath) { schema = Utils.getSchemaWithInputSourceTag(schema, "INPUT_FILE_PATH"); } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {signature}); p.setProperty(signature + ".schema", schema.toString()); } } return schema; }
Example #3
Source File: HiveRCInputFormat.java From spork with Apache License 2.0 | 6 votes |
public HiveRCInputFormat(String signature) { this.signature = signature; Properties properties = UDFContext.getUDFContext().getUDFProperties( HiveColumnarLoader.class, new String[] { signature }); // This expression is passed in the // HiveColumnarLoader.setPartitionExpression method by the Pig Loader // Classes. String partitionExpression = properties .getProperty(PathPartitionHelper.PARITITION_FILTER_EXPRESSION); // backwards compatibility String dateRange = properties .getProperty(HiveColumnarLoader.DATE_RANGE); if (partitionExpression == null && dateRange != null) { partitionExpression = buildFilterExpressionFromDatePartition(dateRange); properties.setProperty( PathPartitionHelper.PARITITION_FILTER_EXPRESSION, partitionExpression); } }
Example #4
Source File: JsonStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToWrite(RecordWriter writer) throws IOException { // Store the record writer reference so we can use it when it's time // to write tuples this.writer = writer; // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } // Parse the schema from the string stored in the properties object. schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); // Build a Json factory jsonFactory = new JsonFactory(); }
Example #5
Source File: PathPartitionHelper.java From spork with Apache License 2.0 | 6 votes |
/** * Reads the partition keys from the location i.e the base directory * * @param location * String must be the base directory for the partitions * @param conf * @param loaderClass * @throws IOException */ public void setPartitionKeys(String location, Configuration conf, Class<? extends LoadFunc> loaderClass, String signature) throws IOException { Set<String> partitionKeys = getPartitionKeys(location, conf); if (partitionKeys != null) { StringBuilder buff = new StringBuilder(); int i = 0; for (String key : partitionKeys) { if (i++ != 0) { buff.append(","); } buff.append(key); } UDFContext.getUDFContext() .getUDFProperties(loaderClass, new String[] { signature }) .setProperty(PARTITION_COLUMNS, buff.toString()); } }
Example #6
Source File: JsonLoader.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { this.reader = reader; // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } // Parse the schema from the string stored in the properties object. schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); jsonFactory = new JsonFactory(); }
Example #7
Source File: JsonLoader.java From spork with Apache License 2.0 | 6 votes |
public ResourceSchema getSchema(String location, Job job) throws IOException { ResourceSchema s; if (schema!=null) { s = schema; } else { // Parse the schema s = (new JsonMetadata()).getSchema(location, job, true); if (s == null) { throw new IOException("Unable to parse schema found in file in " + location); } } // Now that we have determined the schema, store it in our // UDFContext properties object so we have it when we need it on the // backend UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); p.setProperty(SCHEMA_SIGNATURE, s.toString()); return s; }
Example #8
Source File: CurrentTime.java From spork with Apache License 2.0 | 6 votes |
@Override public DateTime exec(Tuple input) throws IOException { // If we are doing compile time calculation if (UDFContext.getUDFContext().isFrontend()) { return new DateTime(); } if (!isInitialized) { String dateTimeValue = UDFContext.getUDFContext().getJobConf().get("pig.job.submitted.timestamp"); if (dateTimeValue == null) { throw new ExecException("pig.job.submitted.timestamp was not set!"); } dateTime = new DateTime(Long.parseLong(dateTimeValue)); isInitialized = true; } return dateTime; }
Example #9
Source File: OrcStorage.java From spork with Apache License 2.0 | 6 votes |
@Override public RequiredFieldResponse pushProjection( RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (requiredFieldList.getFields() != null) { int schemaSize = ((StructTypeInfo)typeInfo).getAllStructFieldTypeInfos().size(); mRequiredColumns = new boolean[schemaSize]; for (RequiredField rf: requiredFieldList.getFields()) { if (rf.getIndex()!=-1) mRequiredColumns[rf.getIndex()] = true; } Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); try { p.setProperty(signature + RequiredColumnsSuffix, ObjectSerializer.serialize(mRequiredColumns)); } catch (Exception e) { throw new RuntimeException("Cannot serialize mRequiredColumns"); } } return new RequiredFieldResponse(true); }
Example #10
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToRead(RecordReader reader, PigSplit split) throws IOException { // Save reader to use in getNext() this.reader = reader; splitIndex = split.getSplitIndex(); // Get schema from front-end UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[] { udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); requiredFields = (boolean[]) ObjectSerializer.deserialize(p.getProperty(REQUIRED_FIELDS_SIGNATURE)); if (requiredFields != null) { numRequiredFields = 0; for (int i = 0; i < requiredFields.length; i++) { if (requiredFields[i]) numRequiredFields++; } } }
Example #11
Source File: FetchLauncher.java From spork with Apache License 2.0 | 6 votes |
/** * Runs the fetch task by executing chain of calls on the PhysicalPlan from the leaf * up to the LoadFunc * * @param pp - Physical plan * @return SimpleFetchPigStats instance representing the fetched result * @throws IOException */ public PigStats launchPig(PhysicalPlan pp) throws IOException { try { POStore poStore = (POStore) pp.getLeaves().get(0); init(pp, poStore); // run fetch runPipeline(poStore); UDFFinishVisitor udfFinisher = new UDFFinishVisitor(pp, new DependencyOrderWalker<PhysicalOperator, PhysicalPlan>(pp)); udfFinisher.visit(); return PigStats.start(new EmptyPigStats(pigContext, poStore)); } finally { UDFContext.getUDFContext().addJobConf(null); pigContext.getProperties().remove(PigImplConstants.CONVERTED_TO_FETCH); } }
Example #12
Source File: EsStorage.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private void addEsApiKeyToken(Settings esSettings, Job job) { if (!UDFContext.getUDFContext().isFrontend()) { return; } UserProvider userProvider = UserProvider.create(esSettings); if (userProvider.isEsKerberosEnabled()) { User user = userProvider.getUser(); if (user.getKerberosPrincipal() != null) { RestClient tokenBootstrap = new RestClient(esSettings); try { TokenUtil.obtainTokenForJob(tokenBootstrap, user, job); } finally { tokenBootstrap.close(); } } else { log.info("Not loading Elasticsearch API Key for auth delegation since no Kerberos TGT exist."); } } }
Example #13
Source File: FixedWidthLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { if (requiredFieldList == null) return null; if (fields != null && requiredFieldList.getFields() != null) { requiredFields = new boolean[fields.length]; for (RequiredField f : requiredFieldList.getFields()) { requiredFields[f.getIndex()] = true; } UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature }); try { p.setProperty(REQUIRED_FIELDS_SIGNATURE, ObjectSerializer.serialize(requiredFields)); } catch (Exception e) { throw new RuntimeException("Cannot serialize requiredFields for pushProjection"); } } return new RequiredFieldResponse(true); }
Example #14
Source File: FixedWidthStorer.java From spork with Apache License 2.0 | 6 votes |
@Override public void prepareToWrite(RecordWriter writer) throws IOException { // Store writer to use in putNext() this.writer = writer; // Get the schema string from the UDFContext object. UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{ udfContextSignature }); String strSchema = p.getProperty(SCHEMA_SIGNATURE); if (strSchema == null) { throw new IOException("Could not find schema in UDF context"); } schema = new ResourceSchema(Utils.getSchemaFromString(strSchema)); fields = schema.getFields(); }
Example #15
Source File: FetchLauncher.java From spork with Apache License 2.0 | 5 votes |
private void init(PhysicalPlan pp, POStore poStore) throws IOException { poStore.setStoreImpl(new FetchPOStoreImpl(pigContext)); poStore.setUp(); TaskAttemptID taskAttemptID = HadoopShims.getNewTaskAttemptID(); HadoopShims.setTaskAttemptId(conf, taskAttemptID); if (!PlanHelper.getPhysicalOperators(pp, POStream.class).isEmpty()) { MapRedUtil.setupStreamingDirsConfSingle(poStore, pigContext, conf); } String currentTime = Long.toString(System.currentTimeMillis()); conf.set("pig.script.submitted.timestamp", currentTime); conf.set("pig.job.submitted.timestamp", currentTime); PhysicalOperator.setReporter(new FetchProgressableReporter()); SchemaTupleBackend.initialize(conf, pigContext); UDFContext udfContext = UDFContext.getUDFContext(); udfContext.addJobConf(conf); udfContext.setClientSystemProps(pigContext.getProperties()); udfContext.serialize(conf); PigMapReduce.sJobConfInternal.set(conf); Utils.setDefaultTimeZone(conf); boolean aggregateWarning = "true".equalsIgnoreCase(conf.get("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new FetchTaskContext(new FetchContext())); PigHadoopLogger pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); }
Example #16
Source File: DBStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema s) throws IOException { // We won't really check the schema here, we'll store it in our // UDFContext properties object so we have it when we need it on the // backend UDFContext udfc = UDFContext.getUDFContext(); Properties p = udfc.getUDFProperties(this.getClass(), new String[]{udfcSignature}); p.setProperty(SCHEMA_SIGNATURE, s.toString()); }
Example #17
Source File: HBaseStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void setLocation(String location, Job job) throws IOException { Properties udfProps = getUDFProperties(); job.getConfiguration().setBoolean("pig.noSplitCombination", true); m_conf = initializeLocalJobConfig(job); String delegationTokenSet = udfProps.getProperty(HBASE_TOKEN_SET); if (delegationTokenSet == null) { addHBaseDelegationToken(m_conf, job); udfProps.setProperty(HBASE_TOKEN_SET, "true"); } String tablename = location; if (location.startsWith("hbase://")) { tablename = location.substring(8); } m_conf.set(TableInputFormat.INPUT_TABLE, tablename); String projectedFields = udfProps.getProperty( projectedFieldsName() ); if (projectedFields != null) { // update columnInfo_ pushProjection((RequiredFieldList) ObjectSerializer.deserialize(projectedFields)); } addFiltersWithoutColumnPrefix(columnInfo_); if (requiredFieldList != null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[] {contextSignature}); p.setProperty(contextSignature + "_projectedFields", ObjectSerializer.serialize(requiredFieldList)); } }
Example #18
Source File: HBaseStorage.java From spork with Apache License 2.0 | 5 votes |
/** * Get delegation token from hbase and add it to the Job * */ @SuppressWarnings({ "rawtypes", "unchecked" }) private void addHBaseDelegationToken(Configuration hbaseConf, Job job) { if (!UDFContext.getUDFContext().isFrontend()) { return; } if ("kerberos".equalsIgnoreCase(hbaseConf.get(HBASE_SECURITY_CONF_KEY))) { // Will not be entering this block for 0.20.2 as it has no security. try { // getCurrentUser method is not public in 0.20.2 Method m1 = UserGroupInformation.class.getMethod("getCurrentUser"); UserGroupInformation currentUser = (UserGroupInformation) m1.invoke(null,(Object[]) null); // hasKerberosCredentials method not available in 0.20.2 Method m2 = UserGroupInformation.class.getMethod("hasKerberosCredentials"); boolean hasKerberosCredentials = (Boolean) m2.invoke(currentUser, (Object[]) null); if (hasKerberosCredentials) { // Class and method are available only from 0.92 security release Class tokenUtilClass = Class .forName("org.apache.hadoop.hbase.security.token.TokenUtil"); Method m3 = tokenUtilClass.getMethod("obtainTokenForJob", new Class[] { Configuration.class, UserGroupInformation.class, Job.class }); m3.invoke(null, new Object[] { hbaseConf, currentUser, job }); } else { LOG.info("Not fetching hbase delegation token as no Kerberos TGT is available"); } } catch (ClassNotFoundException cnfe) { throw new RuntimeException("Failure loading TokenUtil class, " + "is secure RPC available?", cnfe); } catch (RuntimeException re) { throw re; } catch (Exception e) { throw new UndeclaredThrowableException(e, "Unexpected error calling TokenUtil.obtainTokenForJob()"); } } }
Example #19
Source File: IcebergStorage.java From iceberg with Apache License 2.0 | 5 votes |
private void copyUDFContextToScopedConfiguration(Configuration conf, String key) { String value = UDFContext.getUDFContext() .getUDFProperties(this.getClass(), new String[]{signature}).getProperty(key); if (value != null) { conf.set(key + '.' + signature, value); } }
Example #20
Source File: IcebergStorage.java From iceberg with Apache License 2.0 | 5 votes |
private void copyUDFContextToConfiguration(Configuration conf, String key) { String value = UDFContext.getUDFContext().getUDFProperties(this.getClass(), new String[]{signature}).getProperty(key); if (value != null) { conf.set(key, value); } }
Example #21
Source File: ExecutableManager.java From spork with Apache License 2.0 | 5 votes |
ProcessInputThread(InputHandler inputHandler, POStream poStream, UDFContext udfContext) { setDaemon(true); this.inputHandler = inputHandler; this.poStream = poStream; // a copy of UDFContext passed from the ExecutableManager thread this.udfContext = udfContext; // the input queue from where this thread will read // input tuples this.binaryInputQueue = poStream.getBinaryInputQueue(); }
Example #22
Source File: PathPartitionHelper.java From spork with Apache License 2.0 | 5 votes |
/** * Sets the PARITITION_FILTER_EXPRESSION property in the UDFContext * identified by the loaderClass. * * @param partitionFilterExpression * @param loaderClass * @throws IOException */ public void setPartitionFilterExpression(String partitionFilterExpression, Class<? extends LoadFunc> loaderClass, String signature) throws IOException { UDFContext .getUDFContext() .getUDFProperties(loaderClass, new String[] { signature }) .setProperty(PARITITION_FILTER_EXPRESSION, partitionFilterExpression); }
Example #23
Source File: TestMRJobStats.java From spork with Apache License 2.0 | 5 votes |
@Test public void testGetOuputSizeUsingNonFileBasedStorage2() throws Exception { // Register a custom output size reader in configuration Configuration conf = new Configuration(); conf.set(PigStatsOutputSizeReader.OUTPUT_SIZE_READER_KEY, DummyOutputSizeReader.class.getName()); // ClientSystemProps is needed to instantiate HBaseStorage UDFContext.getUDFContext().setClientSystemProps(new Properties()); long outputSize = JobStats.getOutputSize( createPOStoreForNonFileBasedSystem(new HBaseStorage("colName"), conf), conf); assertEquals("The dummy output size reader always returns " + DummyOutputSizeReader.SIZE, DummyOutputSizeReader.SIZE, outputSize); }
Example #24
Source File: GFCross.java From spork with Apache License 2.0 | 5 votes |
@Override public DataBag exec(Tuple input) throws IOException { if (parallelism == 0) { parallelism = DEFAULT_PARALLELISM; Configuration cfg = UDFContext.getUDFContext().getJobConf(); if (cfg != null) { String s = cfg.get(PigImplConstants.PIG_CROSS_PARALLELISM + "." + crossKey); if (s == null) { throw new IOException("Unable to get parallelism hint from job conf"); } parallelism = Integer.valueOf(s); if (parallelism < 0) { throw new IOException(PigImplConstants.PIG_CROSS_PARALLELISM + "." + crossKey + " was " + parallelism); } } numInputs = (Integer)input.get(0); myNumber = (Integer)input.get(1); numGroupsPerInput = (int) Math.ceil(Math.pow(parallelism, 1.0/numInputs)); numGroupsGoingTo = (int) Math.pow(numGroupsPerInput,numInputs - 1); } DataBag output = mBagFactory.newDefaultBag(); try{ int[] digits = new int[numInputs]; digits[myNumber] = r.nextInt(numGroupsPerInput); for (int i=0; i<numGroupsGoingTo; i++){ output.add(toTuple(digits)); next(digits); } return output; }catch(ExecException e){ throw e; } }
Example #25
Source File: VespaQuery.java From vespa with Apache License 2.0 | 5 votes |
public VespaQuery(String... params) { configuration = VespaConfiguration.get(UDFContext.getUDFContext().getJobConf(), null); properties = VespaConfiguration.loadProperties(params); queryTemplate = properties.getProperty(PROPERTY_QUERY_TEMPLATE); if (queryTemplate == null || queryTemplate.isEmpty()) { throw new IllegalArgumentException("Query template cannot be empty"); } querySchema = properties.getProperty(PROPERTY_QUERY_SCHEMA, "rank:int,id:chararray"); queryRootNode = properties.getProperty(PROPERTY_ROOT_NODE, "root/children"); }
Example #26
Source File: UDFContextTestLoaderWithSignature.java From spork with Apache License 2.0 | 5 votes |
@Override public Tuple getNext() throws IOException { Tuple t = super.getNext(); if (t!=null) { Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); t.append(p.get("test_" + signature)); } return t; }
Example #27
Source File: TestPruneColumn.java From spork with Apache License 2.0 | 5 votes |
@Override public RequiredFieldResponse pushProjection(RequiredFieldList requiredFieldList) throws FrontendException { aliases = new String[requiredFieldList.getFields().size()]; for (int i=0; i<requiredFieldList.getFields().size(); i++) { RequiredField fs = requiredFieldList.getFields().get(i); aliases[i] = fs.getAlias(); } try { UDFContext.getUDFContext().getUDFProperties(this.getClass()).setProperty(signature, ObjectSerializer.serialize(aliases)); } catch (IOException e) { throw new FrontendException(e); } return new RequiredFieldResponse(true); }
Example #28
Source File: OrcStorage.java From spork with Apache License 2.0 | 5 votes |
@Override public void checkSchema(ResourceSchema rs) throws IOException { ResourceFieldSchema fs = new ResourceFieldSchema(); fs.setType(DataType.TUPLE); fs.setSchema(rs); typeInfo = OrcUtils.getTypeInfo(fs); Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); p.setProperty(signature + SchemaSignatureSuffix, ObjectSerializer.serialize(typeInfo)); }
Example #29
Source File: UDFContextTestLoaderWithSignature.java From spork with Apache License 2.0 | 5 votes |
@Override public void setLocation(String location, Job job) throws IOException { super.setLocation(location, job); Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass()); if (p.get(signature)==null) { p.put("test_" + signature, val); } }
Example #30
Source File: POUserFunc.java From spork with Apache License 2.0 | 5 votes |
/** * Sets EvalFunc's inputschema based on the signature * @param signature */ public void setFuncInputSchema(String signature) { Properties props = UDFContext.getUDFContext().getUDFProperties(func.getClass()); Schema tmpS=(Schema)props.get("pig.evalfunc.inputschema."+signature); if(tmpS!=null) { this.func.setInputSchema(tmpS); } }