Java Code Examples for org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil#mergeConf()

The following examples show how to use org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil#mergeConf() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PigOutputFormat.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Before delegating calls to underlying OutputFormat or OutputCommitter
 * Pig needs to ensure the Configuration in the JobContext contains
 * the output location and StoreFunc
 * for the specific store - so set these up in the context for this specific
 * store
 * @param jobContext the {@link JobContext}
 * @param store the POStore
 * @throws IOException on failure
 */
public static void setLocation(JobContext jobContext, POStore store) throws
IOException {
    Job storeJob = new Job(jobContext.getConfiguration());
    StoreFuncInterface storeFunc = store.getStoreFunc();
    String outputLocation = store.getSFile().getFileName();
    storeFunc.setStoreLocation(outputLocation, storeJob);

    // the setStoreLocation() method would indicate to the StoreFunc
    // to set the output location for its underlying OutputFormat.
    // Typically OutputFormat's store the output location in the
    // Configuration - so we need to get the modified Configuration
    // containing the output location (and any other settings the
    // OutputFormat might have set) and merge it with the Configuration
    // we started with so that when this method returns the Configuration
    // supplied as input has the updates.
    ConfigurationUtil.mergeConf(jobContext.getConfiguration(),
            storeJob.getConfiguration());
}
 
Example 2
Source File: QueryParserUtils.java    From spork with Apache License 2.0 5 votes vote down vote up
static void setHdfsServers(String absolutePath, PigContext pigContext) throws URISyntaxException {
    // Get native host
    String defaultFS = (String)pigContext.getProperties().get("fs.default.name");
    if (defaultFS==null)
        defaultFS = (String)pigContext.getProperties().get("fs.defaultFS");

    URI defaultFSURI = new URI(defaultFS);

    Configuration conf = new Configuration(true);
    ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    Set<String> remoteHosts = getRemoteHosts(absolutePath, defaultFSURI, conf);

    String hdfsServersString = (String)pigContext.getProperties().get(MRConfiguration.JOB_HDFS_SERVERS);
    if (hdfsServersString == null) hdfsServersString = "";
    String hdfsServers[] = hdfsServersString.split(",");

    for (String remoteHost : remoteHosts) {
        boolean existing = false;
        for (String hdfsServer : hdfsServers) {
            if (hdfsServer.equals(remoteHost)) {
                existing = true;
            }
        }
        if (!existing) {
            if (!hdfsServersString.isEmpty()) {
                hdfsServersString = hdfsServersString + ",";
            }
            hdfsServersString = hdfsServersString + remoteHost;
        }
    }

    if (!hdfsServersString.isEmpty()) {
        pigContext.getProperties().setProperty(MRConfiguration.JOB_HDFS_SERVERS, hdfsServersString);
    }
}
 
Example 3
Source File: PigInputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * get the corresponding configuration for the input on which the split
 * is based and merge it with the Conf supplied
 *
 * package level access so that this is not publicly used elsewhere
 * @throws IOException
 */
static void mergeSplitSpecificConf(LoadFunc loadFunc, PigSplit pigSplit, Configuration originalConf)
        throws IOException {
    // set up conf with entries from input specific conf
    Job job = new Job(originalConf);
    loadFunc.setLocation(getLoadLocation(pigSplit.getInputIndex(),
            originalConf), job);
    // The above setLocation call could write to the conf within
    // the job - merge that updated conf with original conf
    ConfigurationUtil.mergeConf(originalConf, job.getConfiguration());

}
 
Example 4
Source File: PigRecordReader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    // initialize the underlying actual RecordReader with the right Context
    // object - this is achieved by merging the Context corresponding to
    // the input split this Reader is supposed to process with the context
    // passed in.
    this.pigSplit = (PigSplit)split;
    this.context = context;
    ConfigurationUtil.mergeConf(context.getConfiguration(),
            inputSpecificConf);
    // Pass loader signature to LoadFunc and to InputFormat through
    // the conf
    PigInputFormat.passLoadSignature(loadfunc, pigSplit.getInputIndex(),
            context.getConfiguration());
    // now invoke initialize() on underlying RecordReader with
    // the "adjusted" conf
    if (null != curReader) {
        curReader.initialize(pigSplit.getWrappedSplit(), context);
        loadfunc.prepareToRead(curReader, pigSplit);
    }
    if (pigSplit.isMultiInputs() && !pigSplit.disableCounter()) {
        counterName = getMultiInputsCounerName(pigSplit, inputSpecificConf);
        if (counterName != null) {
            // Create the counter. This is needed because incrCounter() may
            // never be called in case of empty file.
            reporter.incrCounter(PigStatsUtil.MULTI_INPUTS_COUNTER_GROUP, counterName, 0);
        }
    }
}
 
Example 5
Source File: FileLocalizer.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Copies the files from remote to local filesystem.
 * When 'multipleFiles' is set the path could point to multiple files
 * through globs or a directory. In this case, return array contains multiple
 * files, otherwise a single file is returned.
 *
 * If pig.jars.relative.to.dfs is true then a relative path is assumed to be
 * relative to the default filesystem's active directory.
 * Else they are assumed to be relative to the local working directory.
 *
 * @param properties
 * @param filePath
 * @param multipleFiles
 * @return
 */
private static FetchFileRet[] fetchFilesInternal(Properties properties,
                                        String filePath,
                                        boolean multipleFiles) throws IOException {

    Path path = new Path(filePath);
    if (path.getName().isEmpty()) {
        return new FetchFileRet[0];
    }
    URI uri = path.toUri();
    Configuration conf = new Configuration();
    ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(properties));

    // if there is no schema or if the schema is "local", then it is
    // expected to be a local path.

    FileSystem localFs = FileSystem.getLocal(conf);
    FileSystem srcFs;
    if ( (!"true".equals(properties.getProperty("pig.jars.relative.to.dfs"))
            && uri.getScheme() == null )||
            // For Windows local files
            (uri.getScheme() == null && uri.getPath().matches("^/[A-Za-z]:.*")) ||
            (uri.getScheme() != null && uri.getScheme().equals("local"))
        ) {
        srcFs = localFs;
    } else {
        srcFs = path.getFileSystem(conf);
    }

    FileStatus[] files;

    if (multipleFiles) {
        files = srcFs.globStatus(path);
    } else {
        files = new FileStatus[]{ srcFs.getFileStatus(path) };
    }
    if (files == null || files.length == 0) {
        throw new ExecException("file '" + filePath + "' does not exist.", 101, PigException.INPUT);
    }

    FetchFileRet[] fetchFiles = new FetchFileRet[files.length];
    int idx = 0;

    for(FileStatus file : files) {
        // should throw an exception if this is not a file?

        String pathname = file.getPath().toUri().getPath();
        String filename = file.getPath().getName();

        if (srcFs == localFs) {
            fetchFiles[idx++] = new FetchFileRet(new File(pathname), false);
        } else {
            // fetch from remote:
            File dest = new File(localTempDir, filename);
            dest.deleteOnExit();
            try {
                srcFs.copyToLocalFile(file.getPath(), new Path(dest.getAbsolutePath()));
            } catch (IOException e) {
                throw new ExecException("Could not copy " + filePath + " to local destination " + dest, 101, PigException.INPUT, e);
            }
            fetchFiles[idx++] = new FetchFileRet(dest, true);
        }
    }

    return fetchFiles;
}
 
Example 6
Source File: HExecutionEngine.java    From spork with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("resource")
private void init(Properties properties) throws ExecException {
    String cluster = null;
    String nameNode = null;

    // We need to build a configuration object first in the manner described
    // below and then get back a properties object to inspect the
    // JOB_TRACKER_LOCATION and FILE_SYSTEM_LOCATION. The reason to do this
    // is if we looked only at the existing properties object, we may not
    // get the right settings. So we want to read the configurations in the
    // order specified below and only then look for JOB_TRACKER_LOCATION and
    // FILE_SYSTEM_LOCATION.

    // Hadoop by default specifies two resources, loaded in-order from the
    // classpath:
    // 1. hadoop-default.xml : Read-only defaults for hadoop.
    // 2. hadoop-site.xml: Site-specific configuration for a given hadoop
    // installation.
    // Now add the settings from "properties" object to override any
    // existing properties All of the above is accomplished in the method
    // call below

    JobConf jc = getS3Conf();
    if (!this.pigContext.getExecType().isLocal()) {
        JobConf execConf = getExecConf(properties);
        ConfigurationUtil.mergeConf(jc, execConf);

        // Trick to invoke static initializer of DistributedFileSystem to
        // add hdfs-default.xml into configuration
        new DistributedFileSystem();
    } else {
        // If we are running in local mode we dont read the hadoop conf file
        if (properties.getProperty(MRConfiguration.FRAMEWORK_NAME) == null) {
            properties.setProperty(MRConfiguration.FRAMEWORK_NAME, LOCAL);
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, LOCAL);
        properties.setProperty(FILE_SYSTEM_LOCATION, "file:///");
        properties.setProperty(ALTERNATIVE_FILE_SYSTEM_LOCATION, "file:///");

        JobConf localConf = getLocalConf();
        ConfigurationUtil.mergeConf(jc, localConf);
    }

    // the method below alters the properties object by overriding the
    // hadoop properties with the values from properties and recomputing
    // the properties
    Utils.recomputeProperties(jc, properties);

    cluster = jc.get(MRConfiguration.JOB_TRACKER);
    nameNode = jc.get(FILE_SYSTEM_LOCATION);
    if (nameNode == null) {
        nameNode = (String) pigContext.getProperties().get(ALTERNATIVE_FILE_SYSTEM_LOCATION);
    }

    if (cluster != null && cluster.length() > 0) {
        if (!cluster.contains(":") && !cluster.equalsIgnoreCase(LOCAL)) {
            cluster = cluster + ":50020";
        }
        properties.setProperty(MRConfiguration.JOB_TRACKER, cluster);
    }

    if (nameNode != null && nameNode.length() > 0) {
        if (!nameNode.contains(":") && !nameNode.equalsIgnoreCase(LOCAL)) {
            nameNode = nameNode + ":8020";
        }
        properties.setProperty(FILE_SYSTEM_LOCATION, nameNode);
    }

    LOG.info("Connecting to hadoop file system at: "
            + (nameNode == null ? LOCAL : nameNode));
    // constructor sets DEFAULT_REPLICATION_FACTOR_KEY
    ds = new HDataStorage(properties);

    if (cluster != null && !cluster.equalsIgnoreCase(LOCAL)) {
        LOG.info("Connecting to map-reduce job tracker at: "
                + jc.get(MRConfiguration.JOB_TRACKER));
    }
}