Java Code Examples for org.apache.hadoop.mapred.FileOutputFormat#getWorkOutputPath()

The following examples show how to use org.apache.hadoop.mapred.FileOutputFormat#getWorkOutputPath() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
  masterIndex = new Path(tmpOutputDir, "_masterindex");
  index = new Path(tmpOutputDir, "_index");
  try {
    fs = masterIndex.getFileSystem(conf);
    if (fs.exists(masterIndex)) {
      fs.delete(masterIndex, false);
    }
    if (fs.exists(index)) {
      fs.delete(index, false);
    }
    indexStream = fs.create(index);
    outStream = fs.create(masterIndex);
    String version = VERSION + " \n";
    outStream.write(version.getBytes(Charsets.UTF_8));
    
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 2
Source File: HadoopArchives.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
  masterIndex = new Path(tmpOutputDir, "_masterindex");
  index = new Path(tmpOutputDir, "_index");
  try {
    fs = masterIndex.getFileSystem(conf);
    if (fs.exists(masterIndex)) {
      fs.delete(masterIndex, false);
    }
    if (fs.exists(index)) {
      fs.delete(index, false);
    }
    indexStream = fs.create(index);
    outStream = fs.create(masterIndex);
    String version = VERSION + " \n";
    outStream.write(version.getBytes(Charsets.UTF_8));
    
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 3
Source File: HadoopArchives.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
  masterIndex = new Path(tmpOutputDir, "_masterindex");
  index = new Path(tmpOutputDir, "_index");
  try {
    fs = masterIndex.getFileSystem(conf);
    if (fs.exists(masterIndex)) {
      fs.delete(masterIndex, false);
    }
    if (fs.exists(index)) {
      fs.delete(index, false);
    }
    indexStream = fs.create(index);
    outStream = fs.create(masterIndex);
    String version = VERSION + " \n";
    outStream.write(version.getBytes());
    
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 4
Source File: OrcOutputFormat.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter<NullWritable, OrcSerdeRow> getRecordWriter(FileSystem fileSystem,
     JobConf conf, String name, Progressable reporter) throws IOException {
  ReaderWriterProfiler.setProfilerOptions(conf);

  // To be compatible with older file formats like Sequence and RC
  // Only works if mapred.work.output.dir is set in the conf
  Path workOutputPath = FileOutputFormat.getWorkOutputPath(conf);
  Path outputPath = workOutputPath == null ? new Path(name) : new Path(workOutputPath, name);

  if (fileSystem == null && workOutputPath != null) {
    fileSystem = workOutputPath.getFileSystem(conf);
  }

  return new OrcRecordWriter(fileSystem, outputPath, conf,
    OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE.defaultLongVal,
    OrcConf.ConfVars.HIVE_ORC_COMPRESSION.defaultVal,
    OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE.defaultIntVal,
    OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE.defaultIntVal);
}
 
Example 5
Source File: HadoopArchives.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt("mapred.task.partition", -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    }
    partStream = destFs.create(tmpOutput);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput);
  }
  buffer = new byte[buf_size];
}
 
Example 6
Source File: HadoopArchives.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(this.conf);
  masterIndex = new Path(tmpOutputDir, "_masterindex");
  index = new Path(tmpOutputDir, "_index");
  try {
    fs = masterIndex.getFileSystem(conf);
    if (fs.exists(masterIndex)) {
      fs.delete(masterIndex, false);
    }
    if (fs.exists(index)) {
      fs.delete(index, false);
    }
    indexStream = fs.create(index);
    outStream = fs.create(masterIndex);
    String version = HarFileSystem.VERSION + " \n";
    outStream.write(version.getBytes());
    
  } catch(IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 7
Source File: HadoopArchives.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}
 
Example 8
Source File: HadoopArchives.java    From big-c with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}
 
Example 9
Source File: HadoopArchives.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void configure(JobConf conf) {
  this.conf = conf;
  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt("mapred.task.partition", -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}