org.apache.hadoop.util.Progressable Java Examples

The following examples show how to use org.apache.hadoop.util.Progressable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnshardedExportToCloudStorage.java    From hadoop-connectors with Apache License 2.0 6 votes vote down vote up
@Override
public void waitForUsableMapReduceInput() throws IOException, InterruptedException {
    Preconditions.checkState(
        exportJobReference != null,
        "beginExport() must be called before waitForUsableMapReduceInput()");

    BigQueryUtils.waitForJobCompletion(
        bigQueryHelper.getRawBigquery(),
        projectId,
        exportJobReference,
        new Progressable() {
          @Override
          public void progress() {
          }
        });
  }
 
Example #2
Source File: DFSOutputStream.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Construct a new output stream for append. */
private DFSOutputStream(DFSClient dfsClient, String src,
    EnumSet<CreateFlag> flags, Progressable progress, LocatedBlock lastBlock,
    HdfsFileStatus stat, DataChecksum checksum) throws IOException {
  this(dfsClient, src, progress, stat, checksum);
  initialFileSize = stat.getLen(); // length of file when opened
  this.shouldSyncBlock = flags.contains(CreateFlag.SYNC_BLOCK);

  boolean toNewBlock = flags.contains(CreateFlag.NEW_BLOCK);

  // The last partial block of the file has to be filled.
  if (!toNewBlock && lastBlock != null) {
    // indicate that we are appending to an existing block
    bytesCurBlock = lastBlock.getBlockSize();
    streamer = new DataStreamer(lastBlock, stat, bytesPerChecksum);
  } else {
    computePacketChunkSize(dfsClient.getConf().writePacketSize,
        bytesPerChecksum);
    streamer = new DataStreamer(stat,
        lastBlock != null ? lastBlock.getBlock() : null);
  }
  this.fileEncryptionInfo = stat.getFileEncryptionInfo();
}
 
Example #3
Source File: DFSClient.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Same as {{@link #create(String, FsPermission, EnumSet, short, long,
 *  Progressable, int, ChecksumOpt)} except that the permission
 *  is absolute (ie has already been masked with umask.
 */
public DFSOutputStream primitiveCreate(String src, 
                           FsPermission absPermission,
                           EnumSet<CreateFlag> flag,
                           boolean createParent,
                           short replication,
                           long blockSize,
                           Progressable progress,
                           int buffersize,
                           ChecksumOpt checksumOpt)
    throws IOException, UnresolvedLinkException {
  checkOpen();
  CreateFlag.validate(flag);
  DFSOutputStream result = primitiveAppend(src, flag, buffersize, progress);
  if (result == null) {
    DataChecksum checksum = dfsClientConf.createChecksum(checksumOpt);
    result = DFSOutputStream.newStreamForCreate(this, src, absPermission,
        flag, createParent, replication, blockSize, progress, buffersize,
        checksum, null);
  }
  beginFileLease(result.getFileId(), result);
  return result;
}
 
Example #4
Source File: ObjectStoreFileSystem.java    From stocator with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 * create path of the form dataroot/objectname
 * Each object name is modified to contain task-id prefix.
 * Thus for example, create
 * dataroot/objectname/_temporary/0/_temporary/attempt_201603131849_0000_m_000019_0/
 * part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv
 * will be transformed to
 * PUT dataroot/objectname
 * /part-r-00019-a08dcbab-8a34-4d80-a51c-368a71db90aa.csv-attempt_201603131849_0000_m_000019_0
 *
 */
public FSDataOutputStream  create(Path f, FsPermission permission,
    boolean overwrite, int bufferSize,
    short replication, long blockSize, Progressable progress) throws IOException {
  LOG.debug("Create: {}, overwrite is: {}", f.toString(), overwrite);
  validateBracketSupport(f.toString());
  final Path path = storageClient.qualify(f);
  final String objNameModified;
  // check if request is dataroot/objectname/_SUCCESS
  if (path.getName().equals(Constants.HADOOP_SUCCESS)) {
    // no need to add attempt id to the _SUCCESS
    objNameModified =  stocatorPath.extractFinalKeyFromTemporaryPath(path, false,
        storageClient.getDataRoot(), true);
  } else {
    // add attempt id to the final name
    objNameModified = stocatorPath.extractFinalKeyFromTemporaryPath(path, true,
        storageClient.getDataRoot(), true);
  }
  return storageClient.createObject(objNameModified,
      "application/octet-stream", null, statistics, overwrite);
}
 
Example #5
Source File: HadoopOutputFormatTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testOpen() throws Exception {

	OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
	DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
	JobConf jobConf = Mockito.spy(new JobConf());
	when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);

	HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);

	outputFormat.open(1, 1);

	verify(jobConf, times(2)).getOutputCommitter();
	verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
	verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
 
Example #6
Source File: S3AFileSystem.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Create an FSDataOutputStream at the indicated Path with write-progress
 * reporting.
 * @param f the file name to open
 * @param permission
 * @param overwrite if a file with this name already exists, then if true,
 *   the file will be overwritten, and if false an error will be thrown.
 * @param bufferSize the size of the buffer to be used.
 * @param replication required block replication for the file.
 * @param blockSize
 * @param progress
 * @throws IOException
 * @see #setPermission(Path, FsPermission)
 */
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, 
  int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
  String key = pathToKey(f);

  if (!overwrite && exists(f)) {
    throw new FileAlreadyExistsException(f + " already exists");
  }
  if (getConf().getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD)) {
    return new FSDataOutputStream(new S3AFastOutputStream(s3, this, bucket,
        key, progress, statistics, cannedACL,
        serverSideEncryptionAlgorithm, partSize, (long)multiPartThreshold,
        threadPoolExecutor), statistics);
  }
  // We pass null to FSDataOutputStream so it won't count writes that are being buffered to a file
  return new FSDataOutputStream(new S3AOutputStream(getConf(), transfers, this,
    bucket, key, progress, cannedACL, statistics, 
    serverSideEncryptionAlgorithm), null);
}
 
Example #7
Source File: AbstractDynamoDBRecordWriter.java    From emr-dynamodb-connector with Apache License 2.0 6 votes vote down vote up
public AbstractDynamoDBRecordWriter(JobConf jobConf, Progressable progressable) {
  this.progressable = progressable;

  client = new DynamoDBClient(jobConf);
  tableName = jobConf.get(DynamoDBConstants.OUTPUT_TABLE_NAME);
  if (tableName == null) {
    throw new ResourceNotFoundException("No output table name was specified.");
  }


  deletionMode = jobConf.getBoolean(DynamoDBConstants.DELETION_MODE, DynamoDBConstants.DEFAULT_DELETION_MODE);

  IopsCalculator iopsCalculator = new WriteIopsCalculator(createJobClient(jobConf), client,
      tableName);
  iopsController = new IopsController(iopsCalculator, DEFAULT_AVERAGE_ITEM_SIZE_IN_BYTES,
      DynamoDBOperationType.WRITE);
  permissibleWritesPerSecond = iopsController.getTargetItemsPerSecond();
  log.info("Number of allocated item writes per second: " + permissibleWritesPerSecond);

  // Hive may not have a valid Reporter and pass in null progressable
  // TODO Check whether this would happen when excluding Hive
  if (progressable instanceof Reporter) {
    reporter = (Reporter) progressable;
  }
}
 
Example #8
Source File: DFSOutputStream.java    From big-c with Apache License 2.0 6 votes vote down vote up
static DFSOutputStream newStreamForAppend(DFSClient dfsClient, String src,
    EnumSet<CreateFlag> flags, int bufferSize, Progressable progress,
    LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum,
    String[] favoredNodes) throws IOException {
  TraceScope scope =
      dfsClient.getPathTraceScope("newStreamForAppend", src);
  try {
    final DFSOutputStream out = new DFSOutputStream(dfsClient, src, flags,
        progress, lastBlock, stat, checksum);
    if (favoredNodes != null && favoredNodes.length != 0) {
      out.streamer.setFavoredNodes(favoredNodes);
    }
    out.start();
    return out;
  } finally {
    scope.close();
  }
}
 
Example #9
Source File: TezMerger.java    From tez with Apache License 2.0 6 votes vote down vote up
public static
TezRawKeyValueIterator merge(Configuration conf, FileSystem fs,
                          Class keyClass, Class valueClass, 
                          CompressionCodec codec, boolean ifileReadAhead,
                          int ifileReadAheadLength, int ifileBufferSize,
                          Path[] inputs, boolean deleteInputs, 
                          int mergeFactor, Path tmpDir,
                          RawComparator comparator, Progressable reporter,
                          TezCounter readsCounter,
                          TezCounter writesCounter,
                          TezCounter bytesReadCounter,
                          Progress mergePhase)
    throws IOException, InterruptedException {
  return 
    new MergeQueue(conf, fs, inputs, deleteInputs, codec, ifileReadAhead,
                         ifileReadAheadLength, ifileBufferSize, false, comparator, 
                         reporter, null).merge(keyClass, valueClass,
                                         mergeFactor, tmpDir,
                                         readsCounter, writesCounter,
                                         bytesReadCounter,
                                         mergePhase);
}
 
Example #10
Source File: TextMultiOutputFormat.java    From XLearning with Apache License 2.0 6 votes vote down vote up
public RecordWriter<K, V> getRecordWriter(FileSystem ignored,
                                          JobConf job,
                                          String name,
                                          Progressable progress)
    throws IOException {
  boolean ignoreSeparatorOnNull = job.getBoolean("mapred.textoutputformat.ignore.separator", false);
  String keyValueSeparator = job.get("mapred.textoutputformat.separator", "\t");
  splitSize = job.getLong(MR_REDUCE_MAX_FILE_PER_FILE, SPLIT_SIZE);
  jobConf = job;
  fileName = name;
  jobProgress = progress;
  Class<? extends CompressionCodec> codecClass =
      getOutputCompressorClass(job, GzipCodec.class);
  // create the named codec
  codec = ReflectionUtils.newInstance(codecClass, job);
  FSDataOutputStream fileOut = createFile();

  return new MultiSplitRecordWriter<K, V>(new NewDataOutputStream(codec.createOutputStream(fileOut)),
      keyValueSeparator, ignoreSeparatorOnNull);

}
 
Example #11
Source File: Task.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public ValuesIterator (RawKeyValueIterator in, 
                       RawComparator<KEY> comparator, 
                       Class<KEY> keyClass,
                       Class<VALUE> valClass, Configuration conf, 
                       Progressable reporter)
  throws IOException {
  this.in = in;
  this.comparator = comparator;
  this.reporter = reporter;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
  this.keyDeserializer.open(keyIn);
  this.valDeserializer = serializationFactory.getDeserializer(valClass);
  this.valDeserializer.open(this.valueIn);
  readNextKey();
  key = nextKey;
  nextKey = null; // force new instance creation
  hasNext = more;
}
 
Example #12
Source File: FileSystem.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Deprecated
protected FSDataOutputStream primitiveCreate(Path f,
   FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize,
   short replication, long blockSize, Progressable progress,
   ChecksumOpt checksumOpt) throws IOException {

  boolean pathExists = exists(f);
  CreateFlag.validate(f, pathExists, flag);
  
  // Default impl  assumes that permissions do not matter and 
  // nor does the bytesPerChecksum  hence
  // calling the regular create is good enough.
  // FSs that implement permissions should override this.

  if (pathExists && flag.contains(CreateFlag.APPEND)) {
    return append(f, bufferSize, progress);
  }
  
  return this.create(f, absolutePermission,
      flag.contains(CreateFlag.OVERWRITE), bufferSize, replication,
      blockSize, progress);
}
 
Example #13
Source File: MultipleOutputs.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({"unchecked"})
public RecordWriter<Object, Object> getRecordWriter(
  FileSystem fs, JobConf job, String baseFileName, Progressable progress)
  throws IOException {

  String nameOutput = job.get(CONFIG_NAMED_OUTPUT, null);
  String fileName = getUniqueName(job, baseFileName);

  // The following trick leverages the instantiation of a record writer via
  // the job conf thus supporting arbitrary output formats.
  JobConf outputConf = new JobConf(job);
  outputConf.setOutputFormat(getNamedOutputFormatClass(job, nameOutput));
  outputConf.setOutputKeyClass(getNamedOutputKeyClass(job, nameOutput));
  outputConf.setOutputValueClass(getNamedOutputValueClass(job, nameOutput));
  OutputFormat outputFormat = outputConf.getOutputFormat();
  return outputFormat.getRecordWriter(fs, outputConf, fileName, progress);
}
 
Example #14
Source File: EsHiveOutputFormat.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
public EsHiveRecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) {
    // force the table properties to be merged into the configuration
    // NB: the properties are also available in HiveConstants#OUTPUT_TBL_PROPERTIES
    Settings settings = HadoopSettingsManager.loadFrom(jc).merge(tableProperties);

    Log log = LogFactory.getLog(getClass());

    // NB: ESSerDe is already initialized at this stage but should still have a reference to the same cfg object
    // NB: the value writer is not needed by Hive but it's set for consistency and debugging purposes

    InitializationUtils.setValueWriterIfNotSet(settings, HiveValueWriter.class, log);
    InitializationUtils.setBytesConverterIfNeeded(settings, HiveBytesConverter.class, log);
    InitializationUtils.setUserProviderIfNotSet(settings, HadoopUserProvider.class, log);

    // set write resource
    settings.setResourceWrite(settings.getResourceWrite());

    HiveUtils.init(settings, log);

    return new EsHiveRecordWriter(jc, progress);
}
 
Example #15
Source File: HarFileSystem.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public FSDataOutputStream create(Path f,
    FsPermission permission,
    boolean overwrite,
    int bufferSize,
    short replication,
    long blockSize,
    Progressable progress) throws IOException {
  throw new IOException("Har: create not allowed.");
}
 
Example #16
Source File: RawLocalFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
  boolean overwrite, int bufferSize, short replication, long blockSize,
  Progressable progress) throws IOException {

  FSDataOutputStream out = create(f, overwrite, true, bufferSize, replication,
      blockSize, progress, permission);
  return out;
}
 
Example #17
Source File: NullOutputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, 
                                    String name, Progressable progress) {
  return new RecordWriter<K, V>(){
      public void write(K key, V value) { }
      public void close(Reporter reporter) { }
    };
}
 
Example #18
Source File: LazyOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, 
    String name, Progressable progress) throws IOException {
  if (baseOut == null) {
    getBaseOutputFormat(job);
  }
  return new LazyRecordWriter<K, V>(job, baseOut, name, progress);
}
 
Example #19
Source File: FilterFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
@Deprecated
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
    EnumSet<CreateFlag> flags, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {
  
  return fs.createNonRecursive(f, permission, flags, bufferSize, replication, blockSize,
      progress);
}
 
Example #20
Source File: TestFilterFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
public FSDataOutputStream create(Path f,
    FsPermission permission,
    EnumSet<CreateFlag> flags,
    int bufferSize,
    short replication,
    long blockSize,
    Progressable progress,
    ChecksumOpt checksumOpt) throws IOException {
  return null;
}
 
Example #21
Source File: Merger.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public MergeQueue(Configuration conf, FileSystem fs, 
    List<Segment<K, V>> segments, RawComparator<K> comparator,
    Progressable reporter, boolean sortSegments) {
  this.conf = conf;
  this.fs = fs;
  this.comparator = comparator;
  this.segments = segments;
  this.reporter = reporter;
  if (sortSegments) {
    Collections.sort(segments, segmentComparator);
  }
}
 
Example #22
Source File: ViewFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream append(final Path f, final int bufferSize,
    final Progressable progress) throws IOException {
  InodeTree.ResolveResult<FileSystem> res = 
    fsState.resolve(getUriPath(f), true);
  return res.targetFileSystem.append(res.remainingPath, bufferSize, progress);
}
 
Example #23
Source File: RawLocalFileSystem.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
    boolean overwrite,
    int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {
  FSDataOutputStream out = create(f,
      overwrite, false, bufferSize, replication, blockSize, progress);
  setPermission(f, permission);
  return out;
}
 
Example #24
Source File: Decoder.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public CRSDecoderInputStream(final Progressable reporter,
		final long limit, final long blockSize, final long errorOffset,
		final FileSystem srcFs, final Path srcFile,
		final FileSystem parityFs, final Path parityFile) {
	super(reporter,limit, blockSize, errorOffset, srcFs, srcFile,
		parityFs, parityFile);
	this.reporter = reporter;
	this.limit = limit;

	this.blockSize = blockSize;

	this.srcFile = srcFile;
	this.srcFs = (DistributedFileSystem)srcFs;
	this.parityFile = parityFile;
	this.parityFs = (DistributedFileSystem)parityFs;

	this.blockIndex = (int) (errorOffset / blockSize);
	this.startOffsetInBlock = errorOffset % blockSize;
	this.currentOffset = errorOffset;
	this.erasedLocationToFix = blockIndex % codec.stripeLength;
	this.structure = conf.get("raid.degradedread.recover.structure", "line");
	localBufferSize = conf.getInt("raid.degradedread.merger.buffersize", 
			(1 << 20));
	this.buffer = new byte[this.localBufferSize];
	
	LOG.info("blockSize = [" + blockSize + "], limit = ["
			+ limit + "], errorOffset = [" + errorOffset 
			+ "], blockIndex = [" + blockIndex + "], structure = [" 
			+ structure + "], erasedLocationToFix = ["
			+ erasedLocationToFix + "], localBufferSize = ["
			+ localBufferSize + "]");
	
	try {
		init();
		inited = true;
	} catch (IOException ioe) {
		inited = false;
		LOG.error("NTar : CRSDecoderInputStream inition failed !" + ioe);	
	}
}
 
Example #25
Source File: MapFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** Create the named map using the named key comparator. 
 * @deprecated Use Writer(Configuration, Path, Option...)} instead.
 */
@Deprecated
public Writer(Configuration conf, FileSystem fs, String dirName,
              WritableComparator comparator, Class valClass,
              SequenceFile.CompressionType compress,
              Progressable progress) throws IOException {
  this(conf, new Path(dirName), comparator(comparator),
       valueClass(valClass), compression(compress),
       progressable(progress));
}
 
Example #26
Source File: TestFilterFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
public FSDataOutputStream create(Path f,
    FsPermission permission,
    boolean overwrite,
    int bufferSize,
    short replication,
    long blockSize,
    Progressable progress) {
  return null;
}
 
Example #27
Source File: ChecksumFileSystem.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
    boolean overwrite, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {
  return create(f, permission, overwrite, false, bufferSize, replication,
      blockSize, progress);
}
 
Example #28
Source File: DistributedFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
protected HdfsDataOutputStream primitiveCreate(Path f,
  FsPermission absolutePermission, EnumSet<CreateFlag> flag, int bufferSize,
  short replication, long blockSize, Progressable progress,
  ChecksumOpt checksumOpt) throws IOException {
  statistics.incrementWriteOps(1);
  final DFSOutputStream dfsos = dfs.primitiveCreate(
    getPathName(fixRelativePart(f)),
    absolutePermission, flag, true, replication, blockSize,
    progress, bufferSize, checksumOpt);
  return dfs.createWrappedOutputStream(dfsos, statistics);
}
 
Example #29
Source File: WebHdfsFileSystem.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public FSDataOutputStream append(final Path f, final int bufferSize,
    final Progressable progress) throws IOException {
  statistics.incrementWriteOps(1);

  final HttpOpParam.Op op = PostOpParam.Op.APPEND;
  return new FsPathOutputStreamRunner(op, f, bufferSize,
      new BufferSizeParam(bufferSize)
  ).run();
}
 
Example #30
Source File: BloomMapFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FileSystem fs, String dirName,
    Class<? extends WritableComparable> keyClass,
    Class<? extends Writable> valClass, CompressionType compress,
    CompressionCodec codec, Progressable progress) throws IOException {
  super(conf, fs, dirName, keyClass, valClass, compress, codec, progress);
  this.fs = fs;
  this.dir = new Path(dirName);
  initBloomFilter(conf);
}