Java Code Examples for org.apache.hadoop.util.Progressable#progress()

The following examples show how to use org.apache.hadoop.util.Progressable#progress() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CephFileSystem.java    From cephfs-hadoop with GNU Lesser General Public License v2.1 6 votes vote down vote up
/**
 * Get an FSDataOutputStream to append onto a file.
 * @param path The File you want to append onto
 * @param bufferSize Ceph does internal buffering but you can buffer in the Java code as well if you like.
 * @param progress The Progressable to report progress to.
 * Reporting is limited but exists.
 * @return An FSDataOutputStream that connects to the file on Ceph.
 * @throws IOException If the file cannot be found or appended to.
 */
public FSDataOutputStream append(Path path, int bufferSize,
    Progressable progress) throws IOException {
  path = makeAbsolute(path);

  if (progress != null) {
    progress.progress();
  }

  int fd = ceph.open(path, CephMount.O_WRONLY|CephMount.O_APPEND, 0);

  if (progress != null) {
    progress.progress();
  }

  CephOutputStream ostream = new CephOutputStream(getConf(), ceph, fd,
      bufferSize);
  return new FSDataOutputStream(ostream, statistics);
}
 
Example 2
Source File: Merger.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong(JobContext.RECORDS_BEFORE_PROGRESS,
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example 3
Source File: Merger.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong(JobContext.RECORDS_BEFORE_PROGRESS,
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example 4
Source File: Merger.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress",
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example 5
Source File: XOREncoder.java    From RDFS with Apache License 2.0 5 votes vote down vote up
protected void encodeStripeParallel(
  InputStream[] blocks,
  long stripeStartOffset,
  long blockSize,
  OutputStream[] outs,
  Progressable reporter,
  ParallelStreamReader parallelReader) throws IOException {
  ParallelStreamReader.ReadResult readResult;
  for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
    try {
      readResult = parallelReader.getReadResult();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while waiting for read result");
    }
    // Cannot tolerate any IO errors.
    IOException readEx = readResult.getException();
    if (readEx != null) {
      throw readEx;
    }

    xor(readResult.readBufs, writeBufs[0]);
    reporter.progress();

    // Write to output
    outs[0].write(writeBufs[0], 0, bufSize);
    reporter.progress();
  }
}
 
Example 6
Source File: ReedSolomonEncoder.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void encodeStripeParallel(
   InputStream[] blocks,
   long stripeStartOffset,
   long blockSize,
   OutputStream[] outs,
   Progressable reporter,
   ParallelStreamReader parallelReader) throws IOException {

  int[] data = new int[stripeSize];
  int[] code = new int[paritySize];

  for (long encoded = 0; encoded < blockSize; encoded += bufSize) {
    // Read some data from each block = bufSize.
    ParallelStreamReader.ReadResult readResult;
    try {
      readResult = parallelReader.getReadResult();
    } catch (InterruptedException e) {
      throw new IOException("Interrupted while waiting for read result");
    }
    // Cannot tolerate any IO errors.
    IOException readEx = readResult.getException();
    if (readEx != null) {
      throw readEx;
    }

    // Encode the data read.
    for (int j = 0; j < bufSize; j++) {
      performEncode(readResult.readBufs, writeBufs, j, data, code);
    }
    reporter.progress();

    // Now that we have some data to write, send it to the temp files.
    for (int i = 0; i < paritySize; i++) {
      outs[i].write(writeBufs[i], 0, bufSize);
      reporter.progress();
    }
  }
}
 
Example 7
Source File: TezMerger.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(TezRawKeyValueIterator records, Writer writer, 
                 Progressable progressable, long recordsBeforeProgress) 
  throws IOException {
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % recordsBeforeProgress) == 0) {
        progressable.progress();
      }
    }
}
 
Example 8
Source File: TezMerger.java    From tez with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
void writeFile(TezRawKeyValueIterator records, Writer writer,
    Progressable progressable, long recordsBeforeProgress)
    throws IOException, InterruptedException {
  long recordCtr = 0;
  long count = 0;
  while(records.next()) {
    if (records.isSameKey()) {
      writer.append(IFile.REPEAT_KEY, records.getValue());
      count++;
    } else {
      writer.append(records.getKey(), records.getValue());
    }
    
    if (((recordCtr++) % recordsBeforeProgress) == 0) {
      progressable.progress();
      if (Thread.currentThread().isInterrupted()) {
        /**
         * Takes care DefaultSorter.mergeParts, MergeManager's merger threads,
         * PipelinedSorter's flush(). This is not expensive check as it is carried out every
         * 10000 records or so.
         */
        throw new InterruptedException("Current thread=" + Thread.currentThread().getName() + " got "
            + "interrupted");
      }
    }
  }
  if ((count > 0) && LOG.isTraceEnabled()) {
    LOG.trace("writeFile SAME_KEY count=" + count);
  }
}
 
Example 9
Source File: Merger.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public static <K extends Object, V extends Object>
  void writeFile(RawKeyValueIterator records, Writer<K, V> writer, 
                 Progressable progressable, Configuration conf) 
  throws IOException {
    long progressBar = conf.getLong("mapred.merge.recordsBeforeProgress",
        10000);
    long recordCtr = 0;
    while(records.next()) {
      writer.append(records.getKey(), records.getValue());
      
      if (((recordCtr++) % progressBar) == 0) {
        progressable.progress();
      }
    }
}
 
Example 10
Source File: BigQueryUtils.java    From hadoop-connectors with Apache License 2.0 4 votes vote down vote up
/**
 * Polls job until it is completed.
 *
 * @param bigquery the Bigquery instance to poll.
 * @param projectId the project that is polling.
 * @param jobReference the job to poll.
 * @param progressable to get progress of task.
 * @throws IOException on IO Error.
 * @throws InterruptedException on sleep interrupt.
 */
public static void waitForJobCompletion(
    Bigquery bigquery, String projectId, JobReference jobReference, Progressable progressable)
    throws IOException, InterruptedException {

  Sleeper sleeper = Sleeper.DEFAULT;
  BackOff pollBackOff =
      new ExponentialBackOff.Builder()
          .setMaxIntervalMillis(POLL_WAIT_INTERVAL_MAX_MILLIS)
          .setInitialIntervalMillis(POLL_WAIT_INITIAL_MILLIS)
          .setMaxElapsedTimeMillis(POLL_WAIT_MAX_ELAPSED_MILLIS)
          .build();

  // Get starting time.
  long startTime = System.currentTimeMillis();
  long elapsedTime = 0;
  boolean notDone = true;

  // While job is incomplete continue to poll.
  while (notDone) {
    BackOff operationBackOff = new ExponentialBackOff();
    Get get =
        bigquery
            .jobs()
            .get(projectId, jobReference.getJobId())
            .setLocation(jobReference.getLocation());

    Job pollJob =
        ResilientOperation.retry(
            ResilientOperation.getGoogleRequestCallable(get),
            operationBackOff,
            RetryDeterminer.RATE_LIMIT_ERRORS,
            IOException.class,
            sleeper);

    elapsedTime = System.currentTimeMillis() - startTime;
    logger.atFine().log(
        "Job status (%s ms) %s: %s",
        elapsedTime, jobReference.getJobId(), pollJob.getStatus().getState());
    if (pollJob.getStatus().getState().equals("DONE")) {
      notDone = false;
      if (pollJob.getStatus().getErrorResult() != null) {
        throw new IOException(
            String.format(
                "Job %s failed: %s. Errors: %s",
                jobReference.getJobId(),
                pollJob.getStatus().getErrorResult(),
                pollJob.getStatus().getErrors()));
      }
    } else {
      long millisToWait = pollBackOff.nextBackOffMillis();
      if (millisToWait == BackOff.STOP) {
        throw new IOException(
            String.format(
                "Job %s failed to complete after %s millis.",
                jobReference.getJobId(), elapsedTime));
      }
      // Pause execution for the configured duration before polling job status again.
      Thread.sleep(millisToWait);
      // Call progress to ensure task doesn't time out.
      progressable.progress();
    }
  }
}
 
Example 11
Source File: CephFileSystem.java    From cephfs-hadoop with GNU Lesser General Public License v2.1 4 votes vote down vote up
/**
 * Create a new file and open an FSDataOutputStream that's connected to it.
 * @param path The file to create.
 * @param permission The permissions to apply to the file.
 * @param overwrite If true, overwrite any existing file with
* this name; otherwise don't.
 * @param bufferSize Ceph does internal buffering, but you can buffer
 *   in the Java code too if you like.
 * @param replication Replication factor. See documentation on the
 *   "ceph.data.pools" configuration option.
 * @param blockSize Ignored by Ceph. You can set client-wide block sizes
 * via the fs.ceph.blockSize param if you like.
 * @param progress A Progressable to report back to.
 * Reporting is limited but exists.
 * @return An FSDataOutputStream pointing to the created file.
 * @throws IOException if the path is an
 * existing directory, or the path exists but overwrite is false, or there is a
 * failure in attempting to open for append with Ceph.
 */
public FSDataOutputStream create(Path path, FsPermission permission,
    boolean overwrite, int bufferSize, short replication, long blockSize,
    Progressable progress) throws IOException {

  path = makeAbsolute(path);

  boolean exists = exists(path);

  if (progress != null) {
    progress.progress();
  }

  int flags = CephMount.O_WRONLY | CephMount.O_CREAT;

  if (exists) {
    if (overwrite)
      flags |= CephMount.O_TRUNC;
    else
      throw new FileAlreadyExistsException();
  } else {
    Path parent = path.getParent();
    if (parent != null)
      if (!mkdirs(parent))
        throw new IOException("mkdirs failed for " + parent.toString());
  }

  if (progress != null) {
    progress.progress();
  }

  /* Sanity check. Ceph interface uses int for striping strategy */
  if (blockSize > Integer.MAX_VALUE) {
    blockSize = Integer.MAX_VALUE;
    LOG.info("blockSize too large. Rounding down to " + blockSize);
  }

  /*
   * If blockSize <= 0 then we complain. We need to explicitly check for the
   * < 0 case (as opposed to allowing Ceph to raise an exception) because
   * the ceph_open_layout interface accepts -1 to request Ceph-specific
   * defaults.
   */
  if (blockSize <= 0)
    throw new IllegalArgumentException("Invalid block size: " + blockSize);

  /*
   * Ceph may impose alignment restrictions on file layout. In this case we
   * check if the requested block size is aligned to the granularity of a
   * stripe unit used in the file system. When the block size is not aligned
   * we automatically adjust to the next largest multiple of stripe unit
   * granularity.
   */
  int su = ceph.get_stripe_unit_granularity();
  if (blockSize % su != 0) {
    long newBlockSize = blockSize - (blockSize % su) + su;
    LOG.debug("fix alignment: blksize " + blockSize + " new blksize " + newBlockSize);
    blockSize = newBlockSize;
  }

  /*
   * The default Ceph data pool is selected to store files unless a specific
   * data pool is provided when a file is created. Since a pool has a fixed
   * replication factor, in order to achieve a requested replication factor,
   * we must select an appropriate data pool to place the file into.
   */
  String datapool = selectDataPool(path, replication);
  int fd = ceph.open(path, flags, (int)permission.toShort(), (int)blockSize,
      CEPH_STRIPE_COUNT, (int)blockSize, datapool);

  if (progress != null) {
    progress.progress();
  }

  OutputStream ostream = new CephOutputStream(getConf(), ceph, fd,
      bufferSize);
  return new FSDataOutputStream(ostream, statistics);
}
 
Example 12
Source File: Encoder.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Recovers a corrupt block in a parity file to an output stream.
 * 
 * The encoder generates codec.parityLength parity blocks for a source file
 * stripe. Since there is only one output provided, some blocks are written
 * out to files before being written out to the output.
 * 
 * @param blockSize
 *            The block size for the source/parity files.
 * @param out
 *            The destination for the reovered block.
 */
private void encodeFileToStream(StripeReader sReader, long blockSize,
		OutputStream out, Progressable reporter) throws IOException {
	OutputStream[] tmpOuts = new OutputStream[codec.parityLength];
	// One parity block can be written directly to out, rest to local files.
	tmpOuts[0] = out;
	File[] tmpFiles = new File[codec.parityLength - 1];
	for (int i = 0; i < codec.parityLength - 1; i++) {
		tmpFiles[i] = File.createTempFile("parity", "_" + i);
		LOG.info("Created tmp file " + tmpFiles[i]);
		tmpFiles[i].deleteOnExit();
	}
	try {
		// Loop over stripe
		while (sReader.hasNext()) {
			reporter.progress();
			// Create input streams for blocks in the stripe.
			InputStream[] blocks = sReader.getNextStripeInputs();
			try {
				// Create output streams to the temp files.
				for (int i = 0; i < codec.parityLength - 1; i++) {
					tmpOuts[i + 1] = new FileOutputStream(tmpFiles[i]);
				}
				// Call the implementation of encoding.
				encodeStripe(blocks, blockSize, tmpOuts, reporter);
			} finally {
				RaidUtils.closeStreams(blocks);
			}

			// --Test
			long start = System.currentTimeMillis();

			// Close output streams to the temp files and write the temp
			// files
			// to the output provided.
			for (int i = 0; i < codec.parityLength - 1; i++) {
				tmpOuts[i + 1].close();
				tmpOuts[i + 1] = null;
				InputStream in = new FileInputStream(tmpFiles[i]);
				RaidUtils.copyBytes(in, out, writeBufs[i], blockSize);
				reporter.progress();
			}

			// --Test
			TimeStatistics.addCopyTime(System.currentTimeMillis() - start);
		}
	} finally {
		for (int i = 0; i < codec.parityLength - 1; i++) {
			if (tmpOuts[i + 1] != null) {
				tmpOuts[i + 1].close();
			}
			tmpFiles[i].delete();
			LOG.info("Deleted tmp file " + tmpFiles[i]);
		}
	}
}
 
Example 13
Source File: BlockReconstructor.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Reads through a source file reconstructing lost blocks on the way.
 * 
 * @param srcPath
 *            Path identifying the lost file.
 * @throws IOException
 * @return true if file was reconstructed, false if no reconstruction was
 *         necessary or possible.
 */
boolean processFile(Path srcPath, ParityFilePair parityPair,
		Decoder decoder, Context context) throws IOException,
		InterruptedException {
	//LOG.info("Processing file " + srcPath);
	Progressable progress = context;
	if (progress == null) {
		progress = RaidUtils.NULL_PROGRESSABLE;
	}

	DistributedFileSystem srcFs = getDFS(srcPath);
	FileStatus srcStat = srcFs.getFileStatus(srcPath);
	long blockSize = srcStat.getBlockSize();
	long srcFileSize = srcStat.getLen();
	String uriPath = srcPath.toUri().getPath();

	List<LocatedBlockWithMetaInfo> lostBlocks = lostBlocksInFile(srcFs,
			uriPath, srcStat);
	if (lostBlocks.size() == 0) {
		LOG.warn("Couldn't find any lost blocks in file " + srcPath
				+ ", ignoring...");
		return false;
	}
	
	for (LocatedBlockWithMetaInfo lb : lostBlocks) {
		Block lostBlock = lb.getBlock();
		long lostBlockOffset = lb.getStartOffset();

		final long blockContentsSize = Math.min(blockSize, srcFileSize
				- lostBlockOffset);
		File localBlockFile = File.createTempFile(lostBlock.getBlockName(),
				".tmp");
		localBlockFile.deleteOnExit();

		try {
			decoder.recoverBlockToFile(srcFs, srcPath,
					parityPair.getFileSystem(), parityPair.getPath(),
					blockSize, lostBlockOffset, localBlockFile,
					blockContentsSize, context);

			// Now that we have recovered the file block locally, send it.
			String datanode = chooseDatanode(lb.getLocations());
			computeMetadataAndSendReconstructedBlock(datanode,
					localBlockFile, lostBlock, blockContentsSize,
					lb.getDataProtocolVersion(), lb.getNamespaceID(),
					progress);

		} finally {
			localBlockFile.delete();
		}
		progress.progress();
	}

	return true;
}
 
Example 14
Source File: BlockReconstructor.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Reads through a parity file, reconstructing lost blocks on the way. This
 * function uses the corresponding source file to regenerate parity file
 * blocks.
 * 
 * @return true if file was reconstructed, false if no reconstruction was
 *         necessary or possible.
 */
boolean processParityFile(Path parityPath, Decoder decoder, Context context)
		throws IOException, InterruptedException {

	Progressable progress = context;
	if (progress == null) {
		progress = RaidUtils.NULL_PROGRESSABLE;
	}

	Path srcPath = sourcePathFromParityPath(parityPath);
	if (srcPath == null) {
		LOG.warn("Could not get regular file corresponding to parity file "
				+ parityPath + ", ignoring...");
		return false;
	}

	DistributedFileSystem parityFs = getDFS(parityPath);
	DistributedFileSystem srcFs = getDFS(srcPath);
	FileStatus parityStat = parityFs.getFileStatus(parityPath);
	long blockSize = parityStat.getBlockSize();
	FileStatus srcStat = srcFs.getFileStatus(srcPath);
	
	// Check timestamp.
	if (srcStat.getModificationTime() != parityStat.getModificationTime()) {
		LOG.warn("Mismatching timestamp for " + srcPath + " and "
				+ parityPath + ", ignoring...");
		return false;
	}

	String uriPath = parityPath.toUri().getPath();
	List<LocatedBlockWithMetaInfo> lostBlocks = lostBlocksInFile(parityFs,
			uriPath, parityStat);
	if (lostBlocks.size() == 0) {
		LOG.warn("Couldn't find any lost blocks in parity file "
				+ parityPath + ", ignoring...");
		return false;
	}
	for (LocatedBlockWithMetaInfo lb : lostBlocks) {
		Block lostBlock = lb.getBlock();
		long lostBlockOffset = lb.getStartOffset();

		File localBlockFile = File.createTempFile(lostBlock.getBlockName(),
				".tmp");
		localBlockFile.deleteOnExit();

		try {
			decoder.recoverParityBlockToFile(srcFs, srcPath, parityFs,
					parityPath, blockSize, lostBlockOffset, localBlockFile,
					context);

			// Now that we have recovered the parity file block locally,
			// send it.
			String datanode = chooseDatanode(lb.getLocations());
			computeMetadataAndSendReconstructedBlock(datanode,
					localBlockFile, lostBlock, blockSize,
					lb.getDataProtocolVersion(), lb.getNamespaceID(),
					progress);
		} finally {
			localBlockFile.delete();
		}
		progress.progress();
	}

	return true;
}
 
Example 15
Source File: BlockReconstructor.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Reads through a parity HAR part file, reconstructing lost blocks on the
 * way. A HAR block can contain many file blocks, as long as the HAR part
 * file block size is a multiple of the file block size.
 * 
 * @return true if file was reconstructed, false if no reconstruction was
 *         necessary or possible.
 */
boolean processParityHarPartFile(Path partFile, Progressable progress)
		throws IOException {
	LOG.info("Processing parity HAR file " + partFile);
	// Get some basic information.
	DistributedFileSystem dfs = getDFS(partFile);
	FileStatus partFileStat = dfs.getFileStatus(partFile);
	long partFileBlockSize = partFileStat.getBlockSize();
	LOG.info(partFile + " has block size " + partFileBlockSize);

	// Find the path to the index file.
	// Parity file HARs are only one level deep, so the index files is at
	// the same level as the part file.
	// Parses through the HAR index file.
	HarIndex harIndex = HarIndex.getHarIndex(dfs, partFile);
	String uriPath = partFile.toUri().getPath();
	int numBlocksReconstructed = 0;
	List<LocatedBlockWithMetaInfo> lostBlocks = lostBlocksInFile(dfs,
			uriPath, partFileStat);
	if (lostBlocks.size() == 0) {
		LOG.warn("Couldn't find any lost blocks in HAR file " + partFile
				+ ", ignoring...");
		return false;
	}
	for (LocatedBlockWithMetaInfo lb : lostBlocks) {
		Block lostBlock = lb.getBlock();
		long lostBlockOffset = lb.getStartOffset();

		File localBlockFile = File.createTempFile(lostBlock.getBlockName(),
				".tmp");
		localBlockFile.deleteOnExit();

		try {
			processParityHarPartBlock(dfs, partFile, lostBlock,
					lostBlockOffset, partFileStat, harIndex,
					localBlockFile, progress);

			// Now that we have recovered the part file block locally, send
			// it.
			String datanode = chooseDatanode(lb.getLocations());
			computeMetadataAndSendReconstructedBlock(datanode,
					localBlockFile, lostBlock, localBlockFile.length(),
					lb.getDataProtocolVersion(), lb.getNamespaceID(),
					progress);

			numBlocksReconstructed++;
		} finally {
			localBlockFile.delete();
		}
		progress.progress();
	}

	return true;
}