org.apache.hadoop.mapred.FileAlreadyExistsException Java Examples

The following examples show how to use org.apache.hadoop.mapred.FileAlreadyExistsException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FileOutputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, job.getConfiguration());

  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}
 
Example #2
Source File: FileOutputFormat.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, job.getConfiguration());

  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}
 
Example #3
Source File: ZephyrOutputFormat.java    From zephyr with Apache License 2.0 6 votes vote down vote up
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) throws FileAlreadyExistsException, InvalidJobConfException, IOException {
    // Ensure that the output directory is set and not already there
    Path outDir = getOutputPath(job);
    if (outDir == null && job.getNumReduceTasks() != 0) {
        throw new InvalidJobConfException("Output directory not set in JobConf.");
    }
    if (outDir != null) {
        FileSystem fs = outDir.getFileSystem(job);
        // normalize the output directory
        outDir = fs.makeQualified(outDir);
        setOutputPath(job, outDir);

        // get delegation token for the outDir's file system
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job);
        String jobUuid = job.get("zephyr.job.uuid");
        if (jobUuid == null)
            throw new InvalidJobConfException("This output format REQUIRES the value zephyr.job.uuid to be specified in the job configuration!");
        // // check its existence
        // if (fs.exists(outDir)) {
        // throw new FileAlreadyExistsException("Output directory " + outDir
        // + " already exists");
        // }
    }
}
 
Example #4
Source File: NativeSparkDataSet.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Overridden to avoid throwing an exception if the specified directory
 * for export already exists.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    Path outDir = getOutputPath(job);
    if(outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    } else {
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
        /*
        if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
            System.out.println("Output dir already exists, no problem");
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
        */
    }
}
 
Example #5
Source File: SparkDataSet.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * Overridden to avoid throwing an exception if the specified directory
 * for export already exists.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    Path outDir = getOutputPath(job);
    if(outDir == null) {
        throw new InvalidJobConfException("Output directory not set.");
    } else {
        TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[]{outDir}, job.getConfiguration());
        /*
        if(outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
            System.out.println("Output dir already exists, no problem");
            throw new FileAlreadyExistsException("Output directory " + outDir + " already exists");
        }
        */
    }
}
 
Example #6
Source File: TeraOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  if (fs.exists(outDir)) {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  }
}
 
Example #7
Source File: TeraOutputFormat.java    From pravega-samples with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  try {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  } catch (FileNotFoundException ignored) {
  }
}
 
Example #8
Source File: TeraOutputFormat.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  if (fs.exists(outDir)) {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  }
}
 
Example #9
Source File: ForwardingBigQueryFileOutputFormat.java    From hadoop-connectors with Apache License 2.0 5 votes vote down vote up
/**
 * Checks to make sure the configuration is valid, the output path doesn't already exist, and that
 * a connection to BigQuery can be established.
 */
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
  Configuration conf = job.getConfiguration();

  // Validate the output configuration.
  BigQueryOutputConfiguration.validateConfiguration(conf);

  // Get the output path.
  Path outputPath = BigQueryOutputConfiguration.getGcsOutputPath(conf);
  logger.atInfo().log("Using output path '%s'.", outputPath);

  // Error if the output path already exists.
  FileSystem outputFileSystem = outputPath.getFileSystem(conf);
  if (outputFileSystem.exists(outputPath)) {
    throw new IOException("The output path '" + outputPath + "' already exists.");
  }

  // Error if compression is set as there's mixed support in BigQuery.
  if (FileOutputFormat.getCompressOutput(job)) {
    throw new IOException("Compression isn't supported for this OutputFormat.");
  }

  // Error if unable to create a BigQuery helper.
  try {
    new BigQueryFactory().getBigQueryHelper(conf);
  } catch (GeneralSecurityException gse) {
    throw new IOException("Failed to create BigQuery client", gse);
  }

  // Let delegate process its checks.
  getDelegate(conf).checkOutputSpecs(job);
}
 
Example #10
Source File: PigOutputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
private void checkOutputSpecsHelper(List<POStore> stores, JobContext
        jobcontext) throws IOException, InterruptedException {
    for (POStore store : stores) {
        // make a copy of the original JobContext so that
        // each OutputFormat get a different copy
        JobContext jobContextCopy = HadoopShims.createJobContext(
                jobcontext.getConfiguration(), jobcontext.getJobID());

        // set output location
        PigOutputFormat.setLocation(jobContextCopy, store);

        StoreFuncInterface sFunc = store.getStoreFunc();
        OutputFormat of = sFunc.getOutputFormat();

        // The above call should have update the conf in the JobContext
        // to have the output location - now call checkOutputSpecs()
        try {
            of.checkOutputSpecs(jobContextCopy);
        } catch (IOException ioe) {
            boolean shouldThrowException = true;
            if (sFunc instanceof OverwritableStoreFunc) {
                if (((OverwritableStoreFunc) sFunc).shouldOverwrite()) {
                    if (ioe instanceof FileAlreadyExistsException
                            || ioe instanceof org.apache.hadoop.fs.FileAlreadyExistsException) {
                        shouldThrowException = false;
                    }
                }
            }
            if (shouldThrowException)
                throw ioe;
        }
    }
}
 
Example #11
Source File: TestNativeMapReduce.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testNativeMRJobSimpleFailure() throws Exception{
    try{
        //test if correct return code is obtained when query fails
        // the native MR is writing to an exisiting and should fail

        Collection<String> results = new HashSet<String>();
        results.add("(one,1)");
        results.add("(two,2)");
        results.add("(three,3)");

        pigServer.setBatchOn();
        pigServer.registerQuery("A = load '" + INPUT_FILE + "';");
        pigServer.registerQuery("B = mapreduce '" + jarFileName + "' " +
                "Store A into 'table_testNativeMRJobSimple_input' "+
                "Load 'table_testNativeMRJobSimple_output' "+
        "`org.apache.pig.test.utils.WordCount table_testNativeMRJobSimple_input " + INPUT_FILE + "`;");
        pigServer.registerQuery("Store B into 'table_testNativeMRJobSimpleDir';");
        pigServer.executeBatch();

        assertTrue("job failed", PigStats.get().getReturnCode() != 0);

    } catch (JobCreationException e) {
        // Running in Tez mode throw exception
        assertTrue(e.getCause() instanceof FileAlreadyExistsException);
    }
    finally{
        // We have to manually delete intermediate mapreduce files
        Util.deleteFile(cluster, "table_testNativeMRJobSimple_input");
        Util.deleteFile(cluster, "table_testNativeMRJobSimpleDir");
    }
}
 
Example #12
Source File: FileOutputFormat.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void checkOutputSpecs(JobContext job
                             ) throws FileAlreadyExistsException, IOException{
  // Ensure that the output directory is set and not already there
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set.");
  }
  if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
    throw new FileAlreadyExistsException("Output directory " + outDir + 
                                         " already exists");
  }
}
 
Example #13
Source File: CsvBulkLoadToolIT.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlreadyExistsOutputPath() {
    String tableName = "TABLE9";
    String outputPath = "/tmp/output/tabl9";
    try {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
                + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
        
        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
        fs.create(new Path(outputPath));
        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
        PrintWriter printWriter = new PrintWriter(outputStream);
        printWriter.println("1,FirstName 1,LastName 1");
        printWriter.println("2,FirstName 2,LastName 2");
        printWriter.close();
        
        CsvBulkLoadTool csvBulkLoadTool = new CsvBulkLoadTool();
        csvBulkLoadTool.setConf(getUtility().getConfiguration());
        csvBulkLoadTool.run(new String[] {
            "--input", "/tmp/input9.csv",
            "--output", outputPath,
            "--table", tableName,
            "--zookeeper", zkQuorum });
        
        fail(String.format("Output path %s already exists. hence, should fail",outputPath));
    } catch (Exception ex) {
        assertTrue(ex instanceof FileAlreadyExistsException); 
    }
}
 
Example #14
Source File: RegexBulkLoadToolIT.java    From phoenix with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlreadyExistsOutputPath() {
    String tableName = "TABLE9";
    String outputPath = "/tmp/output/tabl9";
    try {
        Statement stmt = conn.createStatement();
        stmt.execute("CREATE TABLE " + tableName + "(ID INTEGER NOT NULL PRIMARY KEY, "
                + "FIRST_NAME VARCHAR, LAST_NAME VARCHAR)");
        
        FileSystem fs = FileSystem.get(getUtility().getConfiguration());
        fs.create(new Path(outputPath));
        FSDataOutputStream outputStream = fs.create(new Path("/tmp/input9.csv"));
        PrintWriter printWriter = new PrintWriter(outputStream);
        printWriter.println("1,FirstName 1,LastName 1");
        printWriter.println("2,FirstName 2,LastName 2");
        printWriter.close();
        
        RegexBulkLoadTool regexBulkLoadTool = new RegexBulkLoadTool();
        regexBulkLoadTool.setConf(getUtility().getConfiguration());
        regexBulkLoadTool.run(new String[] {
            "--input", "/tmp/input9.csv",
            "--output", outputPath,
            "--table", tableName,
            "--regex", "([^,]*),([^,]*),([^,]*)",
            "--zookeeper", zkQuorum });
        
        fail(String.format("Output path %s already exists. hence, should fail",outputPath));
    } catch (Exception ex) {
        assertTrue(ex instanceof FileAlreadyExistsException); 
    }
}
 
Example #15
Source File: StreamJob.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public int submitAndMonitorJob() throws IOException {

    if (jar_ != null && isLocalHadoop()) {
      // getAbs became required when shell and subvm have different working dirs...
      File wd = new File(".").getAbsoluteFile();
      StreamUtil.unJar(new File(jar_), wd);
    }

    // if jobConf_ changes must recreate a JobClient
    jc_ = new JobClient(jobConf_);
    boolean error = true;
    running_ = null;
    String lastReport = null;
    try {
      running_ = jc_.submitJob(jobConf_);
      jobId_ = running_.getID();

      LOG.info("getLocalDirs(): " + Arrays.asList(jobConf_.getLocalDirs()));
      LOG.info("Running job: " + jobId_);
      jobInfo();

      while (!running_.isComplete()) {
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e) {
        }
        running_ = jc_.getJob(jobId_);
        String report = null;
        report = " map " + Math.round(running_.mapProgress() * 100) + "%  reduce "
          + Math.round(running_.reduceProgress() * 100) + "%";

        if (!report.equals(lastReport)) {
          LOG.info(report);
          lastReport = report;
        }
      }
      if (!running_.isSuccessful()) {
        jobInfo();
	LOG.error("Job not Successful!");
	return 1;
      }
      LOG.info("Job complete: " + jobId_);
      LOG.info("Output: " + output_);
      error = false;
    } catch(FileNotFoundException fe) {
      LOG.error("Error launching job , bad input path : " + fe.getMessage());
      return 2;
    } catch(InvalidJobConfException je) {
      LOG.error("Error launching job , Invalid job conf : " + je.getMessage());
      return 3;
    } catch(FileAlreadyExistsException fae) {
      LOG.error("Error launching job , Output path already exists : " 
                + fae.getMessage());
      return 4;
    } catch(IOException ioe) {
      LOG.error("Error Launching job : " + ioe.getMessage());
      return 5;
    } finally {
      if (error && (running_ != null)) {
        LOG.info("killJob...");
        running_.killJob();
      }
      jc_.close();
    }
    return 0;
  }
 
Example #16
Source File: PerMapOutputFormat.java    From webarchive-commons with Apache License 2.0 4 votes vote down vote up
/**
 * Over-ride the default FileOutputFormat's checkOutputSpecs() to
 * allow for the target directory to already exist.
 */
public void checkOutputSpecs( FileSystem ignored, JobConf job )
  throws FileAlreadyExistsException, InvalidJobConfException, IOException 
{
}
 
Example #17
Source File: SSTableIndexOutputFormat.java    From hadoop-sstable with Apache License 2.0 4 votes vote down vote up
@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
}
 
Example #18
Source File: TestPigContext.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testHadoopExceptionCreation() throws Exception {
    Object object = PigContext
            .instantiateFuncFromSpec("org.apache.hadoop.mapred.FileAlreadyExistsException");
    assertTrue(object instanceof FileAlreadyExistsException);
}