Java Code Examples for org.apache.hadoop.mapred.JobConf

The following are top voted examples for showing how to use org.apache.hadoop.mapred.JobConf. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: hadoop   File: TestFetcher.java   View source code 7 votes vote down vote up
@Before
@SuppressWarnings("unchecked") // mocked generics
public void setup() {
  LOG.info(">>>> " + name.getMethodName());
  job = new JobConf();
  job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false);
  jobWithRetry = new JobConf();
  jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true);
  id = TaskAttemptID.forName("attempt_0_1_r_1_1");
  ss = mock(ShuffleSchedulerImpl.class);
  mm = mock(MergeManagerImpl.class);
  r = mock(Reporter.class);
  metrics = mock(ShuffleClientMetrics.class);
  except = mock(ExceptionReporter.class);
  key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
  connection = mock(HttpURLConnection.class);

  allErrs = mock(Counters.Counter.class);
  when(r.getCounter(anyString(), anyString())).thenReturn(allErrs);

  ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
  maps.add(map1ID);
  maps.add(map2ID);
  when(ss.getMapsForHost(host)).thenReturn(maps);
}
 
Example 2
Project: ditb   File: TestTableInputFormat.java   View source code 7 votes vote down vote up
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
 
Example 3
Project: ditb   File: TestRowCounter.java   View source code 6 votes vote down vote up
@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateAndRunSubmittableJob() throws Exception {
  RowCounter rCounter = new RowCounter();
  rCounter.setConf(HBaseConfiguration.create());
  String[] args = new String[] { "\temp", "tableA", "column1", "column2",
      "column3" };
  JobConf jobConfig = rCounter.createSubmittableJob(args);

  assertNotNull(jobConfig);
  assertEquals(0, jobConfig.getNumReduceTasks());
  assertEquals("rowcounter", jobConfig.getJobName());
  assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
  assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
  assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
      .join("column1", "column2", "column3"));
  assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
}
 
Example 4
Project: hadoop   File: DistCpV1.java   View source code 6 votes vote down vote up
/** Mapper configuration.
 * Extracts source and destination file system, as well as
 * top-level paths on source and destination directories.
 * Gets the named file systems, to be used later in map.
 */
public void configure(JobConf job)
{
  destPath = new Path(job.get(DST_DIR_LABEL, "/"));
  try {
    destFileSys = destPath.getFileSystem(job);
  } catch (IOException ex) {
    throw new RuntimeException("Unable to get the named file system.", ex);
  }
  sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
  buffer = new byte[sizeBuf];
  ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
  preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
  if (preserve_status) {
    preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
  }
  update = job.getBoolean(Options.UPDATE.propertyname, false);
  overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
  skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
  this.job = job;
}
 
Example 5
Project: ditb   File: TestTableMapReduceUtil.java   View source code 6 votes vote down vote up
/**
 * Check what the given number of reduce tasks for the given job configuration
 * does not exceed the number of regions for the given table.
 */
@Test
public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
    throws IOException {
  Assert.assertNotNull(presidentsTable);
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.setScannerCaching(jobConf, 100);
  assertEquals(1, jobConf.getNumReduceTasks());
  assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));

  jobConf.setNumReduceTasks(10);
  TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
  assertEquals(1, jobConf.getNumReduceTasks());
}
 
Example 6
Project: aliyun-maxcompute-data-collectors   File: MySQLUtils.java   View source code 6 votes vote down vote up
/**
 * Writes the user's password to a tmp file with 0600 permissions.
 * @return the filename used.
 */
public static String writePasswordFile(Configuration conf)
    throws IOException {
  // Create the temp file to hold the user's password.
  String tmpDir = conf.get(
      ConfigurationConstants.PROP_JOB_LOCAL_DIRECTORY, "/tmp/");
  File tempFile = File.createTempFile("mysql-cnf", ".cnf", new File(tmpDir));

  // Make the password file only private readable.
  DirectImportUtils.setFilePermissions(tempFile, "0600");

  // If we're here, the password file is believed to be ours alone.  The
  // inability to set chmod 0600 inside Java is troublesome. We have to
  // trust that the external 'chmod' program in the path does the right
  // thing, and returns the correct exit status. But given our inability to
  // re-read the permissions associated with a file, we'll have to make do
  // with this.
  String password = DBConfiguration.getPassword((JobConf) conf);
  BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
      new FileOutputStream(tempFile)));
  w.write("[client]\n");
  w.write("password=" + password + "\n");
  w.close();

  return tempFile.toString();
}
 
Example 7
Project: ditb   File: TableMapReduceUtil.java   View source code 6 votes vote down vote up
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
  Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
  boolean addDependencyJars) throws IOException {
  job.setOutputFormat(TableOutputFormat.class);
  job.setReducerClass(reducer);
  job.set(TableOutputFormat.OUTPUT_TABLE, table);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Put.class);
  job.setStrings("io.serializations", job.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName());
  if (partitioner == HRegionPartitioner.class) {
    job.setPartitionerClass(HRegionPartitioner.class);
    int regions =
      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
    if (job.getNumReduceTasks() > regions) {
      job.setNumReduceTasks(regions);
    }
  } else if (partitioner != null) {
    job.setPartitionerClass(partitioner);
  }
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  initCredentials(job);
}
 
Example 8
Project: hadoop   File: TestInputSampler.java   View source code 6 votes vote down vote up
/**
 * Verify SplitSampler contract in mapred.lib.InputSampler, which is added
 * back for binary compatibility of M/R 1.x
 */
@Test (timeout = 30000)
@SuppressWarnings("unchecked") // IntWritable comparator not typesafe
public void testMapredSplitSampler() throws Exception {
  final int TOT_SPLITS = 15;
  final int NUM_SPLITS = 5;
  final int STEP_SAMPLE = 5;
  final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
  org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable,NullWritable>
      sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler
          <IntWritable,NullWritable>(NUM_SAMPLES, NUM_SPLITS);
  int inits[] = new int[TOT_SPLITS];
  for (int i = 0; i < TOT_SPLITS; ++i) {
    inits[i] = i * STEP_SAMPLE;
  }
  Object[] samples = sampler.getSample(
      new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits),
      new JobConf());
  assertEquals(NUM_SAMPLES, samples.length);
  Arrays.sort(samples, new IntWritable.Comparator());
  for (int i = 0; i < NUM_SAMPLES; ++i) {
    // mapred.lib.InputSampler.SplitSampler has a sampling step
    assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE),
        ((IntWritable)samples[i]).get());
  }
}
 
Example 9
Project: hadoop   File: DataJoinJob.java   View source code 6 votes vote down vote up
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
Example 10
Project: hadoop   File: LocalFetcher.java   View source code 6 votes vote down vote up
public LocalFetcher(JobConf job, TaskAttemptID reduceId,
               ShuffleSchedulerImpl<K, V> scheduler,
               MergeManager<K,V> merger,
               Reporter reporter, ShuffleClientMetrics metrics,
               ExceptionReporter exceptionReporter,
               SecretKey shuffleKey,
               Map<TaskAttemptID, MapOutputFile> localMapFiles) {
  super(job, reduceId, scheduler, merger, reporter, metrics,
      exceptionReporter, shuffleKey);

  this.job = job;
  this.localMapFiles = localMapFiles;

  setName("localfetcher#" + id);
  setDaemon(true);
}
 
Example 11
Project: hadoop   File: StreamXmlRecordReader.java   View source code 6 votes vote down vote up
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                             JobConf job, FileSystem fs) throws IOException {
  super(in, split, reporter, job, fs);

  beginMark_ = checkJobGet(CONF_NS + "begin");
  endMark_ = checkJobGet(CONF_NS + "end");

  maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
  lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
  synched_ = false;

  slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
  if (slowMatch_) {
    beginPat_ = makePatternCDataOrMark(beginMark_);
    endPat_ = makePatternCDataOrMark(endMark_);
  }
  init();
}
 
Example 12
Project: hadoop   File: GenerateDistCacheData.java   View source code 6 votes vote down vote up
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
Example 13
Project: ditb   File: TestTableInputFormat.java   View source code 6 votes vote down vote up
@Override
public void configure(JobConf job) {
  try {
    HTable exampleTable = new HTable(HBaseConfiguration.create(job),
      Bytes.toBytes("exampleDeprecatedTable"));
    // mandatory
    setHTable(exampleTable);
    byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
      Bytes.toBytes("columnB") };
    // mandatory
    setInputColumns(inputColumns);
    Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
    // optional
    setRowFilter(exampleFilter);
  } catch (IOException exception) {
    throw new RuntimeException("Failed to configure for job.", exception);
  }
}
 
Example 14
Project: hadoop   File: Logalyzer.java   View source code 6 votes vote down vote up
public void setConf(Configuration conf) {
  if (conf instanceof JobConf) {
    this.conf = (JobConf) conf;
  } else {
    this.conf = new JobConf(conf);
  }
  
  //Initialize the specification for *comparision*
  String sortColumns = this.conf.get(SORT_COLUMNS, null);
  if (sortColumns != null) {
    sortSpec = sortColumns.split(",");
  }
  
  //Column-separator
  columnSeparator = this.conf.get(COLUMN_SEPARATOR, "");
}
 
Example 15
Project: ditb   File: User.java   View source code 6 votes vote down vote up
@Override
public void obtainAuthTokenForJob(JobConf job)
    throws IOException, InterruptedException {
  try {
    Class<?> c = Class.forName(
        "org.apache.hadoop.hbase.security.token.TokenUtil");
    Methods.call(c, null, "obtainTokenForJob",
        new Class[]{JobConf.class, UserGroupInformation.class},
        new Object[]{job, ugi});
  } catch (ClassNotFoundException cnfe) {
    throw new RuntimeException("Failure loading TokenUtil class, "
        +"is secure RPC available?", cnfe);
  } catch (IOException ioe) {
    throw ioe;
  } catch (InterruptedException ie) {
    throw ie;
  } catch (RuntimeException re) {
    throw re;
  } catch (Exception e) {
    throw new UndeclaredThrowableException(e,
        "Unexpected error calling TokenUtil.obtainAndCacheToken()");
  }
}
 
Example 16
Project: dremio-oss   File: FileSplitParquetRecordReader.java   View source code 6 votes vote down vote up
public FileSplitParquetRecordReader(
    final OperatorContext oContext,
    final ParquetReaderFactory readerFactory,
    final List<SchemaPath> columnsToRead,
    final List<SchemaPath> groupScanColumns,
    final List<FilterCondition> conditions,
    final FileSplit fileSplit,
    final ParquetMetadata footer,
    final JobConf jobConf,
    final boolean vectorize,
    final boolean enableDetailedTracing
) {
  this.oContext = oContext;
  this.columnsToRead = columnsToRead;
  this.groupScanColumns = groupScanColumns;
  this.conditions = conditions;
  this.fileSplit = fileSplit;
  this.footer = footer;
  this.jobConf = jobConf;
  this.readerFactory = readerFactory;
  this.vectorize = vectorize;
  this.enableDetailedTracing = enableDetailedTracing;
}
 
Example 17
Project: hadoop   File: TestGridMixClasses.java   View source code 6 votes vote down vote up
@Test (timeout=30000)
public void testCompareGridmixJob() throws Exception {
  Configuration conf = new Configuration();
  Path outRoot = new Path("target");
  JobStory jobDesc = mock(JobStory.class);
  when(jobDesc.getName()).thenReturn("JobName");
  when(jobDesc.getJobConf()).thenReturn(new JobConf(conf));
  UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
  GridmixJob j1 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j2 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 0);
  GridmixJob j3 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);
  GridmixJob j4 = new LoadJob(conf, 1000L, jobDesc, outRoot, ugi, 1);

  assertTrue(j1.equals(j2));
  assertEquals(0, j1.compareTo(j2));
  // Only one parameter matters
  assertFalse(j1.equals(j3));
  // compare id and submissionMillis
  assertEquals(-1, j1.compareTo(j3));
  assertEquals(-1, j1.compareTo(j4));

}
 
Example 18
Project: ditb   File: TestTableSnapshotInputFormat.java   View source code 5 votes vote down vote up
@Test
public void testInitTableSnapshotMapperJobConfig() throws Exception {
  setupCluster();
  TableName tableName = TableName.valueOf("testInitTableSnapshotMapperJobConfig");
  String snapshotName = "foo";

  try {
    createTableAndSnapshot(UTIL, tableName, snapshotName, getStartRow(), getEndRow(), 1);
    JobConf job = new JobConf(UTIL.getConfiguration());
    Path tmpTableDir = UTIL.getRandomDir();

    TableMapReduceUtil.initTableSnapshotMapJob(snapshotName,
      COLUMNS, TestTableSnapshotMapper.class, ImmutableBytesWritable.class,
      NullWritable.class, job, false, tmpTableDir);

    // TODO: would be better to examine directly the cache instance that results from this
    // config. Currently this is not possible because BlockCache initialization is static.
    Assert.assertEquals(
      "Snapshot job should be configured for default LruBlockCache.",
      HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT,
      job.getFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, -1), 0.01);
    Assert.assertEquals(
      "Snapshot job should not use BucketCache.",
      0, job.getFloat("hbase.bucketcache.size", -1), 0.01);
  } finally {
    UTIL.getHBaseAdmin().deleteSnapshot(snapshotName);
    UTIL.deleteTable(tableName);
    tearDownCluster();
  }
}
 
Example 19
Project: hadoop   File: TestMROutputFormat.java   View source code 5 votes vote down vote up
@Test
public void testJobSubmission() throws Exception {
  JobConf conf = new JobConf();
  Job job = new Job(conf);
  job.setInputFormatClass(TestInputFormat.class);
  job.setMapperClass(TestMapper.class);
  job.setOutputFormatClass(TestOutputFormat.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  job.waitForCompletion(true);
  assertTrue(job.isSuccessful());
}
 
Example 20
Project: hadoop   File: ValueAggregatorJobBase.java   View source code 5 votes vote down vote up
private static ValueAggregatorDescriptor getValueAggregatorDescriptor(
    String spec, JobConf job) {
  if (spec == null)
    return null;
  String[] segments = spec.split(",", -1);
  String type = segments[0];
  if (type.compareToIgnoreCase("UserDefined") == 0) {
    String className = segments[1];
    return new UserDefinedValueAggregatorDescriptor(className, job);
  }
  return null;
}
 
Example 21
Project: hadoop   File: TestEncryptedShuffle.java   View source code 5 votes vote down vote up
private void encryptedShuffleWithCerts(boolean useClientCerts)
  throws Exception {
  try {
    Configuration conf = new Configuration();
    String keystoresDir = new File(BASEDIR).getAbsolutePath();
    String sslConfsDir =
      KeyStoreTestUtil.getClasspathDir(TestEncryptedShuffle.class);
    KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfsDir, conf,
                                    useClientCerts);
    conf.setBoolean(MRConfig.SHUFFLE_SSL_ENABLED_KEY, true);
    startCluster(conf);
    FileSystem fs = FileSystem.get(getJobConf());
    Path inputDir = new Path("input");
    fs.mkdirs(inputDir);
    Writer writer =
      new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
    writer.write("hello");
    writer.close();

    Path outputDir = new Path("output", "output");

    JobConf jobConf = new JobConf(getJobConf());
    jobConf.setInt("mapred.map.tasks", 1);
    jobConf.setInt("mapred.map.max.attempts", 1);
    jobConf.setInt("mapred.reduce.max.attempts", 1);
    jobConf.set("mapred.input.dir", inputDir.toString());
    jobConf.set("mapred.output.dir", outputDir.toString());
    JobClient jobClient = new JobClient(jobConf);
    RunningJob runJob = jobClient.submitJob(jobConf);
    runJob.waitForCompletion();
    Assert.assertTrue(runJob.isComplete());
    Assert.assertTrue(runJob.isSuccessful());
  } finally {
    stopCluster();
  }
}
 
Example 22
Project: hadoop   File: ZombieJob.java   View source code 5 votes vote down vote up
@Override
public String getQueueName() {
  QueueName queue = job.getQueue();
  return (queue == null || queue.getValue() == null) 
         ? JobConf.DEFAULT_QUEUE_NAME 
         : queue.getValue();
}
 
Example 23
Project: hadoop   File: LazyOutputFormat.java   View source code 5 votes vote down vote up
public LazyRecordWriter(JobConf job, OutputFormat of, String name,
    Progressable progress)  throws IOException {
  this.of = of;
  this.job = job;
  this.name = name;
  this.progress = progress;
}
 
Example 24
Project: aliyun-maxcompute-data-collectors   File: TestMainframeFTPClientUtils.java   View source code 5 votes vote down vote up
@Before
public void setUp() {
  conf = new JobConf();
  mockFTPClient = mock(FTPClient.class);
  when(mockFTPClient.getReplyString()).thenReturn("");
  MainframeFTPClientUtils.setMockFTPClient(mockFTPClient);
}
 
Example 25
Project: aliyun-maxcompute-data-collectors   File: PGBulkloadManagerManualTest.java   View source code 5 votes vote down vote up
public PGBulkloadManagerManualTest() {
  JobConf conf = new JobConf(getConf());
  DBConfiguration.configureDB(conf,
                              "org.postgresql.Driver",
                              getConnectString(),
                              getUserName(),
                              (String) null, (Integer) null);
  dbConf = new DBConfiguration(conf);
}
 
Example 26
Project: aliyun-maxcompute-data-collectors   File: ExplicitSetMapper.java   View source code 5 votes vote down vote up
public void configure(JobConf job) {
  String userTypeName = job.get(USER_TYPE_NAME_KEY);
  if (null == userTypeName) {
    throw new RuntimeException("Unconfigured parameter: "
        + USER_TYPE_NAME_KEY);
  }

  setCol = job.get(SET_COL_KEY);
  setVal = job.get(SET_VAL_KEY);

  LOG.info("User type name set to " + userTypeName);
  LOG.info("Will try to set col " + setCol + " to " + setVal);

  this.userRecord = null;

  try {
    Configuration conf = new Configuration();
    Class userClass = Class.forName(userTypeName, true,
        Thread.currentThread().getContextClassLoader());
    this.userRecord =
        (SqoopRecord) ReflectionUtils.newInstance(userClass, conf);
  } catch (ClassNotFoundException cnfe) {
    // handled by the next block.
    LOG.error("ClassNotFound exception: " + cnfe.toString());
  } catch (Exception e) {
    LOG.error("Got an exception reflecting user class: " + e.toString());
  }

  if (null == this.userRecord) {
    LOG.error("Could not instantiate user record of type " + userTypeName);
    throw new RuntimeException("Could not instantiate user record of type "
        + userTypeName);
  }
}
 
Example 27
Project: hadoop   File: PipeReducer.java   View source code 5 votes vote down vote up
String getPipeCommand(JobConf job) {
  String str = job.get("stream.reduce.streamprocessor");
  if (str == null) {
    return str;
  }
  try {
    return URLDecoder.decode(str, "UTF-8");
  } catch (UnsupportedEncodingException e) {
    System.err.println("stream.reduce.streamprocessor in jobconf not found");
    return null;
  }
}
 
Example 28
Project: hadoop   File: Application.java   View source code 5 votes vote down vote up
private void writePasswordToLocalFile(String localPasswordFile,
    byte[] password, JobConf conf) throws IOException {
  FileSystem localFs = FileSystem.getLocal(conf);
  Path localPath = new Path(localPasswordFile);
  FSDataOutputStream out = FileSystem.create(localFs, localPath,
      new FsPermission("400"));
  out.write(password);
  out.close();
}
 
Example 29
Project: hadoop   File: PipeMapper.java   View source code 5 votes vote down vote up
String getPipeCommand(JobConf job) {
  String str = job.get("stream.map.streamprocessor");
  if (str == null) {
    return str;
  }
  try {
    return URLDecoder.decode(str, "UTF-8");
  }
  catch (UnsupportedEncodingException e) {
    System.err.println("stream.map.streamprocessor in jobconf not found");
    return null;
  }
}
 
Example 30
Project: hadoop   File: Parser.java   View source code 5 votes vote down vote up
/**
 * Parse a list of comma-separated nodes.
 */
public void parse(List<Token> args, JobConf job) throws IOException {
  ListIterator<Token> i = args.listIterator();
  while (i.hasNext()) {
    Token t = i.next();
    t.getNode().setID(i.previousIndex() >> 1);
    kids.add(t.getNode());
    if (i.hasNext() && !TType.COMMA.equals(i.next().getType())) {
      throw new IOException("Expected ','");
    }
  }
}
 
Example 31
Project: hadoop   File: MRCaching.java   View source code 5 votes vote down vote up
public void configure(JobConf jconf) {
  conf = jconf;
  try {
    // read the cached files (unzipped, unjarred and text)
    // and put it into a single file TEST_ROOT_DIR/test.txt
    String TEST_ROOT_DIR = jconf.get("test.build.data","/tmp");
    Path file = new Path("file:///", TEST_ROOT_DIR);
    FileSystem fs = FileSystem.getLocal(conf);
    if (!fs.mkdirs(file)) {
      throw new IOException("Mkdirs failed to create " + file.toString());
    }
    Path fileOut = new Path(file, "test.txt");
    fs.delete(fileOut, true);
    DataOutputStream out = fs.create(fileOut); 
    String[] symlinks = new String[6];
    symlinks[0] = ".";
    symlinks[1] = "testjar";
    symlinks[2] = "testzip";
    symlinks[3] = "testtgz";
    symlinks[4] = "testtargz";
    symlinks[5] = "testtar";

    for (int i = 0; i < symlinks.length; i++) {
      // read out the files from these archives
      File f = new File(symlinks[i]);
      File txt = new File(f, "test.txt");
      FileInputStream fin = new FileInputStream(txt);
      BufferedReader reader = new BufferedReader(new InputStreamReader(fin));
      String str = reader.readLine();
      reader.close();
      out.writeBytes(str);
      out.writeBytes("\n");
    }
    out.close();
  } catch (IOException ie) {
    System.out.println(StringUtils.stringifyException(ie));
  }
}
 
Example 32
Project: dremio-oss   File: HiveAbstractReader.java   View source code 5 votes vote down vote up
public static Properties addProperties(JobConf jobConf, Properties output, List<Prop> props){
  for(Prop p : props){
    output.setProperty(p.getKey(), p.getValue());
    jobConf.set(p.getKey(), p.getValue());
  }
  return output;
}
 
Example 33
Project: monarch   File: MonarchSplit.java   View source code 5 votes vote down vote up
/**
 * Provide the required splits from the specified configuration. By default this
 *   method makes query (function-execution) on the region with `_meta' suffix
 *   so need to be make sure that the region-name is passed accordingly.
 *
 * @param conf the job configuration
 * @param numSplits the required number of splits
 * @return the required splits to read/write the data
 * @throws IOException if table does not exist.
 */
public static InputSplit[] getSplits(final JobConf conf, final int numSplits) throws IOException {
  final Path[] tablePaths = FileInputFormat.getInputPaths(conf);
  /** initialize cache if not done yet.. **/
  final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf);
  String tableName = conf.get(MonarchUtils.REGION);
  boolean isFTable = MonarchUtils.isFTable(conf);
  Table table = null;
  if (isFTable) {
    table = aClient.getFTable(tableName);
  } else {
    table = aClient.getMTable(tableName);
  }
  if (table == null) {
    throw new IOException("Table " + tableName + "does not exist.");
  }
  int totalnumberOfSplits = table.getTableDescriptor().getTotalNumOfSplits();
  Map<Integer, Set<ServerLocation>> bucketMap = new HashMap<>(numSplits);
  final AtomicLong start = new AtomicLong(0L);
  MonarchSplit[] splits = MTableUtils
    .getSplitsWithSize(tableName, numSplits, totalnumberOfSplits, bucketMap)
    .stream().map(e -> {
      MonarchSplit ms = convertToSplit(tablePaths, start.get(), e, bucketMap);
      start.addAndGet(e.getSize());
      return ms;
    }).toArray(MonarchSplit[]::new);
  logger.info("numSplits= {}; MonarchSplits= {}", numSplits, Arrays.toString(splits));
  return splits;
}
 
Example 34
Project: monarch   File: MonarchSplit.java   View source code 5 votes vote down vote up
@SuppressWarnings("unchecked")
public static InputSplit[] getSplits(final JobConf conf, final int numSplits, int dummy) {
  final Path[] tablePaths = FileInputFormat.getInputPaths(conf);
  long splitSize = NumberUtils.toLong(conf.get(MonarchUtils.SPLIT_SIZE_KEY), DEFAULT_SPLIT_SIZE);

  final String regionName = conf.get(MonarchUtils.REGION) + MonarchUtils.META_TABLE_SFX;

  MPredicateHolder ph = new MPredicateHolder(-1, BasicTypes.STRING,
    CompareOp.REGEX, ".*"+MonarchUtils.KEY_BLOCKS_SFX);

  MonarchGetAllFunction func = new MonarchGetAllFunction();
  final AmpoolClient aClient = MonarchUtils.getConnectionFromConf(conf);
  Execution exec = FunctionService.onServer(((GemFireCacheImpl)(aClient.getGeodeCache())).getDefaultPool())
  .withArgs(new Object[]{regionName, ph});
  ResultCollector rc = exec.execute(func);
  /** TODO: refactor below code.. change below required in case the function is changed to return in some way **/
  List<String[]> output = (List<String[]>)((List) rc.getResult()).get(0);
  if (output.isEmpty()) {
    logger.error("No entries found in region= {} with key_prefix= %-{}",
      regionName, MonarchUtils.KEY_BLOCKS_SFX);
    return new MonarchSplit[0];
  }

  List<MonarchSplit> list = new ArrayList<>(output.size());
  String prefix;
  long numberOfBlocks;
  for (final String[] arr : output) {
    prefix = arr[0].substring(0, arr[0].length() - 6);
    numberOfBlocks = Long.valueOf(arr[1]);
    if (numberOfBlocks > splitSize) {
      Collections.addAll(list, MonarchSplit.getInputSplits(tablePaths[0], prefix, splitSize, numberOfBlocks));
    } else {
      list.add(new MonarchSplit(tablePaths[0], 0, numberOfBlocks, null, prefix));
    }
  }
  return list.toArray(new MonarchSplit[list.size()]);
}
 
Example 35
Project: hadoop   File: StreamBaseRecordReader.java   View source code 5 votes vote down vote up
public StreamBaseRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                              JobConf job, FileSystem fs) throws IOException {
  in_ = in;
  split_ = split;
  start_ = split_.getStart();
  length_ = split_.getLength();
  end_ = start_ + length_;
  splitName_ = split_.getPath().getName();
  reporter_ = reporter;
  job_ = job;
  fs_ = fs;

  statusMaxRecordChars_ = job_.getInt(CONF_NS + "statuschars", 200);
}
 
Example 36
Project: monarch   File: MonarchRecordReaderTest.java   View source code 5 votes vote down vote up
/**
 * Get input splits for the specified split-size.
 *
 * @param regionName the region name
 * @param splitSize  the split-size
 * @return an array of splits to be read
 */
private InputSplit[] getSplits(final String regionName, final int splitSize) throws IOException{
  JobConf jobConf = new JobConf();
  jobConf.set(MonarchUtils.REGION, regionName);
  jobConf.set("mapred.input.dir", "/home/mgalande");
  jobConf.set(MonarchUtils.SPLIT_SIZE_KEY, String.valueOf(splitSize));
  jobConf.set(MonarchUtils.MONARCH_TABLE_TYPE, "unordered");
  return MonarchSplit.getSplits(jobConf, 1);
}
 
Example 37
Project: ditb   File: TokenUtil.java   View source code 5 votes vote down vote up
/**
 * Obtain an authentication token on behalf of the given user and add it to
 * the credentials for the given map reduce job.
 * @param user The user for whom to obtain the token
 * @param job The job configuration in which the token should be stored
 * @throws IOException If making a remote call to the authentication service fails
 * @throws InterruptedException If executing as the given user is interrupted
 * @deprecated Replaced by {@link #obtainTokenForJob(Connection,JobConf,User)}
 */
@Deprecated
public static void obtainTokenForJob(final JobConf job,
                                     UserGroupInformation user)
    throws IOException, InterruptedException {
  Connection conn = ConnectionFactory.createConnection(job);
  try {
    UserProvider userProvider = UserProvider.instantiate(job);
    obtainTokenForJob(conn, job, userProvider.create(user));
  } finally {
    conn.close();
  }
}
 
Example 38
Project: hadoop   File: MapTaskImpl.java   View source code 5 votes vote down vote up
public MapTaskImpl(JobId jobId, int partition, EventHandler eventHandler,
    Path remoteJobConfFile, JobConf conf,
    TaskSplitMetaInfo taskSplitMetaInfo,
    TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.MAP, partition, eventHandler, remoteJobConfFile,
      conf, taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.taskSplitMetaInfo = taskSplitMetaInfo;
}
 
Example 39
Project: hadoop   File: ReduceTaskImpl.java   View source code 5 votes vote down vote up
public ReduceTaskImpl(JobId jobId, int partition,
    EventHandler eventHandler, Path jobFile, JobConf conf,
    int numMapTasks, TaskAttemptListener taskAttemptListener,
    Token<JobTokenIdentifier> jobToken,
    Credentials credentials, Clock clock,
    int appAttemptId, MRAppMetrics metrics, AppContext appContext) {
  super(jobId, TaskType.REDUCE, partition, eventHandler, jobFile, conf,
      taskAttemptListener, jobToken, credentials, clock,
      appAttemptId, metrics, appContext);
  this.numMapTasks = numMapTasks;
}
 
Example 40
Project: hadoop   File: LazyOutputFormat.java   View source code 5 votes vote down vote up
@Override
public void checkOutputSpecs(FileSystem ignored, JobConf job) 
throws IOException {
  if (baseOut == null) {
    getBaseOutputFormat(job);
  }
  super.checkOutputSpecs(ignored, job);
}