Java Code Examples for org.apache.hadoop.io.Text

The following are top voted examples for showing how to use org.apache.hadoop.io.Text. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: hadoop   File: TestSpeculativeExecution.java   Source Code and License 7 votes vote down vote up
public void map(Object key, Text value, Context context)
        throws IOException, InterruptedException {
  // Make one mapper slower for speculative execution
  TaskAttemptID taid = context.getTaskAttemptID();
  long sleepTime = 100;
  Configuration conf = context.getConfiguration();
  boolean test_speculate_map =
          conf.getBoolean(MRJobConfig.MAP_SPECULATIVE, false);

  // IF TESTING MAPPER SPECULATIVE EXECUTION:
  //   Make the "*_m_000000_0" attempt take much longer than the others.
  //   When speculative execution is enabled, this should cause the attempt
  //   to be killed and restarted. At that point, the attempt ID will be
  //   "*_m_000000_1", so sleepTime will still remain 100ms.
  if ( (taid.getTaskType() == TaskType.MAP) && test_speculate_map
        && (taid.getTaskID().getId() == 0) && (taid.getId() == 0)) {
    sleepTime = 10000;
  }
  try{
    Thread.sleep(sleepTime);
  } catch(InterruptedException ie) {
    // Ignore
  }
  context.write(value, new IntWritable(1));
}
 
Example 2
Project: hadoop-oss   File: TestUserGroupInformation.java   Source Code and License 6 votes vote down vote up
@SuppressWarnings("unchecked") // from Mockito mocks
@Test (timeout = 30000)
public <T extends TokenIdentifier> void testAddCreds() throws Exception {
  UserGroupInformation ugi = 
      UserGroupInformation.createRemoteUser("someone"); 
  
  Text service = new Text("service");
  Token<T> t1 = mock(Token.class);
  when(t1.getService()).thenReturn(service);
  Token<T> t2 = mock(Token.class);
  when(t2.getService()).thenReturn(new Text("service2"));
  byte[] secret = new byte[]{};
  Text secretKey = new Text("sshhh");

  // fill credentials
  Credentials creds = new Credentials();
  creds.addToken(t1.getService(), t1);
  creds.addToken(t2.getService(), t2);
  creds.addSecretKey(secretKey, secret);
  
  // add creds to ugi, and check ugi
  ugi.addCredentials(creds);
  checkTokens(ugi, t1, t2);
  assertSame(secret, ugi.getCredentials().getSecretKey(secretKey));
}
 
Example 3
Project: Wikipedia-Index   File: TF.java   Source Code and License 6 votes vote down vote up
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
	String doc = value.toString();
				
	String text = slice(doc, "<text", "</text>", true);
	if (text.length() < 1) return;
	
	char txt[] = text.toLowerCase().toCharArray();
	for (int i = 0; i < txt.length; ++i) {
		if (!((txt[i] >= 'a' && txt[i] <= 'z') || (txt[i] >= 'A' && txt[i] <= 'Z')))
			txt[i] = ' ';
	}
	
	String id = slice(doc, "<id>", "</id>", false);
	if (id.length() < 1) return;
	StringTokenizer itr = new StringTokenizer(String.valueOf(txt));
	int sum = itr.countTokens();
	while (itr.hasMoreTokens()) {
		String s = itr.nextToken();
		word.set(id + '-' + s);
		IntWritable tmp[] = {new IntWritable(sum), new IntWritable(1)};
		IntArrayWritable temp = new IntArrayWritable(tmp);
		context.write(word, temp);
	}
}
 
Example 4
Project: aliyun-maxcompute-data-collectors   File: OraOopDBInputSplit.java   Source Code and License 6 votes vote down vote up
@Override
/** {@inheritDoc} */
public void write(DataOutput output) throws IOException {

  output.writeInt(splitId);

  if (this.oracleDataChunks == null) {
    output.writeInt(0);
  } else {
    output.writeInt(this.oracleDataChunks.size());
    for (OraOopOracleDataChunk dataChunk : this.oracleDataChunks) {
      Text.writeString(output, dataChunk.getClass().getName());
      dataChunk.write(output);
    }
  }
}
 
Example 5
Project: hadoop   File: ValueAggregatorCombiner.java   Source Code and License 6 votes vote down vote up
/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param context to collect combined values
 */
public void reduce(Text key, Iterable<Text> values, Context context) 
    throws IOException, InterruptedException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  long uniqCount = context.getConfiguration().
    getLong(UniqValueCount.MAX_NUM_UNIQUE_VALUES, Long.MAX_VALUE);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type, uniqCount);
  for (Text val : values) {
    aggregator.addNextValue(val);
  }
  Iterator<?> outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      context.write(key, (Text)v);
    } else {
      context.write(key, new Text(v.toString()));
    }
  }
}
 
Example 6
Project: PACE   File: FilteringIT.java   Source Code and License 6 votes vote down vote up
private void fetchColumnTest(String configuration) throws Exception {
  List<String> rows = Arrays.asList("row1", "row2");
  List<String> colFs = Arrays.asList("colF1", "colF2");
  List<String> colQs = Arrays.asList("colQ1", "colQ2");
  List<String> colVs = Collections.singletonList("");
  List<String> values = Collections.singletonList("value");

  clearTable();
  EncryptedBatchWriter writer = getEncryptedWriter(CHARLIE, configuration);
  writeData(writer, rows, colFs, colQs, colVs, values);
  writer.close();

  EncryptedBatchScanner scanner = getEncryptedScanner(CHARLIE, configuration);
  scanner.setRanges(Collections.singletonList(new Range()));
  scanner.fetchColumn(new IteratorSetting.Column(new Text("colF1"), new Text("colQ1")));
  assertThat("contains the filtered data", scanner, hasData(rows, Collections.singletonList("colF1"), Collections.singletonList("colQ1"), colVs, values));
}
 
Example 7
Project: hadoop-oss   File: TestFileSystemTokens.java   Source Code and License 6 votes vote down vote up
@Test
public void testFsWithDuplicateChildrenTokenExists() throws Exception {
  Credentials credentials = new Credentials();
  Text service = new Text("singleTokenFs1");
  Token<?> token = mock(Token.class);
  credentials.addToken(service, token);

  MockFileSystem fs = createFileSystemForServiceName(service);
  MockFileSystem multiFs =
      createFileSystemForServiceName(null, fs, new FilterFileSystem(fs));
  
  multiFs.addDelegationTokens(renewer, credentials);
  verifyTokenFetch(multiFs, false);
  verifyTokenFetch(fs, false);
  
  assertEquals(1, credentials.numberOfTokens());
  assertSame(token, credentials.getToken(service));
}
 
Example 8
Project: hadoop   File: RMDelegationTokenSelector.java   Source Code and License 6 votes vote down vote up
@SuppressWarnings("unchecked")
public Token<RMDelegationTokenIdentifier> selectToken(Text service,
    Collection<Token<? extends TokenIdentifier>> tokens) {
  if (service == null) {
    return null;
  }
  LOG.debug("Looking for a token with service " + service.toString());
  for (Token<? extends TokenIdentifier> token : tokens) {
    LOG.debug("Token kind is " + token.getKind().toString()
        + " and the token's service name is " + token.getService());
    if (RMDelegationTokenIdentifier.KIND_NAME.equals(token.getKind())
        && checkService(service, token)) {
      return (Token<RMDelegationTokenIdentifier>) token;
    }
  }
  return null;
}
 
Example 9
Project: hadoop-logfile-inputformat   File: LogfileRecordReader.java   Source Code and License 6 votes vote down vote up
/**
 * Fetches next line from file.
 * 
 * The line is stored in {@link #line} for further processing. The fields {@link #pos} and {@link #lastReadPos} are
 * updated accordingly.
 * 
 * If the end of the file has been reached, {@link #line} is set to <code>null</code> and the flag {@link eof} is
 * set to {@code true}.
 * 
 * @throws IOException
 */
private void fetchLine() throws IOException {

    if (isSplittable && posInFile() >= end) {
        eos = true;
    }
    Text text = new Text();
    int length = lineReader.readLine(text);
    if (length == 0) {
        eof = true;
        line = null;
        return;
    }
    lastReadPos = pos;
    pos += length;
    line = text.toString();
}
 
Example 10
Project: hadoop   File: HistoryClientService.java   Source Code and License 6 votes vote down vote up
@Override
public RenewDelegationTokenResponse renewDelegationToken(
    RenewDelegationTokenRequest request) throws IOException {
    if (!isAllowedDelegationTokenOp()) {
      throw new IOException(
          "Delegation Token can be renewed only with kerberos authentication");
    }

    org.apache.hadoop.yarn.api.records.Token protoToken = request.getDelegationToken();
    Token<MRDelegationTokenIdentifier> token =
        new Token<MRDelegationTokenIdentifier>(
            protoToken.getIdentifier().array(), protoToken.getPassword()
                .array(), new Text(protoToken.getKind()), new Text(
                protoToken.getService()));

    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    long nextExpTime = jhsDTSecretManager.renewToken(token, user);
    RenewDelegationTokenResponse renewResponse = Records
        .newRecord(RenewDelegationTokenResponse.class);
    renewResponse.setNextExpirationTime(nextExpTime);
    return renewResponse;
}
 
Example 11
Project: hadoop   File: CopyCommitter.java   Source Code and License 6 votes vote down vote up
private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
  String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
  final boolean syncOrOverwrite = syncFolder || overwrite;

  LOG.info("About to preserve attributes: " + attrSymbols);

  EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
  final boolean preserveRawXattrs =
      conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);

  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                                    SequenceFile.Reader.file(sourceListing));
  long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

  Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

  long preservedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();

    // Iterate over every source path that was copied.
    while (sourceReader.next(srcRelPath, srcFileStatus)) {
      // File-attributes for files are set at the time of copy,
      // in the map-task.
      if (! srcFileStatus.isDirectory()) continue;

      Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
      //
      // Skip the root folder when syncOrOverwrite is true.
      //
      if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;

      FileSystem targetFS = targetFile.getFileSystem(conf);
      DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,
          preserveRawXattrs);

      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Preserving status on directory entries. [" +
          sourceReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
  }
  LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}
 
Example 12
Project: ViraPipe   File: RepartitionFastq.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws IOException {

        if (args.length < 1) {
            System.err.println("Usage: RepartitionFastq <input path> <output path> <number of partitions>");
            System.exit(1);
        }

        SparkConf conf = new SparkConf().setAppName("RepartitionFastq");
        //conf.set("spark.default.parallelism", String.valueOf(args[2]));
        JavaSparkContext sc = new JavaSparkContext(conf);

        JavaPairRDD<Text, SequencedFragment> fastqRDD = sc.newAPIHadoopFile(args[0], FastqInputFormat.class, Text.class, SequencedFragment.class, sc.hadoopConfiguration());

        JavaPairRDD<Text, SequencedFragment> repartitioned = fastqRDD.repartition(Integer.valueOf(args[2]));

        repartitioned.saveAsNewAPIHadoopFile(args[1], Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration());

        sc.stop();
    }
 
Example 13
Project: hadoop   File: DelegationTokenAuthenticationHandler.java   Source Code and License 6 votes vote down vote up
@VisibleForTesting
@SuppressWarnings("unchecked")
public void initTokenManager(Properties config) {
  Configuration conf = new Configuration(false);
  for (Map.Entry entry : config.entrySet()) {
    conf.set((String) entry.getKey(), (String) entry.getValue());
  }
  String tokenKind = conf.get(TOKEN_KIND);
  if (tokenKind == null) {
    throw new IllegalArgumentException(
        "The configuration does not define the token kind");
  }
  tokenKind = tokenKind.trim();
  tokenManager = new DelegationTokenManager(conf, new Text(tokenKind));
  tokenManager.init();
}
 
Example 14
Project: DocIT   File: Employee.java   Source Code and License 6 votes vote down vote up
/**
 * Read (say, deserialize) an employee
 */
@Override
public void readFields(DataInput in) throws IOException {
	name = new Text();
	name.readFields(in);
	address = new Text();
	address.readFields(in);
	company = new Text();
	company.readFields(in);
	salary = new DoubleWritable();
	salary.readFields(in);
	department = new Text();
	department.readFields(in);
	isManager = new BooleanWritable();
	isManager.readFields(in);
}
 
Example 15
Project: hadoop-oss   File: TestDelegationToken.java   Source Code and License 6 votes vote down vote up
@Test 
public void testDelegationTokenNullRenewer() throws Exception {
  TestDelegationTokenSecretManager dtSecretManager = 
    new TestDelegationTokenSecretManager(24*60*60*1000,
      10*1000,1*1000,3600000);
  dtSecretManager.startThreads();
  TestDelegationTokenIdentifier dtId = new TestDelegationTokenIdentifier(new Text(
      "theuser"), null, null);
  Token<TestDelegationTokenIdentifier> token = new Token<TestDelegationTokenIdentifier>(
      dtId, dtSecretManager);
  Assert.assertTrue(token != null);
  try {
    dtSecretManager.renewToken(token, "");
    Assert.fail("Renewal must not succeed");
  } catch (IOException e) {
    //PASS
  }
}
 
Example 16
Project: hadoop   File: TestLocalRunner.java   Source Code and License 6 votes vote down vote up
public void map(LongWritable key, Text val, Context c)
    throws IOException, InterruptedException {

  // Create a whole bunch of objects.
  List<Integer> lst = new ArrayList<Integer>();
  for (int i = 0; i < 20000; i++) {
    lst.add(new Integer(i));
  }

  // Actually use this list, to ensure that it isn't just optimized away.
  int sum = 0;
  for (int x : lst) {
    sum += x;
  }

  // throw away the list and run a GC.
  lst = null;
  System.gc();

  c.write(new LongWritable(sum), val);
}
 
Example 17
Project: mapreduce-samples   File: UnitMultiplication.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.setFloat("beta", Float.parseFloat(args[3]));
        Job job = Job.getInstance(conf);
        job.setJarByClass(UnitMultiplication.class);

        ChainMapper.addMapper(job, TransitionMapper.class, Object.class, Text.class, Text.class, Text.class, conf);
        ChainMapper.addMapper(job, PRMapper.class, Object.class, Text.class, Text.class, Text.class, conf);

        job.setReducerClass(MultiplicationReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, TransitionMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, PRMapper.class);

        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.waitForCompletion(true);
    }
 
Example 18
Project: hadoop   File: ValueAggregatorBaseDescriptor.java   Source Code and License 6 votes vote down vote up
/**
 * Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
 * The first id will be of type LONG_VALUE_SUM, with "record_count" as
 * its aggregation id. If the input is a file split,
 * the second id of the same type will be generated too, with the file name 
 * as its aggregation id. This achieves the behavior of counting the total 
 * number of records in the input data, and the number of records 
 * in each input file.
 * 
 * @param key
 *          input key
 * @param val
 *          input value
 * @return a list of aggregation id/value pairs. An aggregation id encodes an
 *         aggregation type which is used to guide the way to aggregate the
 *         value in the reduce/combiner phrase of an Aggregate based job.
 */
public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
                                                        Object val) {
  ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
  String countType = LONG_VALUE_SUM;
  String id = "record_count";
  Entry<Text, Text> e = generateEntry(countType, id, ONE);
  if (e != null) {
    retv.add(e);
  }
  if (this.inputFile != null) {
    e = generateEntry(countType, this.inputFile, ONE);
    if (e != null) {
      retv.add(e);
    }
  }
  return retv;
}
 
Example 19
Project: hadoop-oss   File: TestDelegationToken.java   Source Code and License 6 votes vote down vote up
@Test
public void testGetUserWithOwnerAndReal() {
  Text owner = new Text("owner");
  Text realUser = new Text("realUser");
  TestDelegationTokenIdentifier ident =
      new TestDelegationTokenIdentifier(owner, null, realUser);
  UserGroupInformation ugi = ident.getUser();
  assertNotNull(ugi.getRealUser());
  assertNull(ugi.getRealUser().getRealUser());
  assertEquals("owner", ugi.getUserName());
  assertEquals("realUser", ugi.getRealUser().getUserName());
  assertEquals(AuthenticationMethod.PROXY,
               ugi.getAuthenticationMethod());
  assertEquals(AuthenticationMethod.TOKEN,
               ugi.getRealUser().getAuthenticationMethod());
}
 
Example 20
Project: aliyun-maxcompute-data-collectors   File: RecordParser.java   Source Code and License 5 votes vote down vote up
/**
 * Return a list of strings representing the fields of the input line.
 * This list is backed by an internal buffer which is cleared by the
 * next call to parseRecord().
 */
public List<String> parseRecord(Text input)
    throws com.cloudera.sqoop.lib.RecordParser.ParseError {
  if (null == input) {
    throw new com.cloudera.sqoop.lib.RecordParser.ParseError(
        "null input string");
  }

  // TODO(aaron): The parser should be able to handle UTF-8 strings
  // as well, to avoid this transcode operation.
  return parseRecord(input.toString());
}
 
Example 21
Project: PACE   File: EncryptedBatchScannerTest.java   Source Code and License 5 votes vote down vote up
@Test
public void setRangesSemanticEncryptionTest() throws Exception {
  when(mockConnector.createBatchScanner(TEST_TABLE, authorizations, 1)).thenReturn(mockScanner);

  EntryEncryptor encryptor = new EntryEncryptor(getConfig("encrypt-key.ini"), KEYS);
  List<Map.Entry<Key,Value>> entries = new ArrayList<>();
  Map.Entry<Key,Value> entry = new SimpleImmutableEntry<>(new Key(new byte[] {1}, new byte[] {2}, new byte[] {3},
      "secret".getBytes(Utils.VISIBILITY_CHARSET), 0, false, false), new Value(new byte[] {4}));
  Map.Entry<Key,Value> entry2 = new SimpleImmutableEntry<>(new Key(new byte[] {5}, new byte[] {6}, new byte[] {7},
      "secret".getBytes(Utils.VISIBILITY_CHARSET), 0, false, false), new Value(new byte[] {8}));
  entries.add(encryptor.encrypt(entry));
  entries.add(encryptor.encrypt(entry2));
  when(mockScanner.iterator()).thenReturn(entries.iterator()).thenReturn(entries.iterator()).thenReturn(entries.iterator());

  BatchScanner scanner = new EncryptedBatchScanner(mockConnector, TEST_TABLE, authorizations, 1, getConfig("encrypt-key.ini"), KEYS);
  assertThat("has correct number of elements", scanner, iterableWithSize(2));

  scanner.setRanges(Collections.singletonList(new Range(new Text(new byte[] {1}))));
  assertThat("has correct number of elements", scanner, iterableWithSize(1));

  scanner.setRanges(Collections.singletonList(new Range(new Text(new byte[] {1}), new Text(new byte[] {5}))));
  assertThat("has correct number of elements", scanner, iterableWithSize(2));

  // Should not have been handled server side.
  verify(mockScanner, times(2)).setRanges(captor.capture());

  for (Collection<Range> ranges : captor.getAllValues()) {
    assertThat("has the infinite range", ranges, hasSize(1));
    assertThat("has the infinite range", ranges.iterator().next(), equalTo(new Range()));
  }
}
 
Example 22
Project: hadoop   File: RMDelegationTokenSelector.java   Source Code and License 5 votes vote down vote up
private boolean checkService(Text service,
    Token<? extends TokenIdentifier> token) {
  if (service == null || token.getService() == null) {
    return false;
  }
  return token.getService().toString().contains(service.toString());
}
 
Example 23
Project: DocIT   File: Cut.java   Source Code and License 5 votes vote down vote up
protected void map(Text key, Employee value, Context context)
		throws IOException, InterruptedException {
	if (value.getCompany().toString().equals(name))
		value.setSalary(value.getSalary().get() / 2);
	// copy all Employees
	context.write(key, value);
}
 
Example 24
Project: hadoop   File: AbstractDelegationTokenSelector.java   Source Code and License 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public Token<TokenIdent> selectToken(Text service,
    Collection<Token<? extends TokenIdentifier>> tokens) {
  if (service == null) {
    return null;
  }
  for (Token<? extends TokenIdentifier> token : tokens) {
    if (kindName.equals(token.getKind())
        && service.equals(token.getService())) {
      return (Token<TokenIdent>) token;
    }
  }
  return null;
}
 
Example 25
Project: hadoop   File: TestLocalModeWithNewApis.java   Source Code and License 5 votes vote down vote up
@Test
public void testNewApis() throws Exception {
  Random r = new Random(System.currentTimeMillis());
  Path tmpBaseDir = new Path("/tmp/wc-" + r.nextInt());
  final Path inDir = new Path(tmpBaseDir, "input");
  final Path outDir = new Path(tmpBaseDir, "output");
  String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(TestLocalModeWithNewApis.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  assertEquals(job.waitForCompletion(true), true);

  String output = readOutput(outDir, conf);
  assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
               "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", output);
  
  outFs.delete(tmpBaseDir, true);
}
 
Example 26
Project: hadoop   File: TestCombineFileInputFormat.java   Source Code and License 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public RecordReader<Text,Text> createRecordReader(InputSplit split, 
    TaskAttemptContext context) throws IOException {
  return new CombineFileRecordReader((CombineFileSplit) split, context,
      (Class) DummyRecordReader.class);
}
 
Example 27
Project: hive-udf-backports   File: UDFReplace.java   Source Code and License 5 votes vote down vote up
public Text evaluate(Text s, Text search, Text replacement) {
  if (s == null || search == null || replacement == null) {
    return null;
  }
  String r = s.toString().replace(search.toString(), replacement.toString());
  result.set(r);
  return result;
}
 
Example 28
Project: hadoop-oss   File: TestUserGroupInformation.java   Source Code and License 5 votes vote down vote up
@SuppressWarnings("unchecked") // from Mockito mocks
@Test (timeout = 30000)
public <T extends TokenIdentifier> void testAddToken() throws Exception {
  UserGroupInformation ugi = 
      UserGroupInformation.createRemoteUser("someone"); 
  
  Token<T> t1 = mock(Token.class);
  Token<T> t2 = mock(Token.class);
  Token<T> t3 = mock(Token.class);
  
  // add token to ugi
  ugi.addToken(t1);
  checkTokens(ugi, t1);

  // replace token t1 with t2 - with same key (null)
  ugi.addToken(t2);
  checkTokens(ugi, t2);
  
  // change t1 service and add token
  when(t1.getService()).thenReturn(new Text("t1"));
  ugi.addToken(t1);
  checkTokens(ugi, t1, t2);

  // overwrite t1 token with t3 - same key (!null)
  when(t3.getService()).thenReturn(new Text("t1"));
  ugi.addToken(t3);
  checkTokens(ugi, t2, t3);

  // just try to re-add with new name
  when(t1.getService()).thenReturn(new Text("t1.1"));
  ugi.addToken(t1);
  checkTokens(ugi, t1, t2, t3);    

  // just try to re-add with new name again
  ugi.addToken(t1);
  checkTokens(ugi, t1, t2, t3);    
}
 
Example 29
Project: hadoop   File: JobClient.java   Source Code and License 5 votes vote down vote up
/**
 * Get a delegation token for the user from the JobTracker.
 * @param renewer the user who can renew the token
 * @return the new token
 * @throws IOException
 */
public Token<DelegationTokenIdentifier> 
  getDelegationToken(final Text renewer) throws IOException, InterruptedException {
  return clientUgi.doAs(new 
      PrivilegedExceptionAction<Token<DelegationTokenIdentifier>>() {
    public Token<DelegationTokenIdentifier> run() throws IOException, 
    InterruptedException {
      return cluster.getDelegationToken(renewer);
    }
  });
}
 
Example 30
Project: hadoop   File: TupleWritable.java   Source Code and License 5 votes vote down vote up
/** Writes each Writable to <code>out</code>.
 * TupleWritable format:
 * {@code
 *  <count><type1><type2>...<typen><obj1><obj2>...<objn>
 * }
 */
public void write(DataOutput out) throws IOException {
  WritableUtils.writeVInt(out, values.length);
  writeBitSet(out, values.length, written);
  for (int i = 0; i < values.length; ++i) {
    Text.writeString(out, values[i].getClass().getName());
  }
  for (int i = 0; i < values.length; ++i) {
    if (has(i)) {
      values[i].write(out);
    }
  }
}
 
Example 31
Project: hadoop   File: JobTokenSelector.java   Source Code and License 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public Token<JobTokenIdentifier> selectToken(Text service,
    Collection<Token<? extends TokenIdentifier>> tokens) {
  if (service == null) {
    return null;
  }
  for (Token<? extends TokenIdentifier> token : tokens) {
    if (JobTokenIdentifier.KIND_NAME.equals(token.getKind())
        && service.equals(token.getService())) {
      return (Token<JobTokenIdentifier>) token;
    }
  }
  return null;
}
 
Example 32
Project: hadoop   File: TestProtocolRecords.java   Source Code and License 5 votes vote down vote up
@Test
public void testNodeHeartBeatResponse() throws IOException {
  NodeHeartbeatResponse record =
      Records.newRecord(NodeHeartbeatResponse.class);
  Map<ApplicationId, ByteBuffer> appCredentials =
      new HashMap<ApplicationId, ByteBuffer>();
  Credentials app1Cred = new Credentials();

  Token<DelegationTokenIdentifier> token1 =
      new Token<DelegationTokenIdentifier>();
  token1.setKind(new Text("kind1"));
  app1Cred.addToken(new Text("token1"), token1);
  Token<DelegationTokenIdentifier> token2 =
      new Token<DelegationTokenIdentifier>();
  token2.setKind(new Text("kind2"));
  app1Cred.addToken(new Text("token2"), token2);

  DataOutputBuffer dob = new DataOutputBuffer();
  app1Cred.writeTokenStorageToStream(dob);
  ByteBuffer byteBuffer1 = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
  appCredentials.put(ApplicationId.newInstance(1234, 1), byteBuffer1);
  record.setSystemCredentialsForApps(appCredentials);

  NodeHeartbeatResponse proto =
      new NodeHeartbeatResponsePBImpl(
        ((NodeHeartbeatResponsePBImpl) record).getProto());
  Assert.assertEquals(appCredentials, proto.getSystemCredentialsForApps());
}
 
Example 33
Project: hadoop   File: TokenCache.java   Source Code and License 5 votes vote down vote up
/**
 * @deprecated Use {@link Credentials#getToken(org.apache.hadoop.io.Text)}
 * instead, this method is included for compatibility against Hadoop-1
 * @param namenode
 * @return delegation token
 */
@InterfaceAudience.Private
@Deprecated
public static
    Token<?> getDelegationToken(
        Credentials credentials, String namenode) {
  return (Token<?>) credentials.getToken(new Text(
    namenode));
}
 
Example 34
Project: hadoop   File: MRCaching.java   Source Code and License 5 votes vote down vote up
public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}
 
Example 35
Project: ViraPipe   File: HDFSWriter.java   Source Code and License 5 votes vote down vote up
private static JavaPairRDD<Text, SequencedFragment> alignmentsToFastq(JavaRDD<String> alignmentRDD, SAMFileHeader header) {
    return alignmentRDD.mapPartitionsToPair(alns -> {

        List<Tuple2<Text, SequencedFragment>> records = new ArrayList<Tuple2<Text, SequencedFragment>>();
        final SAMLineParser samLP = new SAMLineParser(new DefaultSAMRecordFactory(), ValidationStringency.SILENT, header, null, null);
        while (alns.hasNext()) {
            String aln = alns.next().replace("\r\n", "").replace("\n", "").replace(System.lineSeparator(), "");
            try{
                SAMRecord sam = samLP.parseLine(aln);
                String[] fields = aln.split("\\t");
                String name = fields[0];
                if(sam.getReadPairedFlag()){
                    if(sam.getFirstOfPairFlag())
                        name = name+"/1";
                    if(sam.getSecondOfPairFlag())
                        name = name+"/2";
                }

                String bases = fields[9];
                String quality = fields[10];

                Text t = new Text(name);
                SequencedFragment sf = new SequencedFragment();
                sf.setSequence(new Text(bases));
                sf.setQuality(new Text(quality));
                records.add(new Tuple2<Text, SequencedFragment>(t, sf));
            }catch(SAMFormatException e){
                System.out.println(e.getMessage().toString());
            }
        }
        return records.iterator();
    });
}
 
Example 36
Project: hadoop   File: TestSpeculativeExecution.java   Source Code and License 5 votes vote down vote up
private Job runSpecTest(boolean mapspec, boolean redspec)
    throws IOException, ClassNotFoundException, InterruptedException {

  Path first = createTempFile("specexec_map_input1", "a\nz");
  Path secnd = createTempFile("specexec_map_input2", "a\nz");

  Configuration conf = mrCluster.getConfig();
  conf.setBoolean(MRJobConfig.MAP_SPECULATIVE,mapspec);
  conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE,redspec);
  conf.setClass(MRJobConfig.MR_AM_TASK_ESTIMATOR,
          TestSpecEstimator.class,
          TaskRuntimeEstimator.class);

  Job job = Job.getInstance(conf);
  job.setJarByClass(TestSpeculativeExecution.class);
  job.setMapperClass(SpeculativeMapper.class);
  job.setReducerClass(SpeculativeReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(2);
  FileInputFormat.setInputPaths(job, first);
  FileInputFormat.addInputPath(job, secnd);
  FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);

  // Delete output directory if it exists.
  try {
    localFs.delete(TEST_OUT_DIR,true);
  } catch (IOException e) {
    // ignore
  }

  // Creates the Job Configuration
  job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
  job.setMaxMapAttempts(2);

  job.submit();

  return job;
}
 
Example 37
Project: mumu-mapreduce   File: MonthTrafficStatisticsMapReduce.java   Source Code and License 5 votes vote down vote up
@Override
protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException {
    Set<String> ipSet = new HashSet<String>();
    Iterator<Text> iterator = values.iterator();
    while (iterator.hasNext()) {
        ipSet.add(iterator.next().toString());
    }
    context.write(key, new IntWritable(ipSet.size()));
}
 
Example 38
Project: ditb   File: HbaseObjectWritableFor96Migration.java   Source Code and License 5 votes vote down vote up
/** Writes the encoded class code as defined in CLASS_TO_CODE, or
 * the whole class name if not defined in the mapping.
 */
static void writeClass(DataOutput out, Class<?> c) throws IOException {
  Integer code = CLASS_TO_CODE.get(c);
  if (code == null) {
    WritableUtils.writeVInt(out, NOT_ENCODED);
    Text.writeString(out, c.getName());
  } else {
    WritableUtils.writeVInt(out, code);
  }
}
 
Example 39
Project: hadoop   File: TeraSort.java   Source Code and License 5 votes vote down vote up
int findPartition(Text key) {
  for(int i=lower; i<upper; ++i) {
    if (splitPoints[i].compareTo(key) > 0) {
      return i;
    }
  }
  return upper;
}
 
Example 40
Project: WIFIProbe   File: CustomerFlowElement.java   Source Code and License 5 votes vote down vote up
public void write(DataOutput dataOutput) throws IOException {
    Text text = new Text(wifiProb==null?"":wifiProb);
    text.write(dataOutput);

    IntWritable intWritable = new IntWritable();

    intWritable.set(inNoOutWifi);
    intWritable.write(dataOutput);
    intWritable.set(inNoOutStore);
    intWritable.write(dataOutput);

    intWritable.set(outNoInWifi);
    intWritable.write(dataOutput);
    intWritable.set(outNoInStore);
    intWritable.write(dataOutput);

    intWritable.set(inAndOutWifi);
    intWritable.write(dataOutput);
    intWritable.set(inAndOutStore);
    intWritable.write(dataOutput);

    intWritable.set(stayInWifi);
    intWritable.write(dataOutput);
    intWritable.set(stayInStore);
    intWritable.write(dataOutput);

    DoubleWritable doubleWritable = new DoubleWritable();
    doubleWritable.set(jumpRate);
    doubleWritable.write(dataOutput);
    doubleWritable.set(deepVisit);
    doubleWritable.write(dataOutput);
    doubleWritable.set(inStoreRate);
    doubleWritable.write(dataOutput);
}
 
Example 41
Project: hadoop   File: RandomTextWriterJob.java   Source Code and License 5 votes vote down vote up
/**
 * Given an output filename, write a bunch of random records to it.
 */
public void map(Text key, Text value,
                Context context) throws IOException,InterruptedException {
  int itemCount = 0;
  while (numBytesToWrite > 0) {
    // Generate the key/value 
    int noWordsKey = minWordsInKey + 
      (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
    int noWordsValue = minWordsInValue + 
      (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
    Text keyWords = generateSentence(noWordsKey);
    Text valueWords = generateSentence(noWordsValue);
    
    // Write the sentence 
    context.write(keyWords, valueWords);
    
    numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
    
    // Update counters, progress etc.
    context.getCounter(Counters.BYTES_WRITTEN).increment(
              keyWords.getLength() + valueWords.getLength());
    context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
    if (++itemCount % 200 == 0) {
      context.setStatus("wrote record " + itemCount + ". " + 
                         numBytesToWrite + " bytes left.");
    }
  }
  context.setStatus("done with " + itemCount + " records.");
}
 
Example 42
Project: hadoop   File: Credentials.java   Source Code and License 5 votes vote down vote up
/**
 * Return all the secret key entries in the in-memory map
 */
public List<Text> getAllSecretKeys() {
  List<Text> list = new java.util.ArrayList<Text>();
  list.addAll(secretKeysMap.keySet());

  return list;
}
 
Example 43
Project: hadoop   File: DistCh.java   Source Code and License 5 votes vote down vote up
private static JobConf createJobConf(Configuration conf) {
  JobConf jobconf = new JobConf(conf, DistCh.class);
  jobconf.setJobName(NAME);
  jobconf.setMapSpeculativeExecution(false);

  jobconf.setInputFormat(ChangeInputFormat.class);
  jobconf.setOutputKeyClass(Text.class);
  jobconf.setOutputValueClass(Text.class);

  jobconf.setMapperClass(ChangeFilesMapper.class);
  jobconf.setNumReduceTasks(0);
  return jobconf;
}
 
Example 44
Project: hadoop-oss   File: AbstractDelegationTokenIdentifier.java   Source Code and License 5 votes vote down vote up
@Override
public void write(DataOutput out) throws IOException {
  if (owner.getLength() > Text.DEFAULT_MAX_LEN) {
    throw new IOException("owner is too long to be serialized!");
  }
  if (renewer.getLength() > Text.DEFAULT_MAX_LEN) {
    throw new IOException("renewer is too long to be serialized!");
  }
  if (realUser.getLength() > Text.DEFAULT_MAX_LEN) {
    throw new IOException("realuser is too long to be serialized!");
  }
  writeImpl(out);
}
 
Example 45
Project: hadoop   File: TestNodeStatusUpdater.java   Source Code and License 5 votes vote down vote up
public MyResourceTracker4(Context context) {
  // create app Credentials
  org.apache.hadoop.security.token.Token<DelegationTokenIdentifier> token1 =
      new org.apache.hadoop.security.token.Token<DelegationTokenIdentifier>();
  token1.setKind(new Text("kind1"));
  expectedCredentials.addToken(new Text("token1"), token1);
  this.context = context;
}
 
Example 46
Project: hive-jq-udtf   File: JsonQueryUDTF.java   Source Code and License 5 votes vote down vote up
public static String asConstantNonNullString(final ObjectInspector oi, final String name) throws UDFArgumentException {
	if (!(oi instanceof WritableConstantStringObjectInspector))
		throw new UDFArgumentException(name + " must be a constant string.");
	final Text text = ((WritableConstantStringObjectInspector) oi).getWritableConstantValue();
	if (text == null)
		throw new UDFArgumentException(name + " must not be NULL.");
	return text.toString();
}
 
Example 47
Project: java-learn   File: WordCount.java   Source Code and License 5 votes vote down vote up
public void reduce(Text key, Iterable<IntWritable> values,
                   Context context
) throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable val : values) {
        sum += val.get();
    }
    result.set(sum);
    context.write(key, result);
}
 
Example 48
Project: aliyun-maxcompute-data-collectors   File: MergeTextMapper.java   Source Code and License 5 votes vote down vote up
public void map(LongWritable key, Text val, Context c)
    throws IOException, InterruptedException {
  try {
    this.record.parse(val);
  } catch (RecordParser.ParseError pe) {
    throw new IOException(pe);
  }

  processRecord(this.record, c);
}
 
Example 49
Project: hadoop   File: TestFileSystem.java   Source Code and License 5 votes vote down vote up
public static void createControlFile(FileSystem fs,
                                     long megaBytes, int numFiles,
                                     long seed) throws Exception {

  LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");

  Path controlFile = new Path(CONTROL_DIR, "files");
  fs.delete(controlFile, true);
  Random random = new Random(seed);

  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, conf, controlFile, 
                              Text.class, LongWritable.class, CompressionType.NONE);

  long totalSize = 0;
  long maxSize = ((megaBytes / numFiles) * 2) + 1;
  try {
    while (totalSize < megaBytes) {
      Text name = new Text(Long.toString(random.nextLong()));

      long size = random.nextLong();
      if (size < 0)
        size = -size;
      size = size % maxSize;

      //LOG.info(" adding: name="+name+" size="+size);

      writer.append(name, new LongWritable(size));

      totalSize += size;
    }
  } finally {
    writer.close();
  }
  LOG.info("created control file for: "+totalSize+" bytes");
}
 
Example 50
Project: hadoop   File: KeyValueTextInputFormat.java   Source Code and License 5 votes vote down vote up
public RecordReader<Text, Text> getRecordReader(InputSplit genericSplit,
                                                JobConf job,
                                                Reporter reporter)
  throws IOException {
  
  reporter.setStatus(genericSplit.toString());
  return new KeyValueLineRecordReader(job, (FileSplit) genericSplit);
}
 
Example 51
Project: ditb   File: SequenceFileLogReader.java   Source Code and License 5 votes vote down vote up
/**
 * Call this method after init() has been executed
 * @return whether WAL compression is enabled
 */
static boolean isWALCompressionEnabled(final Metadata metadata) {
  // Check version is >= VERSION?
  Text txt = metadata.get(WAL_VERSION_KEY);
  if (txt == null || Integer.parseInt(txt.toString()) < COMPRESSION_VERSION) {
    return false;
  }
  // Now check that compression type is present.  Currently only one value.
  txt = metadata.get(WAL_COMPRESSION_TYPE_KEY);
  return txt != null && txt.equals(DICTIONARY_COMPRESSION_TYPE);
}
 
Example 52
Project: PACE   File: EncryptedReadWriteExample.java   Source Code and License 5 votes vote down vote up
private void execute(Opts opts) throws Exception {
  conn = opts.getConnector();

  // add the authorizations to the user
  Authorizations userAuthorizations = conn.securityOperations().getUserAuthorizations(opts.getPrincipal());
  ByteArraySet auths = new ByteArraySet(userAuthorizations.getAuthorizations());
  auths.addAll(opts.auths.getAuthorizations());
  if (!auths.isEmpty())
    conn.securityOperations().changeUserAuthorizations(opts.getPrincipal(), new Authorizations(auths));

  // create table
  if (opts.createtable) {
    SortedSet<Text> partitionKeys = new TreeSet<>();
    for (int i = Byte.MIN_VALUE; i < Byte.MAX_VALUE; i++)
      partitionKeys.add(new Text(new byte[] {(byte) i}));
    conn.tableOperations().create(opts.getTableName());
    conn.tableOperations().addSplits(opts.getTableName(), partitionKeys);
  }

  // send mutations
  createEntries(opts);

  // read entries
  if (opts.readEntries) {
    // Note that the user needs to have the authorizations for the specified scan authorizations
    // by an administrator first
    BatchScanner scanner = new EncryptedBatchScanner(conn, opts.getTableName(), opts.auths, 1, opts.encryptionConfig, opts.encryptionKeys);
    scanner.setRanges(Collections.singletonList(new Range()));
    for (Entry<Key,Value> entry : scanner)
      System.out.println(entry.getKey().toString() + " -> " + entry.getValue().toString());
  }

  // delete table
  if (opts.deletetable)
    conn.tableOperations().delete(opts.getTableName());
}
 
Example 53
Project: mapreduce-samples   File: WordCount.java   Source Code and License 5 votes vote down vote up
public void reduce(Text key, Iterable<IntWritable> values,
                   Context context) throws IOException, InterruptedException {
    int sum = 0;
    for (IntWritable val : values) {
        sum += val.get();
    }
    result.set(sum);
    context.write(key, result);
}
 
Example 54
Project: hadoop   File: ValueCountReduce.java   Source Code and License 5 votes vote down vote up
public void reduce(Object arg0, Iterator arg1, OutputCollector arg2, Reporter arg3) throws IOException {
  int count = 0;
  while (arg1.hasNext()) {
    count += 1;
    arg1.next();
  }
  arg2.collect(arg0, new Text("" + count));
}
 
Example 55
Project: PACE   File: EntryEncryptor.java   Source Code and License 5 votes vote down vote up
/**
 * Get the filter for the given value.
 *
 * @param col
 *          Column family to filter.
 * @return Results with columns to filter server ide and whether client side filtering is needed.
 */
public ColumnFilterResult getColumnFamilyFilter(Text col) {
  checkArgument(col != null, "col is null");

  MutableEntry key = new MutableEntry();
  key.colF = TextUtil.getBytes(col);

  return getColumnFamilyFilter(key, ImmutableSet.of(EntryField.COLUMN_FAMILY));
}
 
Example 56
Project: hadoop   File: JobSplitWriter.java   Source Code and License 5 votes vote down vote up
private static SplitMetaInfo[] writeOldSplits(
    org.apache.hadoop.mapred.InputSplit[] splits,
    FSDataOutputStream out, Configuration conf) throws IOException {
  SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
  if (splits.length != 0) {
    int i = 0;
    long offset = out.getPos();
    int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY,
        MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT);
    for(org.apache.hadoop.mapred.InputSplit split: splits) {
      long prevLen = out.getPos();
      Text.writeString(out, split.getClass().getName());
      split.write(out);
      long currLen = out.getPos();
      String[] locations = split.getLocations();
      if (locations.length > maxBlockLocations) {
        LOG.warn("Max block location exceeded for split: "
            + split + " splitsize: " + locations.length +
            " maxsize: " + maxBlockLocations);
        locations = Arrays.copyOf(locations,maxBlockLocations);
      }
      info[i++] = new JobSplit.SplitMetaInfo( 
          locations, offset,
          split.getLength());
      offset += currLen - prevLen;
    }
  }
  return info;
}
 
Example 57
Project: hadoop-oss   File: TestDelegationTokenAuthenticationHandlerWithMocks.java   Source Code and License 5 votes vote down vote up
@Test
public void testManagementOperations() throws Exception {
    testNonManagementOperation();
    testManagementOperationErrors();
    testGetToken(null, new Text("foo"));
    testGetToken("bar", new Text("foo"));
    testCancelToken();
    testRenewToken();
}
 
Example 58
Project: hadoop   File: TestDataJoin.java   Source Code and License 5 votes vote down vote up
public void testDataJoin() throws Exception {
  final int srcs = 4;
  JobConf job = new JobConf();
  job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
  Path base = cluster.getFileSystem().makeQualified(new Path("/inner"));
  Path[] src = writeSimpleSrc(base, job, srcs);
  job.setInputFormat(SequenceFileInputFormat.class);
  Path outdir = new Path(base, "out");
  FileOutputFormat.setOutputPath(job, outdir);

  job.setMapperClass(SampleDataJoinMapper.class);
  job.setReducerClass(SampleDataJoinReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(SampleTaggedMapOutput.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormat(TextOutputFormat.class);
  job.setNumMapTasks(1);
  job.setNumReduceTasks(1);
  FileInputFormat.setInputPaths(job, src);
  try {
    JobClient.runJob(job);
    confirmOutput(outdir, job, srcs);
  } finally {
    base.getFileSystem(job).delete(base, true);
  }
}
 
Example 59
Project: hadoop   File: RandomTextWriter.java   Source Code and License 4 votes vote down vote up
/**
 * This is the main routine for launching a distributed random write job.
 * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
 * The reduce doesn't do anything.
 * 
 * @throws IOException 
 */
public int run(String[] args) throws Exception {    
  if (args.length == 0) {
    return printUsage();    
  }
  
  Configuration conf = getConf();
  JobClient client = new JobClient(conf);
  ClusterStatus cluster = client.getClusterStatus();
  int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
  long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
                                           1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
    return -2;
  }
  long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
       numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
  int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
  
  Job job = Job.getInstance(conf);
  
  job.setJarByClass(RandomTextWriter.class);
  job.setJobName("random-text-writer");
  
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  
  job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
  job.setMapperClass(RandomTextMapper.class);        
  
  Class<? extends OutputFormat> outputFormatClass = 
    SequenceFileOutputFormat.class;
  List<String> otherArgs = new ArrayList<String>();
  for(int i=0; i < args.length; ++i) {
    try {
      if ("-outFormat".equals(args[i])) {
        outputFormatClass = 
          Class.forName(args[++i]).asSubclass(OutputFormat.class);
      } else {
        otherArgs.add(args[i]);
      }
    } catch (ArrayIndexOutOfBoundsException except) {
      System.out.println("ERROR: Required parameter missing from " +
          args[i-1]);
      return printUsage(); // exits
    }
  }

  job.setOutputFormatClass(outputFormatClass);
  FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
  
  System.out.println("Running " + numMaps + " maps.");
  
  // reducer NONE
  job.setNumReduceTasks(0);
  
  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " + 
                     (endTime.getTime() - startTime.getTime()) /1000 + 
                     " seconds.");
  
  return ret;
}
 
Example 60
Project: dataSqueeze   File: BytesWritableCompactionMapperTest.java   Source Code and License 4 votes vote down vote up
@Test
public void testMapThreshold() throws Exception {
    when(configuration.get(Matchers.anyString())).thenReturn("0");
    mapper.map(NullWritable.get(), bytesWritable, context);
    Mockito.verify(context, Mockito.times(1)).write(Mockito.eq(new Text("/source/path")), Matchers.anyObject());
}