Java Code Examples for org.apache.hadoop.io.Text

The following are top voted examples for showing how to use org.apache.hadoop.io.Text. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to product more good examples.

Example 1
Project: Hphoto   File: AdminTable.java View source code 9 votes vote down vote up
public void createTable(Text table,String[] column) throws IOException{
	if(!this.client.tableExists(table)){
		HTableDescriptor desc = new HTableDescriptor(table.toString());
		for(int i = 0 ; i < column.length ; i++)
			desc.addFamily(new HColumnDescriptor(column[i]));
		this.client.createTable(desc);
	}
}
 
Example 2
Project: hadoop_ekg   File: DistCh.java View source code 8 votes vote down vote up
/** {@inheritDoc} */
public void write(DataOutput out) throws IOException {
  Text.writeString(out, src.toString());
  DistTool.writeString(out, owner);
  DistTool.writeString(out, group);

  boolean b = permission != null;
  out.writeBoolean(b);
  if (b) {permission.write(out);}
}
 
Example 3
Project: hadoop_ekg   File: DatanodeInfo.java View source code 8 votes vote down vote up
/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
  super.readFields(in);

  //TODO: move it to DatanodeID once DatanodeID is not stored in FSImage
  this.ipcPort = in.readShort() & 0x0000ffff;

  this.capacity = in.readLong();
  this.dfsUsed = in.readLong();
  this.remaining = in.readLong();
  this.lastUpdate = in.readLong();
  this.xceiverCount = in.readInt();
  this.location = Text.readString(in);
  this.hostName = Text.readString(in);
  setAdminState(WritableUtils.readEnum(in, AdminStates.class));
}
 
Example 4
Project: hivemall   File: FactorizationMachineUDTF.java View source code 6 votes vote down vote up
private void forwardAsStringFeature() throws HiveException {
    final FMStringFeatureMapModel model = (FMStringFeatureMapModel) _model;

    final Text feature = new Text();
    final FloatWritable f_Wi = new FloatWritable(0.f);
    final FloatWritable[] f_Vi = HiveUtils.newFloatArray(_factor, 0.f);

    final Object[] forwardObjs = new Object[3];
    forwardObjs[0] = feature;
    forwardObjs[1] = f_Wi;
    forwardObjs[2] = null;
    // W0
    feature.set("0");
    f_Wi.set(_model.getW0());
    // V0 is null
    forward(forwardObjs);

    // Wi, Vif (i starts from 1..P)
    forwardObjs[2] = Arrays.asList(f_Vi);

    final IMapIterator<String, Entry> itor = model.entries();
    while(itor.next() != -1) {
        String i = itor.getKey();
        assert (i != null);
        // set i
        feature.set(i);
        Entry entry = itor.getValue();
        // set Wi
        f_Wi.set(entry.W);
        // set Vif
        final float[] Vi = entry.Vf;
        for(int f = 0; f < _factor; f++) {
            float v = Vi[f];
            f_Vi[f].set(v);
        }
        forward(forwardObjs);
    }
}
 
Example 5
Project: Hphoto   File: HBaseTestCase.java View source code 6 votes vote down vote up
/**
 * Add content to region <code>r</code> on the passed column
 * <code>column</code>.
 * Adds data of the from 'aaa', 'aab', etc where key and value are the same.
 * @param updater  An instance of {@link Incommon}.
 * @param column
 * @param startKeyBytes Where to start the rows inserted
 * @param endKey Where to stop inserting rows.
 * @param ts Timestamp to write the content with.
 * @throws IOException
 */
protected static void addContent(final Incommon updater, final String column,
    final byte [] startKeyBytes, final Text endKey, final long ts)
throws IOException {
  // Add rows of three characters.  The first character starts with the
  // 'a' character and runs up to 'z'.  Per first character, we run the
  // second character over same range.  And same for the third so rows
  // (and values) look like this: 'aaa', 'aab', 'aac', etc.
  char secondCharStart = (char)startKeyBytes[1];
  char thirdCharStart = (char)startKeyBytes[2];
  EXIT: for (char c = (char)startKeyBytes[0]; c <= LAST_CHAR; c++) {
    for (char d = secondCharStart; d <= LAST_CHAR; d++) {
      for (char e = thirdCharStart; e <= LAST_CHAR; e++) {
        byte [] bytes = new byte [] {(byte)c, (byte)d, (byte)e};
        Text t = new Text(new String(bytes, HConstants.UTF8_ENCODING));
        if (endKey != null && endKey.getLength() > 0
            && endKey.compareTo(t) <= 0) {
          break EXIT;
        }
        long lockid = updater.startBatchUpdate(t);
        try {
          updater.put(lockid, new Text(column), bytes);
          if (ts == -1) {
            updater.commit(lockid);
          } else {
            updater.commit(lockid, ts);
          }
          lockid = -1;
        } finally {
          if (lockid != -1) {
            updater.abort(lockid);
          }
        }
      }
      // Set start character back to FIRST_CHAR after we've done first loop.
      thirdCharStart = FIRST_CHAR;
    }
    secondCharStart = FIRST_CHAR;
  }
}
 
Example 6
Project: Hphoto   File: TestGet.java View source code 6 votes vote down vote up
private void verifyGet(final HRegion r, final String expectedServer)
throws IOException {
  // This should return a value because there is only one family member
  byte [] value = r.get(ROW_KEY, CONTENTS);
  assertNotNull(value);
  
  // This should not return a value because there are multiple family members
  value = r.get(ROW_KEY, HConstants.COLUMN_FAMILY);
  assertNull(value);
  
  // Find out what getFull returns
  TreeMap<Text, byte []> values = r.getFull(ROW_KEY);
  
  // assertEquals(4, values.keySet().size());
  for(Iterator<Text> i = values.keySet().iterator(); i.hasNext(); ) {
    Text column = i.next();
    if (column.equals(HConstants.COL_SERVER)) {
      String server = Writables.bytesToString(values.get(column));
      assertEquals(expectedServer, server);
      LOG.info(server);
    }
  }
}
 
Example 7
Project: Hphoto   File: HBaseTestCase.java View source code 6 votes vote down vote up
/**
 * Add content to region <code>r</code> on the passed column
 * <code>column</code>.
 * Adds data of the from 'aaa', 'aab', etc where key and value are the same.
 * @param r
 * @param column
 * @throws IOException
 */
protected static void addContent(final HRegion r, final String column)
throws IOException {
  Text startKey = r.getRegionInfo().getStartKey();
  Text endKey = r.getRegionInfo().getEndKey();
  byte [] startKeyBytes = startKey.getBytes();
  if (startKeyBytes == null || startKeyBytes.length == 0) {
    startKeyBytes = START_KEY_BYTES;
  }
  addContent(new HRegionIncommon(r), column, startKeyBytes, endKey, -1);
}
 
Example 8
Project: hadoop_ekg   File: TeraInputFormat.java View source code 6 votes vote down vote up
public boolean next(Text key, Text value) throws IOException {
  if (in.next(junk, line)) {
    if (line.getLength() < KEY_LENGTH) {
      key.set(line);
      value.clear();
    } else {
      byte[] bytes = line.getBytes();
      key.set(bytes, 0, KEY_LENGTH);
      value.set(bytes, KEY_LENGTH, line.getLength() - KEY_LENGTH);
    }
    return true;
  } else {
    return false;
  }
}
 
Example 9
Project: hadoop_ekg   File: TeraSort.java View source code 6 votes vote down vote up
/**
 * Given a sorted set of cut points, build a trie that will find the correct
 * partition quickly.
 * @param splits the list of cut points
 * @param lower the lower bound of partitions 0..numPartitions-1
 * @param upper the upper bound of partitions 0..numPartitions-1
 * @param prefix the prefix that we have already checked against
 * @param maxDepth the maximum depth we will build a trie for
 * @return the trie node that will divide the splits correctly
 */
private static TrieNode buildTrie(Text[] splits, int lower, int upper, 
                                  Text prefix, int maxDepth) {
  int depth = prefix.getLength();
  if (depth >= maxDepth || lower == upper) {
    return new LeafTrieNode(depth, splits, lower, upper);
  }
  InnerTrieNode result = new InnerTrieNode(depth);
  Text trial = new Text(prefix);
  // append an extra byte on to the prefix
  trial.append(new byte[1], 0, 1);
  int currentBound = lower;
  for(int ch = 0; ch < 255; ++ch) {
    trial.getBytes()[depth] = (byte) (ch + 1);
    lower = currentBound;
    while (currentBound < upper) {
      if (splits[currentBound].compareTo(trial) >= 0) {
        break;
      }
      currentBound += 1;
    }
    trial.getBytes()[depth] = (byte) ch;
    result.child[ch] = buildTrie(splits, lower, currentBound, trial, 
                                 maxDepth);
  }
  // pick up the rest
  trial.getBytes()[depth] = 127;
  result.child[255] = buildTrie(splits, currentBound, upper, trial,
                                maxDepth);
  return result;
}
 
Example 10
Project: hadoop_ekg   File: Logalyzer.java View source code 6 votes vote down vote up
public int compare(byte[] b1, int s1, int l1,
                   byte[] b2, int s2, int l2) {
  
  if (sortSpec == null) {
    return super.compare(b1, s1, l1, b2, s2, l2);
  }
  
  try {
    Text logline1 = new Text(); 
    logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
    String line1 = logline1.toString();
    String[] logColumns1 = line1.split(columnSeparator);
    
    Text logline2 = new Text(); 
    logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
    String line2 = logline2.toString();
    String[] logColumns2 = line2.split(columnSeparator);
    
    if (logColumns1 == null || logColumns2 == null) {
      return super.compare(b1, s1, l1, b2, s2, l2);
    }
    
    //Compare column-wise according to *sortSpec*
    for(int i=0; i < sortSpec.length; ++i) {
      int column = (Integer.valueOf(sortSpec[i]).intValue());
      String c1 = logColumns1[column]; 
      String c2 = logColumns2[column];
      
      //Compare columns
      int comparision = super.compareBytes(
                                           c1.getBytes(), 0, c1.length(),
                                           c2.getBytes(), 0, c2.length()
                                           );
      
      //They differ!
      if (comparision != 0) {
        return comparision;
      }
    }
    
  } catch (IOException ioe) {
    LOG.fatal("Caught " + ioe);
    return 0;
  }
  
  return 0;
}
 
Example 11
Project: hadoop_ekg   File: DistCp.java View source code 6 votes vote down vote up
/** Check whether the file list have duplication. */
static private void checkDuplication(FileSystem fs, Path file, Path sorted,
  Configuration conf) throws IOException {
  SequenceFile.Reader in = null;
  try {
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs,
      new Text.Comparator(), Text.class, Text.class, conf);
    sorter.sort(file, sorted);
    in = new SequenceFile.Reader(fs, sorted, conf);

    Text prevdst = null, curdst = new Text();
    Text prevsrc = null, cursrc = new Text(); 
    for(; in.next(curdst, cursrc); ) {
      if (prevdst != null && curdst.equals(prevdst)) {
        throw new DuplicationException(
          "Invalid input, there are duplicated files in the sources: "
          + prevsrc + ", " + cursrc);
      }
      prevdst = curdst;
      curdst = new Text();
      prevsrc = cursrc;
      cursrc = new Text();
    }
  }
  finally {
    checkAndClose(in);
  }
}
 
Example 12
Project: hadoop_ekg   File: TestIndexedSort.java View source code 6 votes vote down vote up
public WritableSortable(int j) throws IOException {
  seed = r.nextLong();
  r.setSeed(seed);
  Text t = new Text();
  StringBuffer sb = new StringBuffer();
  indices = new int[j];
  offsets = new int[j];
  check = new String[j];
  DataOutputBuffer dob = new DataOutputBuffer();
  for (int i = 0; i < j; ++i) {
    indices[i] = i;
    offsets[i] = dob.getLength();
    genRandom(t, r.nextInt(15) + 1, sb);
    t.write(dob);
    check[i] = t.toString();
  }
  eob = dob.getLength();
  bytes = dob.getData();
  comparator = WritableComparator.get(Text.class);
}
 
Example 13
Project: hadoop_ekg   File: Utils.java View source code 6 votes vote down vote up
/**
 * Read a String as a VInt n, followed by n Bytes in Text format.
 * 
 * @param in
 *          The input stream.
 * @return The string
 * @throws IOException
 */
public static String readString(DataInput in) throws IOException {
  int length = readVInt(in);
  if (length == -1) return null;
  byte[] buffer = new byte[length];
  in.readFully(buffer);
  return Text.decode(buffer);
}
 
Example 14
Project: hadoop_ekg   File: SequenceFileInputFilter.java View source code 6 votes vote down vote up
/** Filtering method
 * If MD5(key) % frequency==0, return true; otherwise return false
 * @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)
 */
public boolean accept(Object key) {
  try {
    long hashcode;
    if (key instanceof Text) {
      hashcode = MD5Hashcode((Text)key);
    } else if (key instanceof BytesWritable) {
      hashcode = MD5Hashcode((BytesWritable)key);
    } else {
      ByteBuffer bb;
      bb = Text.encode(key.toString());
      hashcode = MD5Hashcode(bb.array(), 0, bb.limit());
    }
    if (hashcode/frequency*frequency==hashcode)
      return true;
  } catch(Exception e) {
    LOG.warn(e);
    throw new RuntimeException(e);
  }
  return false;
}
 
Example 15
Project: hadoop-20   File: TestDFSUtil.java View source code 6 votes vote down vote up
private long write(int count, String s, DataOutputStream dos, boolean opt) 
    throws IOException{
  long start = System.currentTimeMillis();
  if (opt) {
    for(int i = 0; i < count; i++) {
      Text.writeStringOpt(dos, s);
    }
  } else {
    for(int i = 0; i < count; i++) {
      Text.writeString(dos, s);
    }
  }
  long stop = System.currentTimeMillis();
  return stop - start;
}
 
Example 16
Project: hadoop-20   File: TestDFSUtil.java View source code 6 votes vote down vote up
private long read(int count, DataInputStream dos, boolean opt)
    throws IOException {
  long start = System.currentTimeMillis();
  if (opt) {
    for (int i = 0; i < count; i++) {
      Text.readStringOpt(dos);
    }
  } else {
    for (int i = 0; i < count; i++) {
      Text.readString(dos);
    }
  }
  long stop = System.currentTimeMillis();
  return stop - start;
}
 
Example 17
Project: HDP-2.2-Patched   File: RoundRobinUserResolver.java View source code 6 votes vote down vote up
/**
 * Userlist assumes one user per line.
 * Each line in users-list-file is of the form &lt;username&gt;[,group]* 
 * <br> Group names are ignored(they are not parsed at all).
 */
private List<UserGroupInformation> parseUserList(URI userUri, 
                                                 Configuration conf) 
throws IOException {
  if (null == userUri) {
    return Collections.emptyList();
  }
  
  final Path userloc = new Path(userUri.toString());
  final Text rawUgi = new Text();
  final FileSystem fs = userloc.getFileSystem(conf);
  final ArrayList<UserGroupInformation> ugiList =
      new ArrayList<UserGroupInformation>();

  LineReader in = null;
  try {
    in = new LineReader(fs.open(userloc));
    while (in.readLine(rawUgi) > 0) {//line is of the form username[,group]*
      if(rawUgi.toString().trim().equals("")) {
        continue; //Continue on empty line
      }
      // e is end position of user name in this line
      int e = rawUgi.find(",");
      if (e == 0) {
        throw new IOException("Missing username: " + rawUgi);
      }
      if (e == -1) {
        e = rawUgi.getLength();
      }
      final String username = Text.decode(rawUgi.getBytes(), 0, e).trim();
      UserGroupInformation ugi = null;
      try {
        ugi = UserGroupInformation.createProxyUser(username,
                  UserGroupInformation.getLoginUser());
      } catch (IOException ioe) {
        LOG.error("Error while creating a proxy user " ,ioe);
      }
      if (ugi != null) {
        ugiList.add(ugi);
      }
      // No need to parse groups, even if they exist. Go to next line
    }
  } finally {
    if (in != null) {
      in.close();
    }
  }
  return ugiList;
}
 
Example 18
Project: HDP-2.2-Patched   File: TestDelegationToken.java View source code 6 votes vote down vote up
@Test
public void testOverlongDtidSerialization() throws IOException {
  byte[] bigBuf = new byte[Text.DEFAULT_MAX_LEN + 1];
  for (int i = 0; i < bigBuf.length; i++) {
    bigBuf[i] = 0;
  }
  assertFalse(testDelegationTokenIdentiferSerializationRoundTrip(
      new Text(bigBuf), new Text("renewer"), new Text("realUser")));
  assertFalse(testDelegationTokenIdentiferSerializationRoundTrip(
      new Text("owner"), new Text(bigBuf), new Text("realUser")));
  assertFalse(testDelegationTokenIdentiferSerializationRoundTrip(
      new Text("owner"), new Text("renewer"), new Text(bigBuf)));
}