Java Code Examples for org.apache.hadoop.fs.FSDataInputStream#readLine()

The following examples show how to use org.apache.hadoop.fs.FSDataInputStream#readLine() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: KMeansDriver.java From flink-perf with Apache License 2.0

6 votes

public static void createCentersSequenceFile (Configuration conf, FileSystem fs, String centroidsPath, String sequenceFilePath) throws Exception {
	Path seqFile = new Path (sequenceFilePath);
	if (fs.exists(seqFile)) {
		fs.delete(seqFile, true);
	}
	FSDataInputStream inputStream = fs.open(new Path(centroidsPath));
	SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, seqFile, Centroid.class, IntWritable.class);
	IntWritable value = new IntWritable(0);
	while (inputStream.available() > 0) {
		String line = inputStream.readLine();
		StringTokenizer tokenizer = new StringTokenizer(line, " ");
		int dim = tokenizer.countTokens() - 1;
		int clusterId = Integer.valueOf(tokenizer.nextToken());
		double [] coords = new double [dim];
		for (int i = 0; i < dim; i++) {
			coords[i] = Double.valueOf(tokenizer.nextToken());
		}
		Centroid cluster = new Centroid(clusterId, new Point(coords));
		writer.append(cluster, value);
	}
	IOUtils.closeStream(writer);
	inputStream.close();
}

Example 2

Source File: TestMerge.java From hadoop with Apache License 2.0

5 votes

private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem)
  throws Exception {
  FSDataInputStream dis = null;
  long numValidRecords = 0;
  long numInvalidRecords = 0;
  long numMappersLaunched = NUM_MAPPERS;
  String prevKeyValue = "000000000";
  Path[] fileList = 
    FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT,
        new Utils.OutputFileUtils.OutputFilesFilter()));
  for (Path outFile : fileList) {
    try {
      dis = fileSystem.open(outFile);
      String record;
      while((record = dis.readLine()) != null) {
        // Split the line into key and value.
        int blankPos = record.indexOf(" ");
        String keyString = record.substring(0, blankPos);
        String valueString = record.substring(blankPos+1);
        // Check for sorted output and correctness of record.
        if (keyString.compareTo(prevKeyValue) >= 0
            && keyString.equals(valueString)) {
          prevKeyValue = keyString;
          numValidRecords++;
        } else {
          numInvalidRecords++;
        }
      }
    } finally {
      if (dis != null) {
        dis.close();
        dis = null;
      }
    }
  }
  // Make sure we got all input records in the output in sorted order.
  assertEquals((long)(NUM_MAPPERS*NUM_LINES), numValidRecords);
  // Make sure there is no extraneous invalid record.
  assertEquals(0, numInvalidRecords);
}

Example 3

Source File: TestMRIntermediateDataEncryption.java From hadoop with Apache License 2.0

5 votes

private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem, int numMappers, int numLines)
  throws Exception {
  FSDataInputStream dis = null;
  long numValidRecords = 0;
  long numInvalidRecords = 0;
  String prevKeyValue = "000000000";
  Path[] fileList =
    FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT,
        new Utils.OutputFileUtils.OutputFilesFilter()));
  for (Path outFile : fileList) {
    try {
      dis = fileSystem.open(outFile);
      String record;
      while((record = dis.readLine()) != null) {
        // Split the line into key and value.
        int blankPos = record.indexOf(" ");
        String keyString = record.substring(0, blankPos);
        String valueString = record.substring(blankPos+1);
        // Check for sorted output and correctness of record.
        if (keyString.compareTo(prevKeyValue) >= 0
            && keyString.equals(valueString)) {
          prevKeyValue = keyString;
          numValidRecords++;
        } else {
          numInvalidRecords++;
        }
      }
    } finally {
      if (dis != null) {
        dis.close();
        dis = null;
      }
    }
  }
  // Make sure we got all input records in the output in sorted order.
  assertEquals((long)(numMappers * numLines), numValidRecords);
  // Make sure there is no extraneous invalid record.
  assertEquals(0, numInvalidRecords);
}

Example 4

Source File: TestMerge.java From big-c with Apache License 2.0

5 votes

private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem)
  throws Exception {
  FSDataInputStream dis = null;
  long numValidRecords = 0;
  long numInvalidRecords = 0;
  long numMappersLaunched = NUM_MAPPERS;
  String prevKeyValue = "000000000";
  Path[] fileList = 
    FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT,
        new Utils.OutputFileUtils.OutputFilesFilter()));
  for (Path outFile : fileList) {
    try {
      dis = fileSystem.open(outFile);
      String record;
      while((record = dis.readLine()) != null) {
        // Split the line into key and value.
        int blankPos = record.indexOf(" ");
        String keyString = record.substring(0, blankPos);
        String valueString = record.substring(blankPos+1);
        // Check for sorted output and correctness of record.
        if (keyString.compareTo(prevKeyValue) >= 0
            && keyString.equals(valueString)) {
          prevKeyValue = keyString;
          numValidRecords++;
        } else {
          numInvalidRecords++;
        }
      }
    } finally {
      if (dis != null) {
        dis.close();
        dis = null;
      }
    }
  }
  // Make sure we got all input records in the output in sorted order.
  assertEquals((long)(NUM_MAPPERS*NUM_LINES), numValidRecords);
  // Make sure there is no extraneous invalid record.
  assertEquals(0, numInvalidRecords);
}

Example 5

Source File: TestMRIntermediateDataEncryption.java From big-c with Apache License 2.0

5 votes

private void verifyOutput(RunningJob submittedJob, FileSystem fileSystem, int numMappers, int numLines)
  throws Exception {
  FSDataInputStream dis = null;
  long numValidRecords = 0;
  long numInvalidRecords = 0;
  String prevKeyValue = "000000000";
  Path[] fileList =
    FileUtil.stat2Paths(fileSystem.listStatus(OUTPUT,
        new Utils.OutputFileUtils.OutputFilesFilter()));
  for (Path outFile : fileList) {
    try {
      dis = fileSystem.open(outFile);
      String record;
      while((record = dis.readLine()) != null) {
        // Split the line into key and value.
        int blankPos = record.indexOf(" ");
        String keyString = record.substring(0, blankPos);
        String valueString = record.substring(blankPos+1);
        // Check for sorted output and correctness of record.
        if (keyString.compareTo(prevKeyValue) >= 0
            && keyString.equals(valueString)) {
          prevKeyValue = keyString;
          numValidRecords++;
        } else {
          numInvalidRecords++;
        }
      }
    } finally {
      if (dis != null) {
        dis.close();
        dis = null;
      }
    }
  }
  // Make sure we got all input records in the output in sorted order.
  assertEquals((long)(numMappers * numLines), numValidRecords);
  // Make sure there is no extraneous invalid record.
  assertEquals(0, numInvalidRecords);
}

Example 6

Source File: JobHistoryFileParserHadoop2.java From hraven with Apache License 2.0

4 votes

/**
 * {@inheritDoc}
 */
@Override
public void parse(byte[] historyFileContents, JobKey jobKey)
    throws ProcessingException {

  this.jobKey = jobKey;
  this.jobKeyBytes = jobKeyConv.toBytes(jobKey);
  this.jobDetails = new JobDetails(jobKey);
  initializeJobDetails();
  setJobId(jobKey.getJobId().getJobIdString());

  try {
    FSDataInputStream in =
        new FSDataInputStream(new ByteArrayWrapper(historyFileContents));

    /** first line is the version, ignore it */
    String versionIgnore = in.readLine();

    /** second line in file is the schema */
    this.schema = schema.parse(in.readLine());

    /** now figure out the schema */
    understandSchema(schema.toString());

    /** now read the rest of the file */
    this.reader = new GenericDatumReader<GenericRecord>(schema);
    this.decoder = DecoderFactory.get().jsonDecoder(schema, in);

    GenericRecord record = null;
    Hadoop2RecordType recType = null;
    try {
      while ((record = reader.read(null, decoder)) != null) {
        if (record.get(TYPE) != null) {
          recType = EVENT_RECORD_NAMES.get(record.get(TYPE).toString());
        } else {
          throw new ProcessingException("expected one of "
              + Arrays.asList(Hadoop2RecordType.values())
              + " \n but not found, cannot process this record! " + jobKey);
        }
        if (recType == null) {
          throw new ProcessingException("new record type has surfaced: "
              + record.get(TYPE).toString() + " cannot process this record! " + jobKey);
        }
        // GenericRecord's get returns an Object
        Object eDetails = record.get(EVENT);

        // confirm that we got an "event" object
        if (eDetails != null) {
          JSONObject eventDetails = new JSONObject(eDetails.toString());
          processRecords(recType, eventDetails);
        } else {
          throw new ProcessingException("expected event details but not found "
              + record.get(TYPE).toString() + " cannot process this record! " + jobKey);
        }
      }
    } catch (EOFException eof) {
      // not an error, simply end of file
      LOG.info("Done parsing file, reached eof for " + jobKey);
    }
  } catch (IOException ioe) {
    throw new ProcessingException(" Unable to parse history file in function parse, "
        + "cannot process this record!" + jobKey + " error: ", ioe);
  } catch (JSONException jse) {
    throw new ProcessingException(" Unable to parse history file in function parse, "
        + "cannot process this record! " + jobKey + " error: ", jse);
  } catch (IllegalArgumentException iae) {
    throw new ProcessingException(" Unable to parse history file in function parse, "
        + "cannot process this record! " + jobKey + " error: ", iae);
  }

  /*
   * set the job status for this job once the entire file is parsed
   * this has to be done separately
   * since JOB_FINISHED event is missing the field jobStatus,
   * where as JOB_KILLED and JOB_FAILED
   * events are not so we need to look through the whole file to confirm
   * the job status and then generate the put
   */
  Put jobStatusPut = getJobStatusPut();
  this.jobPuts.add(jobStatusPut);

  // set the hadoop version for this record
  Put versionPut = getHadoopVersionPut(JobHistoryFileParserFactory.getHistoryFileVersion2(), this.jobKeyBytes);
  this.jobPuts.add(versionPut);

  LOG.info("For " + this.jobKey + " #jobPuts " + jobPuts.size() + " #taskPuts: "
      + taskPuts.size());
}