org.apache.hadoop.io.SequenceFile Java Examples

The following examples show how to use org.apache.hadoop.io.SequenceFile. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUniformSizeInputFormat.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void checkSplits(Path listFile, List<InputSplit> splits) throws IOException {
  long lastEnd = 0;

  //Verify if each split's start is matching with the previous end and
  //we are not missing anything
  for (InputSplit split : splits) {
    FileSplit fileSplit = (FileSplit) split;
    long start = fileSplit.getStart();
    Assert.assertEquals(lastEnd, start);
    lastEnd = start + fileSplit.getLength();
  }

  //Verify there is nothing more to read from the input file
  SequenceFile.Reader reader
          = new SequenceFile.Reader(cluster.getFileSystem().getConf(),
                  SequenceFile.Reader.file(listFile));

  try {
    reader.seek(lastEnd);
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    Assert.assertFalse(reader.next(srcRelPath, srcFileStatus));
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
Example #2
Source File: SequenceWriter.java    From nutchpy with Apache License 2.0 6 votes vote down vote up
public static void write_seq() throws IOException {

        String uri = "test_file.seq";
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        Path path = new Path(uri);

        IntWritable key = new IntWritable();
        Text value = new Text();

        SequenceFile.Writer writer = null;

        try {
            writer = SequenceFile.createWriter(fs, conf, path,
                    key.getClass(), value.getClass());
            for (int i = 0; i < 100; i++) {
                key.set(100 - i);
                value.set(DATA[i % DATA.length]);
                System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
                writer.append(key, value);
            }
        } finally {
            IOUtils.closeStream(writer); }
    }
 
Example #3
Source File: TestSequenceFileLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  pigServer = new PigServer(LOCAL);
  File tmpFile = File.createTempFile("test", ".txt");
  tmpFileName = tmpFile.getAbsolutePath();
  System.err.println("fileName: "+tmpFileName);
  Path path = new Path("file:///"+tmpFileName);
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.get(path.toUri(), conf);

  IntWritable key = new IntWritable();
  Text value = new Text();
  SequenceFile.Writer writer = null;
  try {
    writer = SequenceFile.createWriter(fs, conf, path,
                                       key.getClass(), value.getClass());
    for (int i=0; i < DATA.length; i++) {
      key.set(i);
      value.set(DATA[i]);
      writer.append(key, value);
    }
  } finally {
    IOUtils.closeStream(writer);
  }
}
 
Example #4
Source File: DistributedFSCheck.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private void createInputFile(String rootName) throws IOException {
  cleanup();  // clean up if previous run failed

  Path inputFile = new Path(MAP_INPUT_DIR, "in_file");
  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, fsConfig, inputFile, 
                              Text.class, LongWritable.class, CompressionType.NONE);
  
  try {
    nrFiles = 0;
    listSubtree(new Path(rootName), writer);
  } finally {
    writer.close();
  }
  LOG.info("Created map input files.");
}
 
Example #5
Source File: TestJoinDatamerge.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static int countProduct(IntWritable key, Path[] src, 
    Configuration conf) throws IOException {
  int product = 1;
  for (Path p : src) {
    int count = 0;
    SequenceFile.Reader r = new SequenceFile.Reader(
      cluster.getFileSystem(), p, conf);
    IntWritable k = new IntWritable();
    IntWritable v = new IntWritable();
    while (r.next(k, v)) {
      if (k.equals(key)) {
        count++;
      }
    }
    r.close();
    if (count != 0) {
      product *= count;
    }
  }
  return product;
}
 
Example #6
Source File: KMeansReducer.java    From mapreduce-kmeans with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
	super.cleanup(context);
	Configuration conf = context.getConfiguration();
	Path outPath = new Path(conf.get("centroid.path"));
	FileSystem fs = FileSystem.get(conf);
	fs.delete(outPath, true);
	try (SequenceFile.Writer out = SequenceFile.createWriter(fs, context.getConfiguration(), outPath,
			ClusterCenter.class, IntWritable.class)) {
		final IntWritable value = new IntWritable(0);
		for (ClusterCenter center : centers) {
			out.append(center, value);
		}
	}
}
 
Example #7
Source File: SeqCombiner.java    From compiler with Apache License 2.0 6 votes vote down vote up
public static long readAndAppendAst(Configuration conf, FileSystem fileSystem, MapFile.Writer writer, String fileName, long lastKey) throws IOException {
	long newLastKey = lastKey;
	SequenceFile.Reader r = new SequenceFile.Reader(fileSystem, new Path(fileName), conf);
	LongWritable longKey = new LongWritable();
	BytesWritable value = new BytesWritable();
	try {
		while (r.next(longKey, value)) {
			newLastKey = longKey.get() + lastKey;
			writer.append(new LongWritable(newLastKey), value);
		}
	} catch (Exception e) {
		System.err.println(fileName);
		e.printStackTrace();
	} finally {
		r.close();
	}
	return newLastKey;
}
 
Example #8
Source File: ProxiedFileSystemWrapper.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Get token from the token sequence file.
 * @param authPath
 * @param proxyUserName
 * @return Token for proxyUserName if it exists.
 * @throws IOException
 */
private static Optional<Token<?>> getTokenFromSeqFile(String authPath, String proxyUserName) throws IOException {
  try (Closer closer = Closer.create()) {
    FileSystem localFs = FileSystem.getLocal(new Configuration());
    SequenceFile.Reader tokenReader =
        closer.register(new SequenceFile.Reader(localFs, new Path(authPath), localFs.getConf()));
    Text key = new Text();
    Token<?> value = new Token<>();
    while (tokenReader.next(key, value)) {
      LOG.info("Found token for " + key);
      if (key.toString().equals(proxyUserName)) {
        return Optional.<Token<?>> of(value);
      }
    }
  }
  return Optional.absent();
}
 
Example #9
Source File: 1000021_TestCDbwEvaluator_t.java    From coming with MIT License 6 votes vote down vote up
private void checkRefPoints(int numIterations) throws IOException {
  for (int i = 0; i <= numIterations; i++) {
    Path out = new Path(getTestTempDirPath("output"), "representativePoints-" + i);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus file : fs.listStatus(out)) {
      if (!file.getPath().getName().startsWith(".")) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, file.getPath(), conf);
        try {
          Writable clusterId = new IntWritable(0);
          VectorWritable point = new VectorWritable();
          while (reader.next(clusterId, point)) {
            System.out.println("\tC-" + clusterId + ": " + AbstractCluster.formatVector(point.get(), null));
          }
        } finally {
          reader.close();
        }
      }
    }
  }
}
 
Example #10
Source File: TestSequenceFileWriter.java    From suro with Apache License 2.0 6 votes vote down vote up
private int checkFileContents(String filePath, String message) throws IOException {
    SequenceFile.Reader r = new SequenceFile.Reader(
            FileSystem.get(new Configuration()),
            new Path(filePath),
            new Configuration());

    Text routingKey = new Text();
    MessageWritable value = new MessageWritable();
    StringSerDe serde = new StringSerDe();

    int i = 0;
    while (r.next(routingKey, value)) {
        assertEquals(routingKey.toString(), "routingKey");
        assertEquals(serde.deserialize(value.getMessage().getPayload()), message + i);
        ++i;
    }
    r.close();

    return i;
}
 
Example #11
Source File: DistributedFSCheck.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void createInputFile(String rootName) throws IOException {
  cleanup();  // clean up if previous run failed

  Path inputFile = new Path(MAP_INPUT_DIR, "in_file");
  SequenceFile.Writer writer =
    SequenceFile.createWriter(fs, fsConfig, inputFile, 
                              Text.class, LongWritable.class, CompressionType.NONE);
  
  try {
    nrFiles = 0;
    listSubtree(new Path(rootName), writer);
  } finally {
    writer.close();
  }
  LOG.info("Created map input files.");
}
 
Example #12
Source File: TestMapRed.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void reduce(WritableComparable key, Iterator values,
                   OutputCollector output, Reporter reporter
                   ) throws IOException {
  if (first) {
    first = false;
    MapOutputFile mapOutputFile = new MapOutputFile(taskId.getJobID());
    mapOutputFile.setConf(conf);
    Path input = mapOutputFile.getInputFile(0, taskId);
    FileSystem fs = FileSystem.get(conf);
    assertTrue("reduce input exists " + input, fs.exists(input));
    SequenceFile.Reader rdr = 
      new SequenceFile.Reader(fs, input, conf);
    assertEquals("is reduce input compressed " + input, 
                 compressInput, 
                 rdr.isCompressed());
    rdr.close();          
  }
}
 
Example #13
Source File: TensorReaderBinaryBlockParallel.java    From systemds with Apache License 2.0 6 votes vote down vote up
@Override
public Object call() throws Exception {
	TensorBlock value = new TensorBlock();
	TensorIndexes key = new TensorIndexes();
	//directly read from sequence files (individual partfiles)
	try(SequenceFile.Reader reader = new SequenceFile.Reader(_job, SequenceFile.Reader.file(_path))) {
		//note: next(key, value) does not yet exploit the given serialization classes, 
		//record reader does but is generally slower.
		while (reader.next(key, value)) {
			if( value.isEmpty(false) )
				continue;
			int[] lower = new int[_dims.length];
			int[] upper = new int[lower.length];
			UtilFunctions.getBlockBounds(key, value.getLongDims(), _blen, lower, upper);
			_dest.copy(lower, upper, value);
		}
	}
	
	return null;
}
 
Example #14
Source File: SequenceFileUtil.java    From alchemy with Apache License 2.0 6 votes vote down vote up
public static void writeSequenceFile(String path) throws Exception{
	Writer.Option filePath = Writer.file(new Path(path));
	Writer.Option keyClass = Writer.keyClass(IntWritable.class);
	Writer.Option valueClass = Writer.valueClass(Text.class);
	Writer.Option compression = Writer.compression(CompressionType.NONE);
	Writer writer = SequenceFile.createWriter(configuration, filePath, keyClass, valueClass, compression);
	IntWritable key = new IntWritable();
	Text value = new Text("");
	for(int i=0;i<100;i++){
		key.set(i);
		value.set("value_"+i);
		writer.append(key, value);
	}
	writer.hflush();
	writer.close();
}
 
Example #15
Source File: TestGlobbedCopyListing.java    From big-c with Apache License 2.0 6 votes vote down vote up
private void verifyContents(Path listingPath) throws Exception {
  SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(),
                                            listingPath, new Configuration());
  Text key   = new Text();
  CopyListingFileStatus value = new CopyListingFileStatus();
  Map<String, String> actualValues = new HashMap<String, String>();
  while (reader.next(key, value)) {
    if (value.isDirectory() && key.toString().equals("")) {
      // ignore root with empty relPath, which is an entry to be 
      // used for preserving root attributes etc.
      continue;
    }
    actualValues.put(value.getPath().toString(), key.toString());
  }

  Assert.assertEquals(expectedValues.size(), actualValues.size());
  for (Map.Entry<String, String> entry : actualValues.entrySet()) {
    Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey()));
  }
}
 
Example #16
Source File: BlurInputFormatTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private void walkOutput(Path output, Configuration conf, ResultReader resultReader) throws IOException {
  FileSystem fileSystem = output.getFileSystem(conf);
  FileStatus fileStatus = fileSystem.getFileStatus(output);
  if (fileStatus.isDir()) {
    FileStatus[] listStatus = fileSystem.listStatus(output, new PathFilter() {
      @Override
      public boolean accept(Path path) {
        return !path.getName().startsWith("_");
      }
    });
    for (FileStatus fs : listStatus) {
      walkOutput(fs.getPath(), conf, resultReader);
    }
  } else {
    Reader reader = new SequenceFile.Reader(fileSystem, output, conf);
    Text rowId = new Text();
    TableBlurRecord tableBlurRecord = new TableBlurRecord();
    while (reader.next(rowId, tableBlurRecord)) {
      resultReader.read(rowId, tableBlurRecord);
    }
    reader.close();
  }
}
 
Example #17
Source File: TestCopyListing.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailOnCloseError() throws IOException {
  File inFile = File.createTempFile("TestCopyListingIn", null);
  inFile.deleteOnExit();
  File outFile = File.createTempFile("TestCopyListingOut", null);
  outFile.deleteOnExit();
  List<Path> srcs = new ArrayList<Path>();
  srcs.add(new Path(inFile.toURI()));
  
  Exception expectedEx = new IOException("boom");
  SequenceFile.Writer writer = mock(SequenceFile.Writer.class);
  doThrow(expectedEx).when(writer).close();
  
  SimpleCopyListing listing = new SimpleCopyListing(getConf(), CREDENTIALS);
  DistCpOptions options = new DistCpOptions(srcs, new Path(outFile.toURI()));
  Exception actualEx = null;
  try {
    listing.doBuildListing(writer, options);
  } catch (Exception e) {
    actualEx = e;
  }
  Assert.assertNotNull("close writer didn't fail", actualEx);
  Assert.assertEquals(expectedEx, actualEx);
}
 
Example #18
Source File: HadoopSequenceFileFormatter.java    From CloverETL-Engine with GNU Lesser General Public License v2.1 5 votes vote down vote up
@Override
public void setDataTarget(Object outputDataTarget) throws IOException {
	if (outputDataTarget instanceof SequenceFile.Writer) {
		writer = (SequenceFile.Writer) outputDataTarget;
		return;
	}

	if (outputDataTarget instanceof URI) {
		URI targetURI = (URI) outputDataTarget;
		
		targetURI = HadoopSequenceFileParser.sandboxToFileURI(targetURI);
		
		ClassLoader formerContextClassloader = Thread.currentThread().getContextClassLoader();
		Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
		try {
			if (fs == null) {
				fs = HadoopSequenceFileParser.getFileSystem(targetURI, graph, user, config, this);
			}
			writer = SequenceFile.createWriter(fs, config,
					new Path(targetURI.getPath()), // Path to new file on fileSystem
					keyCopy.getValueClass(), // Key Data Type
					valCopy.getValueClass(), // Value Data Type
					SequenceFile.CompressionType.NONE);
		} catch (ComponentNotReadyException e) {
			throw new IOException("Failed to create Hadoop sequence file writer", e);
		} finally {
			Thread.currentThread().setContextClassLoader(formerContextClassloader);
		}

	} else {
		throw new IOException("Unsupported data target type: " + outputDataTarget.getClass().getName());
	}

}
 
Example #19
Source File: SeqFileReader.java    From aliyun-maxcompute-data-collectors with Apache License 2.0 5 votes vote down vote up
public static Object getFirstValue(String filename) throws IOException {
  Reader r = null;
  try {
    // read from local filesystem
    Configuration conf = new Configuration();
    if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
      conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
    }
    FileSystem fs = FileSystem.get(conf);
    r = new SequenceFile.Reader(fs, new Path(filename), conf);
    Object key = ReflectionUtils.newInstance(r.getKeyClass(), conf);
    Object val = ReflectionUtils.newInstance(r.getValueClass(), conf);
    LOG.info("Reading value of type " + r.getValueClassName()
        + " from SequenceFile " + filename);
    r.next(key);
    r.getCurrentValue(val);
    LOG.info("Value as string: " + val.toString());
    return val;
  } finally {
    if (null != r) {
      try {
        r.close();
      } catch (IOException ioe) {
        LOG.warn("IOException during close: " + ioe.toString());
      }
    }
  }
}
 
Example #20
Source File: DistBlockIntegrityMonitor.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * determines which files have failed for a given job
 */
private HashMap<String, String> getFailedFiles(Job job) throws IOException {
  HashMap<String, String> failedFiles = new HashMap<String, String>();

  Path outDir = SequenceFileOutputFormat.getOutputPath(job);
  FileSystem fs  = outDir.getFileSystem(getConf());
  if (!fs.getFileStatus(outDir).isDir()) {
    throw new IOException(outDir.toString() + " is not a directory");
  }

  FileStatus[] files = fs.listStatus(outDir);

  for (FileStatus f: files) {
    Path fPath = f.getPath();
    if ((!f.isDir()) && (fPath.getName().startsWith(PART_PREFIX))) {
      LOG.info("opening " + fPath.toString());
      SequenceFile.Reader reader = 
        new SequenceFile.Reader(fs, fPath, getConf());

      Text key = new Text();
      Text value = new Text();
      while (reader.next(key, value)) {
        LOG.info("key: " + key.toString() + " , value: " + value.toString());
        failedFiles.put(key.toString(), value.toString());
      }
      reader.close();
    }
  }
  return failedFiles;
}
 
Example #21
Source File: SequenceFileOutputFormat.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/** Open the output generated by this format. */
public static SequenceFile.Reader[] getReaders(Configuration conf, Path dir)
  throws IOException {
  FileSystem fs = dir.getFileSystem(conf);
  Path[] names = FileUtil.stat2Paths(fs.listStatus(dir));
  
  // sort names, so that hash partitioning works
  Arrays.sort(names);
  
  SequenceFile.Reader[] parts = new SequenceFile.Reader[names.length];
  for (int i = 0; i < names.length; i++) {
    parts[i] = new SequenceFile.Reader(fs, names[i], conf);
  }
  return parts;
}
 
Example #22
Source File: TestDataJoin.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
    int srcs) throws IOException {
  SequenceFile.Writer out[] = null;
  Path[] src = new Path[srcs];
  try {
    out = createWriters(testdir, conf, srcs, src);
    final int capacity = srcs * 2 + 1;
    Text key = new Text();
    key.set("ignored");
    Text val = new Text();
    for (int k = 0; k < capacity; ++k) {
      for (int i = 0; i < srcs; ++i) {
        val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
            "\t" + Integer.toString(10 * k + i));
        out[i].append(key, val);
        if (i == k) {
          // add duplicate key
          out[i].append(key, val);
        }
      }
    }
  } finally {
    if (out != null) {
      for (int i = 0; i < srcs; ++i) {
        if (out[i] != null)
          out[i].close();
      }
    }
  }
  return src;
}
 
Example #23
Source File: MapTask.java    From big-c with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void writeSkippedRec(K key, V value) throws IOException{
  if(skipWriter==null) {
    Path skipDir = SkipBadRecords.getSkipOutputPath(conf);
    Path skipFile = new Path(skipDir, getTaskID().toString());
    skipWriter = 
      SequenceFile.createWriter(
          skipFile.getFileSystem(conf), conf, skipFile,
          (Class<K>) createKey().getClass(),
          (Class<V>) createValue().getClass(), 
          CompressionType.BLOCK, getTaskReporter());
  }
  skipWriter.append(key, value);
}
 
Example #24
Source File: SimpleCopyListing.java    From big-c with Apache License 2.0 5 votes vote down vote up
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
  FileSystem fs = pathToListFile.getFileSystem(getConf());
  if (fs.exists(pathToListFile)) {
    fs.delete(pathToListFile, false);
  }
  return SequenceFile.createWriter(getConf(),
          SequenceFile.Writer.file(pathToListFile),
          SequenceFile.Writer.keyClass(Text.class),
          SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
          SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
 
Example #25
Source File: HdfsExtractor.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if given file is sequence
 * @param file
 * @return boolean
 */
@SuppressWarnings("deprecation")
private boolean isSequenceFile(Path file) {
  SequenceFile.Reader filereader = null;
  try {
    filereader = new SequenceFile.Reader(file.getFileSystem(conf), file, conf);
    filereader.close();
  } catch (IOException e) {
    return false;
  }
  return true;
}
 
Example #26
Source File: CubeStatsWriter.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
private static void writeCuboidStatisticsInner(Configuration conf, Path outputFilePath, //
        Map<Long, HLLCounter> cuboidHLLMap, int samplingPercentage, int mapperNumber, double mapperOverlapRatio,
        long sourceRecordCount) throws IOException {
    List<Long> allCuboids = Lists.newArrayList();
    allCuboids.addAll(cuboidHLLMap.keySet());
    Collections.sort(allCuboids);

    ByteBuffer valueBuf = ByteBuffer.allocate(BufferedMeasureCodec.DEFAULT_BUFFER_SIZE);
    SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(outputFilePath),
            SequenceFile.Writer.keyClass(LongWritable.class), SequenceFile.Writer.valueClass(BytesWritable.class));
    try {
        // mapper overlap ratio at key -1
        writer.append(new LongWritable(-1), new BytesWritable(Bytes.toBytes(mapperOverlapRatio)));

        // mapper number at key -2
        writer.append(new LongWritable(-2), new BytesWritable(Bytes.toBytes(mapperNumber)));

        // sampling percentage at key 0
        writer.append(new LongWritable(0L), new BytesWritable(Bytes.toBytes(samplingPercentage)));

        // flat table source_count at key -3
        writer.append(new LongWritable(-3), new BytesWritable(Bytes.toBytes(sourceRecordCount)));

        for (long i : allCuboids) {
            valueBuf.clear();
            cuboidHLLMap.get(i).writeRegisters(valueBuf);
            valueBuf.flip();
            writer.append(new LongWritable(i), new BytesWritable(valueBuf.array(), valueBuf.limit()));
        }
    } finally {
        IOUtils.closeQuietly(writer);
    }
}
 
Example #27
Source File: TestMapRed.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void printSequenceFile(FileSystem fs, Path p, 
                                      Configuration conf) throws IOException {
  SequenceFile.Reader r = new SequenceFile.Reader(fs, p, conf);
  Object key = null;
  Object value = null;
  while ((key = r.next(key)) != null) {
    value = r.getCurrentValue(value);
    System.out.println("  Row: " + key + ", " + value);
  }
  r.close();    
}
 
Example #28
Source File: HdfsMrsPyramidOutputFormatTest.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws Exception
{
  defaultCodec = DefaultCodec.class.newInstance();
  defaultCompressionType = SequenceFile.CompressionType.BLOCK;
  mockWriter = mock(MapFile.Writer.class);
  outputPath = new Path(outputPathString);
}
 
Example #29
Source File: HadoopUtil.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
public static List<String> readDistinctColumnValues(Configuration conf, String inputPath) throws IOException {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(HadoopUtil.getWorkingFileSystem(conf), new Path(inputPath), conf)) {
        List<String> values = Lists.newArrayList();

        NullWritable key = (NullWritable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

        while (reader.next(key, value)) {
            values.add(value.toString());
        }

        return values;
    }
}
 
Example #30
Source File: HdfsSequenceWriter.java    From sqoop-on-spark with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
public void initialize(Path filepath, Configuration conf, CompressionCodec codec) throws IOException {
  if (codec != null) {
    filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf),
            conf, filepath, Text.class, NullWritable.class,
            SequenceFile.CompressionType.BLOCK, codec);
  } else {
    filewriter = SequenceFile.createWriter(filepath.getFileSystem(conf),
            conf, filepath, Text.class, NullWritable.class, SequenceFile.CompressionType.NONE);
  }

  text = new Text();
}