Java Code Examples for org.apache.beam.sdk.io.FileSystems#open()

The following examples show how to use org.apache.beam.sdk.io.FileSystems#open() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GeoCityLookup.java    From gcp-ingestion with Mozilla Public License 2.0 6 votes vote down vote up
/**
 * Returns a singleton object for reading from the GeoCity database.
 *
 * <p>We copy the configured database file to a static temp location so that the MaxMind API can
 * save on heap usage by using memory mapping. The reader is threadsafe and this singleton pattern
 * allows multiple worker threads on the same machine to share a single reader instance.
 *
 * <p>Note that we do not clean up the temp mmdb file, but it's a static path, so running locally
 * will overwrite the existing path every time rather than creating an unbounded number of copies.
 * This also assumes that only one JVM per machine is running this code. In the production case
 * where this is running on Cloud Dataflow, we should always have a clean environment and the temp
 * state will be cleaned up along with the workers once the job finishes. However, behavior is
 * undefined if you run multiple local jobs concurrently.
 *
 * @throws IOException if the configured file path is not a valid .mmdb file
 */
private static synchronized DatabaseReader getOrCreateSingletonGeoCityReader(
    ValueProvider<String> geoCityDatabase) throws IOException {
  if (singletonGeoCityReader == null) {
    File mmdb;
    try {
      InputStream inputStream;
      Metadata metadata = FileSystems.matchSingleFileSpec(geoCityDatabase.get());
      ReadableByteChannel channel = FileSystems.open(metadata.resourceId());
      inputStream = Channels.newInputStream(channel);
      Path mmdbPath = Paths.get(System.getProperty("java.io.tmpdir"), "GeoCityLookup.mmdb");
      Files.copy(inputStream, mmdbPath, StandardCopyOption.REPLACE_EXISTING);
      mmdb = mmdbPath.toFile();
    } catch (IOException e) {
      throw new IOException("Exception thrown while fetching configured geoCityDatabase", e);
    }
    singletonGeoCityReader = new DatabaseReader.Builder(mmdb).withCache(new CHMCache()).build();
  }
  return singletonGeoCityReader;
}
 
Example 2
Source File: FileUtils.java    From deployment-examples with MIT License 6 votes vote down vote up
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile)
    throws IOException {

  try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) {
    try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) {

      final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024);
      while (readChannel.read(buffer) != -1) {
        buffer.flip();
        writeChannel.write(buffer);
        buffer.compact();
      }
      buffer.flip();
      while (buffer.hasRemaining()) {
        writeChannel.write(buffer);
      }
    }
  }

  return destinationFile.toString();
}
 
Example 3
Source File: StreamingDataGenerator.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
@Setup
public void setup() throws IOException {
  dataGenerator = new JsonDataGeneratorImpl();

  Metadata metadata = FileSystems.matchSingleFileSpec(schemaLocation);

  // Copy the schema file into a string which can be used for generation.
  try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream()) {
    try (ReadableByteChannel readerChannel = FileSystems.open(metadata.resourceId())) {
      try (WritableByteChannel writerChannel = Channels.newChannel(byteArrayOutputStream)) {
        ByteStreams.copy(readerChannel, writerChannel);
      }
    }

    schema = byteArrayOutputStream.toString();
  }
}
 
Example 4
Source File: FileUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
public static String copyFile(ResourceId sourceFile, ResourceId destinationFile)
    throws IOException {

  try (WritableByteChannel writeChannel = FileSystems.create(destinationFile, "text/plain")) {
    try (ReadableByteChannel readChannel = FileSystems.open(sourceFile)) {

      final ByteBuffer buffer = ByteBuffer.allocateDirect(16 * 1024);
      while (readChannel.read(buffer) != -1) {
        buffer.flip();
        writeChannel.write(buffer);
        buffer.compact();
      }
      buffer.flip();
      while (buffer.hasRemaining()) {
        writeChannel.write(buffer);
      }
    }
  }

  return destinationFile.toString();
}
 
Example 5
Source File: ReadData.java    From java-docs-samples with Apache License 2.0 6 votes vote down vote up
/**
 * Download the image data as a grid of weights and store them in a 2D array.
 */
private void downloadImageData(String artUrl) {
  try {
    ReadableByteChannel chan =
        FileSystems.open(
            FileSystems.matchNewResource(artUrl, false /* is_directory */));
    InputStream is = Channels.newInputStream(chan);
    BufferedReader br = new BufferedReader(new InputStreamReader(is));

    String line;
    while ((line = br.readLine()) != null) {
      imageData.add(
          Arrays.stream(line.split(","))
              .map(Float::valueOf)
              .collect(Collectors.toList()));
    }
  } catch (Exception e) {
    e.printStackTrace();
  }
}
 
Example 6
Source File: CsvToAvro.java    From java-docs-samples with Apache License 2.0 6 votes vote down vote up
public static String getSchema(String schemaPath) throws IOException {
  ReadableByteChannel chan = FileSystems.open(FileSystems.matchNewResource(
      schemaPath, false));

  try (InputStream stream = Channels.newInputStream(chan)) {
    BufferedReader streamReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
    StringBuilder dataBuilder = new StringBuilder();

    String line;
    while ((line = streamReader.readLine()) != null) {
      dataBuilder.append(line);
    }

    return dataBuilder.toString();
  }
}
 
Example 7
Source File: AvroToCsv.java    From java-docs-samples with Apache License 2.0 6 votes vote down vote up
private static String getSchema(String schemaPath) throws IOException {
  ReadableByteChannel channel = FileSystems.open(FileSystems.matchNewResource(
      schemaPath, false));

  try (InputStream stream = Channels.newInputStream(channel)) {
    BufferedReader streamReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
    StringBuilder dataBuilder = new StringBuilder();

    String line;
    while ((line = streamReader.readLine()) != null) {
      dataBuilder.append(line);
    }

    return dataBuilder.toString();
  }
}
 
Example 8
Source File: HashClientInfo.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
@VisibleForTesting
byte[] readBytes(String uri) throws IOException {
  Metadata metadata = FileSystems.matchSingleFileSpec(uri);
  ReadableByteChannel inputChannel = FileSystems.open(metadata.resourceId());
  try (InputStream inputStream = Channels.newInputStream(inputChannel)) {
    byte[] key = new byte[32];
    int bytesRead = inputStream.read(key);
    if (bytesRead != 32) {
      throw new KeyLengthMismatchException(bytesRead);
    }
    return key;
  }
}
 
Example 9
Source File: GeoCityLookup.java    From gcp-ingestion with Mozilla Public License 2.0 5 votes vote down vote up
/**
 * Returns a singleton object describing allowed cities.
 *
 * @throws IOException if the configured file path does not exist or is in a bad format
 */
private static synchronized Set<Integer> getOrCreateSingletonAllowedCities(
    ValueProvider<String> geoCityFilter) throws IOException {
  if (singletonAllowedCities == null) {
    InputStream inputStream;
    try {
      Metadata metadata = FileSystems.matchSingleFileSpec(geoCityFilter.get());
      ReadableByteChannel channel = FileSystems.open(metadata.resourceId());
      inputStream = Channels.newInputStream(channel);
    } catch (IOException e) {
      throw new IOException("Exception thrown while fetching configured geoCityFilter", e);
    }
    singletonAllowedCities = new HashSet<>();
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
    while (reader.ready()) {
      String line = reader.readLine();
      Matcher matcher = GEO_NAME_PATTERN.matcher(line);
      if (matcher.find()) {
        Integer geoNameId = Integer.valueOf(matcher.group(1));
        singletonAllowedCities.add(geoNameId);
      } else {
        throw new IllegalStateException(
            "Line of geoCityFilter file does not begin with a geoName integer ID: " + line);

      }
    }
  }
  return singletonAllowedCities;
}
 
Example 10
Source File: SchemaParser.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Parses a JSON file and Returns a JSONObject containing the necessary source, sink, and schema
 * information.
 *
 * @param pathToJSON the JSON file location so we can download and parse it
 * @return the parsed JSONObject
 */
public JSONObject parseSchema(String pathToJSON) throws Exception {

  try {
    ReadableByteChannel readableByteChannel =
        FileSystems.open(FileSystems.matchNewResource(pathToJSON, false));

    String json = new String(
        StreamUtils.getBytesWithoutClosing(Channels.newInputStream(readableByteChannel)));

    return new JSONObject(json);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example 11
Source File: BulkCompressor.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
@ProcessElement
public void processElement(ProcessContext context) {
  ResourceId inputFile = context.element().resourceId();
  Compression compression = compressionValue.get();

  // Add the compression extension to the output filename. Example: demo.txt -> demo.txt.gz
  String outputFilename = inputFile.getFilename() + compression.getSuggestedSuffix();

  // Resolve the necessary resources to perform the transfer
  ResourceId outputDir = FileSystems.matchNewResource(destinationLocation.get(), true);
  ResourceId outputFile =
      outputDir.resolve(outputFilename, StandardResolveOptions.RESOLVE_FILE);
  ResourceId tempFile =
      outputDir.resolve("temp-" + outputFilename, StandardResolveOptions.RESOLVE_FILE);

  // Perform the copy of the compressed channel to the destination.
  try (ReadableByteChannel readerChannel = FileSystems.open(inputFile)) {
    try (WritableByteChannel writerChannel =
        compression.writeCompressed(FileSystems.create(tempFile, MimeTypes.BINARY))) {

      // Execute the copy to the temporary file
      ByteStreams.copy(readerChannel, writerChannel);
    }

    // Rename the temporary file to the output file
    FileSystems.rename(ImmutableList.of(tempFile), ImmutableList.of(outputFile));

    // Output the path to the uncompressed file
    context.output(outputFile.toString());
  } catch (IOException e) {
    LOG.error("Error occurred during compression of {}", inputFile.toString(), e);
    context.output(DEADLETTER_TAG, KV.of(inputFile.toString(), e.getMessage()));
  }
}
 
Example 12
Source File: DynamicJdbcIO.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** utility method to copy binary (jar file) data from source to dest. */
private static void copy(ResourceId source, ResourceId dest) throws IOException {
  try (ReadableByteChannel rbc = FileSystems.open(source)) {
    try (WritableByteChannel wbc = FileSystems.create(dest, MimeTypes.BINARY)) {
      ByteStreams.copy(rbc, wbc);
    }
  }
}
 
Example 13
Source File: SpannerConverterTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Unit test for export transform. */
@Test
@Category(NeedsRunner.class)
public void testSchemaSave() throws IOException {

  ValueProvider<String> table = ValueProvider.StaticValueProvider.of(TABLE);
  SpannerConfig spannerConfig = SpannerConfig.create();
  DatabaseClient databaseClient = mock(DatabaseClient.class, withSettings().serializable());
  ReadOnlyTransaction readOnlyTransaction =
      mock(ReadOnlyTransaction.class, withSettings().serializable());
  ResultSet resultSet = mock(ResultSet.class, withSettings().serializable());
  Struct struct = mock(Struct.class, withSettings().serializable());

  when(databaseClient.readOnlyTransaction()).thenReturn(readOnlyTransaction);
  when(readOnlyTransaction.executeQuery(any(Statement.class))).thenReturn(resultSet);
  when(resultSet.next()).thenReturn(true).thenReturn(false);
  when(resultSet.getCurrentRowAsStruct()).thenReturn(struct);
  when(struct.getString(0)).thenReturn(COLUMN_NAME);
  when(struct.getString(1)).thenReturn("INT64");

  String schemaPath = "/tmp/" + UUID.randomUUID().toString();
  ValueProvider<String> textWritePrefix = ValueProvider.StaticValueProvider.of(schemaPath);
  SpannerConverters.ExportTransform exportTransform =
      SpannerConverters.ExportTransformFactory.create(table, spannerConfig, textWritePrefix);
  exportTransform.setDatabaseClient(databaseClient);

  PCollection<ReadOperation> results = pipeline.apply("Create", exportTransform);
  ReadOperation readOperation =
      ReadOperation.create().withColumns(ImmutableList.of(COLUMN_NAME)).withTable(TABLE);
  PAssert.that(results).containsInAnyOrder(readOperation);
  pipeline.run();
  ReadableByteChannel channel =
      FileSystems.open(
          FileSystems.matchNewResource(
              schemaPath + SpannerConverters.ExportTransform.ExportFn.SCHEMA_SUFFIX,
              false));
  java.util.Scanner scanner = new java.util.Scanner(channel).useDelimiter("\\A");
  assertEquals("{\"id\":\"INT64\"}", scanner.next());
}
 
Example 14
Source File: IsmReaderImpl.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Opens a new channel. */
private SeekableByteChannel open() throws IOException {
  ReadableByteChannel channel = FileSystems.open(resourceId);
  Preconditions.checkArgument(
      channel instanceof SeekableByteChannel,
      "IsmReaderImpl requires a SeekableByteChannel for path %s but received %s.",
      resourceId,
      channel);
  return (SeekableByteChannel) channel;
}