org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy Java Examples

The following examples show how to use org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AvroIO.java    From beam with Apache License 2.0 6 votes vote down vote up
DynamicAvroDestinations<UserT, DestinationT, OutputT> resolveDynamicDestinations() {
  DynamicAvroDestinations<UserT, DestinationT, OutputT> dynamicDestinations =
      getDynamicDestinations();
  if (dynamicDestinations == null) {
    // In this case DestinationT is Void.
    FilenamePolicy usedFilenamePolicy = getFilenamePolicy();
    if (usedFilenamePolicy == null) {
      usedFilenamePolicy =
          DefaultFilenamePolicy.fromStandardParameters(
              getFilenamePrefix(),
              getShardTemplate(),
              getFilenameSuffix(),
              getWindowedWrites());
    }
    dynamicDestinations =
        (DynamicAvroDestinations<UserT, DestinationT, OutputT>)
            constantDestinations(
                usedFilenamePolicy,
                getSchema(),
                getMetadata(),
                getCodec().getCodec(),
                getFormatFunction());
  }
  return dynamicDestinations;
}
 
Example #2
Source File: PTransformMatchersTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void writeWithRunnerDeterminedSharding() {
  ResourceId outputDirectory = LocalResources.fromString("/foo/bar", true /* isDirectory */);
  FilenamePolicy policy =
      DefaultFilenamePolicy.fromStandardParameters(
          StaticValueProvider.of(outputDirectory),
          DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE,
          "",
          false);
  WriteFiles<Integer, Void, Integer> write =
      WriteFiles.to(
          new FileBasedSink<Integer, Void, Integer>(
              StaticValueProvider.of(outputDirectory), DynamicFileDestinations.constant(policy)) {
            @Override
            public WriteOperation<Void, Integer> createWriteOperation() {
              return null;
            }
          });
  assertThat(
      PTransformMatchers.writeWithRunnerDeterminedSharding().matches(appliedWrite(write)),
      is(true));

  WriteFiles<Integer, Void, Integer> withStaticSharding = write.withNumShards(3);
  assertThat(
      PTransformMatchers.writeWithRunnerDeterminedSharding()
          .matches(appliedWrite(withStaticSharding)),
      is(false));

  WriteFiles<Integer, Void, Integer> withCustomSharding =
      write.withSharding(Sum.integersGlobally().asSingletonView());
  assertThat(
      PTransformMatchers.writeWithRunnerDeterminedSharding()
          .matches(appliedWrite(withCustomSharding)),
      is(false));
}
 
Example #3
Source File: AvroIOTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public FilenamePolicy getFilenamePolicy(String destination) {
  return DefaultFilenamePolicy.fromStandardParameters(
      StaticValueProvider.of(baseDir.resolve("file_" + destination, RESOLVE_FILE)),
      "-SSSSS-of-NNNNN",
      ".avro",
      false);
}
 
Example #4
Source File: FileBasedSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Output filenames are generated correctly when an extension is not supplied. */
@Test
public void testGenerateOutputFilenamesWithoutExtension() {
  List<ResourceId> expected;
  List<ResourceId> actual;
  ResourceId root = getBaseOutputDirectory();
  SimpleSink<Void> sink =
      SimpleSink.makeSimpleSink(root, "file", "-SSSSS-of-NNNNN", "", Compression.UNCOMPRESSED);
  FilenamePolicy policy = sink.getDynamicDestinations().getFilenamePolicy(null);

  expected =
      Arrays.asList(
          root.resolve("file-00000-of-00003", StandardResolveOptions.RESOLVE_FILE),
          root.resolve("file-00001-of-00003", StandardResolveOptions.RESOLVE_FILE),
          root.resolve("file-00002-of-00003", StandardResolveOptions.RESOLVE_FILE));
  actual = generateDestinationFilenames(policy, 3);
  assertEquals(expected, actual);

  expected =
      Collections.singletonList(
          root.resolve("file-00000-of-00001", StandardResolveOptions.RESOLVE_FILE));
  actual = generateDestinationFilenames(policy, 1);
  assertEquals(expected, actual);

  expected = new ArrayList<>();
  actual = generateDestinationFilenames(policy, 0);
  assertEquals(expected, actual);
}
 
Example #5
Source File: AvroIO.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a {@link DynamicAvroDestinations} that always returns the same {@link FilenamePolicy},
 * schema, metadata, and codec.
 */
public static <UserT, OutputT> DynamicAvroDestinations<UserT, Void, OutputT> constantDestinations(
    FilenamePolicy filenamePolicy,
    Schema schema,
    Map<String, Object> metadata,
    CodecFactory codec,
    SerializableFunction<UserT, OutputT> formatFunction) {
  return new ConstantAvroDestination<>(filenamePolicy, schema, metadata, codec, formatFunction);
}
 
Example #6
Source File: FileBasedSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Output filenames are generated correctly when an extension is supplied. */
@Test
public void testGenerateOutputFilenames() {
  List<ResourceId> expected;
  List<ResourceId> actual;
  ResourceId root = getBaseOutputDirectory();

  SimpleSink<Void> sink =
      SimpleSink.makeSimpleSink(
          root, "file", ".SSSSS.of.NNNNN", ".test", Compression.UNCOMPRESSED);
  FilenamePolicy policy = sink.getDynamicDestinations().getFilenamePolicy(null);

  expected =
      Arrays.asList(
          root.resolve("file.00000.of.00003.test", StandardResolveOptions.RESOLVE_FILE),
          root.resolve("file.00001.of.00003.test", StandardResolveOptions.RESOLVE_FILE),
          root.resolve("file.00002.of.00003.test", StandardResolveOptions.RESOLVE_FILE));
  actual = generateDestinationFilenames(policy, 3);
  assertEquals(expected, actual);

  expected =
      Collections.singletonList(
          root.resolve("file.00000.of.00001.test", StandardResolveOptions.RESOLVE_FILE));
  actual = generateDestinationFilenames(policy, 1);
  assertEquals(expected, actual);

  expected = new ArrayList<>();
  actual = generateDestinationFilenames(policy, 0);
  assertEquals(expected, actual);
}
 
Example #7
Source File: FileBasedSinkTest.java    From beam with Apache License 2.0 5 votes vote down vote up
public List<ResourceId> generateDestinationFilenames(FilenamePolicy policy, int numFiles) {
  List<ResourceId> filenames = new ArrayList<>();
  for (int i = 0; i < numFiles; i++) {
    filenames.add(policy.unwindowedFilename(i, numFiles, CompressionType.UNCOMPRESSED));
  }
  return filenames;
}
 
Example #8
Source File: TextIO.java    From beam with Apache License 2.0 5 votes vote down vote up
private DynamicDestinations<UserT, DestinationT, String> resolveDynamicDestinations() {
  DynamicDestinations<UserT, DestinationT, String> dynamicDestinations =
      getDynamicDestinations();
  if (dynamicDestinations == null) {
    if (getDestinationFunction() != null) {
      // In this case, DestinationT == Params
      dynamicDestinations =
          (DynamicDestinations)
              DynamicFileDestinations.toDefaultPolicies(
                  getDestinationFunction(), getEmptyDestination(), getFormatFunction());
    } else {
      // In this case, DestinationT == Void
      FilenamePolicy usedFilenamePolicy = getFilenamePolicy();
      if (usedFilenamePolicy == null) {
        usedFilenamePolicy =
            DefaultFilenamePolicy.fromStandardParameters(
                getFilenamePrefix(),
                getShardTemplate(),
                getFilenameSuffix(),
                getWindowedWrites());
      }
      dynamicDestinations =
          (DynamicDestinations)
              DynamicFileDestinations.constant(usedFilenamePolicy, getFormatFunction());
    }
  }
  return dynamicDestinations;
}
 
Example #9
Source File: WriteFilesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private SimpleSink<Void> makeSimpleSink() {
  FilenamePolicy filenamePolicy =
      new PerWindowFiles(
          getBaseOutputDirectory().resolve("file", StandardResolveOptions.RESOLVE_FILE),
          "simple");
  return SimpleSink.makeSimpleSink(getBaseOutputDirectory(), filenamePolicy);
}
 
Example #10
Source File: ConstantAvroDestination.java    From beam with Apache License 2.0 5 votes vote down vote up
public ConstantAvroDestination(
    FilenamePolicy filenamePolicy,
    Schema schema,
    Map<String, Object> metadata,
    CodecFactory codec,
    SerializableFunction<UserT, OutputT> formatFunction) {
  this.filenamePolicy = filenamePolicy;
  this.schema = Suppliers.compose(new SchemaFunction(), Suppliers.ofInstance(schema.toString()));
  this.metadata = metadata;
  this.codec = new SerializableAvroCodecFactory(codec);
  this.formatFunction = formatFunction;
}
 
Example #11
Source File: ConstantAvroDestination.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public FilenamePolicy getFilenamePolicy(Void destination) {
  return filenamePolicy;
}
 
Example #12
Source File: WriteFilesTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public FilenamePolicy getFilenamePolicy(Integer destination) {
  return new PerWindowFiles(
      baseOutputDirectory.resolve("file_" + destination, StandardResolveOptions.RESOLVE_FILE),
      "simple");
}
 
Example #13
Source File: DynamicFileDestinations.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * A specialization of {@link #constant(FilenamePolicy, SerializableFunction)} for the case where
 * UserT and OutputT are the same type and the format function is the identity.
 */
public static <UserT> DynamicDestinations<UserT, Void, UserT> constant(
    FilenamePolicy filenamePolicy) {
  return new ConstantFilenamePolicy<>(filenamePolicy, SerializableFunctions.<UserT>identity());
}
 
Example #14
Source File: DynamicFileDestinations.java    From beam with Apache License 2.0 4 votes vote down vote up
/** Returns a {@link DynamicDestinations} that always returns the same {@link FilenamePolicy}. */
public static <UserT, OutputT> DynamicDestinations<UserT, Void, OutputT> constant(
    FilenamePolicy filenamePolicy, SerializableFunction<UserT, OutputT> formatFunction) {
  return new ConstantFilenamePolicy<>(filenamePolicy, formatFunction);
}
 
Example #15
Source File: DynamicFileDestinations.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public FilenamePolicy getFilenamePolicy(DefaultFilenamePolicy.Params params) {
  return DefaultFilenamePolicy.fromParams(params);
}
 
Example #16
Source File: DynamicFileDestinations.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public FilenamePolicy getFilenamePolicy(Void destination) {
  return filenamePolicy;
}
 
Example #17
Source File: DynamicFileDestinations.java    From beam with Apache License 2.0 4 votes vote down vote up
public ConstantFilenamePolicy(
    FilenamePolicy filenamePolicy, SerializableFunction<UserT, OutputT> formatFunction) {
  this.filenamePolicy = filenamePolicy;
  this.formatFunction = formatFunction;
}
 
Example #18
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
@Nullable
abstract FilenamePolicy getFilenamePolicy();
 
Example #19
Source File: TextIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** See {@link TypedWrite#to(FilenamePolicy)}. */
@Experimental(Kind.FILESYSTEM)
public Write to(FilenamePolicy filenamePolicy) {
  return new Write(
      inner.to(filenamePolicy).withFormatFunction(SerializableFunctions.identity()));
}
 
Example #20
Source File: TextIO.java    From beam with Apache License 2.0 4 votes vote down vote up
abstract Builder<UserT, DestinationT> setFilenamePolicy(
@Nullable FilenamePolicy filenamePolicy);
 
Example #21
Source File: TextIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** A policy for naming output files. */
@Nullable
abstract FilenamePolicy getFilenamePolicy();
 
Example #22
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/** See {@link TypedWrite#to(FilenamePolicy)}. */
public Write<T> to(FilenamePolicy filenamePolicy) {
  return new Write<>(
      inner.to(filenamePolicy).withFormatFunction(SerializableFunctions.identity()));
}
 
Example #23
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
/**
 * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A
 * directory for temporary files must be specified using {@link #withTempDirectory}.
 */
@Experimental(Kind.FILESYSTEM)
public TypedWrite<UserT, DestinationT, OutputT> to(FilenamePolicy filenamePolicy) {
  return toBuilder().setFilenamePolicy(filenamePolicy).build();
}
 
Example #24
Source File: AvroIO.java    From beam with Apache License 2.0 4 votes vote down vote up
abstract Builder<UserT, DestinationT, OutputT> setFilenamePolicy(
FilenamePolicy filenamePolicy);
 
Example #25
Source File: TextIO.java    From beam with Apache License 2.0 2 votes vote down vote up
/**
 * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A
 * directory for temporary files must be specified using {@link #withTempDirectory}.
 */
public TypedWrite<UserT, DestinationT> to(FilenamePolicy filenamePolicy) {
  return toBuilder().setFilenamePolicy(filenamePolicy).build();
}