org.kitesdk.data.DatasetWriter Java Examples

The following examples show how to use org.kitesdk.data.DatasetWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestWriteReflectReadGeneric.java    From kite with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setup() throws IOException {
  fs = LocalFileSystem.getInstance();
  testDirectory = new Path(Files.createTempDir().getAbsolutePath());
  FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(),
      testDirectory);
  Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder()
                                 .schema(MyRecord.class)
                                 .build(), MyRecord.class);
  DatasetWriter<MyRecord> writer = writerDataset.newWriter();
  for (int i = 0; i < totalRecords; i++) {
    writer.write(new MyRecord(String.valueOf(i), i));
  }
  writer.close();

  readerDataset = repo.load("ns", "test", GenericRecord.class);
}
 
Example #2
Source File: TestProjection.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testMixedProjection() throws IOException {
  Dataset<StandardEvent> original = repo.create("ns", "mixedProjection",
      new DatasetDescriptor.Builder()
          .schema(StandardEvent.class)
          .build(), StandardEvent.class);

  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = original.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
      ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example #3
Source File: CreateEvents.java    From kite-examples with Apache License 2.0 6 votes vote down vote up
@Override
public int run(List<String> args) throws Exception {

  Preconditions.checkState(!Datasets.exists(uri),
      "events dataset already exists");

  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(StandardEvent.class).build();

  View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class);
  DatasetWriter<StandardEvent> writer = events.newWriter();
  try {
    while (System.currentTimeMillis() - baseTimestamp < 36000) {
      writer.write(generateRandomEvent());
    }
  } finally {
    writer.close();
  }

  System.out.println("Generated " + counter + " events");

  return 0;
}
 
Example #4
Source File: TestReadCustomGeneric.java    From kite with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void setup() throws IOException {
  fs = LocalFileSystem.getInstance();
  testDirectory = new Path(Files.createTempDir().getAbsolutePath());
  FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(),
      testDirectory);
  Dataset<MyRecord> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder()
                                 .schema(MyRecord.class)
                                 .build(), MyRecord.class);
  DatasetWriter<MyRecord> writer = writerDataset.newWriter();
  for (int i = 0; i < totalRecords; i++) {
    writer.write(new MyRecord(String.valueOf(i), i));
  }
  writer.close();

  readerDataset = repo.load("ns", "test", TestGenericRecord.class);
}
 
Example #5
Source File: TestProjection.java    From kite with Apache License 2.0 6 votes vote down vote up
@Test
public void testSpecificProjectionLoad() throws IOException {
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  Dataset<SmallEvent> dataset = repo.load(
      "ns", unbounded.getDataset().getName(),
      SmallEvent.class);

  Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent),
      toSmallEvent(octEvent), toSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example #6
Source File: PartitionedDatasetWriter.java    From kite with Apache License 2.0 6 votes vote down vote up
@Override
public void write(E entity) {
  Preconditions.checkState(state.equals(ReaderWriterState.OPEN),
      "Attempt to write to a writer in state:%s", state);

  accessor.keyFor(entity, provided, reusedKey);

  DatasetWriter<E> writer = cachedWriters.getIfPresent(reusedKey);
  if (writer == null) {
    // avoid checking in every whether the entity belongs in the view by only
    // checking when a new writer is created
    Preconditions.checkArgument(view.includes(entity),
        "View %s does not include entity %s", view, entity);
    // get a new key because it is stored in the cache
    StorageKey key = StorageKey.copy(reusedKey);
    try {
      writer = cachedWriters.getUnchecked(key);
    } catch (UncheckedExecutionException ex) {
      throw new IllegalArgumentException(
          "Problem creating view for entity: " + entity, ex.getCause());
    }
  }

  writer.write(entity);
}
 
Example #7
Source File: TestFileSystemPartitionView.java    From kite with Apache License 2.0 6 votes vote down vote up
private static void writeTestRecords(View<TestRecord> view) {
  DatasetWriter<TestRecord> writer = null;
  try {
    writer = view.newWriter();
    for (int i = 0; i < 10; i += 1) {
      TestRecord record = new TestRecord();
      record.id = i;
      record.data = "test/-" + i;
      writer.write(record);
    }

  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #8
Source File: TestPartitionReplacement.java    From kite with Apache License 2.0 6 votes vote down vote up
private static void writeTestRecords(View<TestRecord> view) {
  DatasetWriter<TestRecord> writer = null;
  try {
    writer = view.newWriter();
    for (int i = 0; i < 10; i += 1) {
      TestRecord record = new TestRecord();
      record.id = i;
      record.data = "test-" + i;
      writer.write(record);
    }

  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #9
Source File: DatasetTestUtilities.java    From kite with Apache License 2.0 6 votes vote down vote up
public static void writeTestUsers(View<GenericData.Record> view, int count, int start, String... fields) {
  DatasetWriter<GenericData.Record> writer = null;
  try {
    writer = view.newWriter();
    for (int i = start; i < count + start; i++) {
      GenericRecordBuilder recordBuilder = new GenericRecordBuilder(view.getDataset().getDescriptor
          ().getSchema()).set("username", "test-" + i);
      for (String field : fields) {
        recordBuilder.set(field, field + "-" + i);
      }
      writer.write(recordBuilder.build());
    }
    if (writer instanceof Flushable) {
      ((Flushable) writer).flush();
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #10
Source File: TestFileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testUnboundedMoveToTrash() throws Exception {
  // NOTE: this is an un-restricted write so all should succeed
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final Path root = new Path("target/data/ns/test");
  final Path y2013 = new Path("target/data/ns/test/year=2013");
  final Path sep = new Path("target/data/ns/test/year=2013/month=09");
  final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12");
  final Path oct = new Path("target/data/ns/test/year=2013/month=10");
  final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12");
  final Path nov = new Path("target/data/ns/test/year=2013/month=11");
  final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11");
  assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11);

  Assert.assertTrue("Delete should return true to indicate data was deleted.",
      unbounded.moveToTrash());
  assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov);
  assertDirectoriesExist(fs, root);
}
 
Example #11
Source File: TestFileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("unchecked")
public void testUnboundedDelete() throws Exception {
  // NOTE: this is an un-restricted write so all should succeed
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final Path root = new Path("target/data/ns/test");
  final Path y2013 = new Path("target/data/ns/test/year=2013");
  final Path sep = new Path("target/data/ns/test/year=2013/month=09");
  final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12");
  final Path oct = new Path("target/data/ns/test/year=2013/month=10");
  final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12");
  final Path nov = new Path("target/data/ns/test/year=2013/month=11");
  final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11");
  assertDirectoriesExist(fs, root, y2013, sep, sep12, oct, oct12, nov, nov11);

  Assert.assertTrue("Delete should return true to indicate data was deleted.",
      unbounded.deleteAll());
  assertDirectoriesDoNotExist(fs, y2013, sep12, sep, oct12, oct, nov11, nov);
  assertDirectoriesExist(fs, root);
}
 
Example #12
Source File: TestFileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeWithEmptyDirectory() throws Exception {
  long start = new DateTime(2013, 9, 1, 0, 0, DateTimeZone.UTC).getMillis();
  long end = new DateTime(2013, 11, 14, 0, 0, DateTimeZone.UTC).getMillis();
  final RefinableView<StandardEvent> range = unbounded
          .from("timestamp", start).toBefore("timestamp", end);

  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = range.newWriter();
    writer.write(sepEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  // All expected datasets and partitions are present
  final Path root = new Path("target/data/ns/test");
  final Path y2013 = new Path("target/data/ns/test/year=2013");
  final Path sep = new Path("target/data/ns/test/year=2013/month=09");
  final Path sep12 = new Path("target/data/ns/test/year=2013/month=09/day=12");
  final Path nov = new Path("target/data/ns/test/year=2013/month=11");
  final Path nov11 = new Path("target/data/ns/test/year=2013/month=11/day=11");
  assertDirectoriesExist(fs, root, y2013, sep, sep12, nov, nov11);

  // Recreate an empty directory that represents a valid partition for october 12.
  final Path oct = new Path("target/data/ns/test/year=2013/month=10");
  final Path oct12 = new Path("target/data/ns/test/year=2013/month=10/day=12");
  assertTrue("mkdir should return true to indicate FS changed.", fs.mkdirs(oct12));
  assertDirectoriesExist(fs, oct, oct12);

  // Test reading from September thru November. The range includes the empty directory.
  assertContentEquals(Sets.newHashSet(sepEvent, novEvent), range);

}
 
Example #13
Source File: TestSimpleView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testRefineIdentity() throws Exception {
    PartitionStrategy strategy = new PartitionStrategy.Builder()
            .identity("user_id")
            .build();

    DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
            .schemaUri("resource:standard_event.avsc")
            .partitionStrategy(strategy)
            .build();

    // Create a separate dataset to avoid conflicts with the above.
    Dataset<StandardEvent> identityDataset = repo.create(
        "ns", "test_identity", descriptor);

    DatasetWriter<StandardEvent> writer = null;

    try {
        writer = identityDataset.newWriter();
        writer.write(sepEvent);
        writer.write(octEvent);
        writer.write(novEvent);
    } finally {
        Closeables.close(writer, false);
    }

    assertContentEquals(Sets.newHashSet(sepEvent, novEvent),
            identityDataset.with("user_id", 0L));
}
 
Example #14
Source File: TestPartitionedDatasetWriter.java    From kite with Apache License 2.0 5 votes vote down vote up
private static <E> void writeToView(View<E> view, E... entities) {
  DatasetWriter<E> writer = null;
  try {
    writer = view.newWriter();
    for (E entity : entities) {
      writer.write(entity);
    }
    writer.close();
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #15
Source File: TestFileSystemUtil.java    From kite with Apache License 2.0 5 votes vote down vote up
public void writeUserToView(View<GenericRecord> dataset) {
  DatasetWriter<GenericRecord> writer = null;
  try {
    writer = dataset.newWriter();
    writer.write(USER);
  } finally {
    if (writer != null) {
      writer.close();
    }
  }
}
 
Example #16
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testGenericProjectionAsSchema() throws IOException {
  Dataset<StandardEvent> original = Datasets.load(
      unbounded.getUri(), StandardEvent.class);
  Schema standardEvent = Schemas.fromAvsc(conf,
      URI.create("resource:standard_event.avsc"));
  final Schema smallEvent = Schemas.fromAvsc(conf,
      URI.create("resource:small_event.avsc"));

  DatasetWriter<GenericRecord> writer = null;
  try {
    writer = original.asSchema(standardEvent).newWriter();
    writer.write(toGeneric(sepEvent, standardEvent));
    writer.write(toGeneric(octEvent, standardEvent));
    writer.write(toGeneric(novEvent, standardEvent));
  } finally {
    Closeables.close(writer, false);
  }

  final View<GenericRecord> smallEvents = original.asSchema(smallEvent);

  Set<GenericRecord> expected = Sets.newHashSet(
      toGeneric(toSmallEvent(sepEvent), smallEvent),
      toGeneric(toSmallEvent(octEvent), smallEvent),
      toGeneric(toSmallEvent(novEvent), smallEvent));

  assertContentEquals(expected, smallEvents);

  TestHelpers.assertThrows("Should not be able to write small events",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          smallEvents.newWriter();
        }
      });
}
 
Example #17
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testSpecificProjectionAsType() throws IOException {
  Dataset<GenericRecord> original = Datasets.load(unbounded.getUri());

  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = original.asType(StandardEvent.class).newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  final View<SmallEvent> smallEvents = original.asType(SmallEvent.class);

  Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent),
      toSmallEvent(octEvent), toSmallEvent(novEvent));

  assertContentEquals(expected, smallEvents);

  TestHelpers.assertThrows("Should not be able to write small events",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          smallEvents.newWriter();
        }
      });
}
 
Example #18
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectProjectionAsType() throws IOException {
  Dataset<StandardEvent> original = repo.create(
      "ns", "reflectProjection",
      new DatasetDescriptor.Builder()
          .schema(StandardEvent.class)
          .build(),
      StandardEvent.class);

  DatasetWriter<ReflectStandardEvent> writer = null;
  try {
    writer = original.asType(ReflectStandardEvent.class).newWriter();
    writer.write(new ReflectStandardEvent(sepEvent));
    writer.write(new ReflectStandardEvent(octEvent));
    writer.write(new ReflectStandardEvent(novEvent));
  } finally {
    Closeables.close(writer, false);
  }

  final View<ReflectSmallEvent> smallEvents = original.asType(ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, smallEvents);

  TestHelpers.assertThrows("Should not be able to write small events",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          smallEvents.newWriter();
        }
      });
}
 
Example #19
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testReflectProjectionLoad() throws IOException {
  Dataset<ReflectStandardEvent> original = repo.create(
      "ns", "reflectProjection",
      new DatasetDescriptor.Builder()
          .schema(ReflectStandardEvent.class)
          .build(),
      ReflectStandardEvent.class);

  DatasetWriter<ReflectStandardEvent> writer = null;
  try {
    writer = original.newWriter();
    writer.write(new ReflectStandardEvent(sepEvent));
    writer.write(new ReflectStandardEvent(octEvent));
    writer.write(new ReflectStandardEvent(novEvent));
  } finally {
    Closeables.close(writer, false);
  }

  View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(),
      ReflectSmallEvent.class);

  Set<ReflectSmallEvent> expected = Sets.newHashSet(
      new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent),
      new ReflectSmallEvent(novEvent));

  assertContentEquals(expected, dataset);
}
 
Example #20
Source File: TestProjection.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testIncompatibleProjection() throws IOException {
  DatasetWriter<StandardEvent> writer = null;
  try {
    writer = unbounded.newWriter();
    writer.write(sepEvent);
    writer.write(octEvent);
    writer.write(novEvent);
  } finally {
    Closeables.close(writer, false);
  }

  TestHelpers.assertThrows(
      "Should not load a dataset with an incompatible class",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          repo.load("ns", unbounded.getDataset().getName(),
              IncompatibleEvent.class);
        }
      });

  TestHelpers.assertThrows("Should reject a schema that can't read or write",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          unbounded.asType(IncompatibleEvent.class);
        }
      });

  TestHelpers.assertThrows("Should reject a schema that can't read or write",
      IncompatibleSchemaException.class, new Runnable() {
        @Override
        public void run() {
          unbounded.getDataset().asType(IncompatibleEvent.class);
        }
      });
}
 
Example #21
Source File: DaoViewTest.java    From kite with Apache License 2.0 5 votes vote down vote up
@Test
public void testLimitedWriter() {
  final View<TestEntity> range = ds
      .fromAfter(NAMES[0], "1").to(NAMES[0], "5")
      .fromAfter(NAMES[1], "1").to(NAMES[1], "5");
  DatasetWriter<TestEntity> writer = range.newWriter();
  try {
    writer.write(newTestEntity("3", "3"));
    writer.write(newTestEntity("5", "5"));
  } finally {
    writer.close();
  }
}
 
Example #22
Source File: TestHiveExternalDatasetRepository.java    From kite with Apache License 2.0 5 votes vote down vote up
private void writeRecord(Dataset<GenericRecord> dataset, int partition) {
  PartitionKey key = new PartitionKey(partition);
  DatasetWriter<GenericRecord> writer =
      ((PartitionedDataset<GenericRecord>) dataset).getPartition(key, true).newWriter();
  try {
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(
        dataset.getDescriptor().getSchema())
        .set("username", partition + "").set("email", partition + "@example.com");
    writer.write(recordBuilder.build());
  } finally {
    writer.close();
  }

}
 
Example #23
Source File: TestCrunchDatasetsHBase.java    From kite with Apache License 2.0 5 votes vote down vote up
private void writeRecords(Dataset<GenericRecord> dataset, int count) {
  DatasetWriter<GenericRecord> writer = dataset.newWriter();
  try {
    for (int i = 0; i < count; ++i) {
      GenericRecord entity = HBaseDatasetRepositoryTest.createGenericEntity(i);
      writer.write(entity);
    }
  } finally {
    writer.close();
  }
}
 
Example #24
Source File: CreateUserDatasetGenericPartitioned.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  // Create a partition strategy that hash partitions on username with 10 buckets
  PartitionStrategy partitionStrategy = new PartitionStrategy.Builder()
      .identity("favoriteColor", "favorite_color")
      .build();

  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .partitionStrategy(partitionStrategy)
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example #25
Source File: CreateUserDatasetGeneric.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example #26
Source File: CreateUserDatasetGenericParquet.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .format(Formats.PARQUET)
      .build();
  Dataset<Record> users = Datasets.create(
      "dataset:hdfs:/tmp/data/users", descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example #27
Source File: CreateProductDatasetPojo.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {

  // Create a dataset of products with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schema(Product.class)
      .build();
  Dataset<Product> products = Datasets.create(
      "dataset:hdfs:/tmp/data/products", descriptor, Product.class);

  // Get a writer for the dataset and write some products to it
  DatasetWriter<Product> writer = null;
  try {
    writer = products.newWriter();
    int i = 0;
    for (String name : names) {
      Product product = new Product();
      product.setName(name);
      product.setId(i++);
      writer.write(product);
    }
  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example #28
Source File: CreateHiveUserDatasetGeneric.java    From kite-examples with Apache License 2.0 5 votes vote down vote up
@Override
public int run(String[] args) throws Exception {
  // Create a dataset of users with the Avro schema
  DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
      .schemaUri("resource:user.avsc")
      .build();
  Dataset<Record> users = Datasets.create("dataset:hive?dataset=users",
      descriptor, Record.class);

  // Get a writer for the dataset and write some users to it
  DatasetWriter<Record> writer = null;
  try {
    writer = users.newWriter();
    Random rand = new Random();
    GenericRecordBuilder builder = new GenericRecordBuilder(descriptor.getSchema());
    for (int i = 0; i < 100; i++) {
      Record record = builder.set("username", "user-" + i)
          .set("creationDate", System.currentTimeMillis())
          .set("favoriteColor", colors[rand.nextInt(colors.length)]).build();
      writer.write(record);
    }

  } finally {
    if (writer != null) {
      writer.close();
    }
  }

  return 0;
}
 
Example #29
Source File: DatasetSink.java    From kite with Apache License 2.0 5 votes vote down vote up
private DatasetWriter<GenericRecord> newWriter(
    final UserGroupInformation login, final URI uri) {
  View<GenericRecord> view = KerberosUtil.runPrivileged(login,
      new PrivilegedExceptionAction<Dataset<GenericRecord>>() {
        @Override
        public Dataset<GenericRecord> run() {
          return Datasets.load(uri);
        }
      });

  DatasetDescriptor descriptor = view.getDataset().getDescriptor();
  String formatName = descriptor.getFormat().getName();
  Preconditions.checkArgument(allowedFormats().contains(formatName),
      "Unsupported format: " + formatName);

  Schema newSchema = descriptor.getSchema();
  if (targetSchema == null || !newSchema.equals(targetSchema)) {
    this.targetSchema = descriptor.getSchema();
    // target dataset schema has changed, invalidate all readers based on it
    readers.invalidateAll();
  }

  this.reuseDatum = !("parquet".equals(formatName));
  this.datasetName = view.getDataset().getName();

  return view.newWriter();
}
 
Example #30
Source File: FileSystemView.java    From kite with Apache License 2.0 5 votes vote down vote up
@Override
public DatasetWriter<E> newWriter() {
  checkSchemaForWrite();
  AbstractDatasetWriter<E> writer;
  if (dataset.getDescriptor().isPartitioned()) {
    writer = PartitionedDatasetWriter.newWriter(this);
  } else {
    writer = FileSystemWriter.newWriter(
        fs, root, -1, -1 /* get from descriptor */, dataset.getDescriptor(), this.getAccessor().getWriteSchema());
  }
  writer.initialize();
  return writer;
}