/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.iceberg; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.nio.ByteBuffer; import java.nio.ByteOrder; import org.apache.hadoop.conf.Configuration; import org.apache.iceberg.ManifestFile.PartitionFieldSummary; import org.apache.iceberg.hadoop.HadoopFileIO; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.types.Types; import org.apache.spark.SparkConf; import org.apache.spark.serializer.KryoSerializer; import org.junit.Assert; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import static org.apache.iceberg.types.Types.NestedField.optional; import static org.apache.iceberg.types.Types.NestedField.required; public class TestManifestFileSerialization { private static final Schema SCHEMA = new Schema( required(1, "id", Types.LongType.get()), optional(2, "data", Types.StringType.get()), required(3, "date", Types.StringType.get())); private static final PartitionSpec SPEC = PartitionSpec .builderFor(SCHEMA) .identity("date") .build(); private static final DataFile FILE_A = DataFiles.builder(SPEC) .withPath("/path/to/data-1.parquet") .withFileSizeInBytes(0) .withPartitionPath("date=2018-06-08") .withMetrics(new Metrics(5L, null, // no column sizes ImmutableMap.of(1, 5L, 2, 3L), // value count ImmutableMap.of(1, 0L, 2, 2L), // null count ImmutableMap.of(1, longToBuffer(0L)), // lower bounds ImmutableMap.of(1, longToBuffer(4L)) // upper bounds )) .build(); private static final FileIO FILE_IO = new HadoopFileIO(new Configuration()); @Rule public TemporaryFolder temp = new TemporaryFolder(); @Test public void testManifestFileKryoSerialization() throws IOException { File data = temp.newFile(); Assert.assertTrue(data.delete()); Kryo kryo = new KryoSerializer(new SparkConf()).newKryo(); ManifestFile manifest = writeManifest(FILE_A); try (Output out = new Output(new FileOutputStream(data))) { kryo.writeClassAndObject(out, manifest); kryo.writeClassAndObject(out, manifest.copy()); kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build()); } try (Input in = new Input(new FileInputStream(data))) { for (int i = 0; i < 3; i += 1) { Object obj = kryo.readClassAndObject(in); Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile); checkManifestFile(manifest, (ManifestFile) obj); } } } @Test public void testManifestFileJavaSerialization() throws Exception { ByteArrayOutputStream bytes = new ByteArrayOutputStream(); ManifestFile manifest = writeManifest(FILE_A); try (ObjectOutputStream out = new ObjectOutputStream(bytes)) { out.writeObject(manifest); out.writeObject(manifest.copy()); out.writeObject(GenericManifestFile.copyOf(manifest).build()); } try (ObjectInputStream in = new ObjectInputStream(new ByteArrayInputStream(bytes.toByteArray()))) { for (int i = 0; i < 3; i += 1) { Object obj = in.readObject(); Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile); checkManifestFile(manifest, (ManifestFile) obj); } } } private void checkManifestFile(ManifestFile expected, ManifestFile actual) { Assert.assertEquals("Path must match", expected.path(), actual.path()); Assert.assertEquals("Length must match", expected.length(), actual.length()); Assert.assertEquals("Spec id must match", expected.partitionSpecId(), actual.partitionSpecId()); Assert.assertEquals("Snapshot id must match", expected.snapshotId(), actual.snapshotId()); Assert.assertEquals("Added files flag must match", expected.hasAddedFiles(), actual.hasAddedFiles()); Assert.assertEquals("Added files count must match", expected.addedFilesCount(), actual.addedFilesCount()); Assert.assertEquals("Added rows count must match", expected.addedRowsCount(), actual.addedRowsCount()); Assert.assertEquals("Existing files flag must match", expected.hasExistingFiles(), actual.hasExistingFiles()); Assert.assertEquals("Existing files count must match", expected.existingFilesCount(), actual.existingFilesCount()); Assert.assertEquals("Existing rows count must match", expected.existingRowsCount(), actual.existingRowsCount()); Assert.assertEquals("Deleted files flag must match", expected.hasDeletedFiles(), actual.hasDeletedFiles()); Assert.assertEquals("Deleted files count must match", expected.deletedFilesCount(), actual.deletedFilesCount()); Assert.assertEquals("Deleted rows count must match", expected.deletedRowsCount(), actual.deletedRowsCount()); PartitionFieldSummary expectedPartition = expected.partitions().get(0); PartitionFieldSummary actualPartition = actual.partitions().get(0); Assert.assertEquals("Null flag in partition must match", expectedPartition.containsNull(), actualPartition.containsNull()); Assert.assertEquals("Lower bounds in partition must match", expectedPartition.lowerBound(), actualPartition.lowerBound()); Assert.assertEquals("Upper bounds in partition must match", expectedPartition.upperBound(), actualPartition.upperBound()); } private ManifestFile writeManifest(DataFile... files) throws IOException { File manifestFile = temp.newFile("input.m0.avro"); Assert.assertTrue(manifestFile.delete()); OutputFile outputFile = FILE_IO.newOutputFile(manifestFile.getCanonicalPath()); ManifestWriter writer = ManifestFiles.write(SPEC, outputFile); try { for (DataFile file : files) { writer.add(file); } } finally { writer.close(); } return writer.toManifestFile(); } private static ByteBuffer longToBuffer(long value) { return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, value); } }