org.apache.spark.serializer.KryoSerializer Java Examples

The following examples show how to use org.apache.spark.serializer.KryoSerializer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestDataFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testDataFileKryoSerialization() throws Exception {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());
  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, DATA_FILE);
    kryo.writeClassAndObject(out, DATA_FILE.copy());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 2; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a DataFile", obj instanceof DataFile);
      checkDataFile(DATA_FILE, (DataFile) obj);
    }
  }
}

Example #2

Source File: TestDataFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testParquetWriterSplitOffsets() throws IOException {
  Iterable<InternalRow> records = RandomData.generateSpark(DATE_SCHEMA, 1, 33L);
  File parquetFile = new File(
      temp.getRoot(),
      FileFormat.PARQUET.addExtension(UUID.randomUUID().toString()));
  FileAppender<InternalRow> writer =
      Parquet.write(Files.localOutput(parquetFile))
          .schema(DATE_SCHEMA)
          .createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(DATE_SCHEMA), msgType))
          .build();
  try {
    writer.addAll(records);
  } finally {
    writer.close();
  }

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();
  File dataFile = temp.newFile();
  try (Output out = new Output(new FileOutputStream(dataFile))) {
    kryo.writeClassAndObject(out, writer.splitOffsets());
  }
  try (Input in = new Input(new FileInputStream(dataFile))) {
    kryo.readClassAndObject(in);
  }
}

Example #3

Source File: TestManifestFileSerialization.java From iceberg with Apache License 2.0

6 votes

@Test
public void testManifestFileKryoSerialization() throws IOException {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  ManifestFile manifest = writeManifest(FILE_A);

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, manifest);
    kryo.writeClassAndObject(out, manifest.copy());
    kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 3; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile);
      checkManifestFile(manifest, (ManifestFile) obj);
    }
  }
}

Example #4

Source File: LensAPI.java From cognition with Apache License 2.0

6 votes

/**
 * Helper method for creating the spark context from the given cognition configuration
 * @return a new configured spark context
 */
public SparkContext createSparkContext() {
  SparkConf conf = new SparkConf();

  Configuration config = cognition.getProperties();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName(config.getString("app.name"));
  conf.setMaster(config.getString("master"));

  Iterator<String> iterator = config.getKeys("spark");
  while (iterator.hasNext()) {
    String key = iterator.next();
    conf.set(key, config.getString(key));
  }

  SparkContext sc = new SparkContext(conf);
  for (String jar : config.getStringArray("jars")) {
    sc.addJar(jar);
  }

  return sc;
}

Example #5

Source File: UnshadedKryoShimService.java From tinkerpop with Apache License 2.0

6 votes

private LinkedBlockingQueue<Kryo> initialize(final Configuration configuration) {
    // DCL is safe in this case due to volatility
    if (!INITIALIZED) {
        synchronized (UnshadedKryoShimService.class) {
            if (!INITIALIZED) {
                // so we don't get a WARN that a new configuration is being created within an active context
                final SparkConf sparkConf = null == Spark.getContext() ? new SparkConf() : Spark.getContext().getConf().clone();
                configuration.getKeys().forEachRemaining(key -> sparkConf.set(key, configuration.getProperty(key).toString()));
                final KryoSerializer serializer = new KryoSerializer(sparkConf);
                // Setup a pool backed by our spark.serializer instance
                // Reuse Gryo poolsize for Kryo poolsize (no need to copy this to SparkConf)
                KRYOS.clear();
                final int poolSize = configuration.getInt(GryoPool.CONFIG_IO_GRYO_POOL_SIZE, GryoPool.CONFIG_IO_GRYO_POOL_SIZE_DEFAULT);
                for (int i = 0; i < poolSize; i++) {
                    KRYOS.add(serializer.newKryo());
                }
                INITIALIZED = true;
            }
        }
    }

    return KRYOS;
}

Example #6

Source File: SparkFrontendUtils.java From incubator-nemo with Apache License 2.0

5 votes

/**
 * Derive Spark serializer from a spark context.
 *
 * @param sparkContext spark context to derive the serializer from.
 * @return the serializer.
 */
public static Serializer deriveSerializerFrom(final org.apache.spark.SparkContext sparkContext) {
  if (sparkContext.conf().get("spark.serializer", "")
    .equals("org.apache.spark.serializer.KryoSerializer")) {
    return new KryoSerializer(sparkContext.conf());
  } else {
    return new JavaSerializer(sparkContext.conf());
  }
}

Example #7

Source File: SparkFrontendUtils.java From nemo with Apache License 2.0

5 votes

/**
 * Derive Spark serializer from a spark context.
 * @param sparkContext spark context to derive the serializer from.
 * @return the serializer.
 */
public static Serializer deriveSerializerFrom(final SparkContext sparkContext) {
  if (sparkContext.conf().get("spark.serializer", "")
      .equals("org.apache.spark.serializer.KryoSerializer")) {
    return new KryoSerializer(sparkContext.conf());
  } else {
    return new JavaSerializer(sparkContext.conf());
  }
}

Example #8

Source File: SparkBatchProcessingTest.java From OSTMap with Apache License 2.0

5 votes

private static StreamingContext createSparkStreamingContext(){
    SparkConf conf = new SparkConf()
            .setAppName("Spark Batch Processing Test")
            .set("spark.serializer", KryoSerializer.class.getCanonicalName())
            .set("spark.eventLog.enabled", "true");
    return new StreamingContext(conf, Durations.seconds(15));
}

Example #9

Source File: LensTest.java From cognition with Apache License 2.0

5 votes

@Test
public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException {

/*Connector conn = instance.getConnector("root", new PasswordToken());
Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY);
for(Map.Entry<Key, Value> entry : scan){
	System.out.println(entry);
}*/

  SparkConf conf = new SparkConf();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName("test");
  conf.setMaster("local[2]");

  SparkContext sc = new SparkContext(conf);

  CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true));
  LensAPI lens = new LensAPI(sc, pip);
  Criteria criteria = new Criteria();
  criteria.addKeyword("test");
  criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z"));
  SchemaAdapter s = new SchemaAdapter();
  s.loadJson("moreover-schema.json");
  criteria.setSchema(s);
  criteria.setAccumuloTable("moreover");
  String json = lens.query(criteria);
  assertEquals("[moreover json]", json);
}

Example #10

Source File: SparkHadoopGraphProvider.java From tinkerpop with Apache License 2.0

4 votes

@Override
public Map<String, Object> getBaseConfiguration(final String graphName, final Class<?> test, final String testMethodName, final LoadGraphWith.GraphData loadGraphWith) {
    this.graphSONInput = RANDOM.nextBoolean();
    if (this.getClass().equals(SparkHadoopGraphProvider.class) && !SparkHadoopGraphProvider.class.getCanonicalName().equals(System.getProperty(PREVIOUS_SPARK_PROVIDER, null))) {
        Spark.close();
        HadoopPools.close();
        KryoShimServiceLoader.close();
        System.setProperty(PREVIOUS_SPARK_PROVIDER, SparkHadoopGraphProvider.class.getCanonicalName());
    }

    final Map<String,Object> config = new HashMap<String, Object>() {{
        put(Graph.GRAPH, HadoopGraph.class.getName());
        put(Constants.GREMLIN_HADOOP_GRAPH_READER, graphSONInput ? GraphSONInputFormat.class.getCanonicalName() : GryoInputFormat.class.getCanonicalName());
        put(Constants.GREMLIN_HADOOP_GRAPH_WRITER, GryoOutputFormat.class.getCanonicalName());
        put(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, getWorkingDirectory());
        put(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);

        put(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);  // this makes the test suite go really fast
    }};

    // toy graph inputRDD does not have corresponding outputRDD so where jobs chain, it fails (failing makes sense)
    if (null != loadGraphWith &&
            !test.equals(ProgramTest.Traversals.class) &&
            !test.equals(PageRankTest.Traversals.class) &&
            !test.equals(ConnectedComponentTest.Traversals.class) &&
            !test.equals(ShortestPathTest.Traversals.class) &&
            !test.equals(PeerPressureTest.Traversals.class) &&
            !test.equals(FileSystemStorageCheck.class) &&
            !testMethodName.equals("shouldSupportJobChaining") &&  // GraphComputerTest.shouldSupportJobChaining
            RANDOM.nextBoolean()) {
        config.put(Constants.GREMLIN_HADOOP_GRAPH_READER, ToyGraphInputRDD.class.getCanonicalName());
    }

    // tests persisted RDDs
    if (test.equals(SparkContextStorageCheck.class)) {
        config.put(Constants.GREMLIN_HADOOP_GRAPH_READER, ToyGraphInputRDD.class.getCanonicalName());
        config.put(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    }

    config.put(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName());
    config.put(SparkLauncher.SPARK_MASTER, "local[" + AVAILABLE_PROCESSORS + "]");
    config.put(Constants.SPARK_SERIALIZER, KryoSerializer.class.getCanonicalName());
    config.put(Constants.SPARK_KRYO_REGISTRATOR, GryoRegistrator.class.getCanonicalName());
    config.put(Constants.SPARK_KRYO_REGISTRATION_REQUIRED, true);
    return config;
}

Example #11

Source File: SpliceKryoSerializerInstance.java From spliceengine with GNU Affero General Public License v3.0

4 votes

public SpliceKryoSerializerInstance(KryoSerializer ks) {
    super(ks,false);
}

Example #12

Source File: SparkTestUtils.java From gatk with BSD 3-Clause "New" or "Revised" License

3 votes

/**
 * Takes an input object and returns the value of the object after it has been serialized and then deserialized in Kryo.
 * Requires the class of the input object as a parameter because it's not generally possible to get the class of a
 * generified method parameter with reflection.
 *
 * @param input instance of inputClazz.  Never {@code null}
 * @param inputClazz class to cast input
 * @param conf Spark configuration to test
 * @param <T> class to attempt.  Same or subclass of inputClazz
 * @return serialized and deserialized instance of input.  Throws exception if serialization round trip fails.
 */
public static <T> T roundTripInKryo(final T input, final Class<?> inputClazz, final SparkConf conf) {
    Utils.nonNull(input);
    final KryoSerializer kryoSerializer = new KryoSerializer(conf);
    final SerializerInstance sparkSerializer = kryoSerializer.newInstance();
    final ClassTag<T> tag = ClassTag$.MODULE$.apply(inputClazz);
    return sparkSerializer.deserialize(sparkSerializer.serialize(input, tag), tag);
}