org.apache.spark.SparkException Java Examples

The following examples show how to use org.apache.spark.SparkException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: BatchProfilerIntegrationTest.java From metron with Apache License 2.0

6 votes

@Test
public void testBatchProfilerWithInvalidProfile() {
  profilerProperties.put(TELEMETRY_INPUT_READER.getKey(), JSON.toString());
  profilerProperties.put(TELEMETRY_INPUT_PATH.getKey(), "src/test/resources/telemetry.json");

  // the batch profiler should error out, if there is a bug in *any* of the profiles
  BatchProfiler profiler = new BatchProfiler();
  assertThrows(
      SparkException.class,
      () ->
          profiler.run(
              spark,
              profilerProperties,
              getGlobals(),
              readerProperties,
              fromJSON(invalidProfileJson)));
}

Example #2

Source File: SMOutputFormatTest.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@Test
public void writeExceptionsCauseAbort() throws StandardException, IOException {
    SparkPairDataSet<RowLocation, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new ToRowLocationFunction()));
    JavaPairRDD<RowLocation, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(true));
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR, FakeOutputFormat.class, FakeOutputFormat.class);
    // workaround for SPARK-21549 on spark-2.2.0
    conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
    File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
    file.delete();
    file.mkdir();
    conf.set("abort.directory", file.getAbsolutePath());
    try {
        rdd.saveAsNewAPIHadoopDataset(conf);
        Assert.fail("Expected exception");
    } catch (Exception se) {
        Assert.assertTrue("Unexpected exception", se instanceof SparkException);
    }
    File[] files = file.listFiles();
    Assert.assertTrue("Abort() not called", files.length > 0);
}

Example #3

Source File: SMOutputFormatTest.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@Test
public void readExceptionsCauseAbort() throws StandardException, IOException {
    SparkPairDataSet<ExecRow, ExecRow> dataset = new SparkPairDataSet<>(SpliceSpark.getContextUnsafe().parallelizePairs(tenRows).mapToPair(new FailFunction()));
    JavaPairRDD<ExecRow, Either<Exception, ExecRow>> rdd = dataset.wrapExceptions();

    final Configuration conf=new Configuration(HConfiguration.unwrapDelegate());
    TableWriterUtils.serializeInsertTableWriterBuilder(conf, new FakeTableWriterBuilder(false));
    conf.setClass(JobContext.OUTPUT_FORMAT_CLASS_ATTR,FakeOutputFormat.class,FakeOutputFormat.class);
    // workaround for SPARK-21549 on spark-2.2.0
    conf.set("mapreduce.output.fileoutputformat.outputdir","/tmp");
    File file = File.createTempFile(SMOutputFormatTest.class.getName(), "exception");
    file.delete();
    file.mkdir();
    conf.set("abort.directory", file.getAbsolutePath());
    try {
        rdd.saveAsNewAPIHadoopDataset(conf);
        Assert.fail("Expected exception");
    } catch (Exception se) {
        Assert.assertTrue("Unexpected exception", se instanceof SparkException);
    }
    File[] files = file.listFiles();
    Assert.assertTrue("Abort() not called", files.length > 0);
}

Example #4

Source File: MarkDuplicatesSparkUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License

6 votes

@Test
public void testReadsMissingReadGroups() {
    JavaSparkContext ctx = SparkContextFactory.getTestSparkContext();

    SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname,
            true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY);
    samRecordSetBuilder.addFrag("READ" , 0, 10000, false);

    JavaRDD<GATKRead> reads = ctx.parallelize(Lists.newArrayList(samRecordSetBuilder.getRecords()), 2).map(SAMRecordToGATKReadAdapter::new);
    reads = reads.map(r -> {r.setReadGroup(null); return r;});
    SAMFileHeader header = samRecordSetBuilder.getHeader();

    try {
        MarkDuplicatesSparkUtils.transformToDuplicateNames(header, MarkDuplicatesScoringStrategy.SUM_OF_BASE_QUALITIES, null, reads, 2, false).collect();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
        Assert.assertTrue(e instanceof SparkException);
        Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
    }
}

Example #5

Source File: QuicksqlServerMeta.java From Quicksql with MIT License

6 votes

public QueryResult getSparkQueryResult(Entry<List<Attribute>, List<GenericRowWithSchema>> sparkData)
    throws Exception {
    if (sparkData == null) {
        return new QueryResult(new ArrayList<>(), new ArrayList<>());
    }
    if (CollectionUtils.isEmpty(sparkData.getKey())) {
        throw new SparkException("collect data error");
    }
    List<Attribute> attributes = sparkData.getKey();
    List<GenericRowWithSchema> value = sparkData.getValue();
    List<Object> data = new ArrayList<>();
    List<ColumnMetaData> meta = new ArrayList<>();
    value.stream().forEach(column -> {
        data.add(column.values());
    });
    for (int index = 0; index < sparkData.getKey().size(); index++) {
        Attribute attribute = sparkData.getKey().get(index);
        ScalarType columnType = getColumnType(attribute.dataType());
        meta.add(new ColumnMetaData(index, false, true, false, false,
            attribute.nullable() ? 1 : 0, true, -1, attribute.name(), attribute.name(), null, -1, -1, null, null,
            columnType, true, false, false, columnType.columnClassName()));
    }
    return new QueryResult(meta, data);
}

Example #6

Source File: SqoopSparkClientFactory.java From sqoop-on-spark with Apache License 2.0

6 votes

public static LocalSqoopSparkClient createSqoopSparkClient(SqoopConf sqoopConf)
    throws IOException, SparkException {

  Map<String, String> sparkConf = prepareSparkConfMapFromSqoopConfig(sqoopConf);
  // Submit spark job through local spark context while spark master is local
  // mode, otherwise submit
  // spark job through remote spark context.
  String master = sparkConf.get("spark.master");
  if (master.equals("local") || master.startsWith("local[")) {
    // With local spark context, all user sessions share the same spark
    // context.
    return LocalSqoopSparkClient.getInstance(generateSparkConf(sparkConf));
  } else {
    LOG.info("Using yarn submitter");
    //TODO: hook up yarn submitter
    return null;
  }
}

Example #7

Source File: SparkPipelineResult.java From beam with Apache License 2.0

6 votes

private static RuntimeException beamExceptionFrom(final Throwable e) {
  // Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
  // won't let you catch something that is not declared, so we can't catch
  // SparkException directly, instead we do an instanceof check.

  if (e instanceof SparkException) {
    if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
      UserCodeException userException = (UserCodeException) e.getCause();
      return new Pipeline.PipelineExecutionException(userException.getCause());
    } else if (e.getCause() != null) {
      return new Pipeline.PipelineExecutionException(e.getCause());
    }
  }

  return runtimeExceptionFrom(e);
}

Example #8

Source File: SparkStructuredStreamingPipelineResult.java From beam with Apache License 2.0

6 votes

private static RuntimeException beamExceptionFrom(final Throwable e) {
  // Scala doesn't declare checked exceptions in the bytecode, and the Java compiler
  // won't let you catch something that is not declared, so we can't catch
  // SparkException directly, instead we do an instanceof check.

  if (e instanceof SparkException) {
    if (e.getCause() != null && e.getCause() instanceof UserCodeException) {
      UserCodeException userException = (UserCodeException) e.getCause();
      return new Pipeline.PipelineExecutionException(userException.getCause());
    } else if (e.getCause() != null) {
      return new Pipeline.PipelineExecutionException(e.getCause());
    }
  }

  return runtimeExceptionFrom(e);
}

Example #9

Source File: HaplotypeCallerSparkIntegrationTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License

5 votes

/**
 * Test that in VCF mode we're >= 99% concordant with GATK3.5 results
 * THIS TEST explodes with an exception because Allele-Specific annotations are not supported in vcf mode yet.
 * It's included to parallel the matching (also exploding) test for the non-spark HaplotypeCaller
 * {@link org.broadinstitute.hellbender.tools.walkers.haplotypecaller.HaplotypeCallerIntegrationTest#testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations()}
 */
@Test(expectedExceptions = SparkException.class) //this should be a UserException, but spark exceptions are not unwrapped yet
public void testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations() throws Exception {
    Utils.resetRandomGenerator();

    final File output = createTempFile("testVCFModeIsConcordantWithGATK3_5ResultsAlleleSpecificAnnotations", ".vcf");

    //Created by running
    //java -jar ~/bin/GenomeAnalysisTK-3.5.0/GenomeAnalysisTK.jar -T HaplotypeCaller \
    // -I ./src/test/resources/large/CEUTrio.HiSeq.WGS.b37.NA12878.20.21.bam \
    // -R src/test/resources/large/human_g1k_v37.20.21.fasta -L 20:10000000-10100000 \
    // --out as.gatk3.5.noDownsample.vcf -G StandardHC -G Standard -G AS_Standard \
    // --disableDithering --no_cmdline_in_header  -dt NONE --maxReadsInRegionPerSample 100000000 --minReadsPerAlignmentStart 100000
    final File gatk3Output = new File(TEST_FILES_DIR + "expected.testVCFMode.gatk3.5.alleleSpecific.vcf");

    final String[] args = {
            "-I", NA12878_20_21_WGS_bam,
            "-R", b37_2bit_reference_20_21,
            "-L", "20:10000000-10100000",
            "-O", output.getAbsolutePath(),
            "-G", "StandardAnnotation",
            "-G", "AS_StandardAnnotation",
            "-pairHMM", "AVX_LOGLESS_CACHING",
            "-stand_call_conf", "30.0"
    };

    runCommandLine(args);

    final double concordance = HaplotypeCallerIntegrationTest.calculateConcordance(output, gatk3Output);
    Assert.assertTrue(concordance >= 0.99, "Concordance with GATK 3.5 in AS VCF mode is < 99% (" +  concordance + ")");
}

Example #10

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

5 votes

@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
	List<String> list = new ArrayList<>();
	list.add("[cheeseburger,fries]");
	JavaRDD<String> javaRddString = sc.parallelize(list);
	JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
	SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
	Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
	// trigger evaluation to throw exception
	outDF.collectAsList();
}

Example #11

Source File: AbstractMarkDuplicatesCommandLineProgramTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public void testNonExistantReadGroupInRead() {
    final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
    tester.addMatePair("RUNID:7:1203:2886:82292",  19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, "NotADuplicateGroup");
    try {
        tester.runTest();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
       Assert.assertTrue(e instanceof SparkException);
       Assert.assertTrue(e.getCause() instanceof UserException.HeaderMissingReadGroup);
    }
}

Example #12

Source File: AbstractMarkDuplicatesCommandLineProgramTest.java From gatk with BSD 3-Clause "New" or "Revised" License

5 votes

@Test
public void testNoReadGroupInRead() {
    final MarkDuplicatesSparkTester tester = new MarkDuplicatesSparkTester(true);
    tester.addMatePair("RUNID:7:1203:2886:82292",  19, 19, 485253, 485253, false, false, true, true, "42M59S", "59S42M", true, false, false, false, false, DEFAULT_BASE_QUALITY, null);

    try {
        tester.runTest();
        Assert.fail("Should have thrown an exception");
    } catch (Exception e){
        Assert.assertTrue(e instanceof SparkException);
        Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup);
    }
}

Example #13

Source File: TestFileSystemInput.java From envelope with Apache License 2.0

5 votes

@Test (expected = SparkException.class)
public void readInputFormatMismatchTranslator() throws Exception {
  Map<String, Object> paramMap = new HashMap<>();
  paramMap.put(FileSystemInput.FORMAT_CONFIG, "input-format");
  paramMap.put(FileSystemInput.PATH_CONFIG, FileSystemInput.class.getResource(CSV_DATA).getPath());
  paramMap.put(FileSystemInput.INPUT_FORMAT_TYPE_CONFIG, KeyValueTextInputFormat.class.getCanonicalName());
  paramMap.put("translator.type", DummyInputFormatTranslator.class.getCanonicalName());
  config = ConfigFactory.parseMap(paramMap);

  FileSystemInput formatInput = new FileSystemInput();
  assertNoValidationFailures(formatInput, config);
  formatInput.configure(config);
  formatInput.read().show();
}

Example #14

Source File: StreamingService.java From cxf with Apache License 2.0

5 votes

private void processStream(AsyncResponse async, List<String> inputStrings) {
    try {
        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId());
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = null;
        if ("queue".equals(receiverType)) {
            Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
            for (int i = 0; i < 30; i++) {
                rddQueue.add(jssc.sparkContext().parallelize(inputStrings));
            }
            receiverStream = jssc.queueStream(rddQueue);
        } else {
            receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings));
        }

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false);
        wordCounts.foreachRDD(new OutputFunction(streamOut));
        jssc.start();

        executor.execute(new SparkJob(async, sparkListener));
    } catch (Exception ex) {
        // the compiler does not allow to catch SparkException directly
        if (ex instanceof SparkException) {
            async.cancel(60);
        } else {
            async.resume(new WebApplicationException(ex));
        }
    }
}

Example #15

Source File: HExceptionFactory.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private IOException parseSparkException(SparkException e){
    String errMessage = e.getMessage();
    if(errMessage==null)
        return new IOException("Unknown Spark exception");
    else if(SPARK_CANCELLATION_PATTERN.matcher(errMessage).find())
        return new OperationCancelledException();
    else return new IOException(e);
}

Example #16

Source File: RDDConverterUtilsExtTest.java From systemds with Apache License 2.0

5 votes

@Test(expected = SparkException.class)
public void testStringDataFrameToVectorDataFrameNonNumbers() {
	List<String> list = new ArrayList<>();
	list.add("[cheeseburger,fries]");
	JavaRDD<String> javaRddString = sc.parallelize(list);
	JavaRDD<Row> javaRddRow = javaRddString.map(new StringToRow());
	SparkSession sparkSession = SparkSession.builder().sparkContext(sc.sc()).getOrCreate();
	List<StructField> fields = new ArrayList<>();
	fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
	StructType schema = DataTypes.createStructType(fields);
	Dataset<Row> inDF = sparkSession.createDataFrame(javaRddRow, schema);
	Dataset<Row> outDF = RDDConverterUtilsExt.stringDataFrameToVectorDataFrame(sparkSession, inDF);
	// trigger evaluation to throw exception
	outDF.collectAsList();
}

Example #17

Source File: QuicksqlServerMeta.java From Quicksql with MIT License

5 votes

public QueryResult getFlinkQueryResult(Entry<TableSchema, List<Row>> sparkData) throws Exception {
    if (sparkData == null) {
        return new QueryResult(new ArrayList<>(), new ArrayList<>());
    }
    TableSchema tableSchema = sparkData.getKey();
    if (tableSchema == null || tableSchema.getFieldDataTypes().length != tableSchema.getFieldNames().length) {
        throw new SparkException("collect data error");
    }
    org.apache.flink.table.types.DataType[] fieldDataTypes = tableSchema.getFieldDataTypes();
    String[] fieldNames = tableSchema.getFieldNames();
    List<Row> value = sparkData.getValue();
    List<Object> data = new ArrayList<>();
    List<ColumnMetaData> meta = new ArrayList<>();
    value.stream().forEach(column -> {
        Object[] objects = new Object[column.getArity()];
        for (int i = 0; i < column.getArity(); i++) {
            objects[i] = column.getField(i);
        }
        data.add(Arrays.asList(objects));
    });
    for (int index = 0; index < fieldNames.length; index++) {
        ScalarType columnType = getColumnType2(fieldDataTypes[index]);
        meta.add(new ColumnMetaData(index, false, true, false, false,
            1, true, -1, fieldNames[index], fieldNames[index], null, -1, -1, null, null,
            columnType, true, false, false, columnType.columnClassName()));
    }
    return new QueryResult(meta, data);
}