Java Code Examples for org.apache.spark.TaskContext#get()

The following examples show how to use org.apache.spark.TaskContext#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkTableUtil.java    From iceberg with Apache License 2.0 6 votes vote down vote up
private static Iterator<ManifestFile> buildManifest(SerializableConfiguration conf, PartitionSpec spec,
                                                    String basePath, Iterator<Tuple2<String, DataFile>> fileTuples) {
  if (fileTuples.hasNext()) {
    FileIO io = new HadoopFileIO(conf.get());
    TaskContext ctx = TaskContext.get();
    String suffix = String.format("stage-%d-task-%d-manifest", ctx.stageId(), ctx.taskAttemptId());
    Path location = new Path(basePath, suffix);
    String outputPath = FileFormat.AVRO.addExtension(location.toString());
    OutputFile outputFile = io.newOutputFile(outputPath);
    ManifestWriter<DataFile> writer = ManifestFiles.write(spec, outputFile);

    try (ManifestWriter<DataFile> writerRef = writer) {
      fileTuples.forEachRemaining(fileTuple -> writerRef.add(fileTuple._2));
    } catch (IOException e) {
      throw SparkExceptionUtil.toUncheckedException(e, "Unable to close the manifest writer: %s", outputPath);
    }

    ManifestFile manifestFile = writer.toManifestFile();
    return ImmutableList.of(manifestFile).iterator();
  } else {
    return Collections.emptyIterator();
  }
}
 
Example 2
Source File: SparkAMDSI.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public SparkAMDSI(MultiDataSetIterator iterator, int queueSize, BlockingQueue<MultiDataSet> queue,
                boolean useWorkspace, DataSetCallback callback, Integer deviceId) {
    this();

    if (queueSize < 2)
        queueSize = 2;

    this.callback = callback;
    this.buffer = queue;
    this.backedIterator = iterator;
    this.useWorkspaces = useWorkspace;
    this.prefetchSize = queueSize;
    this.workspaceId = "SAMDSI_ITER-" + java.util.UUID.randomUUID().toString();
    this.deviceId = deviceId;

    if (iterator.resetSupported())
        this.backedIterator.reset();

    this.thread = new SparkPrefetchThread(buffer, iterator, terminator, Nd4j.getAffinityManager().getDeviceForCurrentThread());

    context = TaskContext.get();

    thread.setDaemon(true);
    thread.start();
}
 
Example 3
Source File: NLJoinFunction.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
protected void init(Iterator<ExecRow> from) throws StandardException {
    checkInit();
    taskContext = TaskContext.get();
    if (taskContext != null) {
        taskContext.addTaskCompletionListener((TaskCompletionListener) (t) -> close());
    }
    operationContext.getOperation().registerCloseable(this);
    SConfiguration configuration= EngineDriver.driver().getConfiguration();
    batchSize = configuration.getNestedLoopJoinBatchSize();
    nLeftRows = 0;
    leftSideIterator = from;
    executorService = SIDriver.driver().getExecutorService();
    firstBatch = new ArrayDeque<>(batchSize);

    initOperationContexts();
    loadBatch();
}
 
Example 4
Source File: SpliceOutputCommitter.java    From spliceengine with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG,"setupTask");
    // Create child additive transaction so we don't read rows inserted by ourselves in this operation
    TaskContext sparkTaskContext = TaskContext.get();
    TaskId taskId = null;
    if (sparkTaskContext != null) {
        int stageId = sparkTaskContext.stageId();
        int partitionId = sparkTaskContext.partitionId();
        int attemptNumber = sparkTaskContext.attemptNumber();
        taskId = new TaskId(stageId, partitionId, attemptNumber);
    }
    TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn, parentTxn.getIsolationLevel(),
            true, destinationTable, false, taskId);
    ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true, parentTxn.getIsolationLevel(), taskId);
    currentTxn.set(childTxn);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG,"beginTxn=%s and destinationTable=%s",childTxn,destinationTable);

}
 
Example 5
Source File: SparkADSI.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public SparkADSI(DataSetIterator iterator, int queueSize, BlockingQueue<DataSet> queue, boolean useWorkspace,
                DataSetCallback callback, Integer deviceId) {
    this();

    if (queueSize < 2)
        queueSize = 2;

    this.deviceId = deviceId;
    this.callback = callback;
    this.useWorkspace = useWorkspace;
    this.buffer = queue;
    this.prefetchSize = queueSize;
    this.backedIterator = iterator;
    this.workspaceId = "SADSI_ITER-" + java.util.UUID.randomUUID().toString();

    if (iterator.resetSupported())
        this.backedIterator.reset();

    context = TaskContext.get();

    this.thread = new SparkPrefetchThread(buffer, iterator, terminator, null, Nd4j.getAffinityManager().getDeviceForCurrentThread());

    /**
     * We want to ensure, that background thread will have the same thread->device affinity, as master thread
     */

    thread.setDaemon(true);
    thread.start();
}
 
Example 6
Source File: IteratorUtils.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static <E> Iterator<E> asInterruptibleIterator(Iterator<E> it) {
    TaskContext context = TaskContext.get();
    if (context != null) {
        return (Iterator<E>) JavaConverters.asJavaIteratorConverter(new InterruptibleIterator(context, JavaConverters.asScalaIteratorConverter(it).asScala())).asJava();
    } else
        return it;
}
 
Example 7
Source File: SparkLeanOperationContext.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
@SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "intended")
public void readExternal(ObjectInput in)
        throws IOException, ClassNotFoundException{
    if (in.readBoolean()) {
        SpliceClient.connectionString = in.readUTF();
        SpliceClient.setClient(HConfiguration.getConfiguration().getAuthenticationTokenEnabled(), SpliceClient.Mode.EXECUTOR);
    }
    badRecordsSeen = in.readLong();
    badRecordThreshold = in.readLong();
    permissive=in.readBoolean();
    SpliceSpark.setupSpliceStaticComponents();
    boolean isOp=in.readBoolean();
    if(isOp){
        broadcastedActivation = (BroadcastedActivation)in.readObject();
        ActivationHolder ah = broadcastedActivation.getActivationHolder();
        op=(Op)ah.getOperationsMap().get(in.readInt());
        activation = ah.getActivation();
        TaskContext taskContext = TaskContext.get();
        if (taskContext != null) {
            taskContext.addTaskCompletionListener((TaskCompletionListener)(ctx) -> ah.close());
        }
    }
    badRecordsAccumulator = (Accumulable<BadRecordsRecorder,String>) in.readObject();
    importFileName= (String) in.readObject();
    rowsWritten=(LongAccumulator)in.readObject();
}
 
Example 8
Source File: KafkaStreamer.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Iterator<String> call(Integer partition, Iterator<T> locatedRowIterator) throws Exception {
    taskContext = TaskContext.get();

    if (taskContext != null && taskContext.attemptNumber() > 0) {
        LOG.trace("KS.c attempts "+taskContext.attemptNumber());
        long entriesInKafka = KafkaUtils.messageCount(bootstrapServers, topicName, partition);
        LOG.trace("KS.c entries "+entriesInKafka);
        for (long i = 0; i < entriesInKafka; ++i) {
            locatedRowIterator.next();
        }
    }

    Properties props = new Properties();
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    props.put(ProducerConfig.CLIENT_ID_CONFIG, "spark-producer-dss-ks-"+UUID.randomUUID() );
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class.getName());
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ExternalizableSerializer.class.getName());
    KafkaProducer<Integer, Externalizable> producer = new KafkaProducer<>(props);
    int count = 0 ;
    while (locatedRowIterator.hasNext()) {
        T lr = locatedRowIterator.next();

        ProducerRecord<Integer, Externalizable> record = new ProducerRecord(topicName, count++, lr);
        producer.send(record);
        LOG.trace("KS.c sent "+partition.intValue()+" "+count+" "+lr);
    }
    LOG.trace("KS.c count "+partition.intValue()+" "+count);

    producer.close();
    // TODO Clean up
    return Arrays.asList("OK").iterator();
}
 
Example 9
Source File: RowDataRewriter.java    From iceberg with Apache License 2.0 4 votes vote down vote up
private TaskResult rewriteDataForTask(CombinedScanTask task) throws Exception {
  TaskContext context = TaskContext.get();
  int partitionId = context.partitionId();
  long taskId = context.taskAttemptId();

  RowDataReader dataReader = new RowDataReader(
      task, schema, schema, nameMapping, io.value(), encryptionManager.value(), caseSensitive);

  SparkAppenderFactory appenderFactory = new SparkAppenderFactory(
      properties, schema, SparkSchemaUtil.convert(schema));
  OutputFileFactory fileFactory = new OutputFileFactory(
      spec, format, locations, io.value(), encryptionManager.value(), partitionId, taskId);

  BaseWriter writer;
  if (spec.fields().isEmpty()) {
    writer = new UnpartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE);
  } else {
    writer = new PartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE, schema);
  }

  try {
    while (dataReader.next()) {
      InternalRow row = dataReader.get();
      writer.write(row);
    }

    dataReader.close();
    dataReader = null;
    return writer.complete();

  } catch (Throwable originalThrowable) {
    try {
      LOG.error("Aborting task", originalThrowable);
      context.markTaskFailed(originalThrowable);

      LOG.error("Aborting commit for partition {} (task {}, attempt {}, stage {}.{})",
          partitionId, taskId, context.attemptNumber(), context.stageId(), context.stageAttemptNumber());
      if (dataReader != null) {
        dataReader.close();
      }
      writer.abort();
      LOG.error("Aborted commit for partition {} (task {}, attempt {}, stage {}.{})",
          partitionId, taskId, context.taskAttemptId(), context.stageId(), context.stageAttemptNumber());

    } catch (Throwable inner) {
      if (originalThrowable != inner) {
        originalThrowable.addSuppressed(inner);
        LOG.warn("Suppressing exception in catch: {}", inner.getMessage(), inner);
      }
    }

    if (originalThrowable instanceof Exception) {
      throw originalThrowable;
    } else {
      throw new RuntimeException(originalThrowable);
    }
  }
}
 
Example 10
Source File: SparkBoundedInMemoryExecutor.java    From hudi with Apache License 2.0 4 votes vote down vote up
public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer,
    BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
  super(hoodieConfig.getWriteBufferLimitBytes(), producer, Option.of(consumer), bufferedIteratorTransform);
  this.sparkThreadTaskContext = TaskContext.get();
}
 
Example 11
Source File: SMRecordReaderImpl.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
public void init(Configuration config, InputSplit split) throws IOException, InterruptedException {	
	if (LOG.isDebugEnabled())
		SpliceLogUtils.debug(LOG, "init");
	if (TaskContext.get() != null) {
		TaskContext.get().addTaskFailureListener(this);
	}
	String tableScannerAsString = config.get(MRConstants.SPLICE_SCAN_INFO);
       if (tableScannerAsString == null)
		throw new IOException("splice scan info was not serialized to task, failing");
	byte[] scanStartKey = null;
	byte[] scanStopKey = null;
	try {
		builder = TableScannerBuilder.getTableScannerBuilderFromBase64String(tableScannerAsString);
		if (LOG.isTraceEnabled())
			SpliceLogUtils.trace(LOG, "config loaded builder=%s", builder);
		TableSplit tSplit = ((SMSplit) split).getSplit();
		token = builder.getToken();
		DataScan scan = builder.getScan();
		scanStartKey = scan.getStartKey();
		scanStopKey = scan.getStopKey();
		if (Bytes.startComparator.compare(scanStartKey, tSplit.getStartRow()) < 0) {
			// the split itself is more restrictive
			scan.startKey(tSplit.getStartRow());
		}
		if (Bytes.endComparator.compare(scanStopKey, tSplit.getEndRow()) > 0) {
			// the split itself is more restrictive
			scan.stopKey(tSplit.getEndRow());
		}
		setScan(((HScan) scan).unwrapDelegate());
		// TODO (wjk): this seems weird (added with DB-4483)
		this.statisticsRun = AbstractSMInputFormat.oneSplitPerRegion(config);
		Double sampling = AbstractSMInputFormat.sampling(config);
		if (sampling != null) {
			this.sampling = true;
			this.samplingRate = sampling;
		}
		restart(scan.getStartKey());
	} catch (IOException ioe) {
		LOG.error(String.format("Received exception with scan %s, original start key %s, original stop key %s, split %s",
				scan, Bytes.toStringBinary(scanStartKey), Bytes.toStringBinary(scanStopKey), split), ioe);
		throw ioe;
       } catch (StandardException e) {
		throw new IOException(e);
	}
}