Java Code Examples for org.apache.storm.task.TopologyContext#getThisTaskIndex()

The following examples show how to use org.apache.storm.task.TopologyContext#getThisTaskIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: StormRecorder.java    From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void open(final Map<String, Object> spoutConfig, final TopologyContext topologyContext) {
    // Load configuration items.

    // Determine our time bucket window, in seconds, defaulted to 60.
    int timeBucketSeconds = 60;
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_TIME_BUCKET)) {
        final Object timeBucketCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_TIME_BUCKET);
        if (timeBucketCfgValue instanceof Number) {
            timeBucketSeconds = ((Number) timeBucketCfgValue).intValue();
        }
    }

    // Conditionally enable prefixing with taskId
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX)) {
        final Object taskIdCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX);
        if (taskIdCfgValue instanceof Boolean && (Boolean) taskIdCfgValue) {
            this.metricPrefix = "task-" + topologyContext.getThisTaskIndex();
        }
    }

    this.keyBuilder = new KeyBuilder(this.metricPrefix);

    // Log how we got configured.
    logger.info("Configured with time window of {} seconds and using taskId prefixes?: {}",
        timeBucketSeconds, Boolean.toString(metricPrefix.isEmpty()));

    // Register the top level metrics.
    assignedValues = topologyContext.registerMetric("GAUGES", new MultiAssignableMetric(), timeBucketSeconds);
    timers = topologyContext.registerMetric("TIMERS", new MultiReducedMetric(new MeanReducer()), timeBucketSeconds);
    counters = topologyContext.registerMetric("COUNTERS", new MultiCountMetric(), timeBucketSeconds);
}
 
Example 2
Source File: EsSpout.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
    this.collector = collector;

    LinkedHashMap copy = new LinkedHashMap(conf);
    copy.putAll(spoutConfig);

    StormSettings settings = new StormSettings(copy);

    InitializationUtils.setValueReaderIfNotSet(settings, JdkValueReader.class, log);
    InitializationUtils.setUserProviderIfNotSet(settings, JdkUserProvider.class, log);

    EsClusterInfoSelector.populate(settings);

    ackReads = settings.getStormSpoutReliable();

    if (ackReads) {
        inTransitQueue = new LinkedHashMap<Object, Object>();
        replayQueue = new LinkedList<Object[]>();
        retries = new HashMap<Object, Integer>();
        queueSize = settings.getStormSpoutReliableQueueSize();
        tupleRetries = settings.getStormSpoutReliableRetriesPerTuple();
        tupleFailure = settings.getStormSpoutReliableTupleFailureHandling();
    }

    int totalTasks = context.getComponentTasks(context.getThisComponentId()).size();
    int currentTask = context.getThisTaskIndex();

    // match the partitions based on the current topology
    List<PartitionDefinition> partitions = RestService.findPartitions(settings, log);
    List<PartitionDefinition> assigned = RestService.assignPartitions(partitions, currentTask, totalTasks);
    iterator = RestService.multiReader(settings, assigned, log);
}
 
Example 3
Source File: WARCFileNameFormat.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Map conf, TopologyContext topologyContext) {
    this.taskIndex = topologyContext.getThisTaskIndex();
    int totalTasks = topologyContext.getComponentTasks(
            topologyContext.getThisComponentId()).size();
    // single task? let's not bother with the task index in the file name
    if (totalTasks == 1) {
        this.taskIndex = -1;
    }
}
 
Example 4
Source File: SQLSpout.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void open(Map conf, TopologyContext context,
        SpoutOutputCollector collector) {

    super.open(conf, context, collector);

    maxDocsPerBucket = ConfUtils.getInt(conf,
            Constants.SQL_MAX_DOCS_BUCKET_PARAM_NAME, 5);

    tableName = ConfUtils.getString(conf,
            Constants.SQL_STATUS_TABLE_PARAM_NAME, "urls");

    maxNumResults = ConfUtils.getInt(conf,
            Constants.SQL_MAXRESULTS_PARAM_NAME, 100);

    try {
        connection = SQLUtil.getConnection(conf);
    } catch (SQLException ex) {
        LOG.error(ex.getMessage(), ex);
        throw new RuntimeException(ex);
    }

    // determine bucket this spout instance will be in charge of
    int totalTasks = context
            .getComponentTasks(context.getThisComponentId()).size();
    if (totalTasks > 1) {
        logIdprefix = "[" + context.getThisComponentId() + " #"
                + context.getThisTaskIndex() + "] ";
        bucketNum = context.getThisTaskIndex();
    }
}
 
Example 5
Source File: SidelineSpoutHandler.java    From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Handler called when the dynamic spout opens, this method is responsible for creating and setting triggers for
 * handling the spinning up and down of sidelines.
 * @param spout Dynamic spout instance.
 * @param topologyConfig Topology configuration.
 * @param topologyContext Topology context.
 */
@Override
public void onSpoutOpen(
    final DynamicSpout spout,
    final Map topologyConfig,
    final TopologyContext topologyContext
) {
    this.spout = spout;

    createSidelineTriggers();

    Preconditions.checkArgument(
        spoutConfig.containsKey(SidelineConfig.REFRESH_INTERVAL_SECONDS)
        && spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS) != null,
        "Configuration value for " + SidelineConfig.REFRESH_INTERVAL_SECONDS + " is required."
    );

    final long refreshIntervalSeconds = ((Number) spoutConfig.get(SidelineConfig.REFRESH_INTERVAL_SECONDS)).longValue();

    final long refreshIntervalMillis = TimeUnit.SECONDS.toMillis(refreshIntervalSeconds);

    // Why not just start the timer at 0? Because we want to block onSpoutOpen() until the first run of loadSidelines()
    loadSidelines();

    // Repeat our sidelines check periodically
    final String threadName = "[" + DynamicSpout.class.getSimpleName() + ":" + getClass().getSimpleName() + "] Timer on "
        + topologyContext.getThisComponentId() + ":" + topologyContext.getThisTaskIndex();

    timer = new Timer(threadName);
    timer.scheduleAtFixedRate(new TimerTask() {
        @Override
        public void run() {
            // Catch this so that it doesn't kill the recurring task
            try {
                loadSidelines();
            } catch (Exception ex) {
                logger.error("Attempting to loadSidelines() failed {}", ex);
            }
        }
    }, refreshIntervalMillis, refreshIntervalMillis);

    for (final SidelineTrigger sidelineTrigger : sidelineTriggers) {
        sidelineTrigger.open(getSpoutConfig());
    }
}
 
Example 6
Source File: DynamicSpout.java    From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Open is called once the spout instance has been deployed to the Storm cluster
 * and is ready to get to work.
 *
 * @param topologyConfig The Storm Topology configuration.
 * @param topologyContext The Storm Topology context.
 * @param spoutOutputCollector The output collector to emit tuples via.
 * @throws IllegalStateException if you attempt to open the spout multiple times.
 */
@Override
public void open(Map topologyConfig, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
    if (isOpen) {
        throw new IllegalStateException("This spout has already been opened.");
    }

    // Save references.
    this.topologyContext = topologyContext;
    this.outputCollector = spoutOutputCollector;

    // Ensure a consumer id prefix has been correctly set.
    if (Strings.isNullOrEmpty((String) getSpoutConfigItem(SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX))) {
        throw new IllegalStateException("Missing required configuration: " + SpoutConfig.VIRTUAL_SPOUT_ID_PREFIX);
    }

    // We do not use the getters for things like the metricsRecorder and coordinator here
    // because each of these getters perform a check to see if the spout is open, and it's not yet until we've
    // finished setting all of these things up.

    // Initialize Metric Recorder
    this.metricsRecorder = getFactoryManager().createNewMetricsRecorder();
    this.metricsRecorder.open(getSpoutConfig(), getTopologyContext());

    // Create MessageBuffer
    final MessageBuffer messageBuffer = getFactoryManager().createNewMessageBufferInstance();
    messageBuffer.open(getSpoutConfig());

    // Create MessageBus instance and store into SpoutMessageBus reference reducing accessible scope.
    final MessageBus messageBus = new MessageBus(messageBuffer);
    this.messageBus = messageBus;

    // Define thread context, this allows us to use contextually relevant thread names.
    final ThreadContext threadContext = new ThreadContext(
        topologyContext.getThisComponentId(),
        topologyContext.getThisTaskIndex()
    );

    // Create Coordinator instance and call open.
    spoutCoordinator = new SpoutCoordinator(
        getSpoutConfig(),
        threadContext,
        messageBus,
        metricsRecorder
    );
    spoutCoordinator.open();

    // Define consumer cohort definition.
    final ConsumerPeerContext consumerPeerContext = new ConsumerPeerContext(
        topologyContext.getComponentTasks(topologyContext.getThisComponentId()).size(),
        topologyContext.getThisTaskIndex()
    );

    // TODO: This should be configurable and created dynamically, the problem is that right now we are still tightly
    // coupled to the VirtualSpout implementation.
    this.virtualSpoutFactory = new VirtualSpoutFactory(
        spoutConfig,
        consumerPeerContext,
        factoryManager,
        metricsRecorder
    );

    // Our spout is open, it's not dependent upon the handler to finish opening for us to be 'opened'
    // This is important, because if we waited most of our getters that check the opened state of the
    // spout would throw an exception and make them unusable.
    isOpen = true;

    this.spoutHandler = getFactoryManager().createSpoutHandler();
    this.spoutHandler.open(spoutConfig, virtualSpoutFactory);
    this.spoutHandler.onSpoutOpen(this, topologyConfig, topologyContext);
}
 
Example 7
Source File: InOrderDeliveryTest.java    From storm-net-adapter with Apache License 2.0 4 votes vote down vote up
@Override
public void open(Map<String, Object> conf, TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;
    _base = context.getThisTaskIndex();
}
 
Example 8
Source File: AbstractSpout.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@Override
public void open(Map stormConf, TopologyContext context,
        SpoutOutputCollector collector) {

    super.open(stormConf, context, collector);

    indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName,
            "status");

    // one ES client per JVM
    synchronized (AbstractSpout.class) {
        try {
            if (client == null) {
                client = ElasticSearchConnection.getClient(stormConf,
                        ESBoltType);
            }
        } catch (Exception e1) {
            LOG.error("Can't connect to ElasticSearch", e1);
            throw new RuntimeException(e1);
        }
    }

    // if more than one instance is used we expect their number to be the
    // same as the number of shards
    int totalTasks = context.getComponentTasks(context.getThisComponentId())
            .size();
    if (totalTasks > 1) {
        logIdprefix = "[" + context.getThisComponentId() + " #"
                + context.getThisTaskIndex() + "] ";

        // determine the number of shards so that we can restrict the
        // search

        // TODO use the admin API when it gets available
        // TODO or the low level one with
        // https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-shards-stores.html
        // TODO identify local shards and use those if possible

        // ClusterSearchShardsRequest request = new
        // ClusterSearchShardsRequest(
        // indexName);
        // ClusterSearchShardsResponse shardresponse = client.admin()
        // .cluster().searchShards(request).actionGet();
        // ClusterSearchShardsGroup[] shardgroups =
        // shardresponse.getGroups();
        // if (totalTasks != shardgroups.length) {
        // throw new RuntimeException(
        // "Number of ES spout instances should be the same as number of
        // shards ("
        // + shardgroups.length + ") but is " + totalTasks);
        // }
        // shardID = shardgroups[context.getThisTaskIndex()].getShardId()
        // .getId();

        // TEMPORARY simply use the task index as shard index
        shardID = context.getThisTaskIndex();
        LOG.info("{} assigned shard ID {}", logIdprefix, shardID);
    }

    partitionField = ConfUtils.getString(stormConf,
            ESStatusBucketFieldParamName, "key");

    bucketSortField = ConfUtils.loadListFromConf(
            ESStatusBucketSortFieldParamName, stormConf);

    totalSortField = ConfUtils.getString(stormConf,
            ESStatusGlobalSortFieldParamName);

    maxURLsPerBucket = ConfUtils.getInt(stormConf, ESStatusMaxURLsParamName,
            1);
    maxBucketNum = ConfUtils.getInt(stormConf, ESStatusMaxBucketParamName,
            10);

    queryTimeout = ConfUtils.getInt(stormConf,
            ESStatusQueryTimeoutParamName, -1);

    filterQueries = ConfUtils.loadListFromConf(ESStatusFilterParamName, stormConf);
}