Java Code Examples for org.apache.storm.task.TopologyContext#registerMetric()

The following examples show how to use org.apache.storm.task.TopologyContext#registerMetric() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PulsarBolt.java    From pulsar with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes" })
@Override
public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
    this.componentId = context.getThisComponentId();
    this.boltId = String.format("%s-%s", componentId, context.getThisTaskId());
    this.collector = collector;
    try {
        sharedPulsarClient = SharedPulsarClient.get(componentId, clientConf);
        producer = sharedPulsarClient.getSharedProducer(producerConf);
        LOG.info("[{}] Created a pulsar producer on topic {} to send messages", boltId, pulsarBoltConf.getTopic());
    } catch (PulsarClientException e) {
        LOG.error("[{}] Error initializing pulsar producer on topic {}", boltId, pulsarBoltConf.getTopic(), e);
        throw new IllegalStateException(
                format("Failed to initialize producer for %s : %s", pulsarBoltConf.getTopic(), e.getMessage()), e);
    }
    context.registerMetric(String.format("PulsarBoltMetrics-%s-%s", componentId, context.getThisTaskIndex()), this,
            pulsarBoltConf.getMetricsTimeIntervalInSecs());
}
 
Example 2
Source File: MySqlBinLogSpout.java    From storm-mysql with Apache License 2.0 6 votes vote down vote up
private void initializeAndRegisterAllMetrics(TopologyContext context, int timeBucketSize) {
    this.failureMetric                  = new CombinedMetric(new MaxMetric());
    this.successCountMetric             = new CountMetric();
    this.sidelineCountMetric            = new CountMetric();
    this.internalBufferSize             = new CombinedMetric(new MaxMetric());
    this.pendingMessageSize             = new CombinedMetric(new MaxMetric());
    this.currentBinLogFileNumber        = new CombinedMetric(new MaxMetric());
    this.currentBinLogFilePosition      = new CombinedMetric(new MaxMetric());
    this.txEventProcessTime             = new ReducedMetric(new MeanReducer());
    this.txEventFailTimeInTopology      = new ReducedMetric(new MeanReducer());

    context.registerMetric(SpoutConstants.METRIC_FAILURECOUNT, this.failureMetric, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_SUCCESSCOUNT, this.successCountMetric, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_SIDELINECOUNT, this.sidelineCountMetric, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_BUFFER_SIZE, this.internalBufferSize, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_PENDING_MESSAGES, this.pendingMessageSize, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_TXPROCESSTIME, this.txEventProcessTime, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_BINLOG_FILE_NUM, this.currentBinLogFileNumber, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_BIN_LOG_FILE_POS, this.currentBinLogFilePosition, timeBucketSize);
    context.registerMetric(SpoutConstants.METRIC_FAIL_MSG_IN_TOPOLOGY, this.txEventFailTimeInTopology, timeBucketSize);

}
 
Example 3
Source File: MemorySpout.java    From storm-crawler with Apache License 2.0 6 votes vote down vote up
@Override
public void open(@SuppressWarnings("rawtypes") Map conf,
        TopologyContext context, SpoutOutputCollector collector) {
    _collector = collector;

    // check that there is only one instance of it
    int totalTasks = context
            .getComponentTasks(context.getThisComponentId()).size();
    if (totalTasks > 1) {
        throw new RuntimeException(
                "Can't have more than one instance of the MemorySpout");
    }

    Date now = new Date();
    for (String u : startingURLs) {
        LOG.debug("About to deserialize {} ", u);
        List<Object> tuple = scheme.deserialize(ByteBuffer.wrap(u
                .getBytes(StandardCharsets.UTF_8)));
        add((String) tuple.get(0), (Metadata) tuple.get(1), now);
    }
    context.registerMetric("queue_size", () -> queue.size(), 10);
}
 
Example 4
Source File: S3Cacher.java    From storm-crawler with Apache License 2.0 6 votes vote down vote up
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {

    super.prepare(conf, context, collector);

    bucketName = ConfUtils.getString(conf, BUCKET);

    boolean bucketExists = client.doesBucketExist(bucketName);
    if (!bucketExists) {
        String message = "Bucket " + bucketName + " does not exist";
        throw new RuntimeException(message);
    }
    this.eventCounter = context.registerMetric(getMetricPrefix()
            + "s3cache_counter", new MultiCountMetric(), 10);
}
 
Example 5
Source File: IndexerBolt.java    From storm-crawler with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {
    super.prepare(conf, context, collector);

    _collector = collector;

    try {
        connection = SolrConnection.getConnection(conf, BOLT_TYPE);
    } catch (Exception e) {
        LOG.error("Can't connect to Solr: {}", e);
        throw new RuntimeException(e);
    }

    this.eventCounter = context.registerMetric("SolrIndexerBolt",
            new MultiCountMetric(), 10);
}
 
Example 6
Source File: WARCSpout.java    From storm-crawler with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void open(Map conf, TopologyContext context,
        SpoutOutputCollector collector) {
    _collector = collector;
    record = Optional.empty();

    maxContentSize = ConfUtils.getInt(conf, "http.content.limit", -1);
    if (contentBufferSize > maxContentSize) {
        // no need to buffer more content than max. used
        contentBufferSize = maxContentSize;
    }
    storeHTTPHeaders = ConfUtils.getBoolean(conf, "http.store.headers",
            false);
    protocolMDprefix = ConfUtils.getString(conf,
            ProtocolResponse.PROTOCOL_MD_PREFIX_PARAM, protocolMDprefix);

    int metricsTimeBucketSecs = ConfUtils.getInt(conf,
            "fetcher.metrics.time.bucket.secs", 10);
    eventCounter = context.registerMetric("warc_spout_counter",
            new MultiCountMetric(), metricsTimeBucketSecs);
}
 
Example 7
Source File: StormRecorder.java    From storm-dynamic-spout with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void open(final Map<String, Object> spoutConfig, final TopologyContext topologyContext) {
    // Load configuration items.

    // Determine our time bucket window, in seconds, defaulted to 60.
    int timeBucketSeconds = 60;
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_TIME_BUCKET)) {
        final Object timeBucketCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_TIME_BUCKET);
        if (timeBucketCfgValue instanceof Number) {
            timeBucketSeconds = ((Number) timeBucketCfgValue).intValue();
        }
    }

    // Conditionally enable prefixing with taskId
    if (spoutConfig.containsKey(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX)) {
        final Object taskIdCfgValue = spoutConfig.get(SpoutConfig.METRICS_RECORDER_ENABLE_TASK_ID_PREFIX);
        if (taskIdCfgValue instanceof Boolean && (Boolean) taskIdCfgValue) {
            this.metricPrefix = "task-" + topologyContext.getThisTaskIndex();
        }
    }

    this.keyBuilder = new KeyBuilder(this.metricPrefix);

    // Log how we got configured.
    logger.info("Configured with time window of {} seconds and using taskId prefixes?: {}",
        timeBucketSeconds, Boolean.toString(metricPrefix.isEmpty()));

    // Register the top level metrics.
    assignedValues = topologyContext.registerMetric("GAUGES", new MultiAssignableMetric(), timeBucketSeconds);
    timers = topologyContext.registerMetric("TIMERS", new MultiReducedMetric(new MeanReducer()), timeBucketSeconds);
    counters = topologyContext.registerMetric("COUNTERS", new MultiCountMetric(), timeBucketSeconds);
}
 
Example 8
Source File: URLPartitionerBolt.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {

    mode = ConfUtils.getString(stormConf,
            Constants.PARTITION_MODEParamName,
            Constants.PARTITION_MODE_HOST);

    // check that the mode is known
    if (!mode.equals(Constants.PARTITION_MODE_IP)
            && !mode.equals(Constants.PARTITION_MODE_DOMAIN)
            && !mode.equals(Constants.PARTITION_MODE_HOST)) {
        LOG.error("Unknown partition mode : {} - forcing to byHost", mode);
        mode = Constants.PARTITION_MODE_HOST;
    }

    LOG.info("Using partition mode : {}", mode);

    _collector = collector;
    // Register a "MultiCountMetric" to count different events in this bolt
    // Storm will emit the counts every n seconds to a special bolt via a
    // system stream
    // The data can be accessed by registering a "MetricConsumer" in the
    // topology
    this.eventCounter = context.registerMetric("URLPartitioner",
            new MultiCountMetric(), 10);

    final int MAX_ENTRIES = 500;
    cache = new LinkedHashMap(MAX_ENTRIES + 1, .75F, true) {
        // This method is called just after a new entry has been added
        @Override
        public boolean removeEldestEntry(Map.Entry eldest) {
            return size() > MAX_ENTRIES;
        }
    };

    // If the cache is to be used by multiple threads,
    // the cache must be wrapped with code to synchronize the methods
    cache = Collections.synchronizedMap(cache);
}
 
Example 9
Source File: IndexerBolt.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {
    super.prepare(conf, context, collector);
    _collector = collector;
    if (indexName == null) {
        indexName = ConfUtils.getString(conf,
                IndexerBolt.ESIndexNameParamName, "content");
    }

    create = ConfUtils.getBoolean(conf, IndexerBolt.ESCreateParamName,
            false);
    pipeline = ConfUtils.getString(conf,
            IndexerBolt.ESIndexPipelineParamName);

    try {
        connection = ElasticSearchConnection.getConnection(conf, ESBoltType,
                this);
    } catch (Exception e1) {
        LOG.error("Can't connect to ElasticSearch", e1);
        throw new RuntimeException(e1);
    }

    this.eventCounter = context.registerMetric("ElasticSearchIndexer",
            new MultiCountMetric(), 10);

    this.perSecMetrics = context.registerMetric("Indexer_average_persec",
            new MultiReducedMetric(new PerSecondReducer()), 10);

    waitAck = CacheBuilder.newBuilder()
            .expireAfterWrite(60, TimeUnit.SECONDS).removalListener(this)
            .build();

    context.registerMetric("waitAck", () -> waitAck.size(), 10);
}
 
Example 10
Source File: IndexerBolt.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {
    super.prepare(conf, context, collector);
    _collector = collector;

    this.eventCounter = context.registerMetric("SQLIndexer",
            new MultiCountMetric(), 10);

    this.tableName = ConfUtils.getString(conf, SQL_INDEX_TABLE_PARAM_NAME);

    this.conf = conf;
}
 
Example 11
Source File: StatusMetricsBolt.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {
    _collector = collector;
    indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName,
            "status");
    try {
        connection = ElasticSearchConnection.getConnection(stormConf,
                ESBoltType);
    } catch (Exception e1) {
        LOG.error("Can't connect to ElasticSearch", e1);
        throw new RuntimeException(e1);
    }

    context.registerMetric("status.count", () -> {
        return latestStatusCounts;
    }, freqStats);

    listeners = new StatusActionListener[6];

    listeners[0] = new StatusActionListener("DISCOVERED");
    listeners[1] = new StatusActionListener("FETCHED");
    listeners[2] = new StatusActionListener("FETCH_ERROR");
    listeners[3] = new StatusActionListener("REDIRECTION");
    listeners[4] = new StatusActionListener("ERROR");
    listeners[5] = new StatusActionListener("TOTAL");
}
 
Example 12
Source File: FetcherBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {

    super.prepare(stormConf, context, collector);

    Config conf = new Config();
    conf.putAll(stormConf);

    checkConfiguration(conf);

    LOG.info("[Fetcher #{}] : starting at {}", taskID, Instant.now());

    int metricsTimeBucketSecs = ConfUtils.getInt(conf,
            "fetcher.metrics.time.bucket.secs", 10);

    // Register a "MultiCountMetric" to count different events in this bolt
    // Storm will emit the counts every n seconds to a special bolt via a
    // system stream
    // The data can be accessed by registering a "MetricConsumer" in the
    // topology
    this.eventCounter = context.registerMetric("fetcher_counter",
            new MultiCountMetric(), metricsTimeBucketSecs);

    // create gauges
    context.registerMetric("activethreads", () -> {
        return activeThreads.get();
    }, metricsTimeBucketSecs);

    context.registerMetric("in_queues", () -> {
        return fetchQueues.inQueues.get();
    }, metricsTimeBucketSecs);

    context.registerMetric("num_queues", () -> {
        return fetchQueues.queues.size();
    }, metricsTimeBucketSecs);

    this.averagedMetrics = context.registerMetric("fetcher_average_perdoc",
            new MultiReducedMetric(new MeanReducer()),
            metricsTimeBucketSecs);

    this.perSecMetrics = context.registerMetric("fetcher_average_persec",
            new MultiReducedMetric(new PerSecondReducer()),
            metricsTimeBucketSecs);

    protocolFactory = new ProtocolFactory(conf);

    this.fetchQueues = new FetchItemQueues(conf);

    this.taskID = context.getThisTaskId();

    int threadCount = ConfUtils.getInt(conf, "fetcher.threads.number", 10);
    for (int i = 0; i < threadCount; i++) { // spawn threads
        new FetcherThread(conf, i).start();
    }

    // keep track of the URLs in fetching
    beingFetched = new String[threadCount];
    Arrays.fill(beingFetched, "");

    sitemapsAutoDiscovery = ConfUtils.getBoolean(stormConf,
            SITEMAP_DISCOVERY_PARAM_KEY, false);

    maxNumberURLsInQueues = ConfUtils.getInt(conf,
            "fetcher.max.urls.in.queues", -1);

    /**
     * If set to a valid path e.g. /tmp/fetcher-dump-{port} on a worker
     * node, the content of the queues will be dumped to the logs for
     * debugging. The port number needs to match the one used by the
     * FetcherBolt instance.
     **/
    String debugfiletriggerpattern = ConfUtils.getString(conf,
            "fetcherbolt.queue.debug.filepath");

    if (StringUtils.isNotBlank(debugfiletriggerpattern)) {
        debugfiletrigger = new File(
                debugfiletriggerpattern.replaceAll("\\{port\\}",
                        Integer.toString(context.getThisWorkerPort())));
    }

}
 
Example 13
Source File: AbstractStatusUpdaterBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {
    _collector = collector;

    scheduler = Scheduler.getInstance(stormConf);

    mdTransfer = MetadataTransfer.getInstance(stormConf);

    useCache = ConfUtils.getBoolean(stormConf, useCacheParamName, true);

    if (useCache) {
        String spec = ConfUtils.getString(stormConf, cacheConfigParamName);
        cache = CacheBuilder.from(spec).build();

        context.registerMetric("cache", new IMetric() {
            @Override
            public Object getValueAndReset() {
                Map<String, Long> statsMap = new HashMap<>();
                statsMap.put("hits", cacheHits);
                statsMap.put("misses", cacheMisses);
                statsMap.put("size", cache.size());
                cacheHits = 0;
                cacheMisses = 0;
                return statsMap;
            }
        }, 30);
    }

    maxFetchErrors = ConfUtils
            .getInt(stormConf, maxFetchErrorsParamName, 3);

    String tmpdateround = ConfUtils.getString(stormConf,
            roundDateParamName, "SECOND");
    if (tmpdateround.equalsIgnoreCase("MINUTE")) {
        roundDateUnit = Calendar.MINUTE;
    } else if (tmpdateround.equalsIgnoreCase("HOUR")) {
        roundDateUnit = Calendar.HOUR;
    }
}
 
Example 14
Source File: StatusUpdaterBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {

    super.prepare(stormConf, context, collector);

    indexName = ConfUtils.getString(stormConf,
            String.format(StatusUpdaterBolt.ESStatusIndexNameParamName,
                    ESBoltType),
            "status");

    doRouting = ConfUtils.getBoolean(stormConf, String.format(
            StatusUpdaterBolt.ESStatusRoutingParamName, ESBoltType), false);

    partitioner = new URLPartitioner();
    partitioner.configure(stormConf);

    fieldNameForRoutingKey = ConfUtils.getString(stormConf, String.format(
            StatusUpdaterBolt.ESStatusRoutingFieldParamName, ESBoltType));
    if (StringUtils.isNotBlank(fieldNameForRoutingKey)) {
        if (fieldNameForRoutingKey.startsWith("metadata.")) {
            routingFieldNameInMetadata = true;
            fieldNameForRoutingKey = fieldNameForRoutingKey
                    .substring("metadata.".length());
        }
        // periods are not allowed in ES2 - replace with %2E
        fieldNameForRoutingKey = fieldNameForRoutingKey.replaceAll("\\.",
                "%2E");
    }

    waitAck = CacheBuilder.newBuilder()
            .expireAfterWrite(60, TimeUnit.SECONDS).removalListener(this)
            .build();

    // create gauge for waitAck
    context.registerMetric("waitAck", () -> {
        return waitAck.size();
    }, 10);

    try {
        connection = ElasticSearchConnection.getConnection(stormConf,
                ESBoltType, this);
    } catch (Exception e1) {
        LOG.error("Can't connect to ElasticSearch", e1);
        throw new RuntimeException(e1);
    }

    this.eventCounter = context.registerMetric("counters",
            new MultiCountMetric(), 30);
}
 
Example 15
Source File: ParserBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {

    emitOutlinks = ConfUtils.getBoolean(conf, "parser.emitOutlinks", true);

    urlFilters = URLFilters.fromConf(conf);

    parseFilters = ParseFilters.fromConf(conf);

    upperCaseElementNames = ConfUtils.getBoolean(conf,
            "parser.uppercase.element.names", true);

    extractEmbedded = ConfUtils.getBoolean(conf, "parser.extract.embedded",
            false);

    String htmlmapperClassName = ConfUtils.getString(conf,
            "parser.htmlmapper.classname",
            "org.apache.tika.parser.html.IdentityHtmlMapper");

    try {
        HTMLMapperClass = Class.forName(htmlmapperClassName);
        boolean interfaceOK = HtmlMapper.class
                .isAssignableFrom(HTMLMapperClass);
        if (!interfaceOK) {
            throw new RuntimeException("Class " + htmlmapperClassName
                    + " does not implement HtmlMapper");
        }
    } catch (ClassNotFoundException e) {
        LOG.error("Can't load class {}", htmlmapperClassName);
        throw new RuntimeException("Can't load class "
                + htmlmapperClassName);
    }

    mimeTypeWhiteList = ConfUtils.loadListFromConf(
            "parser.mimetype.whitelist", conf);

    protocolMDprefix = ConfUtils.getString(conf,
            ProtocolResponse.PROTOCOL_MD_PREFIX_PARAM, "");

    // instantiate Tika
    long start = System.currentTimeMillis();
    tika = new Tika();
    long end = System.currentTimeMillis();

    LOG.debug("Tika loaded in {} msec", end - start);

    this.collector = collector;

    this.eventCounter = context.registerMetric(this.getClass()
            .getSimpleName(), new MultiCountMetric(), 10);

    this.metadataTransfer = MetadataTransfer.getInstance(conf);
}
 
Example 16
Source File: CloudSearchIndexerBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {
    super.prepare(conf, context, collector);
    _collector = collector;

    this.eventCounter = context.registerMetric("CloudSearchIndexer",
            new MultiCountMetric(), 10);

    maxTimeBuffered = ConfUtils.getInt(conf,
            CloudSearchConstants.MAX_TIME_BUFFERED, 10);

    maxDocsInBatch = ConfUtils.getInt(conf,
            CloudSearchConstants.MAX_DOCS_BATCH, -1);

    buffer = new StringBuffer(MAX_SIZE_BATCH_BYTES).append('[');

    dumpBatchFilesToTemp = ConfUtils.getBoolean(conf,
            "cloudsearch.batch.dump", false);

    if (dumpBatchFilesToTemp) {
        // only dumping to local file
        // no more config required
        return;
    }

    String endpoint = ConfUtils.getString(conf, "cloudsearch.endpoint");

    if (StringUtils.isBlank(endpoint)) {
        String message = "Missing CloudSearch endpoint";
        LOG.error(message);
        throw new RuntimeException(message);
    }

    String regionName = ConfUtils.getString(conf,
            CloudSearchConstants.REGION);

    AmazonCloudSearchClient cl = new AmazonCloudSearchClient();
    if (StringUtils.isNotBlank(regionName)) {
        cl.setRegion(RegionUtils.getRegion(regionName));
    }

    String domainName = null;

    // retrieve the domain name
    DescribeDomainsResult domains = cl
            .describeDomains(new DescribeDomainsRequest());

    Iterator<DomainStatus> dsiter = domains.getDomainStatusList()
            .iterator();
    while (dsiter.hasNext()) {
        DomainStatus ds = dsiter.next();
        if (ds.getDocService().getEndpoint().equals(endpoint)) {
            domainName = ds.getDomainName();
            break;
        }
    }
    // check domain name
    if (StringUtils.isBlank(domainName)) {
        throw new RuntimeException(
                "No domain name found for CloudSearch endpoint");
    }

    DescribeIndexFieldsResult indexDescription = cl
            .describeIndexFields(new DescribeIndexFieldsRequest()
                    .withDomainName(domainName));
    for (IndexFieldStatus ifs : indexDescription.getIndexFields()) {
        String indexname = ifs.getOptions().getIndexFieldName();
        String indextype = ifs.getOptions().getIndexFieldType();
        LOG.info("CloudSearch index name {} of type {}", indexname,
                indextype);
        csfields.put(indexname, indextype);
    }

    client = new AmazonCloudSearchDomainClient();
    client.setEndpoint(endpoint);
}
 
Example 17
Source File: SimpleFetcherBolt.java    From storm-crawler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map stormConf, TopologyContext context,
        OutputCollector collector) {
    super.prepare(stormConf, context, collector);
    this.conf = new Config();
    this.conf.putAll(stormConf);

    checkConfiguration();

    this.taskID = context.getThisTaskId();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss",
            Locale.ENGLISH);
    long start = System.currentTimeMillis();
    LOG.info("[Fetcher #{}] : starting at {}", taskID, sdf.format(start));

    // Register a "MultiCountMetric" to count different events in this bolt
    // Storm will emit the counts every n seconds to a special bolt via a
    // system stream
    // The data can be accessed by registering a "MetricConsumer" in the
    // topology

    int metricsTimeBucketSecs = ConfUtils.getInt(conf,
            "fetcher.metrics.time.bucket.secs", 10);

    this.eventCounter = context.registerMetric("fetcher_counter",
            new MultiCountMetric(), metricsTimeBucketSecs);

    this.averagedMetrics = context.registerMetric("fetcher_average",
            new MultiReducedMetric(new MeanReducer()),
            metricsTimeBucketSecs);

    this.perSecMetrics = context.registerMetric("fetcher_average_persec",
            new MultiReducedMetric(new PerSecondReducer()),
            metricsTimeBucketSecs);

    // create gauges
    context.registerMetric("activethreads", new IMetric() {
        @Override
        public Object getValueAndReset() {
            return activeThreads.get();
        }
    }, metricsTimeBucketSecs);

    context.registerMetric("throttler_size", new IMetric() {
        @Override
        public Object getValueAndReset() {
            return throttler.size();
        }
    }, metricsTimeBucketSecs);

    protocolFactory = new ProtocolFactory(conf);

    sitemapsAutoDiscovery = ConfUtils.getBoolean(stormConf,
            SITEMAP_DISCOVERY_PARAM_KEY, false);

    queueMode = ConfUtils.getString(conf, "fetcher.queue.mode",
            QUEUE_MODE_HOST);
    // check that the mode is known
    if (!queueMode.equals(QUEUE_MODE_IP)
            && !queueMode.equals(QUEUE_MODE_DOMAIN)
            && !queueMode.equals(QUEUE_MODE_HOST)) {
        LOG.error("Unknown partition mode : {} - forcing to byHost",
                queueMode);
        queueMode = QUEUE_MODE_HOST;
    }
    LOG.info("Using queue mode : {}", queueMode);

    this.crawlDelay = (long) (ConfUtils.getFloat(conf,
            "fetcher.server.delay", 1.0f) * 1000);

    this.maxCrawlDelay = (long) ConfUtils.getInt(conf,
            "fetcher.max.crawl.delay", 30) * 1000;

    this.maxCrawlDelayForce = ConfUtils.getBoolean(conf,
            "fetcher.max.crawl.delay.force", false);
    this.crawlDelayForce = ConfUtils.getBoolean(conf,
            "fetcher.server.delay.force", false);

    this.maxThrottleSleepMSec = ConfUtils.getLong(conf,
            "fetcher.max.throttle.sleep", -1);

    this.protocolMDprefix = ConfUtils.getString(conf,
            ProtocolResponse.PROTOCOL_MD_PREFIX_PARAM, protocolMDprefix);
}
 
Example 18
Source File: QueryBolt.java    From bullet-storm with Apache License 2.0 4 votes vote down vote up
private <T extends IMetric> T registerMetric(T metric, String name, TopologyContext context) {
    Number interval = metricsIntervalMapping.getOrDefault(name, metricsIntervalMapping.get(DEFAULT_BUILT_IN_METRICS_INTERVAL_KEY));
    log.info("Registered metric: {} with interval {}", name, interval);
    return context.registerMetric(name, metric, interval.intValue());
}
 
Example 19
Source File: YammerFacadeMetric.java    From storm-metrics-reporter with Apache License 2.0 3 votes vote down vote up
/**
 * Registers a facade metric with a given topology (represented by a {@link TopologyContext}).
 * with a {@link TopologyContext}.
 * <br/><br/>

 * <p/>
 * Multiple registrations might cause metric duplications and problems in the reporting flow.
 *
 * @param stormConf       Storm configuration settings.
 * @param context         TopologyContext for the topology a face metric is to be reporting metrics for.
 * @param metricsRegistry A metric registry instance where underlying metrics are to be stored.
 */
public static void register(final Map stormConf,
                            final TopologyContext context,
                            final MetricsRegistry metricsRegistry) {

  context.registerMetric(FACADE_METRIC_NAME,
                         new YammerFacadeMetric(metricsRegistry),
                         Integer.parseInt(stormConf.get(FACADE_METRIC_TIME_BUCKET_IN_SEC).toString()));
}
 
Example 20
Source File: JSoupParserBolt.java    From storm-crawler with Apache License 2.0 3 votes vote down vote up
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public void prepare(Map conf, TopologyContext context,
        OutputCollector collector) {

    super.prepare(conf, context, collector);

    eventCounter = context.registerMetric(this.getClass().getSimpleName(),
            new MultiCountMetric(), 10);

    parseFilters = ParseFilters.fromConf(conf);

    emitOutlinks = ConfUtils.getBoolean(conf, "parser.emitOutlinks", true);

    trackAnchors = ConfUtils.getBoolean(conf, "track.anchors", true);

    robots_noFollow_strict = ConfUtils.getBoolean(conf,
            RobotsTags.ROBOTS_NO_FOLLOW_STRICT, true);

    treat_non_html_as_error = ConfUtils.getBoolean(conf,
            "jsoup.treat.non.html.as.error", true);

    detectMimeType = ConfUtils.getBoolean(conf, "detect.mimetype", true);

    maxLengthCharsetDetection = ConfUtils.getInt(conf,
            "detect.charset.maxlength", -1);

    maxOutlinksPerPage = ConfUtils.getInt(conf,
            "parser.emitOutlinks.max.per.page", -1);

    protocolMDprefix = ConfUtils.getString(conf,
            ProtocolResponse.PROTOCOL_MD_PREFIX_PARAM, "");

    textExtractor = new TextExtractor(conf);
}