Python elasticsearch.ElasticsearchException() Examples

The following are 30 code examples of elasticsearch.ElasticsearchException(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module elasticsearch , or try the search function .
Example #1
Source File: tracker.py    From rally with Apache License 2.0 6 votes vote down vote up
def extract_mappings_and_corpora(client, output_path, indices_to_extract):
    indices = []
    corpora = []
    # first extract index metadata (which is cheap) and defer extracting data to reduce the potential for
    # errors due to invalid index names late in the process.
    for index_name in indices_to_extract:
        try:
            indices += index.extract(client, output_path, index_name)
        except ElasticsearchException:
            logging.getLogger(__name__).exception("Failed to extract index [%s]", index_name)

    # That list only contains valid indices (with index patterns already resolved)
    for i in indices:
        c = corpus.extract(client, output_path, i["name"])
        if c:
            corpora.append(c)

    return indices, corpora 
Example #2
Source File: indexer.py    From cccatalog-api with MIT License 6 votes vote down vote up
def _bulk_upload(self, es_batch):
        max_attempts = 4
        attempts = 0
        # Initial time to wait between indexing attempts
        # Grows exponentially
        cooloff = 5
        while True:
            try:
                deque(helpers.parallel_bulk(self.es, es_batch, chunk_size=400))
            except elasticsearch.ElasticsearchException:
                # Something went wrong during indexing.
                log.warning(
                    f"Elasticsearch rejected bulk query. We will retry in"
                    f" {cooloff}s. Attempt {attempts}. Details: ",
                    exc_info=True
                )
                time.sleep(cooloff)
                cooloff *= 2
                if attempts >= max_attempts:
                    raise ValueError('Exceeded maximum bulk index retries')
                attempts += 1
                continue
            break 
Example #3
Source File: lambdas.py    From quickstart-datalake-47lining with Apache License 2.0 6 votes vote down vote up
def register_metadata_dashboard(event, context):
    if event['RequestType'] != 'Create':
        return send_cfnresponse(event, context, CFN_SUCCESS, {})
    quickstart_bucket = s3_resource.Bucket(event['ResourceProperties']['QSS3BucketName'])
    kibana_dashboards_key = os.path.join(
        event['ResourceProperties']['QSS3KeyPrefix'],
        'assets/kibana/kibana_metadata_visualizations.json'
    )
    elasticsearch_endpoint = event['ResourceProperties']['ElasticsearchEndpoint']
    try:
        quickstart_bucket.download_file(kibana_dashboards_key, TMP_KIBANA_JSON_PATH)
        create_metadata_visualizations(elasticsearch_endpoint)
        return send_cfnresponse(event, context, CFN_SUCCESS, {})
    except (ClientError, ElasticsearchException) as e:
        print(e)
        return send_cfnresponse(event, context, CFN_FAILED, {}) 
Example #4
Source File: es_doc_publisher.py    From homeassistant-elasticsearch with MIT License 6 votes vote down vote up
def bulk_sync_wrapper(self, actions):
        """
        Wrapper to publish events.
        Workaround for elasticsearch_async not supporting bulk operations
        """
        from elasticsearch import ElasticsearchException
        from elasticsearch.helpers import bulk

        try:
            bulk_response = bulk(self._gateway.get_sync_client(), actions)
            LOGGER.debug("Elasticsearch bulk response: %s",
                         str(bulk_response))
            LOGGER.info("Publish Succeeded")
        except ElasticsearchException as err:
            LOGGER.exception(
                "Error publishing documents to Elasticsearch: %s", err) 
Example #5
Source File: elasticsearch_target.py    From edx-analytics-pipeline with GNU Affero General Public License v3.0 5 votes vote down vote up
def exists(self):
        """Check if this task has already run successfully in the past."""
        try:
            self.elasticsearch_client.get(
                index=self.marker_index,
                doc_type=self.marker_doc_type,
                id=self.marker_index_document_id()
            )
            return True
        except elasticsearch.NotFoundError:
            log.debug('Marker document not found.')
        except elasticsearch.ElasticsearchException as err:
            log.warn(err)
        return False 
Example #6
Source File: lambdas.py    From quickstart-datalake-47lining with Apache License 2.0 5 votes vote down vote up
def handle_bucket_event(event, context):
    sns_message = json.loads(event["Records"][0]["Sns"]["Message"])
    bucket = sns_message["Records"][0]["s3"]["bucket"]["name"]
    key = urllib.parse.unquote_plus(sns_message["Records"][0]["s3"]["object"]["key"])
    print(bucket, key)
    try:
        response = s3.head_object(Bucket=bucket, Key=key)
    except ClientError as e:
        print(e)
        print('Error getting object {} from bucket {}. Make sure they exist, your bucket is in the same region as this function and necessary permissions have been granted.'.format(key, bucket))
        raise e

    metadata = {
        'key': key,
        'ContentLength': response['ContentLength'],
        'SizeMiB': response['ContentLength'] / 1024**2,
        'LastModified': response['LastModified'].isoformat(),
        'ContentType': response['ContentType'],
        'ETag': response['ETag'],
        'Dataset': key.split('/')[0]
    }
    print("METADATA: " + str(metadata))

    es_client = make_elasticsearch_client(os.environ['ELASTICSEARCH_ENDPOINT'])

    try:
        es_client.index(index=es_index, doc_type=bucket, body=json.dumps(metadata))
    except ElasticsearchException as e:
        print(e)
        print("Could not index in Elasticsearch")
        raise e 
Example #7
Source File: analysis.py    From PEBA with GNU General Public License v3.0 5 votes vote down vote up
def getHPStats(peerIdent):
    for i in range(days):

        esquery = """
           {
             "query": {
               "bool": 
                   {
                       "must":
                       [
                           {"term":
                               {"clientDomain" : "false" }
                           }
                       ]
                   }
             },
             "from": 0,
             "size": 0,
             "sort": [],
             "aggs": {
               "peers": {
                 "terms": {
                   "field": "peerIdent",
                   "size": 10000
                 }
               }
               }}
               """

        try:
            res = es.search(index=getRelevantIndex(i), body=esquery)
            print(res["aggregations"]["peers"]["buckets"])


        except ElasticsearchException as err:
            print('ElasticSearch error: %s' % err) 
Example #8
Source File: es_manager.py    From texta with GNU General Public License v3.0 5 votes vote down vote up
def more_like_this(elastic_url, fields: list, like: list, size: int, filters: list, aggregations: list, include: bool, if_agg_only: bool, dataset: Dataset, return_fields=None):
        # Create the base query creator and unite with ES gateway.
        search = Search(using=Elasticsearch(elastic_url)).index(dataset.index).doc_type(dataset.mapping)
        mlt = MoreLikeThis(like=like, fields=fields, min_term_freq=1, max_query_terms=12, include=include)  # Prepare the MLT part of the query.

        paginated_search = search[0:size]  # Set how many documents to return.
        limited_search = paginated_search.source(return_fields) if return_fields else paginated_search  # If added, choose which FIELDS to return.
        finished_search = limited_search.query(mlt)  # Add the premade MLT into the query.

        # Apply all the user-set filters, if they didn't add any this value will be [] and it quits.
        for filter_dict in filters:
            finished_search = finished_search.filter(Q(filter_dict))

        # Apply all the user-set aggregations, if they didn't add any this value will be [] and it quits.
        for aggregation_dict in aggregations:
            # aggs.bucket() does not return a Search object but changes it instead.
            if aggregation_dict["agg_type"] == "composite":
                after = aggregation_dict.get("after_key", None)
                finished_search = ES_Manager.handle_composition_aggregation(finished_search.to_dict(), aggregation_dict, after)
            else:
                field_name = aggregation_dict["field"]
                index = like[0]["_index"]
                field = "{}.keyword".format(field_name) if ES_Manager.is_field_text_field(field_name=field_name, index_name=index) else field_name
                finished_search.aggs.bucket(name=aggregation_dict["bucket_name"], agg_type=aggregation_dict["agg_type"], field=field)

        # Choose if you want to return only the aggregations in {"bucket_name": {results...}} format.
        if if_agg_only:
            finished_search = finished_search.params(size=0)
            response = finished_search.execute()
            return response.aggs.to_dict()

        try:
            response = finished_search.execute()
            result = {"hits": [hit.to_dict() for hit in response]}  # Throw out all metadata and keep only the documents.
            if response.aggs: result.update({"aggregations": response.aggs.to_dict()})  # IF the aggregation query returned anything, THEN add the "aggregatons" key with results.
            return result

        except ElasticsearchException as e:
            logging.getLogger(ERROR_LOGGER).exception(e)
            return {"elasticsearch": [str(e)]} 
Example #9
Source File: analysis.py    From PEBA with GNU General Public License v3.0 5 votes vote down vote up
def getNumberAlerts(timeframe, clientDomain):
    ''' retrieves number of alerts from index in timeframe (minutes)'''
    try:
        res = es.search(index=esindex, body={
            "query": {
                "bool": {
                    "must": [
                        {
                            "match": {
                                "clientDomain": clientDomain
                            }
                        }
                    ],
                    "filter": [
                        {
                            "range": {
                                "createTime": {
                                    "gte": "now-"+str(timeframe)+"m"
                                }
                            }
                        }
                    ]
                }
            },
            "size": 0
        })
        return res['hits']['total']
    except ElasticsearchException as err:
        print('ElasticSearch error: %s' % err)

    return False 
Example #10
Source File: data.py    From georef-ar-api with MIT License 5 votes vote down vote up
def elasticsearch_connection(hosts, sniff=False, sniffer_timeout=60):
    """Crea una conexión a Elasticsearch.

    Args:
        hosts (list): Lista de nodos Elasticsearch a los cuales conectarse.
        sniff (bool): Activa la función de sniffing, la cual permite descubrir
            nuevos nodos en un cluster y conectarse a ellos.

    Raises:
        DataConnectionException: si la conexión no pudo ser establecida.

    Returns:
        Elasticsearch: Conexión a Elasticsearch.

    """
    try:
        options = {
            'hosts': hosts
        }

        if sniff:
            options['sniff_on_start'] = True
            options['sniff_on_connection_fail'] = True
            options['sniffer_timeout'] = sniffer_timeout

        return elasticsearch.Elasticsearch(**options)
    except elasticsearch.ElasticsearchException as e:
        raise DataConnectionException from e 
Example #11
Source File: data.py    From georef-ar-api with MIT License 5 votes vote down vote up
def _run_multisearch(es, searches):
    """Ejecuta una lista de búsquedas Elasticsearch utilizando la función
    MultiSearch. La cantidad de búsquedas que se envían a la vez es
    configurable vía la variable ES_MULTISEARCH_MAX_LEN.

    Args:
        es (Elasticsearch): Conexión a Elasticsearch.
        searches (list): Lista de elasticsearch_dsl.Search.

    Raises:
        DataConnectionException: Si ocurrió un error al ejecutar las búsquedas.

    Returns:
        list: Lista de respuestas a cada búsqueda.

    """
    step_size = constants.ES_MULTISEARCH_MAX_LEN
    responses = []

    # Partir las búsquedas en varios baches si es necesario.
    for i in range(0, len(searches), step_size):
        end = min(i + step_size, len(searches))
        ms = MultiSearch(using=es)

        for j in range(i, end):
            ms = ms.add(searches[j])

        try:
            responses.extend(ms.execute(raise_on_error=True))
        except elasticsearch.ElasticsearchException as e:
            raise DataConnectionException() from e

    return responses 
Example #12
Source File: test_mock_normalizer.py    From georef-ar-api with MIT License 5 votes vote down vote up
def test_elasticsearch_connection_error(self):
        """Se debería devolver un error 500 cuando falla la conexión a
        Elasticsearch."""
        self.es.side_effect = elasticsearch.ElasticsearchException()
        self.assert_500_error(random.choice(ENDPOINTS)) 
Example #13
Source File: test_mock_normalizer.py    From georef-ar-api with MIT License 5 votes vote down vote up
def test_elasticsearch_msearch_error(self):
        """Se debería devolver un error 500 cuando falla la query
        MultiSearch."""
        self.es.return_value.msearch.side_effect = \
            elasticsearch.ElasticsearchException()
        self.assert_500_error(random.choice(ENDPOINTS)) 
Example #14
Source File: question_answerer.py    From mindmeld with Apache License 2.0 5 votes vote down vote up
def execute(self, size=10):
        """Executes the knowledge base search with provided criteria and returns matching documents.

        Args:
            size (int): The maximum number of records to fetch, default to 10.

        Returns:
            a list of matching documents.
        """
        try:
            # TODO: move the ES API call logic to ES helper
            es_query = self._build_es_query(size=size)

            response = self.client.search(index=self.index, body=es_query)
            results = [hit["_source"] for hit in response["hits"]["hits"]]
            return results
        except EsConnectionError as e:
            logger.error(
                "Unable to connect to Elasticsearch: %s details: %s", e.error, e.info
            )
            raise KnowledgeBaseConnectionError(es_host=self.client.transport.hosts)
        except TransportError as e:
            logger.error(
                "Unexpected error occurred when sending requests to Elasticsearch: %s "
                "Status code: %s details: %s",
                e.error,
                e.status_code,
                e.info,
            )
            raise KnowledgeBaseError
        except ElasticsearchException:
            raise KnowledgeBaseError 
Example #15
Source File: es_doc_publisher.py    From homeassistant-elasticsearch with MIT License 5 votes vote down vote up
def async_do_publish(self):
        "Publishes all queued documents to the Elasticsearch cluster"
        from elasticsearch import ElasticsearchException

        if self.publish_queue.empty():
            LOGGER.debug("Skipping publish because queue is empty")
            return

        LOGGER.debug("Collecting queued documents for publish")
        actions = []
        entity_counts = {}
        self._last_publish_time = datetime.now()

        while not self.publish_queue.empty():
            entry = self.publish_queue.get()

            key = entry["state"].entity_id

            entity_counts[key] = 1 if key not in entity_counts else entity_counts[key] + 1
            actions.append(self._state_to_bulk_action(
                entry["state"], entry["event"].time_fired))

        if not self._only_publish_changed:
            all_states = self._hass.states.async_all()
            for state in all_states:
                if (state.domain in self._excluded_domains
                        or state.entity_id in self._excluded_entities):
                    continue

                if state.entity_id not in entity_counts:
                    actions.append(self._state_to_bulk_action(
                        state, self._last_publish_time))

        LOGGER.info("Publishing %i documents to Elasticsearch", len(actions))

        try:
            await self._hass.async_add_executor_job(self.bulk_sync_wrapper, actions)
        except ElasticsearchException as err:
            LOGGER.exception(
                "Error publishing documents to Elasticsearch: %s", err)
        return 
Example #16
Source File: peba.py    From PEBA with GNU General Public License v3.0 5 votes vote down vote up
def queryAlertsWithoutIP(maxAlerts, clientDomain, relevantIndex):
    """ Get IP addresses from alerts in elasticsearch """

    esquery="""
    {
            "query": {
                "terms": {
                    "clientDomain": [ %s ]
                }
            },
            "sort": {
                "recievedTime": {
                    "order": "desc"
                    }
                },
            "size": %s,
            "_source": [
                "createTime",
                "peerType",
                "country",
                "originalRequestString",
                "location",
                "targetCountry",
                "countryName",
                "locationDestination",
                "recievedTime",
                "username",
                "password",
                "login",
                "clientDomain"
                ]
            }""" % (clientDomain, maxAlerts)

    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res["hits"]["hits"]
    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' % err)

    return False 
Example #17
Source File: peba.py    From PEBA with GNU General Public License v3.0 5 votes vote down vote up
def queryAlerts(maxAlerts, clientDomain, relevantIndex):
    """ Get IP addresses from alerts in elasticsearch """

    esquery="""{
            "query": {
                "terms": {
                    "clientDomain": [ %s ]
                }
            },
            "sort": {
                "recievedTime": {
                    "order": "desc"
                    }
                },
            "size": %s,
            "_source": [
                "createTime",
                "recievedTime",
                "peerIdent",
                "peerType",
                "country",
                "targetCountry",
                "originalRequestString",
                "location",
                "sourceEntryIp"
                ]
            }""" % (clientDomain, maxAlerts)
    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res["hits"]["hits"]
    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' %  err)

    return False 
Example #18
Source File: question_answerer.py    From mindmeld with Apache License 2.0 4 votes vote down vote up
def _load_field_info(self, index):
        """load knowledge base field metadata information for the specified index.

        Args:
            index (str): index name.
        """

        # load field info from local cache
        index_info = self._es_field_info.get(index, {})

        if not index_info:
            try:
                # TODO: move the ES API call logic to ES helper
                self._es_field_info[index] = {}
                res = self._es_client.indices.get(index=index)
                if is_es_version_7(self._es_client):
                    all_field_info = res[index]["mappings"]["properties"]
                else:
                    all_field_info = res[index]["mappings"][DOC_TYPE]["properties"]
                for field_name in all_field_info:
                    field_type = all_field_info[field_name].get("type")
                    self._es_field_info[index][field_name] = FieldInfo(
                        field_name, field_type
                    )
            except EsConnectionError as e:
                logger.error(
                    "Unable to connect to Elasticsearch: %s details: %s",
                    e.error,
                    e.info,
                )
                raise KnowledgeBaseConnectionError(
                    es_host=self._es_client.transport.hosts
                )
            except TransportError as e:
                logger.error(
                    "Unexpected error occurred when sending requests to Elasticsearch: %s "
                    "Status code: %s details: %s",
                    e.error,
                    e.status_code,
                    e.info,
                )
                raise KnowledgeBaseError
            except ElasticsearchException:
                raise KnowledgeBaseError 
Example #19
Source File: base.py    From core with MIT License 4 votes vote down vote up
def handle_exception(self, exception, debug, return_json=False): # pylint: disable=arguments-differ
        """
        Send JSON response for exception

        For HTTP and other known exceptions, use its error code
        For all others use a generic 500 error code and log the stack trace
        """

        request_id = self.request.id
        custom_errors = None
        message = str(exception)
        if isinstance(exception, webapp2.HTTPException):
            code = exception.code
        elif isinstance(exception, errors.InputValidationException):
            code = 400
        elif isinstance(exception, errors.APIAuthProviderException):
            code = 401
        elif isinstance(exception, errors.APIRefreshTokenException):
            code = 401
            custom_errors = exception.errors
        elif isinstance(exception, errors.APIUnknownUserException):
            code = 402
        elif isinstance(exception, errors.APIConsistencyException):
            code = 400
        elif isinstance(exception, errors.APIPermissionException):
            custom_errors = exception.errors
            code = 403
        elif isinstance(exception, errors.APINotFoundException):
            code = 404
        elif isinstance(exception, errors.APIConflictException):
            code = 409
        elif isinstance(exception, errors.APIValidationException):
            code = 422
            custom_errors = exception.errors
        elif isinstance(exception, errors.FileStoreException):
            code = 400
        elif isinstance(exception, errors.FileFormException):
            code = 400
        elif isinstance(exception, errors.FileFormException):
            code = 400
        elif isinstance(exception, ElasticsearchException):
            code = 503
            message = "Search is currently down. Try again later."
            self.request.logger.error(traceback.format_exc())
        elif isinstance(exception, KeyError):
            code = 500
            message = "Key {} was not found".format(str(exception))
        else:
            code = 500

        if code == 500:
            tb = traceback.format_exc()
            self.request.logger.error(tb)

        if return_json:
            return util.create_json_http_exception_response(message, code, request_id, custom=custom_errors)

        util.send_json_http_exception(self.response, message, code, request_id, custom=custom_errors) 
Example #20
Source File: test_superelasticsearch.py    From superelasticsearch with MIT License 4 votes vote down vote up
def test_itersearch_raises_assertion_error_when_less_docs_fetched(self):
        mocked_value_template = {
            "took": 27,
            "timed_out": False,
            "_scroll_id": 123213,
            "_shards": {
                "total": 2,
                "successful": 2,
                "failed": 0
            },
            "hits": {
                "total": 13,
                "max_score": None,
                "hits": [
                    dict(some_doc="with_some_val") for i in xrange(10)
                ]
            }
        }

        ss = SuperElasticsearch(hosts=['localhost:9200'])

        def assertion(chunked):
            # mock the client's scroll method
            mocked_search_result = deepcopy(mocked_value_template)
            ss.search = Mock(return_value=mocked_search_result)
            mocked_scroll_result = deepcopy(mocked_value_template)
            mocked_scroll_result['_scroll_id'] = 456456
            mocked_scroll_result['hits']['hits'] = [
                dict(some_doc="with_some_val") for i in xrange(2)
            ]
            ss.scroll = Mock(return_value=mocked_scroll_result)

            search_generator = ss.itersearch(index=self._index,
                                             doc_type=self._doc_type,
                                             body=dict(query=dict(
                                                 match_all={})),
                                             scroll='10m',
                                             chunked=chunked)
            if chunked:
                iterate_times = 2
            else:
                iterate_times = 12

            for _ in range(0, iterate_times):
                search_generator.next()

            mocked_scroll_result = deepcopy(mocked_value_template)
            mocked_scroll_result['_scroll_id'] = 789789
            mocked_scroll_result['hits']['hits'] = []
            ss.scroll = Mock(return_value=mocked_scroll_result)
            search_generator.next()

        self.assertRaises(ElasticsearchException,
                          functools.partial(assertion, True))
        self.assertRaises(ElasticsearchException,
                          functools.partial(assertion, False)) 
Example #21
Source File: dataexplorerhandler.py    From core with MIT License 4 votes vote down vote up
def index_field_names(self):

        try:
            if not config.es.indices.exists('data_explorer'):
                self.abort(404, 'data_explorer index not yet available')
        except TransportError as e:
            self.abort(404, 'elastic search not available: {}'.format(e))

        # Sometimes we might want to clear out what is there...
        if self.is_true('hard-reset') and config.es.indices.exists('data_explorer_fields'):
            config.log.debug('Removing existing data explorer fields index...')
            try:
                config.es.indices.delete(index='data_explorer_fields')
            except ElasticsearchException as e:
                self.abort(500, 'Unable to clear data_explorer_fields index: {}'.format(e))

        # Check to see if fields index exists, if not - create it:
        if not config.es.indices.exists('data_explorer_fields'):
            request = {
                'settings': {
                    'number_of_shards': 1,
                    'number_of_replicas': 0,
                    'analysis' : ANALYSIS
                },
                'mappings': {
                    '_default_' : {
                        '_all' : {'enabled' : True},
                        'dynamic_templates': DYNAMIC_TEMPLATES
                    },
                    'flywheel': {}
                }
            }

            config.log.debug('creating data_explorer_fields index ...')
            try:
                config.es.indices.create(index='data_explorer_fields', body=request)
            except ElasticsearchException:
                self.abort(500, 'Unable to create data_explorer_fields index: {}'.format(e))

        try:
            mappings = config.es.indices.get_mapping(index='data_explorer', doc_type='flywheel')
            fw_mappings = mappings['data_explorer']['mappings']['flywheel']['properties']
        except (TransportError, KeyError):
            self.abort(404, 'Could not find mappings, exiting ...')

        self._handle_properties(fw_mappings, '') 
Example #22
Source File: ElasticSearch.py    From grease with MIT License 4 votes vote down vote up
def parse_source(self, configuration):
        """This will make a ElasticSearch connection & query to the configured server

        Args:
            configuration (dict): Configuration of Source. See Class Documentation above for more info

        Returns:
            bool: If True data will be scheduled for ingestion after deduplication. If False the engine will bail out

        """
        if configuration.get('hour'):
            if datetime.datetime.utcnow().hour != int(configuration.get('hour')):
                # it is not the correct hour
                return True
        if configuration.get('minute'):
            if datetime.datetime.utcnow().minute != int(configuration.get('minute')):
                # it is not the correct hour
                return True
        if configuration.get('server') \
                and configuration.get('query') \
                and configuration.get('index') \
                and configuration.get('doc_type'):
            try:
                es = elasticsearch.Elasticsearch(
                    "".join(configuration.get('server')),
                    timeout=30,
                    max_retries=2,
                    retry_on_timeout=True
                )
            except BaseException:
                # Failed to connect to ES
                return False
            try:
                self._data = es.search(
                    index=''.join(configuration.get('index')),
                    doc_type=''.join(configuration.get('doc_type')),
                    body=configuration.get('query')
                )
            except elasticsearch.ImproperlyConfigured:
                # Improperly configured request
                return False
            except elasticsearch.ElasticsearchException:
                # generic exception
                return False
            del es
        else:
            # Invalid parameters
            return False 
Example #23
Source File: analysis.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def getHoneypotCount(days):
    esquery = """
    {
      "query": {
        "bool": {
          "must": [
            {
              "term": {
                "clientDomain": "false"
              }
            }
          ]
        }
      },
      "from": 0,
      "size": 0,
      "sort": [],
      "aggs": {
        "range": {
          "date_histogram": {
            "field": "recievedTime",
            "interval": "day"
          },
          "aggs": {
            "peers": {
              "terms": {
                "field": "peerIdent",
                "size": 100000
              }
            }
          }
        }
      }
    }
    
    """
    try:
        res = es.search(index=getRelevantIndices(days), body=esquery)
        # print(len(res["aggregations"]["peers"]["buckets"]))
        return res["aggregations"]["range"]["buckets"]


    except ElasticsearchException as err:
        print('ElasticSearch error: %s' % err)
        return False 
Example #24
Source File: analysis.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def getAlertsPerHoneypotType(timeframe, clientDomain):
    ''' retrieves number of alerts from index in timeframe (minutes)'''

    esquery="""
    {
          "query": {
            "bool": {
              "must": [
                {
                  "range": {
                    "recievedTime": {
                        "gte": "now-%sm" 
                    }
                  }
                },
                {
                  "terms": {
                      "clientDomain": [ %s ]
                    }
                }
              ]
            }
          },
          "aggs": {
            "peerType": {
              "terms": {
                "field": "peerType",
                "size": 1000
              }
            }
          },
          "size": 0
        }"""% (timeframe, str(clientDomain).lower())

    try:
        res = es.search(index=esindex, body=esquery)
        return res


    except ElasticsearchException as err:
        print('ElasticSearch error: %s' % err)

    return True 
Example #25
Source File: fillcache.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def queryAlertsCountWithType(timeframe, clientDomain, relevantIndex):
    """ Get number of Alerts in timeframe in elasticsearch """

    # check if timespan = d or number
    if timeframe == "day":
        span = "now/d"
    elif timeframe.isdecimal():
        span = "now-%sm" % timeframe
    else:
        print('Non numeric value in retrieveAlertsCountWithType timespan. Must be decimal number (in minutes) or string "day"')
        return False

    esquery=""" 
    {
          "query": {
            "range": {
              "recievedTime": {
                  "gte": "%s"
              }
            }
          },
          "aggs": {
            "communityfilter": {
              "filter": {
                "terms": {
                  "clientDomain": [ %s ] 
                }
              },
              "aggs": {
                "honeypotTypes": {
                  "terms": {
                    "field": "peerType"
                  }
                }
              }
            }
          },
          "size": 0
        }
    """ % (span, clientDomain)

    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res
    except ElasticsearchException as err:
        print('ElasticSearch error: %s' %  err)

    return False 
Example #26
Source File: fillcache.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def queryAlertStats(clientDomain,relevantIndex):
    """ Get combined statistics from elasticsearch """
    esquery = """{
               "aggs": {
                   "communityfilter": {
                       "filter": {
                           "terms": {
                               "clientDomain": [ %s ]
                           }
                       },
               "aggs": {
               "ctr": {
                 "range": {
                   "field": "recievedTime",
                   "ranges": [
                     {
                       "key": "1d",
                       "from": "now-1440m"
                     },
                     {
                       "key": "1h",
                       "from": "now-60m"
                     },
                     {
                       "key": "5m",
                       "from": "now-5m"
                     },
                     {
                       "key": "1m",
                       "from": "now-1m"
                     }
                   ]
                 }
               }}}
             },
             "size": 0
           }""" % clientDomain

    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res['aggregations']['communityfilter']['ctr']['buckets']
    except ElasticsearchException as err:
        print('ElasticSearch error: %s' % err)

    return False 
Example #27
Source File: tpotstats.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def getAlertsPerHoneypotType(app,clientDomain, es, esindex, utcTimeFrom,utcTimeTo):
    ''' retrieves number of alerts from index in timeframe (minutes)'''

    esquery="""
    {
          "query": {
            "bool": {
              "must": [
                {
                  "range": {
                    "recievedTime": {
                        "gte": "%s",
                        "lte": "%s"
                    }
                  }
                },
                {
                  "terms": {
                      "clientDomain":  [ %s ]
                    }
                }
              ]
            }
          },
          "aggs": {
            "peerType": {
              "terms": {
                "field": "peerType",
                "size": 1000
              }
            }
          },
          "size": 0
        }"""% (utcTimeFrom, utcTimeTo, str(clientDomain).lower())

    try:
        res = es.search(index=esindex, body=esquery)
        return res


    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' % err)

    return True 
Example #28
Source File: peba.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def queryForSingleIP(maxAlerts, ip, clientDomain, relevantIndex):
    """ Get data for specific IP addresse from elasticsearch """
    try:
        ipaddress.IPv4Address(ip)
        if not ipaddress.ip_address(ip).is_global:
            app.logger.debug('No global IP address given on /querySingleIP: %s' % str(request.args.get('ip')))
            return False

    except:
        app.logger.debug('No valid IP given on /querySingleIP: %s' % str(request.args.get('ip')))
        return False

    esquery="""{
          "query": {
            "bool": {
              "must": [
                {
                  "term": {
                    "sourceEntryIp": "%s"
                  }
                },
                {
                  "terms": {
                    "clientDomain": [ %s ]
                  }
                }
              ]
            }
          },
          "size": %s,
          "sort": {
            "createTime": {
              "order": "desc"
            }
          },
          "_source": [
            "createTime",
            "peerType",
            "targetCountry",
            "originalRequestString"
          ]
        }""" % (ip, clientDomain, maxAlerts)

    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res["hits"]["hits"]
    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' %  err)

    return False

# Formatting functions 
Example #29
Source File: peba.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def queryAlertStats(clientDomain, relevantIndex):
    """ Get combined statistics from elasticsearch """
    esquery="""{
            "aggs": {
                "communityfilter": {
                    "filter": {
                        "terms": {
                            "clientDomain": [ %s ]
                        }
                    },
            "aggs": {
            "ctr": {
              "range": {
                "field": "recievedTime",
                "ranges": [
                  {
                    "key": "1d",
                    "from": "now-1440m"
                  },
                  {
                    "key": "1h",
                    "from": "now-60m"
                  },
                  {
                    "key": "5m",
                    "from": "now-5m"
                  },
                  {
                    "key": "1m",
                    "from": "now-1m"
                  }
                ]
              }
            }}}
          },
          "size": 0
        }""" % clientDomain

    try:
        res = es.search(index=relevantIndex, body=esquery)
        if 'aggregations' in res:
            return res['aggregations']['communityfilter']['ctr']['buckets']
        else:
            return False
    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' % err)

    return False 
Example #30
Source File: peba.py    From PEBA with GNU General Public License v3.0 4 votes vote down vote up
def queryDatasetAlertTypesPerMonth(days, clientDomain, relevantIndex):
    # check if days is a number
    if days is None:
        span = "now-1M/d"
    elif days.isdecimal():
        span = "now-%sd/d" % days
    else:
        app.logger.error('Non numeric value in datasetAlertsTypesPerMonth timespan. Must be decimal number in days')
        return False

    esquery="""
    {
          "query": {
            "range": {
              "createTime": {
                "gte": "%s"
              }
            }
          },
         "aggs": {
                "communityfilter": {
                    "filter": {
                        "terms": {
                            "clientDomain": [ %s ]
                        }
            },
          "aggs": {
            "range": {
              "date_histogram": {
                "field": "createTime",
                "interval": "day"
              },
              "aggs": {
                "nested_terms_agg": {
                  "terms": {
                    "field": "peerType.keyword"
                  }}}
                }
              }
            }
          },
          "size": 0
        }
    """ % (str(span), clientDomain )

    try:
        res = es.search(index=relevantIndex, body=esquery)
        return res["aggregations"]["communityfilter"]["range"]
    except ElasticsearchException as err:
        app.logger.error('ElasticSearch error: %s' %  err)

    return False