diff --git a/api_app/queryset.py b/api_app/queryset.py index 0a416db339..29fabbe359 100644 --- a/api_app/queryset.py +++ b/api_app/queryset.py @@ -1,6 +1,5 @@ import datetime import json -import logging import uuid from typing import TYPE_CHECKING, Generator, Type @@ -13,6 +12,8 @@ from api_app.models import PythonConfig from api_app.serializers import AbstractBIInterface +import logging + from celery.canvas import Signature from django.db import models from django.db.models import ( @@ -38,6 +39,8 @@ from certego_saas.apps.organization.membership import Membership from certego_saas.apps.user.models import User +logger = logging.getLogger(__name__) + class SendToBiQuerySet(models.QuerySet): @classmethod @@ -54,10 +57,14 @@ def _create_index_template(): settings.ELASTICSEARCH_CLIENT.indices.put_template( name=settings.ELASTICSEARCH_BI_INDEX, body=body ) + logger.info( + f"created template for Elastic named {settings.ELASTICSEARCH_BI_INDEX}" + ) def send_to_elastic_as_bi(self, max_timeout: int = 60) -> bool: from elasticsearch.helpers import bulk + logger.info("BI start") self._create_index_template() BULK_MAX_SIZE = 1000 found_errors = False @@ -74,13 +81,13 @@ def send_to_elastic_as_bi(self, max_timeout: int = 60) -> bool: request_timeout=max_timeout, ) if errors: - logging.error( + logger.error( f"Errors on sending to elastic: {errors}." " We are not marking objects as sent." ) found_errors |= errors else: - logging.info("BI sent") + logger.info("BI sent") self.model.objects.filter( pk__in=objects.values_list("pk", flat=True) ).update(sent_to_bi=True) diff --git a/api_app/serializers/__init__.py b/api_app/serializers/__init__.py index 44d8a6cef9..8c7419bb01 100644 --- a/api_app/serializers/__init__.py +++ b/api_app/serializers/__init__.py @@ -51,7 +51,6 @@ def get_environment(instance): def to_elastic_dict(data): return { "_source": data, - "_type": "_doc", "_index": settings.ELASTICSEARCH_BI_INDEX + "-" + now().strftime("%Y.%m"), "_op_type": "index", } diff --git a/configuration/elastic_search_mappings/intel_owl_bi.json b/configuration/elastic_search_mappings/intel_owl_bi.json index fca1ec09ce..7cb978c5c3 100644 --- a/configuration/elastic_search_mappings/intel_owl_bi.json +++ b/configuration/elastic_search_mappings/intel_owl_bi.json @@ -1,47 +1,45 @@ { "settings" : { - "number_of_shards" : 3 + "number_of_shards" : 1 }, "mappings": { - "_doc": { - "dynamic": false, - "properties": { - "timestamp": { - "type": "date" - }, - "application": { - "type": "keyword" - }, - "username": { - "type": "keyword" - }, - "environment": { - "type": "keyword" - }, - "name": { - "type": "keyword" - }, - "process_time": { - "type": "integer" - }, - "status": { - "type": "keyword" - }, - "end_time": { - "type": "date" - }, - "parameters": { - "type": "object", - "dynamic": true - }, - "playbook": { - "type": "keyword" - }, - "class_instance": { - "type": "keyword" - } - - } + "dynamic": false, + "properties": { + "timestamp": { + "type": "date" + }, + "application": { + "type": "keyword" + }, + "username": { + "type": "keyword" + }, + "environment": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "process_time": { + "type": "integer" + }, + "status": { + "type": "keyword" + }, + "end_time": { + "type": "date" + }, + "parameters": { + "type": "object", + "dynamic": true + }, + "playbook": { + "type": "keyword" + }, + "class_instance": { + "type": "keyword" } + + } } } \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index bbdd7449a5..6b89107035 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -22,7 +22,6 @@ ENV LOG_PATH /var/log/intel_owl ARG REPO_DOWNLOADER_ENABLED=true ARG WATCHMAN=false ENV watch_logs_cmd "watch -n1 tail -n10 /var/log/intel_owl/django/api_app.log" -ARG PYELASTIC_VERSION=7.4.1 ARG PYCTI_VERSION=5.12.29 RUN mkdir -p ${LOG_PATH} \ @@ -51,8 +50,6 @@ COPY requirements/certego-requirements.txt $PYTHONPATH/certego-requirements.txt WORKDIR $PYTHONPATH RUN pip3 install --no-cache-dir --compile -r project-requirements.txt \ - # install elasticsearch-dsl's appropriate version as specified by user - && pip3 install --no-cache-dir elasticsearch-dsl==${PYELASTIC_VERSION} \ && pip3 install --no-cache-dir pycti==${PYCTI_VERSION} \ && pip3 install --no-cache-dir --compile -r certego-requirements.txt diff --git a/docker/env_file_app_ci b/docker/env_file_app_ci index dc9b7a732c..acb8e204b0 100644 --- a/docker/env_file_app_ci +++ b/docker/env_file_app_ci @@ -35,7 +35,6 @@ ELASTICSEARCH_DSL_NO_OF_REPLICAS=0 ELASTICSEARCH_BI_ENABLED=False ELASTICSEARCH_BI_HOST= -ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME=elastisearch.crt ELASTICSEARCH_BI_INDEX=intelowl-bi diff --git a/docker/env_file_app_template b/docker/env_file_app_template index 946e12156a..ec56bc5c23 100644 --- a/docker/env_file_app_template +++ b/docker/env_file_app_template @@ -61,7 +61,6 @@ ELASTICSEARCH_DSL_NO_OF_REPLICAS=0 ELASTICSEARCH_BI_ENABLED=False ELASTICSEARCH_BI_HOST= -ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME=elastisearch.crt ELASTICSEARCH_BI_INDEX=intelowl-bi # Test tokens diff --git a/docker/test.override.yml b/docker/test.override.yml index 104a45af98..0590bf95a2 100644 --- a/docker/test.override.yml +++ b/docker/test.override.yml @@ -8,7 +8,6 @@ services: args: REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED} WATCHMAN: "true" - PYELASTIC_VERSION: ${PYELASTIC_VERSION:-7.4.1} PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0} image: intelowlproject/intelowl:test volumes: diff --git a/docs/source/Advanced-Configuration.md b/docs/source/Advanced-Configuration.md index 8defdc37a3..727671d28f 100644 --- a/docs/source/Advanced-Configuration.md +++ b/docs/source/Advanced-Configuration.md @@ -25,6 +25,7 @@ This page includes details about some advanced features that Intel Owl provides - [Manual usage](#manual-usage) ## ElasticSearch +Right now only ElasticSearch v8 is supported. ### DSL IntelOwl makes use of [django-elasticsearch-dsl](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html) to index Job results into elasticsearch. The `save` and `delete` operations are auto-synced so you always have the latest data in ES. @@ -40,9 +41,7 @@ Intel Owl provides a Kibana's "Saved Object" configuration (with example dashboa 1. Setup [Elastic Search and Kibana](https://hub.docker.com/r/nshou/elasticsearch-kibana/) and say it is running in a docker service with name `elasticsearch` on port `9200` which is exposed to the shared docker network. (Alternatively, you can spin up a local Elastic Search instance, by appending `--elastic` to the `./start` command. Note that the local Elastic Search instance consumes large amount of memory, and hence having >=16GB is recommended.)) 2. In the `env_file_app`, we set `ELASTICSEARCH_DSL_ENABLED` to `True` and `ELASTICSEARCH_DSL_HOST` to `elasticsearch:9200`. -3. Configure the version of the ElasticSearch Library used [depending on the version](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html#features) of our Elasticsearch server. This is required for compatibility. To do that, you can leverage the option `--pyelastic-version` of the `./start` script. The default value of that parameter indicates the version that would be installed by default. -4. Rebuild the docker images with `./start test build --pyelastic-version x.x.x` (required only if you changed the default value of `--pyelastic-version`) -5. Now start the docker containers and execute, +3. Now start the docker containers and execute ```bash docker exec -ti intelowl_uwsgi python manage.py search_index --rebuild @@ -52,7 +51,7 @@ This will build and populate all existing job objects into the `jobs` index. ### Business Intelligence -IntelOwl makes use of [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/7.x/index.html) to store data that can be used for Business Intelligence purpose. +IntelOwl makes use of [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/8.x/index.html) to store data that can be used for Business Intelligence purpose. Since plugin reports are deleted periodically, this feature allows to save indefinitely small amount of data to keep track of how analyzers perform and user usage. At the moment, the following information are sent to elastic: - application name @@ -65,16 +64,13 @@ At the moment, the following information are sent to elastic: - parameters Documents are saved in the `ELEASTICSEARCH_BI_INDEX-%YEAR-%MONTH`, allowing to manage the retention accordingly. -To activate this feature, it is necessary to set `ELASTICSEARCH_BI_ENABLED` -to `True` in the `env_file_app` and +To activate this feature, it is necessary to set `ELASTICSEARCH_BI_ENABLED` to `True` in the `env_file_app` and `ELASTICSEARCH_BI_HOST` to `elasticsearch:9200` or your elasticsearch server. -At last, you have to copy your ssl certificate in the `configuration` folder -and set `ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME` to your certificate file name. An [index template](https://github.com/intelowlproject/IntelOwl/configuration/elastic_search_mappings/intel_owl_bi.json) is created after the first bulk submission of reports. If you want to use kibana to visualize your data/make dashboard, you must create an index pattern: -Go to Kibana -> Management -> Index Patterns -> search for your index and use as time field `timestamp` +Go to Kibana -> Discover -> Stack Management -> Index Patterns -> search for your index and use as time field `timestamp` ## Authentication options diff --git a/intel_owl/settings/elasticsearch.py b/intel_owl/settings/elasticsearch.py index c50155b779..d231b9f98f 100644 --- a/intel_owl/settings/elasticsearch.py +++ b/intel_owl/settings/elasticsearch.py @@ -1,56 +1,32 @@ # This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl # See the file 'LICENSE' for copying permission. -# Elastic Search Configuration -from ssl import create_default_context - from elasticsearch import Elasticsearch from intel_owl import secrets -from intel_owl.settings import CONFIG_ROOT ELASTICSEARCH_BI_ENABLED = ( secrets.get_secret("ELASTICSEARCH_BI_ENABLED", False) == "True" ) if ELASTICSEARCH_BI_ENABLED: ELASTICSEARCH_BI_HOST = secrets.get_secret("ELASTICSEARCH_BI_HOST").split(",") - ELASTICSEARCH_SSL_CERTIFICATE_PATH = CONFIG_ROOT / secrets.get_secret( - "ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME", "elasticsearch.crt" - ) ELASTICSEARCH_BI_INDEX = secrets.get_secret("ELASTICSEARCH_BI_INDEX") - if ( - not ELASTICSEARCH_BI_HOST - or not ELASTICSEARCH_SSL_CERTIFICATE_PATH - or not ELASTICSEARCH_BI_INDEX - ): + + if not ELASTICSEARCH_BI_HOST or not ELASTICSEARCH_BI_INDEX: print("Elasticsearch not correctly configured") - elif not ELASTICSEARCH_SSL_CERTIFICATE_PATH.exists(): - print( - f"Elasticsearch certificate {ELASTICSEARCH_SSL_CERTIFICATE_PATH}" - " not found" - ) + else: - elastic_ssl_context = create_default_context( - cafile=str(ELASTICSEARCH_SSL_CERTIFICATE_PATH) + ELASTICSEARCH_CLIENT = Elasticsearch( + ELASTICSEARCH_BI_HOST, + maxsize=20, + max_retries=10, + retry_on_timeout=True, + timeout=30, + sniff_on_connection_fail=True, + sniff_timeout=30, ) - from importlib.metadata import version - - v = version("elasticsearch") - major_version = int(v.split(".")[0]) - if major_version < 8: - ELASTICSEARCH_CLIENT = Elasticsearch( - ELASTICSEARCH_BI_HOST, - ssl_context=elastic_ssl_context, - scheme="https", - maxsize=20, - max_retries=10, - retry_on_timeout=True, - timeout=30, - sniff_on_connection_fail=True, - sniff_timeout=30, - ) - else: - raise RuntimeError(f"Elastic version {v} is not supported at the moment.") + if not ELASTICSEARCH_CLIENT.ping(): + print("ELASTICSEARCH client configuration did not connect correctly") ELASTICSEARCH_DSL_ENABLED = ( secrets.get_secret("ELASTICSEARCH_DSL_ENABLED", False) == "True" diff --git a/requirements/project-requirements.txt b/requirements/project-requirements.txt index 1bdee536fe..9a9be37017 100644 --- a/requirements/project-requirements.txt +++ b/requirements/project-requirements.txt @@ -22,10 +22,6 @@ djangorestframework-filters==1.0.0.dev2 drf-spectacular==0.27.1 django-rest-email-auth==4.0.0 -uWSGI==2.0.22 -uwsgitop==0.12 -whitenoise==6.6.0 - # infra boto3==1.26.143 celery[sqs,redis]==5.3.0 @@ -33,6 +29,13 @@ dataclasses==0.6 # https://github.com/advisories/GHSA-q4qm-xhf9-4p8f # unpatched CVE: noproblem, we just use this for debugging purposes flower==2.0.0 +uWSGI==2.0.22 +uwsgitop==0.12 +whitenoise==6.6.0 +daphne==4.1.0 +channels==4.0.0 +channels-redis==4.2.0 +elasticsearch-dsl==8.13.0 # plugins GitPython==3.1.41 @@ -76,7 +79,6 @@ querycontacts==2.0.0 pyxlsb2==0.0.8 xlrd2==1.3.4 defusedxml==0.7.1 -dateparser==1.2.0 -daphne==4.1.0 -channels==4.0.0 -channels-redis==4.2.0 + +#others +dateparser==1.2.0 \ No newline at end of file diff --git a/start b/start index c0ce703f83..c5df2cec53 100755 --- a/start +++ b/start @@ -2,7 +2,6 @@ generic_version_regex="^[0-9]{1,2}\.[0-9]{1,2}.[0-9]{1,2}$" version_regex="^[3-9]\.[0-9]{1,2}.[0-9]{1,2}$" -pyelastic_default_version="7.4.1" pycti_default_version="5.10.0" # little trick for exact matching in arrays @@ -55,9 +54,6 @@ print_help () { echo " --pycti-version The pycti version to choose. This must match the" echo " OpenCTI server version you are connecting to." echo " Default is ${pycti_default_version}." - echo " --pyelastic-version The py-elasticsearch version to choose. This" - echo " must match the server version you are" - echo " connecting to. Default is ${pyelastic_default_version}." } check_parameters () { @@ -87,7 +83,6 @@ check_parameters () { set_defaults_values () { project_name="intel_owl" version=$current_version - export PYELASTIC_VERSION=$pyelastic_default_version export PYCTI_VERSION=$pycti_default_version } @@ -213,14 +208,6 @@ while [[ $# -gt 0 ]]; do params["elastic"]=true shift 1 ;; - --pyelastic-version) - if ! [[ $2 =~ $generic_version_regex ]]; then - echo "Error! Wrong pyelastic version format." >&2 - exit 1 - fi - export PYELASTIC_VERSION=$2 - shift 2 - ;; --pycti-version) if ! [[ $2 =~ $generic_version_regex ]]; then echo "Error! Wrong pycti version format." >&2 @@ -248,9 +235,8 @@ done # here all variables should be parsed and ready for use cmd_py_version=("up" "build") if [[ ( ! $env_argument == "test" || ! ${cmd_py_version[*]} =~ $cmd_argument ) && \ - ( $PYELASTIC_VERSION != "$pyelastic_default_version" || \ - $PYCTI_VERSION != "$pycti_default_version" ) ]]; then - echo "pycti_version and pyelastic_version options are valid only while running in" >&2 + ( $PYCTI_VERSION != "$pycti_default_version" ) ]]; then + echo "pycti_version options are valid only while running in" >&2 echo "'test' mode and while building a new image. This is because they can change" >&2 echo "the version of those library only during the build of a new Docker Image." >&2 exit 11