Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for Elastic8 and removal of support for older versions #2262

Merged
merged 3 commits into from
Apr 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions api_app/queryset.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import datetime
import json
import logging
import uuid
from typing import TYPE_CHECKING, Generator, Type

Expand All @@ -13,6 +12,8 @@
from api_app.models import PythonConfig
from api_app.serializers import AbstractBIInterface

import logging

from celery.canvas import Signature
from django.db import models
from django.db.models import (
Expand All @@ -38,6 +39,8 @@
from certego_saas.apps.organization.membership import Membership
from certego_saas.apps.user.models import User

logger = logging.getLogger(__name__)


class SendToBiQuerySet(models.QuerySet):
@classmethod
Expand All @@ -54,10 +57,14 @@ def _create_index_template():
settings.ELASTICSEARCH_CLIENT.indices.put_template(
name=settings.ELASTICSEARCH_BI_INDEX, body=body
)
logger.info(
f"created template for Elastic named {settings.ELASTICSEARCH_BI_INDEX}"
)

def send_to_elastic_as_bi(self, max_timeout: int = 60) -> bool:
from elasticsearch.helpers import bulk

logger.info("BI start")
self._create_index_template()
BULK_MAX_SIZE = 1000
found_errors = False
Expand All @@ -74,13 +81,13 @@ def send_to_elastic_as_bi(self, max_timeout: int = 60) -> bool:
request_timeout=max_timeout,
)
if errors:
logging.error(
logger.error(
f"Errors on sending to elastic: {errors}."
" We are not marking objects as sent."
)
found_errors |= errors
else:
logging.info("BI sent")
logger.info("BI sent")
self.model.objects.filter(
pk__in=objects.values_list("pk", flat=True)
).update(sent_to_bi=True)
Expand Down
1 change: 0 additions & 1 deletion api_app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def get_environment(instance):
def to_elastic_dict(data):
return {
"_source": data,
"_type": "_doc",
"_index": settings.ELASTICSEARCH_BI_INDEX + "-" + now().strftime("%Y.%m"),
"_op_type": "index",
}
Expand Down
78 changes: 38 additions & 40 deletions configuration/elastic_search_mappings/intel_owl_bi.json
Original file line number Diff line number Diff line change
@@ -1,47 +1,45 @@
{
"settings" : {
"number_of_shards" : 3
"number_of_shards" : 1
},
"mappings": {
"_doc": {
"dynamic": false,
"properties": {
"timestamp": {
"type": "date"
},
"application": {
"type": "keyword"
},
"username": {
"type": "keyword"
},
"environment": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"process_time": {
"type": "integer"
},
"status": {
"type": "keyword"
},
"end_time": {
"type": "date"
},
"parameters": {
"type": "object",
"dynamic": true
},
"playbook": {
"type": "keyword"
},
"class_instance": {
"type": "keyword"
}

}
"dynamic": false,
"properties": {
"timestamp": {
"type": "date"
},
"application": {
"type": "keyword"
},
"username": {
"type": "keyword"
},
"environment": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"process_time": {
"type": "integer"
},
"status": {
"type": "keyword"
},
"end_time": {
"type": "date"
},
"parameters": {
"type": "object",
"dynamic": true
},
"playbook": {
"type": "keyword"
},
"class_instance": {
"type": "keyword"
}

}
}
}
3 changes: 0 additions & 3 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ ENV LOG_PATH /var/log/intel_owl
ARG REPO_DOWNLOADER_ENABLED=true
ARG WATCHMAN=false
ENV watch_logs_cmd "watch -n1 tail -n10 /var/log/intel_owl/django/api_app.log"
ARG PYELASTIC_VERSION=7.4.1
ARG PYCTI_VERSION=5.12.29

RUN mkdir -p ${LOG_PATH} \
Expand Down Expand Up @@ -51,8 +50,6 @@ COPY requirements/certego-requirements.txt $PYTHONPATH/certego-requirements.txt
WORKDIR $PYTHONPATH

RUN pip3 install --no-cache-dir --compile -r project-requirements.txt \
# install elasticsearch-dsl's appropriate version as specified by user
&& pip3 install --no-cache-dir elasticsearch-dsl==${PYELASTIC_VERSION} \
&& pip3 install --no-cache-dir pycti==${PYCTI_VERSION} \
&& pip3 install --no-cache-dir --compile -r certego-requirements.txt

Expand Down
1 change: 0 additions & 1 deletion docker/env_file_app_ci
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ ELASTICSEARCH_DSL_NO_OF_REPLICAS=0

ELASTICSEARCH_BI_ENABLED=False
ELASTICSEARCH_BI_HOST=
ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME=elastisearch.crt
ELASTICSEARCH_BI_INDEX=intelowl-bi


Expand Down
1 change: 0 additions & 1 deletion docker/env_file_app_template
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ ELASTICSEARCH_DSL_NO_OF_REPLICAS=0

ELASTICSEARCH_BI_ENABLED=False
ELASTICSEARCH_BI_HOST=
ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME=elastisearch.crt
ELASTICSEARCH_BI_INDEX=intelowl-bi

# Test tokens
Expand Down
1 change: 0 additions & 1 deletion docker/test.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ services:
args:
REPO_DOWNLOADER_ENABLED: ${REPO_DOWNLOADER_ENABLED}
WATCHMAN: "true"
PYELASTIC_VERSION: ${PYELASTIC_VERSION:-7.4.1}
PYCTI_VERSION: ${PYCTI_VERSION:-5.10.0}
image: intelowlproject/intelowl:test
volumes:
Expand Down
14 changes: 5 additions & 9 deletions docs/source/Advanced-Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ This page includes details about some advanced features that Intel Owl provides
- [Manual usage](#manual-usage)

## ElasticSearch
Right now only ElasticSearch v8 is supported.

### DSL
IntelOwl makes use of [django-elasticsearch-dsl](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html) to index Job results into elasticsearch. The `save` and `delete` operations are auto-synced so you always have the latest data in ES.
Expand All @@ -40,9 +41,7 @@ Intel Owl provides a Kibana's "Saved Object" configuration (with example dashboa
1. Setup [Elastic Search and Kibana](https://hub.docker.com/r/nshou/elasticsearch-kibana/) and say it is running in a docker service with name `elasticsearch` on port `9200` which is exposed to the shared docker network.
(Alternatively, you can spin up a local Elastic Search instance, by appending `--elastic` to the `./start` command. Note that the local Elastic Search instance consumes large amount of memory, and hence having >=16GB is recommended.))
2. In the `env_file_app`, we set `ELASTICSEARCH_DSL_ENABLED` to `True` and `ELASTICSEARCH_DSL_HOST` to `elasticsearch:9200`.
3. Configure the version of the ElasticSearch Library used [depending on the version](https://django-elasticsearch-dsl.readthedocs.io/en/latest/about.html#features) of our Elasticsearch server. This is required for compatibility. To do that, you can leverage the option `--pyelastic-version` of the `./start` script. The default value of that parameter indicates the version that would be installed by default.
4. Rebuild the docker images with `./start test build --pyelastic-version x.x.x` (required only if you changed the default value of `--pyelastic-version`)
5. Now start the docker containers and execute,
3. Now start the docker containers and execute

```bash
docker exec -ti intelowl_uwsgi python manage.py search_index --rebuild
Expand All @@ -52,7 +51,7 @@ This will build and populate all existing job objects into the `jobs` index.


### Business Intelligence
IntelOwl makes use of [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/7.x/index.html) to store data that can be used for Business Intelligence purpose.
IntelOwl makes use of [elasticsearch-py](https://elasticsearch-py.readthedocs.io/en/8.x/index.html) to store data that can be used for Business Intelligence purpose.
Since plugin reports are deleted periodically, this feature allows to save indefinitely small amount of data to keep track of how analyzers perform and user usage.
At the moment, the following information are sent to elastic:
- application name
Expand All @@ -65,16 +64,13 @@ At the moment, the following information are sent to elastic:
- parameters

Documents are saved in the `ELEASTICSEARCH_BI_INDEX-%YEAR-%MONTH`, allowing to manage the retention accordingly.
To activate this feature, it is necessary to set `ELASTICSEARCH_BI_ENABLED`
to `True` in the `env_file_app` and
To activate this feature, it is necessary to set `ELASTICSEARCH_BI_ENABLED` to `True` in the `env_file_app` and
`ELASTICSEARCH_BI_HOST` to `elasticsearch:9200`
or your elasticsearch server.
At last, you have to copy your ssl certificate in the `configuration` folder
and set `ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME` to your certificate file name.

An [index template](https://github.com/intelowlproject/IntelOwl/configuration/elastic_search_mappings/intel_owl_bi.json) is created after the first bulk submission of reports.
If you want to use kibana to visualize your data/make dashboard, you must create an index pattern:
Go to Kibana -> Management -> Index Patterns -> search for your index and use as time field `timestamp`
Go to Kibana -> Discover -> Stack Management -> Index Patterns -> search for your index and use as time field `timestamp`

## Authentication options

Expand Down
50 changes: 13 additions & 37 deletions intel_owl/settings/elasticsearch.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,32 @@
# This file is a part of IntelOwl https://github.com/intelowlproject/IntelOwl
# See the file 'LICENSE' for copying permission.

# Elastic Search Configuration
from ssl import create_default_context

from elasticsearch import Elasticsearch

from intel_owl import secrets
from intel_owl.settings import CONFIG_ROOT

ELASTICSEARCH_BI_ENABLED = (
secrets.get_secret("ELASTICSEARCH_BI_ENABLED", False) == "True"
)
if ELASTICSEARCH_BI_ENABLED:
ELASTICSEARCH_BI_HOST = secrets.get_secret("ELASTICSEARCH_BI_HOST").split(",")
ELASTICSEARCH_SSL_CERTIFICATE_PATH = CONFIG_ROOT / secrets.get_secret(
"ELASTICSEARCH_SSL_CERTIFICATE_FILE_NAME", "elasticsearch.crt"
)
ELASTICSEARCH_BI_INDEX = secrets.get_secret("ELASTICSEARCH_BI_INDEX")
if (
not ELASTICSEARCH_BI_HOST
or not ELASTICSEARCH_SSL_CERTIFICATE_PATH
or not ELASTICSEARCH_BI_INDEX
):

if not ELASTICSEARCH_BI_HOST or not ELASTICSEARCH_BI_INDEX:
print("Elasticsearch not correctly configured")
elif not ELASTICSEARCH_SSL_CERTIFICATE_PATH.exists():
print(
f"Elasticsearch certificate {ELASTICSEARCH_SSL_CERTIFICATE_PATH}"
" not found"
)

else:
elastic_ssl_context = create_default_context(
cafile=str(ELASTICSEARCH_SSL_CERTIFICATE_PATH)
ELASTICSEARCH_CLIENT = Elasticsearch(
ELASTICSEARCH_BI_HOST,
maxsize=20,
max_retries=10,
retry_on_timeout=True,
timeout=30,
sniff_on_connection_fail=True,
sniff_timeout=30,
)
from importlib.metadata import version

v = version("elasticsearch")
major_version = int(v.split(".")[0])
if major_version < 8:
ELASTICSEARCH_CLIENT = Elasticsearch(
ELASTICSEARCH_BI_HOST,
ssl_context=elastic_ssl_context,
scheme="https",
maxsize=20,
max_retries=10,
retry_on_timeout=True,
timeout=30,
sniff_on_connection_fail=True,
sniff_timeout=30,
)
else:
raise RuntimeError(f"Elastic version {v} is not supported at the moment.")
if not ELASTICSEARCH_CLIENT.ping():
print("ELASTICSEARCH client configuration did not connect correctly")

ELASTICSEARCH_DSL_ENABLED = (
secrets.get_secret("ELASTICSEARCH_DSL_ENABLED", False) == "True"
Expand Down
18 changes: 10 additions & 8 deletions requirements/project-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,20 @@ djangorestframework-filters==1.0.0.dev2
drf-spectacular==0.27.1
django-rest-email-auth==4.0.0

uWSGI==2.0.22
uwsgitop==0.12
whitenoise==6.6.0

# infra
boto3==1.26.143
celery[sqs,redis]==5.3.0
dataclasses==0.6
# https://github.com/advisories/GHSA-q4qm-xhf9-4p8f
# unpatched CVE: noproblem, we just use this for debugging purposes
flower==2.0.0
uWSGI==2.0.22
uwsgitop==0.12
whitenoise==6.6.0
daphne==4.1.0
channels==4.0.0
channels-redis==4.2.0
elasticsearch-dsl==8.13.0

# plugins
GitPython==3.1.41
Expand Down Expand Up @@ -76,7 +79,6 @@ querycontacts==2.0.0
pyxlsb2==0.0.8
xlrd2==1.3.4
defusedxml==0.7.1
dateparser==1.2.0
daphne==4.1.0
channels==4.0.0
channels-redis==4.2.0

#others
dateparser==1.2.0
18 changes: 2 additions & 16 deletions start
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

generic_version_regex="^[0-9]{1,2}\.[0-9]{1,2}.[0-9]{1,2}$"
version_regex="^[3-9]\.[0-9]{1,2}.[0-9]{1,2}$"
pyelastic_default_version="7.4.1"
pycti_default_version="5.10.0"

# little trick for exact matching in arrays
Expand Down Expand Up @@ -55,9 +54,6 @@ print_help () {
echo " --pycti-version <value> The pycti version to choose. This must match the"
echo " OpenCTI server version you are connecting to."
echo " Default is ${pycti_default_version}."
echo " --pyelastic-version <value> The py-elasticsearch version to choose. This"
echo " must match the server version you are"
echo " connecting to. Default is ${pyelastic_default_version}."
}

check_parameters () {
Expand Down Expand Up @@ -87,7 +83,6 @@ check_parameters () {
set_defaults_values () {
project_name="intel_owl"
version=$current_version
export PYELASTIC_VERSION=$pyelastic_default_version
export PYCTI_VERSION=$pycti_default_version
}

Expand Down Expand Up @@ -213,14 +208,6 @@ while [[ $# -gt 0 ]]; do
params["elastic"]=true
shift 1
;;
--pyelastic-version)
if ! [[ $2 =~ $generic_version_regex ]]; then
echo "Error! Wrong pyelastic version format." >&2
exit 1
fi
export PYELASTIC_VERSION=$2
shift 2
;;
--pycti-version)
if ! [[ $2 =~ $generic_version_regex ]]; then
echo "Error! Wrong pycti version format." >&2
Expand Down Expand Up @@ -248,9 +235,8 @@ done
# here all variables should be parsed and ready for use
cmd_py_version=("up" "build")
if [[ ( ! $env_argument == "test" || ! ${cmd_py_version[*]} =~ $cmd_argument ) && \
( $PYELASTIC_VERSION != "$pyelastic_default_version" || \
$PYCTI_VERSION != "$pycti_default_version" ) ]]; then
echo "pycti_version and pyelastic_version options are valid only while running in" >&2
( $PYCTI_VERSION != "$pycti_default_version" ) ]]; then
echo "pycti_version options are valid only while running in" >&2
echo "'test' mode and while building a new image. This is because they can change" >&2
echo "the version of those library only during the build of a new Docker Image." >&2
exit 11
Expand Down
Loading