diff --git a/dj_backend_server/Dockerfile.nginx b/dj_backend_server/Dockerfile.nginx new file mode 100644 index 00000000..34821296 --- /dev/null +++ b/dj_backend_server/Dockerfile.nginx @@ -0,0 +1,7 @@ +# Use the official Nginx image +FROM nginx + +COPY ./nginx/nginx.conf /etc/nginx/nginx.conf.template +COPY ./entrypoint-nginx.sh /entrypoint-nginx.sh +RUN chmod +x /entrypoint-nginx.sh +ENTRYPOINT ["/entrypoint-nginx.sh"] \ No newline at end of file diff --git a/dj_backend_server/api/middleware/cors_middleware.py b/dj_backend_server/api/middleware/cors_middleware.py new file mode 100644 index 00000000..a423ca64 --- /dev/null +++ b/dj_backend_server/api/middleware/cors_middleware.py @@ -0,0 +1,28 @@ +from django.utils.deprecation import MiddlewareMixin +from web.models.chatbot import Chatbot +import os + +class CorsMiddleware(MiddlewareMixin): + def process_response(self, request, response): + # Get the origin of the request + origin = request.META.get('HTTP_ORIGIN') + + # Check if the origin is in the database + # Get APP_URL from environment variables + app_url = os.getenv('APP_URL') + #print(f"Origin of the APP_URL: {app_url} == {origin}") + + # Check if the origin is in the database or equal to APP_URL + origin_in_db = origin == app_url or Chatbot.objects.filter(website=origin).exists() + + if origin_in_db: + # Add the 'Access-Control-Allow-Origin' header to the response + response['Access-Control-Allow-Origin'] = origin + response['Access-Control-Allow-Methods'] = 'GET, POST, OPTIONS' + response['Access-Control-Allow-Headers'] = 'X-Requested-With, Content-Type, X-Bot-Token' + + #print(f"Website URLs checked: {[chatbot.website for chatbot in Chatbot.objects.all()]}") + # print(f"Response status code: {response.status_code}") + # print(f"Response content: {response.content}") + #print(f"Response headers: {response.headers}") + return response \ No newline at end of file diff --git a/dj_backend_server/dj_backend_server/settings.py b/dj_backend_server/dj_backend_server/settings.py index 324358ee..a330e9a9 100644 --- a/dj_backend_server/dj_backend_server/settings.py +++ b/dj_backend_server/dj_backend_server/settings.py @@ -62,13 +62,14 @@ MIDDLEWARE = [ 'django.middleware.locale.LocaleMiddleware', 'django.middleware.security.SecurityMiddleware', + 'api.middleware.cors_middleware.CorsMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', - 'corsheaders.middleware.CorsMiddleware', + #'corsheaders.middleware.CorsMiddleware', ] ROOT_URLCONF = 'dj_backend_server.urls' @@ -183,13 +184,17 @@ SESSION_ENGINE = 'django.contrib.sessions.backends.db' # You can choose other engines as well -#ALLOWED_HOSTS = [ -# 'localhost', -# '0.0.0.0', -#] ALLOWED_HOSTS = os.environ.get('ALLOWED_HOSTS', '0.0.0.0').split(',') APP_URL = os.environ.get('APP_URL', 'http://0.0.0.0:8000') CORS_ALLOWED_ORIGINS = [ APP_URL, +] + +CSRF_TRUSTED_ORIGINS = [ + APP_URL, +] + +CSRF_COOKIE_DOMAIN = [ + APP_URL, ] \ No newline at end of file diff --git a/dj_backend_server/docker-compose.linux.yaml b/dj_backend_server/docker-compose.linux.yaml index 430aa2de..7dc23c44 100644 --- a/dj_backend_server/docker-compose.linux.yaml +++ b/dj_backend_server/docker-compose.linux.yaml @@ -35,6 +35,9 @@ services: nginx: image: nginx container_name: oc_nginx + build: + context: . + dockerfile: Dockerfile.nginx restart: unless-stopped ports: - "80:80" @@ -46,6 +49,9 @@ services: - ./static:/app/web/static/ networks: - openchat_network + env_file: + - .env.docker + #entrypoint: ["/entrypoint-nginx.sh"] depends_on: - qdrant - mysql diff --git a/dj_backend_server/entrypoint-nginx.sh b/dj_backend_server/entrypoint-nginx.sh new file mode 100644 index 00000000..b7088b72 --- /dev/null +++ b/dj_backend_server/entrypoint-nginx.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Remove 'http://' or 'https://' prefix from APP_URL +CLEANED_APP_URL=${APP_URL#http://} +CLEANED_APP_URL=${APP_URL#https://} + +echo "Replacing APP_URL with $CLEANED_APP_URL" + +# Define the file path as a variable, for example: +NGINX_CONF="/etc/nginx/nginx.conf" + +sed "s|yourdomain.com|$CLEANED_APP_URL|g" NGINX_CONF > /tmp/nginx.conf +mv /tmp/nginx.conf NGINX_CONF + +# Start your app normally +# exec nginx -g "daemon off;" diff --git a/dj_backend_server/nginx/nginx.conf b/dj_backend_server/nginx/nginx.conf index 5252aa86..d2e755a9 100644 --- a/dj_backend_server/nginx/nginx.conf +++ b/dj_backend_server/nginx/nginx.conf @@ -9,6 +9,34 @@ http { listen 80; server_name yourdomain.com; # Replace with your domain name or IP address + # Duplicate your existing settings here + charset utf-8; + + keepalive_timeout 500; + keepalive_requests 5000; + + client_max_body_size 64m; + client_body_buffer_size 64m; + + sendfile on; + server_tokens off; + + tcp_nopush on; + tcp_nodelay on; + reset_timedout_connection on; + + gzip on; + gzip_comp_level 5; + gzip_min_length 256; + gzip_proxied any; + gzip_types application/javascript application/json application/xml text/css text/plain text/xml; + gzip_vary on; + + open_file_cache max=1000 inactive=20s; + open_file_cache_valid 30s; + open_file_cache_min_uses 2; + open_file_cache_errors on; + location /static { proxy_pass https://web:8000; expires -1; #dev env @@ -73,6 +101,34 @@ http { text/html html; } + # Duplicate your existing settings here + charset utf-8; + + keepalive_timeout 500; + keepalive_requests 5000; + + client_max_body_size 64m; + client_body_buffer_size 64m; + + sendfile on; + server_tokens off; + + tcp_nopush on; + tcp_nodelay on; + reset_timedout_connection on; + + gzip on; + gzip_comp_level 5; + gzip_min_length 256; + gzip_proxied any; + gzip_types application/javascript application/json application/xml text/css text/plain text/xml; + gzip_vary on; + + open_file_cache max=1000 inactive=20s; + open_file_cache_valid 30s; + open_file_cache_min_uses 2; + open_file_cache_errors on; + # location /static/ { # alias /app/web/static/; # The trailing slash is important # # proxy_set_header Host $host; @@ -110,7 +166,7 @@ http { proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; # Forward the original scheme (HTTP or HTTPS) - proxy_set_header Origin ""; # Optionally forward the Origin header + proxy_set_header Origin $http_origin; # Optionally forward the Origin header proxy_ssl_protocols TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; add_header Cache-Control "public, max-age=2592000"; add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; # HSTS header diff --git a/dj_backend_server/web/migrations/0006_crawledpages_content_file.py b/dj_backend_server/web/migrations/0006_crawledpages_content_file.py new file mode 100644 index 00000000..189f67cd --- /dev/null +++ b/dj_backend_server/web/migrations/0006_crawledpages_content_file.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.3 on 2023-10-27 17:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('web', '0005_pdfdatasourceerrorlog'), + ] + + operations = [ + migrations.AddField( + model_name='crawledpages', + name='content_file', + field=models.CharField(max_length=255, null=True), + ), + ] diff --git a/dj_backend_server/web/migrations/0007_alter_crawledpages_id.py b/dj_backend_server/web/migrations/0007_alter_crawledpages_id.py new file mode 100644 index 00000000..2952029b --- /dev/null +++ b/dj_backend_server/web/migrations/0007_alter_crawledpages_id.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.3 on 2023-10-27 17:49 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('web', '0006_crawledpages_content_file'), + ] + + operations = [ + migrations.AlterField( + model_name='crawledpages', + name='id', + field=models.AutoField(primary_key=True, serialize=False), + ), + ] diff --git a/dj_backend_server/web/models/crawled_pages.py b/dj_backend_server/web/models/crawled_pages.py index fd9069a3..70a0d64e 100644 --- a/dj_backend_server/web/models/crawled_pages.py +++ b/dj_backend_server/web/models/crawled_pages.py @@ -4,7 +4,7 @@ from web.models.chatbot import Chatbot class CrawledPages(models.Model): - id = models.CharField(max_length=36, primary_key=True) + id = models.AutoField(primary_key=True) chatbot_id = models.CharField(max_length=36, null=True) website_data_source = models.ForeignKey(WebsiteDataSource, on_delete=models.CASCADE, related_name='crawled_pages') url = models.CharField(max_length=255) @@ -13,7 +13,7 @@ class CrawledPages(models.Model): aws_url = models.TextField(null=True) created_at = models.DateTimeField(auto_now_add=True) updated_at = models.DateTimeField(auto_now=True) - # content_file= models.CharField(max_length=100) + content_file= models.CharField(max_length=255, null=True) def get_id(self): return self.id diff --git a/dj_backend_server/web/templates/onboarding/step-2-pdf.html b/dj_backend_server/web/templates/onboarding/step-2-pdf.html index b7273295..7d54b0d4 100644 --- a/dj_backend_server/web/templates/onboarding/step-2-pdf.html +++ b/dj_backend_server/web/templates/onboarding/step-2-pdf.html @@ -110,7 +110,14 @@

Upload PDF files as sources
- +
+
+ + +
+
Make sure that your files are scannable (text not images) 🫶