Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a new field files_info and a new table for error_handling #175

Merged
merged 3 commits into from
Oct 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions dj_backend_server/web/migrations/0004_pdfdatasource_files_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-10-21 05:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('web', '0003_alter_pdfdatasource_id'),
]

operations = [
migrations.AddField(
model_name='pdfdatasource',
name='files_info',
field=models.JSONField(null=True),
),
]
23 changes: 23 additions & 0 deletions dj_backend_server/web/migrations/0005_pdfdatasourceerrorlog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.3 on 2023-10-21 05:52

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('web', '0004_pdfdatasource_files_info'),
]

operations = [
migrations.CreateModel(
name='PdfDataSourceErrorLog',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('error_message', models.TextField()),
('created_at', models.DateTimeField(auto_now_add=True)),
('pdf_data_source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='error_logs', to='web.pdfdatasource')),
],
),
]
22 changes: 16 additions & 6 deletions dj_backend_server/web/models/pdf_data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import uuid

class PdfDataSource(models.Model):
#id = models.CharField(max_length=36, primary_key=True)
id = models.AutoField(primary_key=True)
chatbot = models.ForeignKey(Chatbot, related_name='pdf_data_sources', db_column='chatbot_id', on_delete=models.SET_NULL, null=True)
#chatbot_id = models.CharField(max_length=36, null=True)
files = models.JSONField()
files_info = models.JSONField(null=True)
folder_name = models.CharField(max_length=255, null=True)
created_at = models.DateTimeField(auto_now_add=True, null=True)
updated_at = models.DateTimeField(auto_now=True, null=True)
Expand All @@ -19,11 +18,11 @@ def set_id(self, _id):
def get_id(self):
return self.id

def set_chatbot_id(self, chatbot_id):
self.chatbot_id = chatbot_id
def set_chatbot(self, chatbot):
self.chatbot = chatbot

def get_chatbot_id(self):
return self.chatbot_id
def get_chatbot(self):
return self.chatbot

def set_files(self, files):
self.files = files
Expand All @@ -46,5 +45,16 @@ def set_status(self, status):
def get_status(self):
return self.ingest_status

def set_files_info(self, files_info):
self.files_info = files_info

def get_files_info(self):
return self.files_info

class Meta:
db_table = 'pdf_data_sources' # Replace 'pdf_data_source' with the actual table name in the database

class PdfDataSourceErrorLog(models.Model):
pdf_data_source = models.ForeignKey(PdfDataSource, related_name='error_logs', on_delete=models.CASCADE)
error_message = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
31 changes: 24 additions & 7 deletions dj_backend_server/web/services/handle_pdf_datasource.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# services.py
import os
import hashlib
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from web.models.chatbot import Chatbot
from web.models.pdf_data_sources import PdfDataSource
from web.models.pdf_data_sources import PdfDataSource, PdfDataSourceErrorLog
from uuid import uuid4
import secrets

Expand All @@ -20,29 +21,45 @@ def handle(self) -> PdfDataSource:
folder_path = f"website_data_sources/{folder_name}"

files_urls = []
files_info_list = []
for file in self.files:
try:
# Validate file types or other conditions if necessary
# For example: if not file.name.endswith('.pdf'): raise ValidationError('Invalid file type')

# Generate a unique file name using UUID
file_extension = os.path.splitext(file.name)[1]
file_name = str(uuid4()) + file_extension
file_path = os.path.join(folder_path, file_name)

file_uuid_name = str(uuid4()) + file_extension
file_path = os.path.join(folder_path, file_uuid_name)

# Generate hash of the file content
file_hash = hashlib.md5(file.read()).hexdigest()
file.seek(0) # Reset file pointer to beginning

# Save the file to the storage system
default_storage.save(file_path, file)

# Save file info
files_info = {
'original_name': file.name,
'uuid_name': file_uuid_name,
'hash': file_hash
}
files_urls.append(file_path)
files_info_list.append(files_info)

except Exception as e:
# Log the exception for debugging purposes
print(f"Error while uploading file: {file.name}, Error: {str(e)}")
# You can log the exception to a file or use a proper logging framework
# For example: logger.error(f"Error while uploading file: {file.name}, Error: {str(e)}")
error_log = PdfDataSourceErrorLog(pdf_data_source=data_source, error_message=str(e))
error_log.save()
# You can also raise a more specific custom exception if needed
raise ValidationError(f"Error while uploading file: {file.name}, Error: {str(e)}")

data_source.chatbot_id = self.bot.id
data_source.files = files_urls
data_source.files_info = files_info_list
data_source.folder_name = folder_name

data_source.save()
Expand Down
5 changes: 3 additions & 2 deletions dj_backend_server/web/templates/layout/app.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
<meta charset="utf-8">
<title>OpenChat - Dashboard</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<link href="{% static 'dashboard/css/vendors/flatpickr.min.css' %}" rel="stylesheet">
<link href="{% static 'dashboard/style.css' %}" rel="stylesheet">
<link href="{% static 'dashboard/css/vendors/flatpickr.min.css' %}" rel="stylesheet" />
<link href="{% static 'dashboard/style.css' %}" rel="stylesheet" />
<link href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,[email protected],100..700,0..1,-50..200" rel="stylesheet" />
</head>

<body class="font-inter antialiased bg-slate-100 text-slate-600" {% if sidebarExpanded %} class="sidebar-expanded" {% endif %} x-data="{ sidebarOpen: false, sidebarExpanded: localStorage.getItem('sidebar-expanded') == 'true' }"
Expand Down
6 changes: 4 additions & 2 deletions dj_backend_server/web/templates/settings-data.html
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ <h3 class="text-xl leading-snug text-slate-800 font-bold mb-1">Data sources</h3>
<tbody
class="text-sm divide-y divide-slate-200 border-b border-slate-200">

{% for file in source.files %}
{% for file in source.merged_files %}
<tr>
<td
class="px-2 first:pl-5 last:pr-5 py-3 whitespace-nowrap md:w-1/2">
Expand All @@ -432,7 +432,9 @@ <h3 class="text-xl leading-snug text-slate-800 font-bold mb-1">Data sources</h3>
<td
class="px-2 first:pl-5 last:pr-5 py-3 whitespace-nowrap">
<div class="text-left">
<a href="{{ file.url }}">download</a>
<a href="{{ file.url }}"><span class="material-symbols-outlined">
download
</span></a>
</div>
</td>

Expand Down
1 change: 1 addition & 0 deletions dj_backend_server/web/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,5 @@
# Chat URL
path('chat/<str:token>/', views_chatbot.get_chat_view, name='chat'),
path('chat/<str:token>/send-message/', views_chatbot.send_message, name='sendMessage'),
path('website_data_sources/<path:file_path>', views_chatbot_settings.serve_website_data_source_file, name='website_data_source_file'),
]
44 changes: 32 additions & 12 deletions dj_backend_server/web/views/views_chatbot_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def delete_bot(request, id):
bot.delete()
return redirect('index')

def serve_website_data_source_file(request, file_path):
file_path = os.path.join('website_data_sources', file_path)
if os.path.exists(file_path):
return FileResponse(open(file_path, 'rb'))
else:
return HttpResponseNotFound()

def general_settings_update(request, id):
bot = get_object_or_404(Chatbot, id=id)
Expand Down Expand Up @@ -64,21 +70,35 @@ def get_history_by_session_id(request, id, session_id):
def data_settings(request, id):
bot = get_object_or_404(Chatbot, id=id)
website_data_sources = WebsiteDataSource.objects.filter(chatbot_id=id).prefetch_related('crawled_pages')
pdf_data_sources = PdfDataSource.objects.filter(chatbot_id=id)
codebase_data_sources = CodebaseDataSource.objects.filter(chatbot_id=id)

# Debugging only
# for data_source in website_data_sources:
# for page in data_source.crawled_pages.all():
# print("Page:", page)
for source in pdf_data_sources:
merged_files = []

# # Get index of current page
# page_index = data_source.crawled_pages.all().index(page)
# print("Debug: File info before merging")
# print(source.get_files_info())

# # Print raw JSON for this page
# print("Raw Page Data:")
# print(data_source._crawled_pages_cache[page_index])

pdf_data_sources = PdfDataSource.objects.filter(chatbot_id=id)
codebase_data_sources = CodebaseDataSource.objects.filter(chatbot_id=id)
# print("Debug: File URLs before merging")
# print(source.get_files())

for file_info, file_url in zip(source.get_files_info(), source.get_files()):
# print("Debug: Current file_info")
# print(file_info)

# print("Debug: Current file_url")
# print(file_url)
full_file_url = os.environ.get('APP_URL') + '/' + file_url
merged_file = {
'name': file_info.get('original_name', ''),
'url': full_file_url
}
merged_files.append(merged_file)

print("Debug: Merged files")
print(merged_files)

source.merged_files = merged_files

return render(request, 'settings-data.html', {'bot': bot, 'website_data_sources': website_data_sources, 'pdf_data_sources': pdf_data_sources, 'codebase_data_sources': codebase_data_sources})

Expand Down
Loading