diff --git a/dj_backend_server/web/migrations/0004_pdfdatasource_files_info.py b/dj_backend_server/web/migrations/0004_pdfdatasource_files_info.py new file mode 100644 index 00000000..6b02715c --- /dev/null +++ b/dj_backend_server/web/migrations/0004_pdfdatasource_files_info.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.3 on 2023-10-21 05:37 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('web', '0003_alter_pdfdatasource_id'), + ] + + operations = [ + migrations.AddField( + model_name='pdfdatasource', + name='files_info', + field=models.JSONField(null=True), + ), + ] diff --git a/dj_backend_server/web/migrations/0005_pdfdatasourceerrorlog.py b/dj_backend_server/web/migrations/0005_pdfdatasourceerrorlog.py new file mode 100644 index 00000000..0dc2a6a9 --- /dev/null +++ b/dj_backend_server/web/migrations/0005_pdfdatasourceerrorlog.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.3 on 2023-10-21 05:52 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('web', '0004_pdfdatasource_files_info'), + ] + + operations = [ + migrations.CreateModel( + name='PdfDataSourceErrorLog', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('error_message', models.TextField()), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('pdf_data_source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='error_logs', to='web.pdfdatasource')), + ], + ), + ] diff --git a/dj_backend_server/web/models/pdf_data_sources.py b/dj_backend_server/web/models/pdf_data_sources.py index fd417253..a928b406 100644 --- a/dj_backend_server/web/models/pdf_data_sources.py +++ b/dj_backend_server/web/models/pdf_data_sources.py @@ -3,11 +3,10 @@ import uuid class PdfDataSource(models.Model): - #id = models.CharField(max_length=36, primary_key=True) id = models.AutoField(primary_key=True) chatbot = models.ForeignKey(Chatbot, related_name='pdf_data_sources', db_column='chatbot_id', on_delete=models.SET_NULL, null=True) - #chatbot_id = models.CharField(max_length=36, null=True) files = models.JSONField() + files_info = models.JSONField(null=True) folder_name = models.CharField(max_length=255, null=True) created_at = models.DateTimeField(auto_now_add=True, null=True) updated_at = models.DateTimeField(auto_now=True, null=True) @@ -19,11 +18,11 @@ def set_id(self, _id): def get_id(self): return self.id - def set_chatbot_id(self, chatbot_id): - self.chatbot_id = chatbot_id + def set_chatbot(self, chatbot): + self.chatbot = chatbot - def get_chatbot_id(self): - return self.chatbot_id + def get_chatbot(self): + return self.chatbot def set_files(self, files): self.files = files @@ -48,3 +47,8 @@ def get_status(self): class Meta: db_table = 'pdf_data_sources' # Replace 'pdf_data_source' with the actual table name in the database + +class PdfDataSourceErrorLog(models.Model): + pdf_data_source = models.ForeignKey(PdfDataSource, related_name='error_logs', on_delete=models.CASCADE) + error_message = models.TextField() + created_at = models.DateTimeField(auto_now_add=True) diff --git a/dj_backend_server/web/services/handle_pdf_datasource.py b/dj_backend_server/web/services/handle_pdf_datasource.py index 322b31ed..96ee27da 100644 --- a/dj_backend_server/web/services/handle_pdf_datasource.py +++ b/dj_backend_server/web/services/handle_pdf_datasource.py @@ -1,9 +1,10 @@ # services.py import os +import hashlib from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from web.models.chatbot import Chatbot -from web.models.pdf_data_sources import PdfDataSource +from web.models.pdf_data_sources import PdfDataSource, PdfDataSourceErrorLog from uuid import uuid4 import secrets @@ -20,29 +21,45 @@ def handle(self) -> PdfDataSource: folder_path = f"website_data_sources/{folder_name}" files_urls = [] + files_info_list = [] for file in self.files: try: # Validate file types or other conditions if necessary # For example: if not file.name.endswith('.pdf'): raise ValidationError('Invalid file type') - + # Generate a unique file name using UUID file_extension = os.path.splitext(file.name)[1] - file_name = str(uuid4()) + file_extension - file_path = os.path.join(folder_path, file_name) - + file_uuid_name = str(uuid4()) + file_extension + file_path = os.path.join(folder_path, file_uuid_name) + + # Generate hash of the file content + file_hash = hashlib.md5(file.read()).hexdigest() + file.seek(0) # Reset file pointer to beginning + # Save the file to the storage system default_storage.save(file_path, file) + + # Save file info + files_info = { + 'original_name': file.name, + 'uuid_name': file_uuid_name, + 'hash': file_hash + } files_urls.append(file_path) + files_info_list.append(files_info) + except Exception as e: # Log the exception for debugging purposes print(f"Error while uploading file: {file.name}, Error: {str(e)}") # You can log the exception to a file or use a proper logging framework - # For example: logger.error(f"Error while uploading file: {file.name}, Error: {str(e)}") + error_log = PdfDataSourceErrorLog(pdf_data_source=data_source, error_message=str(e)) + error_log.save() # You can also raise a more specific custom exception if needed raise ValidationError(f"Error while uploading file: {file.name}, Error: {str(e)}") - + data_source.chatbot_id = self.bot.id data_source.files = files_urls + data_source.files_info = files_info_list data_source.folder_name = folder_name data_source.save()