Skip to content

Commit

Permalink
Added a new field files_info where we keep a hash, uuid_name, origina…
Browse files Browse the repository at this point in the history
…l file name for later record.
  • Loading branch information
lvalics committed Oct 21, 2023
1 parent 4faf01e commit e749027
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 13 deletions.
18 changes: 18 additions & 0 deletions dj_backend_server/web/migrations/0004_pdfdatasource_files_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.3 on 2023-10-21 05:37

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('web', '0003_alter_pdfdatasource_id'),
]

operations = [
migrations.AddField(
model_name='pdfdatasource',
name='files_info',
field=models.JSONField(null=True),
),
]
23 changes: 23 additions & 0 deletions dj_backend_server/web/migrations/0005_pdfdatasourceerrorlog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.3 on 2023-10-21 05:52

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('web', '0004_pdfdatasource_files_info'),
]

operations = [
migrations.CreateModel(
name='PdfDataSourceErrorLog',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('error_message', models.TextField()),
('created_at', models.DateTimeField(auto_now_add=True)),
('pdf_data_source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='error_logs', to='web.pdfdatasource')),
],
),
]
16 changes: 10 additions & 6 deletions dj_backend_server/web/models/pdf_data_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@
import uuid

class PdfDataSource(models.Model):
#id = models.CharField(max_length=36, primary_key=True)
id = models.AutoField(primary_key=True)
chatbot = models.ForeignKey(Chatbot, related_name='pdf_data_sources', db_column='chatbot_id', on_delete=models.SET_NULL, null=True)
#chatbot_id = models.CharField(max_length=36, null=True)
files = models.JSONField()
files_info = models.JSONField(null=True)
folder_name = models.CharField(max_length=255, null=True)
created_at = models.DateTimeField(auto_now_add=True, null=True)
updated_at = models.DateTimeField(auto_now=True, null=True)
Expand All @@ -19,11 +18,11 @@ def set_id(self, _id):
def get_id(self):
return self.id

def set_chatbot_id(self, chatbot_id):
self.chatbot_id = chatbot_id
def set_chatbot(self, chatbot):
self.chatbot = chatbot

def get_chatbot_id(self):
return self.chatbot_id
def get_chatbot(self):
return self.chatbot

def set_files(self, files):
self.files = files
Expand All @@ -48,3 +47,8 @@ def get_status(self):

class Meta:
db_table = 'pdf_data_sources' # Replace 'pdf_data_source' with the actual table name in the database

class PdfDataSourceErrorLog(models.Model):
pdf_data_source = models.ForeignKey(PdfDataSource, related_name='error_logs', on_delete=models.CASCADE)
error_message = models.TextField()
created_at = models.DateTimeField(auto_now_add=True)
31 changes: 24 additions & 7 deletions dj_backend_server/web/services/handle_pdf_datasource.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# services.py
import os
import hashlib
from django.core.exceptions import ValidationError
from django.core.files.storage import default_storage
from web.models.chatbot import Chatbot
from web.models.pdf_data_sources import PdfDataSource
from web.models.pdf_data_sources import PdfDataSource, PdfDataSourceErrorLog
from uuid import uuid4
import secrets

Expand All @@ -20,29 +21,45 @@ def handle(self) -> PdfDataSource:
folder_path = f"website_data_sources/{folder_name}"

files_urls = []
files_info_list = []
for file in self.files:
try:
# Validate file types or other conditions if necessary
# For example: if not file.name.endswith('.pdf'): raise ValidationError('Invalid file type')

# Generate a unique file name using UUID
file_extension = os.path.splitext(file.name)[1]
file_name = str(uuid4()) + file_extension
file_path = os.path.join(folder_path, file_name)

file_uuid_name = str(uuid4()) + file_extension
file_path = os.path.join(folder_path, file_uuid_name)

# Generate hash of the file content
file_hash = hashlib.md5(file.read()).hexdigest()
file.seek(0) # Reset file pointer to beginning

# Save the file to the storage system
default_storage.save(file_path, file)

# Save file info
files_info = {
'original_name': file.name,
'uuid_name': file_uuid_name,
'hash': file_hash
}
files_urls.append(file_path)
files_info_list.append(files_info)

except Exception as e:
# Log the exception for debugging purposes
print(f"Error while uploading file: {file.name}, Error: {str(e)}")
# You can log the exception to a file or use a proper logging framework
# For example: logger.error(f"Error while uploading file: {file.name}, Error: {str(e)}")
error_log = PdfDataSourceErrorLog(pdf_data_source=data_source, error_message=str(e))
error_log.save()
# You can also raise a more specific custom exception if needed
raise ValidationError(f"Error while uploading file: {file.name}, Error: {str(e)}")

data_source.chatbot_id = self.bot.id
data_source.files = files_urls
data_source.files_info = files_info_list
data_source.folder_name = folder_name

data_source.save()
Expand Down

0 comments on commit e749027

Please sign in to comment.