From 1e4eb4bb65f0f5906a84b676d49bc38d3bf36d93 Mon Sep 17 00:00:00 2001 From: Pedro Nascimento Date: Mon, 19 Aug 2024 15:35:21 -0300 Subject: [PATCH] feat: Refactor normalize_case function in utils.py --- app/utils.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/app/utils.py b/app/utils.py index 82d2e81..ca0b6ae 100644 --- a/app/utils.py +++ b/app/utils.py @@ -3,7 +3,6 @@ import jwt import hashlib import json -import nltk from typing import Literal from loguru import logger from passlib.context import CryptContext @@ -12,7 +11,6 @@ from app.models import User -nltk.download('punkt_tab') pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") @@ -141,24 +139,5 @@ def read_timestamp(timestamp: int, output_format=Literal['date','datetime']) -> raise ValueError("Invalid format") def normalize_case(text): - if not text: - return None - - processed_lines = [] - for line in text.splitlines(): - pre_symbol = '' - if line.startswith('#'): - pre_symbol = '#' - line = line[1:] - if line.startswith('-'): - pre_symbol = '-' - line = line[1:] - line = line.strip() - - sentences = nltk.tokenize.sent_tokenize(line) - normalized_sentences = [sentence.capitalize() for sentence in sentences] - normalized_text = pre_symbol + ' ' + ' '.join(normalized_sentences) - - processed_lines.append(normalized_text) - - return '\n'.join(processed_lines) + # TODO + return text