-
Notifications
You must be signed in to change notification settings - Fork 0
/
drake_clean_2.py
82 lines (60 loc) · 2.7 KB
/
drake_clean_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import json
def clean_data(input_file, output_file):
with open(input_file, 'r', encoding='utf-8') as file:
data = json.load(file)
cleaned_data = []
for item in data:
# Extract only the required fields
cleaned_item = {
"price": item.get("price"),
"gender": item.get("gender"),
"color": item.get("color"),
"name": item.get("name"),
"material": item.get("upper_material"),
"min_size(vn_size)": min(item.get("vn_size", []), default=None),
"max_size(vn_size)": max(item.get("vn_size", []), default=None),
"lining": item.get("lining"),
"sole": item.get("sole"),
"description": item.get("product_features")
}
cleaned_data.append(cleaned_item)
# Save the cleaned data to the output file
with open(output_file, 'w', encoding='utf-8') as file:
json.dump(cleaned_data, file, ensure_ascii=False, indent=4)
input_file = 'data_clean/drake_clean.json'
output_file = 'data_clean/drake_clean_2.json'
clean_data(input_file, output_file)
print(f"Data cleaned and saved to {output_file}")
# //////////////////////////////////////////////////////
# import json
# from googletrans import Translator
# translator = Translator()
# def translate_to_vietnamese(text):
# if text is None:
# return None
# translated = translator.translate(text, src='en', dest='vi')
# return translated.text
# def clean_data(input_file, output_file):
# with open(input_file, 'r', encoding='utf-8') as file:
# data = json.load(file)
# cleaned_data = []
# for item in data:
# cleaned_item = {
# "price": item.get("price"),
# "gender": item.get("gender"),
# "color": translate_to_vietnamese(item.get("color")),
# "name": translate_to_vietnamese(item.get("name")),
# "material": translate_to_vietnamese(item.get("upper_material")),
# "min_size(vn_size)": min(item.get("vn_size", []), default=None),
# "max_size(vn_size)": max(item.get("vn_size", []), default=None),
# "lining": translate_to_vietnamese(item.get("lining")),
# "sole": translate_to_vietnamese(item.get("sole")),
# "description": item.get("product_features")
# }
# cleaned_data.append(cleaned_item)
# with open(output_file, 'w', encoding='utf-8') as file:
# json.dump(cleaned_data, file, ensure_ascii=False, indent=4)
# input_file = 'data_clean/drake_clean.json'
# output_file = 'data_clean/drake_clean_2.json'
# clean_data(input_file, output_file)
# print(f"Data cleaned, translated, and saved to {output_file}")