-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
257 lines (206 loc) · 8.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
from __future__ import annotations
import os
import uuid
import shutil
import string
import pandas as pd
from typing import NoReturn
import xml.etree.ElementTree as ET
from random import randint, SystemRandom
'''
Написать программу на Python, которая делает следующие действия:
1. Создает 50 zip-архивов, в каждом 100 xml файлов со случайными данными следующей структуры:
<root>
<var name=’id’ value=’<случайное уникальное строковое значение>’/>
<var name=’level’ value=’<случайное число от 1 до 100>’/>
<objects>
<object name=’<случайное строковое значение>’/>
<object name=’<случайное строковое значение>’/>
…
</objects>
</root>
В тэге objects случайное число (от 1 до 10) вложенных тэгов object.
2. Обрабатывает директорию с полученными zip архивами, разбирает вложенные xml файлы и формирует 2 csv файла:
Первый: id, level - по одной строке на каждый xml файл
Второй: id, object_name - по отдельной строке для каждого тэга object (получится от 1 до 10 строк на каждый xml файл)
'''
ZIP_COUNT = 50
XML_COUNT = 100
LETTER_COUNT = 10
MIN_RANDOM_VALUE = 1
MAX_RANDOM_VALUE = 100
def get_string() -> str:
"""Get string of random letters"""
return ''.join(SystemRandom().choice(string.ascii_letters) for _ in range(LETTER_COUNT))
def remove_files_in_dir(path: str) -> NoReturn:
"""
Remove all files in directory
:param path: path to files which wants to remove
"""
for file in os.listdir(path):
os.remove(os.path.join(path, file))
class XML:
"""A class to using xml (create, generate, get attributes or list, etc)"""
def __init__(self, xml_folder: str = "xml") -> None:
self.xml_folder = xml_folder
@staticmethod
def generate_xml() -> ET.Element:
"""
xml generation according to the task
:return: xml object
:rtype: xml.etree.ElementTree.Element
"""
root = ET.Element("root")
ET.SubElement(root, "var", name="id", value=str(uuid.uuid1()))
ET.SubElement(root, "var", name="level", value=str(randint(MIN_RANDOM_VALUE, MAX_RANDOM_VALUE)))
objects = ET.SubElement(root, "objects")
for _ in range(randint(1, 10)):
rand_string = get_string()
ET.SubElement(objects, "object", name=rand_string)
return root
def create_xml_file(self, name: str) -> NoReturn:
root = XML.generate_xml()
tree = ET.ElementTree(root)
ET.indent(tree, space="\t", level=0)
tree.write(os.path.join(self.xml_folder, name + '.xml'), encoding="utf-8", xml_declaration=True)
@staticmethod
def get_xml_parse(file: File) -> dict:
"""Get parsed dictionary from file"""
xml_list = XML.get_xml_list(file)
return XML.get_xml_attributes(xml_list)
@staticmethod
def get_xml_list(file: File) -> list:
"""
Get elements (tags, attributes) from xml file
:param file: object that wants to parse from filepath to elements of xml file
:return: list of dictionaries with tags and attributes
:rtype: list
"""
tree = ET.parse(file.filepath)
root = tree.getroot()
return [{elem.tag: elem.attrib} for elem in root.iter()]
@staticmethod
def get_xml_attributes(xml_list: list) -> dict:
"""
Get attributes from list of dictionaries which consists of tags and attributes of xml file
:param xml_list: list of dictionaries
:return: dictionary of lists. Lists consist of information about xml file
:rtype: dict
"""
file_id = ""
level_values = []
level_keys = []
object_name_values = []
object_name_keys = []
for xml_dict in xml_list:
for key, val in xml_dict.items():
if key == 'var':
if val['name'] == 'id':
file_id = val['value']
if val['name'] == 'level':
level_values.append((file_id, val['value']))
level_keys = ['id', 'levels']
if key == 'object':
object_name_values.append((file_id, val['name']))
object_name_keys = ['id', 'obj_name']
return dict({
'level_values': level_values,
'level_columns': level_keys,
'object_name_values': object_name_values,
'object_name_columns': object_name_keys
})
class CSV:
"""A class with different operations on CSV files (create, write, save)"""
@staticmethod
def create_csv(content: dict, columns: dict) -> pd.DataFrame:
return pd.DataFrame(content, columns=columns)
@staticmethod
def save_to_csv(obj: pd.DataFrame, filename: str) -> NoReturn:
obj.to_csv(filename, mode='a', header=not os.path.exists(filename), index=False)
def write_to_csv(self, **kwargs: dict) -> NoReturn:
content = kwargs['content']
filenames = kwargs['filenames_path']
levels = self.create_csv(content=content['level_values'], columns=content['level_columns'])
self.save_to_csv(levels, filenames['levels'])
obj_names = self.create_csv(content=content['object_name_values'], columns=content['object_name_columns'])
self.save_to_csv(obj_names, filenames['obj_names'])
class File:
"""A class with information about the current file"""
def __init__(self, filepath: str) -> None:
self.filepath = filepath
class Parser:
"""A class to parse file in any formats"""
def parse(self, file: File, file_format: str) -> dict:
parser = get_parser(file_format)
return parser(file)
def get_parser(file_format: str) -> function:
"""
Creator for choice parse files
:param file_format:
:type file_format: str
:return: particular parse function
:rtype: function
"""
if file_format == 'xml':
return XML.get_xml_parse
else:
raise ValueError(file_format)
class Converter:
"""A class to convert files to/from format"""
def __init__(
self,
output_filename: str = "",
output_dir_path: str = "zip",
archive_type: str = "zip",
convert_dir_name: str = "xml"
) -> None:
"""
Constructs all attributes for converting files
:param output_dir_path: directory path for output converted archive
:type output_filename:str
:param archive_type: type of archive to convert
:type archive_type:str
:param convert_dir_name: directory path to source files which wants to be converted
:type convert_dir_name:str
"""
self.output_dir_path = output_dir_path
self.archive_type = archive_type
self.convert_dir_name = convert_dir_name
self.path = os.path.join(output_dir_path, output_filename)
def convert_to_zip(self) -> NoReturn:
"""Convert files to zip format"""
shutil.make_archive(self.path, self.archive_type, self.convert_dir_name)
remove_files_in_dir(self.convert_dir_name)
def convert_from_zip(self) -> NoReturn:
"""Convert files from zip format"""
for i in os.listdir(self.path):
shutil.unpack_archive(os.path.join(self.output_dir_path, i), self.convert_dir_name)
def run_files_to_zip() -> NoReturn:
if not os.path.isdir("xml"):
os.mkdir("xml")
if not os.path.isdir("zip"):
os.mkdir("zip")
number = 0
for i in range(ZIP_COUNT):
for _ in range(XML_COUNT):
name = "test" + str(number)
XML().create_xml_file(name)
number += 1
converter = Converter(output_filename="test" + str(i))
converter.convert_to_zip()
def run_files_from_zip() -> NoReturn:
converter = Converter(output_dir_path='zip', convert_dir_name='xml')
converter.convert_from_zip()
if not os.path.isdir("csv"):
os.mkdir("csv")
for filename in os.listdir("xml"):
filepath = os.path.join("xml", filename)
file = File(filepath)
attributes_dict = Parser().parse(file, filepath.split('.')[-1])
CSV().write_to_csv(
content=attributes_dict,
filenames_path={'levels': 'csv/levels.csv', 'obj_names': 'csv/obj_names.csv'}
)
if __name__ == '__main__':
run_files_to_zip()
run_files_from_zip()