Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lesson 1 #1

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
87 changes: 87 additions & 0 deletions lesson_1/homework/HW_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from collections import OrderedDict
import csv
import doctest
import hashlib
import os.path
import unittest

import pytest # Run from console 'python -m pytest HW_2.py'


__author__ = "Vasilii Moskvin"


class TestHashSumm(unittest.TestCase):
def test_hash_summ(self):
self.assertEqual(get_hash_summ('I love Python', 'sha1'), '9233eac58259dd3a13d6c9c59f8001823b6b1fee')

def test_hash_summ_exc(self):
with self.assertRaises(KeyError):
get_hash_summ('X', 'X')


def tst_hash_summ(in_string, in_code, ans):
assert get_hash_summ(in_string, in_code) == ans, 'Неверная сумма'


def get_hash_summ(in_string, in_code):
"""
Returns a hexdigest of a in_string.
:param in_string: input string
:param in_code: hash algorithm
:return: a hexdigest of a in_string

>>> get_hash_summ('I love Python', 'sha1') # doctest: +NORMALIZE_WHITESPACE
'9233eac58259dd3a13d6c9c59f8001823b6b1fee'

"""
dct_hash = dict(sha1=hashlib.sha1,
sha224=hashlib.sha224,
sha256=hashlib.sha256,
sha384=hashlib.sha384,
sha512=hashlib.sha512,
md5=hashlib.md5)

h = dct_hash[in_code]()
h.update(in_string.encode('utf-8'))

return h.hexdigest()


def main():
"""
Main function. Reades need_hashes.csv. Gets hexdigest for all strings in the file.
And appends the received codes in need_hashes.csv.
:return: chenged need_hashes.csv

"""
file_path = os.path.abspath(input('Enter path to need_hashes.csv:\n'))
my_struct = ('string', 'code', 'hex_digest')

with open(file_path, 'r') as csv_file:
csv_file = csv.reader(csv_file, delimiter=';')
data = [OrderedDict(zip(my_struct, row)) for row in csv_file]

for src in data: # Вопрос: Как можно такую конструкцию в одну
src['hex_digest'] = get_hash_summ(src['string'], src['code']) # строку записать?

with open(file_path, 'w', newline='') as csv_file:
csv_file = csv.DictWriter(csv_file, delimiter=';', fieldnames=my_struct)
csv_file.writerows(data)


def run_test():
"""
Conducts tests get_hash_summ(in_string, in_code).
assert, doctest, unittest
:return: Results of tests get_hash_summ(in_string, in_code)

"""
tst_hash_summ('I love Python', 'sha1', '9233eac58259dd3a13d6c9c59f8001823b6b1fee')
doctest.testmod()
unittest.main()


if __name__ == '__main__':
#run_test()
main()
205 changes: 205 additions & 0 deletions lesson_1/homework/HW_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
from collections import OrderedDict
import os
import re
import hashlib


__author__ = "Vasilii Moskvin"


def get_hash_summ(in_byte, in_code):
"""
Returns a hexdigest of a in_byte.
:param in_byte: input byte string
:param in_code: hash algorithm
:return: a hexdigest of a in_string

>>> get_hash_summ('I love Python', 'sha1') # doctest: +NORMALIZE_WHITESPACE
'9233eac58259dd3a13d6c9c59f8001823b6b1fee'

"""
dct_hash = dict(sha1=hashlib.sha1,
sha224=hashlib.sha224,
sha256=hashlib.sha256,
sha384=hashlib.sha384,
sha512=hashlib.sha512,
md5=hashlib.md5)

h = dct_hash[in_code]()
h.update(in_byte)

return h.hexdigest()


# ------------------------------------------------------3.1------------------------------------------------------------

'''
Реализовать функцию разбиения файла на кусочки указанной длины.

На входе: имя исходного файла и размер в байтах для разрезания файла<br>
На выходе: количество полученных файлов-фрагментов

'''


def cut_file(file_path, size):
"""
Cuts the file 'file_path' into several parts of the size 'size'
:param file_path: path to file
:param size: size of parts
:return: several parts of the file 'file_path'

"""
dir_path = os.sep.join(file_path.split(os.sep)[:-1])
with open(file_path, 'rb') as f:
raw_data = f.read()

reg = '.{{1,{}}}'.format(size)
data = re.findall(bytes(reg, encoding='utf-8'), raw_data, re.DOTALL)

for index, src in enumerate(data):
with open(os.path.join(dir_path, 'cut_{}'.format(str(index))), 'wb') as f:
f.write(src)

return str(index)


def tst_cut_file(file_path, dir_path):
"""
Tests function cut_file(file_path, size)
:param file_path: path to cut file
:param dir_path: path to directory with cut file
:return: error, if the file was not properly cut

"""
lst_dir = [os.path.join(dir_path, filename) for filename in os.listdir(dir_path) if filename.startswith('cut_')]

log_path = os.path.join(dir_path, 'log.jpg')
with open(log_path, 'wb') as f:
for src in sorted(lst_dir, key=lambda x: float(re.findall(r'.*_(\d+)$', x)[0])):
with open(src, 'rb') as temp_f:
temp_data = temp_f.read()
f.write(temp_data)

with open(log_path, 'rb') as f:
log_data = f.read()
log_hash = get_hash_summ(log_data, 'md5')

with open(file_path, 'rb') as f:
file_data = f.read()
file_hash = get_hash_summ(file_data, 'md5')

os.remove(log_path)

assert log_hash == file_hash, 'Cut file error'


# ------------------------------------------------------3.2------------------------------------------------------------

'''
Реализовать функцию составления текстового файла с md5-хэшами файлов в указанной директории

На входе: имя директории с файлами, имя файла для записи результатов<br>
На выходе: количество просмотренных файлов

'''


def get_hash_in_dir(dir_path, res_path):
"""
Returns file with the list of hash summs from files in the dir_path.
:param dir_path: path to directory with files
:param res_path: path to result files
:return: file with the list of hash summs from files in the dir_path

"""
lst_files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)
if not file_name.endswith('.md5')]

lst_hash = []
for file_path in lst_files:
with open(file_path, 'rb') as f:
data = f.read()
lst_hash.append(get_hash_summ(data, 'md5'))

with open(res_path, 'w') as f:
for hash_line in lst_hash:
f.write('{}\n'.format(hash_line))

return len(lst_files)


# ------------------------------------------------------3.3------------------------------------------------------------

'''
Реализовать функцию "склеивания" файла на основе упорядоченных хэш-сумм

На входе: имя директории с файлами-кусочками, имя файла с хэш-суммами, имя выходного файла<br>
На выходе: размер полученного файла

'''


def get_full_file(dir_path, md5_file_path, res_path):
"""
Creates full file from many parts.
:param dir_path: path to directory with parts of full file
:param md5_file_path: path to list of hash summs
:param res_path: path to result file
:return: Creates full file from many parts

"""
lst_files = [os.path.join(dir_path, file_name) for file_name in os.listdir(dir_path)
if not file_name.endswith('.md5')]

with open(md5_file_path, 'r') as f:
hash_summ = OrderedDict(map(lambda line: (line.strip(), ''), f))

for file_path in lst_files:
with open(file_path, 'rb') as f:
data = f.read()
h = hashlib.md5()
h.update(data)
hash_summ[h.hexdigest()] = file_path

with open(res_path, 'wb') as f_res:
for hash, file_path in hash_summ.items():
with open(file_path, 'rb') as f:
data = f.read()
f_res.write(data)

return os.path.getsize(res_path)


def main():
# --------------------------------------3.1-----------------------------------------------------------------------

print('№ 3.1 "Cut file"')
dir_path = os.path.abspath(input('Enter path to directory with cut file:\n'))
file_path = os.path.join(dir_path, input('Enter the name of cut file:\n'))
size = int(input('Enter the size of cut parts (bytes):\n'))

print('Count of new files: {}'.format(cut_file(file_path, size)))

tst_cut_file(file_path, dir_path)

# --------------------------------------3.2-----------------------------------------------------------------------

print('№ 3.2 "List of hash in a directory"')
dir_path = os.path.abspath(input('Enter path to directory with files:\n'))
res_path = os.path.abspath(input('Enter path to result file:\n'))

print('Viewed {} files'.format(get_hash_in_dir(dir_path, res_path)))

# --------------------------------------3.3-----------------------------------------------------------------------

print('№ 3.3 "Creates full file from many parts."')
dir_path = os.path.abspath(input('Enter path to directory with files:\n'))
md5_file_path = os.path.abspath(input('Enter path to file with list of hashes:\n'))
res_path = os.path.abspath(input('Enter path to result file:\n'))

print('Size of reuslt file: {} bytes'.format(get_full_file(dir_path, md5_file_path, res_path)))


if __name__ == '__main__':
main()
Binary file added lesson_1/homework/file1.zip
Binary file not shown.
Binary file added lesson_1/homework/file2.zip
Binary file not shown.
8 changes: 4 additions & 4 deletions lesson_1/homework/need_hashes.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
I love Python;sha1;9233eac58259dd3a13d6c9c59f8001823b6b1fee
Я люблю Питон;md5;
Guido like beer;sha512;
Тили-мили-трямзия;md5;
Spam and eggs and ham;sha1;
Я люблю Питон;md5;8cda44d3a8ca06e9ebcd9e50fbb060a9
Guido like beer;sha512;9490ca5a146f482ea40649b832017dbec7279462766ec6e02fac4c7910584c50572b3efd3d257c089431b8b88603226a9d3aa2fcd4185fcc8db9f71114e06d93
Тили-мили-трямзия;md5;178d4b8a96e116c4462bfed4b6e83988
Spam and eggs and ham;sha1;c13dd8ebf6f20c890594773c965932336d58277a
49 changes: 26 additions & 23 deletions lesson_1/homework/pep8_me.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,31 @@
import string


def create_file(namef,dir,size):
if(size.isdigit()!=True):
if size.endswith('KB')==True:
s1 = size.split('KB')
size1 = int(s1[0])*1024
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(size1))
if size.endswith('MB')==True:
s1 = size.split('MB')
size1 = int(s1[0])*1048567
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(size1))
if size.endswith('GB') == True:
s1 = size.split('GB')
size1 = int(s1[0]) * 1073741824
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(size1))
else:
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for x in range(int(size)))

file = open(dir+namef,"w")
def create_file(name_f, dir_path, size):
if not size.isdigit():
if size.endswith('KB'):
s1 = size.split('KB')
size1 = int(s1[0]) * 1024
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits)
for _ in range(size1))
elif size.endswith('MB'):
s1 = size.split('MB')
size1 = int(s1[0]) * 1048567
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits)
for _ in range(size1))
elif size.endswith('GB'):
s1 = size.split('GB')
size1 = int(s1[0]) * 1073741824
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits)
for _ in range(size1))
else:
token = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits)
for _ in range(int(size)))
with open(dir_path + name_f, "w") as file: # По-хорошшему, здесь нужно использовать os.path.join(dir_path, name_f)
file.write(token)



create_file("/test1.txt","E:",'10KB')
create_file("/test2.txt","E:",'1024')
create_file("/test11.txt","E:",'2MB')
create_file("/test21.txt","E:",'1B')
create_file("/test1.txt", "E:", '10KB')
create_file("/test2.txt", "E:", '1024')
create_file("/test11.txt", "E:", '2MB')
create_file("/test21.txt", "E:", '1B')