From fecf8be1097292d48a26a344baac58d8e97d25f1 Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Wed, 17 Mar 2021 19:48:30 -0400 Subject: [PATCH 1/6] Commit description --- src/covidify/forecast.py | 109 +++++++++++++++++---------------- src/covidify/list_countries.py | 20 +++--- 2 files changed, 66 insertions(+), 63 deletions(-) diff --git a/src/covidify/forecast.py b/src/covidify/forecast.py index e96ad00..9058ac2 100644 --- a/src/covidify/forecast.py +++ b/src/covidify/forecast.py @@ -60,57 +60,58 @@ print('Creating reports folder...') os.system('mkdir -p ' + image_dir) - -def plot_forecast(tmp_df, train, index_forecast, forecast, confint): - ''' - Plot the values of train and test, the predictions from ARIMA and the shadowing - for the confidence interval. - - ''' - - # For shadowing - lower_series = pd.Series(confint[:, 0], index=index_forecast) - upper_series = pd.Series(confint[:, 1], index=index_forecast) - - print('... saving graph') - fig, ax = plt.subplots(figsize=FIG_SIZE) - plt.title('ARIMA - Prediction for cumalitive case counts {} days in the future'.format(days_in_future)) - plt.plot(tmp_df.cumulative_cases, label='Train',marker='o') - plt.plot(tmp_df.pred, label='Forecast', marker='o') - tmp_df.groupby('date')[['']].sum().plot(ax=ax) - plt.fill_between(index_forecast, - upper_series, - lower_series, - color='k', alpha=.1) - plt.ylabel('Infections') - plt.xlabel('Date') - fig.legend().set_visible(True) - fig = ax.get_figure() - fig.savefig(os.path.join(image_dir, 'cumulative_forecasts.png')) - - -def forecast(tmp_df, train, index_forecast, days_in_future): - - # Fit model with training data - model = auto_arima(train, trace=False, error_action='ignore', suppress_warnings=True) - model_fit = model.fit(train) - - forecast, confint = model_fit.predict(n_periods=len(index_forecast), return_conf_int=True) - - forecast_df = pd.concat([tmp_df, pd.DataFrame(forecast, index = index_forecast, columns=['pred'])], axis=1, sort=False) - date_range = [d.strftime('%Y-%m-%d') for d in pd.date_range(train_start, forecast_end)] - forecast_df['date'] = pd.Series(date_range).astype(str) - forecast_df[''] = None # Dates get messed up, so need to use pandas plotting - - # Save Model and file - print('... saving file:', forecast_file) - forecast_df.to_csv(os.path.join(data_dir, forecast_file)) - - plot_forecast(forecast_df, train, index_forecast, forecast, confint) - -if __name__ == '__main__': - print('Training forecasting model...') - - train = trend_df[trend_df.date.isin(train_period)].cumulative_cases - index_forecast = [x for x in range(train.index[-1]+1, train.index[-1] + days_in_future+1)] - forecast(trend_df, train, index_forecast, days_in_future) +class PlotForcast() + + def plot_forecast(tmp_df, train, index_forecast, forecast, confint): + ''' + Plot the values of train and test, the predictions from ARIMA and the shadowing + for the confidence interval. + + ''' + + # For shadowing + lower_series = pd.Series(confint[:, 0], index=index_forecast) + upper_series = pd.Series(confint[:, 1], index=index_forecast) + + print('... saving graph') + fig, ax = plt.subplots(figsize=FIG_SIZE) + plt.title('ARIMA - Prediction for cumalitive case counts {} days in the future'.format(days_in_future)) + plt.plot(tmp_df.cumulative_cases, label='Train',marker='o') + plt.plot(tmp_df.pred, label='Forecast', marker='o') + tmp_df.groupby('date')[['']].sum().plot(ax=ax) + plt.fill_between(index_forecast, + upper_series, + lower_series, + color='k', alpha=.1) + plt.ylabel('Infections') + plt.xlabel('Date') + fig.legend().set_visible(True) + fig = ax.get_figure() + fig.savefig(os.path.join(image_dir, 'cumulative_forecasts.png')) + +class Forcast: + def forecast(tmp_df, train, index_forecast, days_in_future): + + # Fit model with training data + model = auto_arima(train, trace=False, error_action='ignore', suppress_warnings=True) + model_fit = model.fit(train) + + forecast, confint = model_fit.predict(n_periods=len(index_forecast), return_conf_int=True) + + forecast_df = pd.concat([tmp_df, pd.DataFrame(forecast, index = index_forecast, columns=['pred'])], axis=1, sort=False) + date_range = [d.strftime('%Y-%m-%d') for d in pd.date_range(train_start, forecast_end)] + forecast_df['date'] = pd.Series(date_range).astype(str) + forecast_df[''] = None # Dates get messed up, so need to use pandas plotting + + # Save Model and file + print('... saving file:', forecast_file) + forecast_df.to_csv(os.path.join(data_dir, forecast_file)) + + plot_forecast(forecast_df, train, index_forecast, forecast, confint) + + if __name__ == '__main__': + print('Training forecasting model...') + + train = trend_df[trend_df.date.isin(train_period)].cumulative_cases + index_forecast = [x for x in range(train.index[-1]+1, train.index[-1] + days_in_future+1)] + forecast(trend_df, train, index_forecast, days_in_future) \ No newline at end of file diff --git a/src/covidify/list_countries.py b/src/covidify/list_countries.py index 8637489..01c2c7b 100644 --- a/src/covidify/list_countries.py +++ b/src/covidify/list_countries.py @@ -12,14 +12,16 @@ from covidify.sources import github from covidify.config import SCRIPT -def get_countries(): - print('Getting available countries...') - df = github.get() - df = df[df.confirmed > 0] +class TopCountries: - countries = sorted(list(set(df.country.values))) + def get_countries(): + print('Getting available countries...') + df = github.get() + df = df[df.confirmed > 0] - for a,b,c in zip(countries[::3],countries[1::3],countries[2::3]): - print('{:<30}{:<30}{:<}'.format(a,b,c)) - - print('\n\033[1;31mNUMBER OF COUNTRIES/AREAS INFECTED:\033[0;0m', len(countries)) \ No newline at end of file + countries = sorted(list(set(df.country.values))) + + for a,b,c in zip(countries[::3],countries[1::3],countries[2::3]): + print('{:<30}{:<30}{:<}'.format(a,b,c)) + + print('\n\033[1;31mNUMBER OF COUNTRIES/AREAS INFECTED:\033[0;0m', len(countries)) \ No newline at end of file From 4787deedc1b12a6288417f17d27ccdf3bad0e420 Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Wed, 17 Mar 2021 20:17:51 -0400 Subject: [PATCH 2/6] Commit description --- src/covidify/cli.py | 149 ++++++++++++++++--------------- src/covidify/data_prep.py | 181 ++++++++++++++++++++------------------ 2 files changed, 172 insertions(+), 158 deletions(-) diff --git a/src/covidify/cli.py b/src/covidify/cli.py index 9c15ce6..712881f 100644 --- a/src/covidify/cli.py +++ b/src/covidify/cli.py @@ -11,76 +11,82 @@ #get the path of covidify in site-packages env = covidify.__path__[0] -def check_output_folder(var, country_str, msg): - ''' - Check if the output folder is valid, if not - just default to dekstop - ''' - - if not var: - print('%sMESSAGE: %s' % (' '*5, msg)) - if country_str == 'Global': - return os.path.join('/Users', USER, 'Desktop', 'covidify-output') - else: - return os.path.join('/Users', USER, 'Desktop', 'covidify-output-{}'.format(country_str)) - else: - return var - -def check_forecast_days(var, msg): - ''' - Default days for forecasting - ''' - if not var: - return DAYS_IN_FUTURE - else: - return var +class CheckOutputFolder: + def check_output_folder(var, country_str, msg): + ''' + Check if the output folder is valid, if not + just default to dekstop + ''' + + if not var: + print('%sMESSAGE: %s' % (' '*5, msg)) + if country_str == 'Global': + return os.path.join('/Users', USER, 'Desktop', 'covidify-output') + else: + return os.path.join('/Users', USER, 'Desktop', 'covidify-output-{}'.format(country_str)) + else: + return var -def check_top_countries(var, msg): - ''' - Check number of countries for the log plot - ''' - - if not var: - print('%sMESSAGE: %s' % (' '*5, msg)) - return LOG_TOP_N_COUNTRIES - else: - return var - -def check_source_arg(var, msg): - ''' - Check if the datasource is valid, if not then just - default to the john hopkin github repo - ''' +class CheckForcastDays: + def check_forecast_days(var, msg): + ''' + Default days for forecasting + ''' + if not var: + return DAYS_IN_FUTURE + else: + return var - if var is None: - print('%sMESSAGE: %s' % (' '*5, msg)) - return 'JHU' - elif 'wiki' in var or 'JHU' in var: - return var - else: - print('%sMESSAGE: %s' % (' '*5, 'invalid source given')) - sys.exit() - -def check_country(country, msg): - ''' - Do some regex work on passed country string - because multi word args are not supported - ''' - - if not country: - print('%sMESSAGE: %s' % (' '*5, msg)) - return 'Global' - else: - country_str = replace_arg_space(country[0]) - return country_str +class CheckTopCountries: + def check_top_countries(var, msg): + ''' + Check number of countries for the log plot + ''' + + if not var: + print('%sMESSAGE: %s' % (' '*5, msg)) + return LOG_TOP_N_COUNTRIES + else: + return var -def check_list_flag(flag, msg): +class CheckSourceArg: + def check_source_arg(var, msg): + ''' + Check if the datasource is valid, if not then just + default to the john hopkin github repo + ''' - if not flag: - print('%sMESSAGE: %s' % (' '*5, msg)) - sys.exit(1) - else: - return flag + if var is None: + print('%sMESSAGE: %s' % (' '*5, msg)) + return 'JHU' + elif 'wiki' in var or 'JHU' in var: + return var + else: + print('%sMESSAGE: %s' % (' '*5, 'invalid source given')) + sys.exit() + +class CheckCountry: + def check_country(country, msg): + ''' + Do some regex work on passed country string + because multi word args are not supported + ''' + + if not country: + print('%sMESSAGE: %s' % (' '*5, msg)) + return 'Global' + else: + country_str = replace_arg_space(country[0]) + return country_str + +class CheckListFlag: + def check_list_flag(flag, msg): + + if not flag: + print('%sMESSAGE: %s' % (' '*5, msg)) + sys.exit(1) + else: + return flag ############################################################ @@ -115,11 +121,12 @@ def run(output, source, country, top, forecast): @click.option('--countries', help='List countries that have had confirmed cases.', is_flag=True) @cli.command() -def list(countries): - ''' - List all the countries that have confirmed cases. - ''' - countries = check_list_flag(countries, '\033[1;31m Invalid flag passed. Make sure to use --countries\033[0;0m') +class List: + def list(countries): + ''' + List all the countries that have confirmed cases. + ''' + countries = check_list_flag(countries, '\033[1;31m Invalid flag passed. Make sure to use --countries\033[0;0m') if countries: get_countries() \ No newline at end of file diff --git a/src/covidify/data_prep.py b/src/covidify/data_prep.py index 6153d44..8cf0bba 100644 --- a/src/covidify/data_prep.py +++ b/src/covidify/data_prep.py @@ -52,23 +52,25 @@ ############ COUNTRY SELECTION ############ -def get_similar_countries(c, country_list): - pos_countries = get_close_matches(c, country_list) - - if len(pos_countries) > 0: - print('\033[1;31m'+c, 'was not listed. did you mean', pos_countries[0].capitalize() + '?\033[0;0m') - - #Only delete if its a covidify generated folder - if 'Desktop/covidify-output-' in out: - os.system('rm -rf ' + out) - sys.exit(1) - else: - print('\033[1;31m'+c, 'was not listed.\033[0;0m') - if 'Desktop/covidify-output-' in out: - os.system('rm -rf ' + out) - sys.exit(1) +class GetSimilarCountries: + def get_similar_countries(c, country_list): + pos_countries = get_close_matches(c, country_list) -def check_specified_country(df, country): + if len(pos_countries) > 0: + print('\033[1;31m'+c, 'was not listed. did you mean', pos_countries[0].capitalize() + '?\033[0;0m') + + #Only delete if its a covidify generated folder + if 'Desktop/covidify-output-' in out: + os.system('rm -rf ' + out) + sys.exit(1) + else: + print('\033[1;31m'+c, 'was not listed.\033[0;0m') + if 'Desktop/covidify-output-' in out: + os.system('rm -rf ' + out) + sys.exit(1) + +class CheckSpecifiedCountry: + def check_specified_country(df, country): ''' let user filter reports by country, if not found then give a option if the string is similar @@ -112,29 +114,32 @@ def check_specified_country(df, country): Get the difference of the sum totals for each date and plot them on a trendline graph ''' -def get_new_cases(tmp, col): - diff_list = [] - tmp_df_list = [] - df = tmp.copy() - - for i, day in enumerate(df.sort_values('file_date').file_date.unique()): - tmp_df = df[df.file_date == day] - tmp_df_list.append(tmp_df[col].sum()) - - if i == 0: - diff_list.append(tmp_df[col].sum()) - else: - diff_list.append(tmp_df[col].sum() - tmp_df_list[i-1]) +class GetNewCases: + def get_new_cases(tmp, col): + diff_list = [] + tmp_df_list = [] + df = tmp.copy() + + for i, day in enumerate(df.sort_values('file_date').file_date.unique()): + tmp_df = df[df.file_date == day] + tmp_df_list.append(tmp_df[col].sum()) + + if i == 0: + diff_list.append(tmp_df[col].sum()) + else: + diff_list.append(tmp_df[col].sum() - tmp_df_list[i-1]) - return diff_list + return diff_list -def get_moving_average(tmp, col): - df = tmp.copy() - return df[col].rolling(window=2).mean() +class GetMovingAverage: + def get_moving_average(tmp, col): + df = tmp.copy() + return df[col].rolling(window=2).mean() -def get_exp_moving_average(tmp, col): - df = tmp.copy() - return df[col].ewm(span=2, adjust=True).mean() +class GetExpMovingAverage: + def get_exp_moving_average(tmp, col): + df = tmp.copy() + return df[col].ewm(span=2, adjust=True).mean() print('... Calculating dataframe for new cases') @@ -166,65 +171,67 @@ def get_exp_moving_average(tmp, col): if not country: print('... top infected countries: {}'.format(top)) -def get_top_countries(data): - # Get top N infected countries - tmp_df = data.copy() - tmp_df = tmp_df[tmp_df.file_date == df.file_date.max()] - return tmp_df.groupby(['country']).agg({'confirmed': 'sum'}).sort_values('confirmed',ascending=False).head(top).index - -TOP_N_COUNTRIES = get_top_countries(df) +class GetTopCountries: + def get_top_countries(data): + # Get top N infected countries + tmp_df = data.copy() + tmp_df = tmp_df[tmp_df.file_date == df.file_date.max()] + return tmp_df.groupby(['country']).agg({'confirmed': 'sum'}).sort_values('confirmed',ascending=False).head(top).index + + TOP_N_COUNTRIES = get_top_countries(df) + + tmp_df = df[df.country.isin(TOP_N_COUNTRIES)].copy() + +class GetDayCounts: + def get_day_counts(d, country): + ''' + For each country, get the days of the spread since 500 + cases + ''' + data = d.copy() + result_df = pd.DataFrame([]) + result_df = data.groupby(['file_date']).agg({'confirmed': 'sum', + 'recovered': 'sum', + 'deaths': 'sum'}) + result_df['date'] = data['file_date'].unique() + result_df['country'] = country + + result_df = result_df[result_df.confirmed >= 500] + result_df.insert(loc=0, column='day', value=np.arange(len(result_df))) + return result_df -tmp_df = df[df.country.isin(TOP_N_COUNTRIES)].copy() + df_list = [] -def get_day_counts(d, country): - ''' - For each country, get the days of the spread since 500 - cases - ''' - data = d.copy() - result_df = pd.DataFrame([]) - result_df = data.groupby(['file_date']).agg({'confirmed': 'sum', - 'recovered': 'sum', - 'deaths': 'sum'}) - result_df['date'] = data['file_date'].unique() - result_df['country'] = country + for country in TOP_N_COUNTRIES: + print(' ...', country + ': ' + str(tmp_df[(tmp_df.file_date == df.file_date.max()) & + (tmp_df.country == country)].confirmed.sum())) + df_list.append(get_day_counts(tmp_df[tmp_df.country == country], country)) - result_df = result_df[result_df.confirmed >= 500] - result_df.insert(loc=0, column='day', value=np.arange(len(result_df))) - return result_df - -df_list = [] - -for country in TOP_N_COUNTRIES: - print(' ...', country + ': ' + str(tmp_df[(tmp_df.file_date == df.file_date.max()) & - (tmp_df.country == country)].confirmed.sum())) - df_list.append(get_day_counts(tmp_df[tmp_df.country == country], country)) - -log_df = pd.concat(df_list, axis=0, ignore_index=True) + log_df = pd.concat(df_list, axis=0, ignore_index=True) -############ SAVE DATA ############ -#Create date of extraction folder -data_folder = os.path.join('data', str(datetime.date(datetime.now()))) -save_dir = os.path.join(out, data_folder) + ############ SAVE DATA ############ + #Create date of extraction folder + data_folder = os.path.join('data', str(datetime.date(datetime.now()))) + save_dir = os.path.join(out, data_folder) -if not os.path.exists(save_dir): - os.system('mkdir -p ' + save_dir) + if not os.path.exists(save_dir): + os.system('mkdir -p ' + save_dir) -print('Creating subdirectory for data...') -print('...', save_dir) + print('Creating subdirectory for data...') + print('...', save_dir) -print('Saving...') -csv_file_name = 'agg_data_{}.csv'.format(datetime.date(datetime.now())) -df.astype(str).to_csv(os.path.join(save_dir, csv_file_name)) -print('...', csv_file_name) + print('Saving...') + csv_file_name = 'agg_data_{}.csv'.format(datetime.date(datetime.now())) + df.astype(str).to_csv(os.path.join(save_dir, csv_file_name)) + print('...', csv_file_name) -daily_cases_file_name = 'trend_{}.csv'.format(datetime.date(datetime.now())) -daily_cases_df.astype(str).to_csv(os.path.join(save_dir, daily_cases_file_name)) -print('...', daily_cases_file_name) + daily_cases_file_name = 'trend_{}.csv'.format(datetime.date(datetime.now())) + daily_cases_df.astype(str).to_csv(os.path.join(save_dir, daily_cases_file_name)) + print('...', daily_cases_file_name) -log_file_name = 'log_{}.csv'.format(datetime.date(datetime.now())) -log_df.astype(str).to_csv(os.path.join(save_dir, log_file_name)) -print('...', log_file_name) + log_file_name = 'log_{}.csv'.format(datetime.date(datetime.now())) + log_df.astype(str).to_csv(os.path.join(save_dir, log_file_name)) + print('...', log_file_name) -print('Done!') \ No newline at end of file + print('Done!') \ No newline at end of file From e0c31b1f33f8cd15face4ad0d7ca2ba5f3c0ff98 Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Wed, 17 Mar 2021 20:30:53 -0400 Subject: [PATCH 3/6] Commit description --- src/covidify/cli.py | 1 + src/covidify/data_prep.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/covidify/cli.py b/src/covidify/cli.py index 712881f..009c586 100644 --- a/src/covidify/cli.py +++ b/src/covidify/cli.py @@ -11,6 +11,7 @@ #get the path of covidify in site-packages env = covidify.__path__[0] +#Aggregate Pattern class CheckOutputFolder: def check_output_folder(var, country_str, msg): ''' diff --git a/src/covidify/data_prep.py b/src/covidify/data_prep.py index 8cf0bba..d881f0b 100644 --- a/src/covidify/data_prep.py +++ b/src/covidify/data_prep.py @@ -51,7 +51,7 @@ ############ COUNTRY SELECTION ############ - +#Aggregrate Pattern root class GetSimilarCountries: def get_similar_countries(c, country_list): pos_countries = get_close_matches(c, country_list) From f0eed28bafe150a3def6dbb1753ec2e411894a1c Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Wed, 14 Apr 2021 16:51:37 -0400 Subject: [PATCH 4/6] Singleton --- src/covidify/data_prep.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/covidify/data_prep.py b/src/covidify/data_prep.py index d881f0b..93f526a 100644 --- a/src/covidify/data_prep.py +++ b/src/covidify/data_prep.py @@ -48,6 +48,21 @@ print('Apologies, the wikipedia source is not ready yet - getting github data') df = github.get() +#Single Pattern +class SingletonPattern: + __instance = None + @staticmethod + def retrieveInstance(): + """ Static access method. """ + if SingletonPattern.__instance == None: + SingletonPattern() + return SingletonPattern.__instance + def _init_(self): + """ Virtually private constructor. """ + if SingletonPattern.__instance != None: + raise Exception("This is a singleton pattern class!") + else: + SingletonPattern.__instance = self ############ COUNTRY SELECTION ############ From 17874f79f618da21ac2ca7afcb77e26cba674f9d Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Thu, 15 Apr 2021 18:31:01 -0400 Subject: [PATCH 5/6] added bridge pattern --- src/covidify/bridge.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 src/covidify/bridge.py diff --git a/src/covidify/bridge.py b/src/covidify/bridge.py new file mode 100644 index 0000000..af3e354 --- /dev/null +++ b/src/covidify/bridge.py @@ -0,0 +1,40 @@ +imBort abc + + +class Bridgeabstraction: + + def __init__(self, imp): + self._imp = imp + + def operation(self): + self._imp.operation_imp() + + +class Bridgeimplementer(metaclass=abc.ABCMeta): + + + @abc.abstractmethod + def operation_imp(self): + pass + + +class ConcretebridgeimplementerA(bridgeimplementer): + + def operation_imp(self): + pass + + +class ConcretebridgeimplementerB(bridgeimplementer): + + def operation_imp(self): + pass + + +def main(): + concrete_bridgeimplementer_a = ConcretebridgeimplementerA() + Bridgeabstraction = Bridgeabstraction(concrete_bridgeimplementer_a) + Bridgeabstraction.operation() + + +if __name__ == "__main__": + main() \ No newline at end of file From 29f7ad8ab07edc855f2fc717941535bc681e2381 Mon Sep 17 00:00:00 2001 From: Zamir95 Date: Thu, 15 Apr 2021 18:36:33 -0400 Subject: [PATCH 6/6] added bridge pattern --- src/covidify/bridge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/covidify/bridge.py b/src/covidify/bridge.py index af3e354..70bb25a 100644 --- a/src/covidify/bridge.py +++ b/src/covidify/bridge.py @@ -1,4 +1,4 @@ -imBort abc +import abc class Bridgeabstraction: