-
Notifications
You must be signed in to change notification settings - Fork 0
/
misc.py
144 lines (133 loc) · 4.69 KB
/
misc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
Miscellaneous useful functions.
# To import these (as useful_code is not a module) use this code: (one of the locations)
sys.path.insert(0, 'N:\\Documents\\Code\\useful_code')
sys.path.insert(0, 'C:\\Users\\shann\\OneDrive\\Documents\\Research\\Workspace\\Code\\useful_code')
import misc
"""
import os
import glob
import numpy as np
import pandas as pd
import astropy.units as u
from datetime import datetime, timedelta
# def get_project_dirs():
# """
# A function to load in dictionary of project directories stored in a
# project_directories.txt file stored in the working directory.
# :return proj_dirs: Dictionary of project directories.
# """
# files = glob.glob(os.path.join(os.getcwd(), 'project_directories.txt'))
# # Open file and extract
# with open(files[0], "r") as f:
# lines = f.read().splitlines()
# proj_dirs = {l.split(',')[0]: l.split(',')[1] for l in lines}
# # Check the directories exist.
# # for val in iter(proj_dirs.items()):
# # if not os.path.exists(val[1]):
# # print('Error, invalid path, check config: ' + val[1])
# return proj_dirs
def find_nearest(array, value):
"""
Finds the index of the array value nearest to input value.
:param: array: array in which to search for nearest value
:paran: value: value which wish to find nearest in array
:return: index of nearest value in array
"""
# handle pandas Series case
if isinstance(array, pd.Series):
array = array.values
# add units if none
value = value * u.dimensionless_unscaled
array = array * u.dimensionless_unscaled
value = value.to(array.unit)
value = value.value
array = array.value
ds = []
for i in range(len(array)):
ds.append(array[i] - value)
idx = (np.abs(ds)).argmin()
return idx
def reshape_df(df, x_list, y_list, x, y, c):
"""Reshapes df.c to have shape len(x_list)xlen(y_list) for use with
plt.pcolormesh.
x, y, c are names of df columns
"""
c_vals = []
for yn in y_list:
row = []
dfy = df[df[y] == yn]
for xn in x_list:
dfyx = dfy[dfy[x] == xn]
if len(dfyx) == 1:
row.append(dfyx[c].values[0])
else:
row.append(np.NaN)
c_vals.append(row)
return c_vals
def find_time_means(val_list, time_list, t, start_month=1, err='std'):
""" Find means of time data.
val_list: data to find means of
time_list: times linked to each of the values in val_list
t: int, time length in months of data to average.
start_month: int, month of year to start the means on (for running means)
err: 'std' for standard deviation or 'sem' for standard error
"""
if len(time_list) != len(val_list):
raise ValueError('val_list and time_list must have same length')
# Arrays for outputs
dates = []
means = []
errs = []
year = np.min(time_list).year
month = start_month
while year <= np.max(time_list).year:
# Sort start date
start_date = datetime(year, month, 1)
# sort end date
end_year = year
end_month = month + t
if end_month > 12:
end_year = year + 1
end_month = end_month - 12
end_date = datetime(end_year, end_month, 1)
# get the relevant points
points = []
for n, i in enumerate(time_list):
if i >= start_date:
if i < end_date:
points.append(val_list[n])
dates.append(start_date + timedelta(weeks=26))
if points != []:
means.append(np.nanmean(points))
s = np.std(points)
if err == 'sem':
s = s / np.sqrt(len(points))
errs.append(s)
else:
means.append(np.NaN)
errs.append(np.NaN)
month = month + t
if month > 12:
year = year + 1
month = month - 12
dates = np.unique(dates)
return dates, means, errs
def find_time_running_means(val_list, time_list, t, err='std'):
"""Finds running means from time data.
val_list: data to find means of
time_list: times linked to each of the values in val_list
t: int, time length in months of data to average.
err: 'std' for standard deviation or 'sem' for standard error
"""
dates = []
means = []
errs = []
# need to find means starting with each month of the year
for i in range(1, 13):
dates2, means2, errs2 = find_time_means(val_list, time_list, t, err=err,
month=i)
dates = np.append(dates2)
means = np.append(means2)
errs = np.append(errs2)
return dates, means, errs