Skip to content

Latest commit

 

History

History
1068 lines (824 loc) · 35.1 KB

lob.org

File metadata and controls

1068 lines (824 loc) · 35.1 KB

Environment Initializations

Python

Packages

import numpy as np
import pandas as pd
import xarray as xr
import datetime

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)

Graphics

import matplotlib.pyplot as plt

from matplotlib import rc
rc('font', size=12)
rc('text', usetex=False)
# matplotlib.pyplot.xkcd()

C_MAR = "#000000" # Belgian flag black
C_RACMO = "#af1523" # Netherland flag red

C_lightblue = np.array((166, 206, 227))/255
C_darkblue = np.array((31, 120, 180))/255
C_lightgreen = np.array((127, 223, 138))/255
C_darkgreen = np.array((51, 160, 44))/255

C_darkblue = "#1f7ab7"

from adjust_spines import adjust_spines
import matplotlib.patheffects as pe

import seaborn as sns

Data Dir

  • I set DATADIR as a bash environment variable in my login scripts.
  • This is so that Python babel blocks can also easily get that property.
import os
DATADIR = os.environ['DATADIR']

Example:

<<get_DATADIR>>
print(DATADIR)

Bash

Init

set -o nounset
set -o pipefail

# set -o errexit

### uncomment the above line when doing initial run. When rerunning and
### counting on GRASS failing w/ overwrite issues (speed increase), the
### line above must be commented

red='\033[0;31m'; orange='\033[0;33m'; green='\033[0;32m'; nc='\033[0m' # No Color
log_info() { echo -e "${green}[$(date --iso-8601=seconds)] [INFO] ${@}${nc}"; }
log_warn() { echo -e "${orange}[$(date --iso-8601=seconds)] [WARN] ${@}${nc}"; }
log_err() { echo -e "${red}[$(date --iso-8601=seconds)] [ERR] ${@}${nc}" >&2; }

trap ctrl_c INT # trap ctrl-c and call ctrl_c()
ctrl_c() { log_err "CTRL-C. Cleaning up"; }

debug() { if [[ debug:- == 1 ]]; then log_warn "debug:"; echo $@; fi; }

<<GRASS_config>>

GRASS config

Config

https://grass.osgeo.org/grass74/manuals/variables.html

GRASS_VERBOSE
-1complete silence (also errors and warnings are discarded)
0only errors and warnings are printed
1progress and important messages are printed (percent complete)
2all module messages are printed
3additional verbose messages are printed
export GRASS_VERBOSE=3
# export GRASS_MESSAGE_FORMAT=silent

if [ -z ${DATADIR+x} ]; then
    echo "DATADIR environment varible is unset."
    echo "Fix with: \"export DATADIR=/path/to/data\""
    exit 255
fi

set -x # print commands to STDOUT before running them

Observations

Obs to standard format at each obs

W: Watson

<<py_init>>

w = pd.read_csv("/home/kdm/data/van_As_2018/Watson_discharge_day_v03.txt", sep="\s+",
                parse_dates=[[0,1,2]],
                index_col=0)\
      .drop(["DayOfYear", "DayOfCentury"], axis='columns')\
      .rename({"WaterFluxDiversOnly(m3/s)"         : "divers",
               "Uncertainty(m3/s)"                 : "divers_err",
               "WaterFluxDivers&Temperature(m3/s)" : "divers_t",
               "Uncertainty(m3/s).1"               : "divers_t_err",
               "WaterFluxCumulative(km3)"          : "cum",
               "Uncertainty(km3)"                  : "cum_err"}, 
              axis='columns')

obs = w[['divers_t','divers_t_err']].rename({'divers_t':'Observed',
                                             'divers_t_err':'Observed error'}, axis='columns')
obs.index.name = 'time'
obs.to_csv("./dat/runoff/obs_W.csv")

Q: Qaanaaq

<<py_init>>

obs = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2017.txt", index_col=0, parse_dates=True)
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2018.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2019.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
obs = obs.resample('1D')\
         .mean()\
         .rename({'Discharge':'Observed'}, axis='columns')

obs.index.name = "time"
obs.to_csv("./dat/runoff/obs_Q.csv")

L: Leverett

<<py_init>>

root="/home/kdm/data/Tedstone_2017"
# for y in np.arange(2009,2012+1):
csv = []
for y in np.arange(2009,2012+1):
    df = pd.read_csv(root + "/leverett_Q_" + str(y) + "_UTC.csv", 
                     comment="#", index_col=0)\
        .rename({"Discharge m3 s-1": "Observed"}, axis="columns")
    df.index = datetime.datetime(y,1,1) + np.array([datetime.timedelta(_-1) for _ in df.index])
    csv.append(df)
obs = pd.concat(csv, axis='index')\
    .resample('1D').mean()
obs.index.name = "time"

obs.to_csv("./dat/runoff/obs_L.csv")

N: Narsarsuaq

<<py_init>>

<<get_DATADIR>>
root=DATADIR+"/Hawkings_2016"
print(root)

obs = pd.read_excel(root+"/NarsarsuaqDischarge2013.xlsx")\
        .rename({"Q (m3 sec-1)" : "Observed"}, axis="columns")
obs.index = datetime.datetime(2013,1,1) + np.array([datetime.timedelta(_-1) for _ in obs['DecDay']])
obs.index.name = "time"
obs.drop('DecDay', inplace=True, axis='columns')
obs = obs.resample('1D').mean().dropna()

obs.to_csv("./dat/runoff/obs_Ks.csv")

GEM

<<py_init>>

obs = pd.read_csv("/home/kdm/data/GEM/GEM.csv", parse_dates=True, index_col=0)
obs.index.name = 'time'

# name, abbreviation
nloc = [['Kobbefjord', "Kb"],
        ['Oriartorfik', "O"],
        ['Teqinngalip', "T"],
        ['Kingigtorssuaq', "K"],
        ['Røde_Elv', "R"],
        ['Zackenberg', "Z"]]

for nl in nloc:
    obs[nl[0]].to_csv("./dat/runoff/obs_" + nl[1] + ".csv")

Load all observations

names = ['Kb Kobbefjord','K Kingigtorssuaq','L Leverett','Ks Kiattuut Sermiat','O Oriartorfik','Q Qaanaaq','R Røde Elv','T Teqinngalip','W Watson', 'Z Zackenberg']
name = [' '.join(_.split(" ")[1:]) for _ in names]
loc = [_.split(" ")[0] for _ in names]

obs = {} # store all in dict of dataarrays
for i,l in enumerate(loc):
    df_obs = pd.read_csv("./dat/runoff/obs_" + l + ".csv", index_col=0, parse_dates=True)
    df_obs.columns = ['obs'] if l != 'W' else ['obs','err']
    df_RCM = pd.read_csv("./dat/runoff/" + l + ".csv", index_col=0, parse_dates=True)
    df = df_obs.merge(df_RCM, left_index=True, right_index=True)

    # add upstream ice to all basins where it exists (not O or K)
    df['MAR'] = df['MAR_land'] + df['MAR_ice_upstream'] if 'MAR_ice_upstream' in df.columns else df['MAR_land']
    # Leverett should be just upstream ice, no land runoff
    if l == 'L': df['MAR'] = df['MAR_ice']
    # Same for RACMO
    df['RACMO'] = df['RACMO_land'] + df['RACMO_ice_upstream'] if 'RACMO_ice_upstream' in df.columns else df['RACMO_land']
    if l == 'L': df['RACMO'] = df['RACMO_ice']

    df['MAR'] = df['MAR'].rolling('7D', min_periods=5).mean()
    df['RACMO'] = df['RACMO'].rolling('7D', min_periods=5).mean()

    df.attrs['name'] = name[i]
    obs[l] = df

# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in loc:
    o = np.append(o, obs[k]['obs'])
    MAR = np.append(MAR, obs[k]['MAR'])
    RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "all"
obs_all = df

# same as above but without GEM basins
o,MAR,RACMO = [],[],[]
for k in loc:
    if k in ['Kb','K','O','T']: continue
    o = np.append(o, obs[k]['obs'])
    MAR = np.append(MAR, obs[k]['MAR'])
    RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "noGEM"
obs_noGEM = df


Scatter - Daily w/ PI

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>

# Plot all basins alone
for k in obs.keys():

    df = obs[k]
    df = df.replace(0, np.nan).dropna()
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1, 
                label=df.attrs['name'], edgecolor='none', clip_on=False)
    ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1, 
                label=df.attrs['name'], edgecolor='none', clip_on=False)



# fit to all basins together
df = obs_all
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# # drop 5/95 outliers
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')





# repeat but without GEM basins
df = obs_noGEM

df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# # # drop 5/95 outliers
# df['diff'] = df['obs'] - df['MAR']
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05])]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax1.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left', color='red')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax2.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left', color='red')






# coords = np.array((ax1.get_xlim(),ax1.get_ylim(),ax2.get_xlim(),ax2.get_ylim())).flatten()
coords = np.log10([1E-3, 1E4])

for ax in [ax1,ax2]:
    # ax.set_yscale('log')
    # ax.set_xscale('log')
    # ax.set_xlim(2E-4,1E3)
    # ax.set_ylim(ax.get_xlim())
    ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
    
    kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)

    ax.set_ylim([-3,4])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
    ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

    # locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12) 
    # ax.xaxis.set_major_locator(locmaj)
    # ax.yaxis.set_major_locator(locmaj)

    # kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
    # if ax == ax1:
    #     loc=4E-3
    #     ax.text(loc, (loc/2)*0.4, "RCM = 1/2 * Obs", **kwargs)
    #     # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
    #     loc=1.5E-3
    #     ax.text(loc, (loc*2)*1.6, "RCM = 2 * Obs", **kwargs)

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.18), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100,1000]]).ravel()
for ax in [ax1,ax2]:
    ax.set_xticks(mticks, minor=True)
    ax.set_yticks(mticks, minor=True)

plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

NOTDONE Tukey - all daily data

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>
df = obs_noGEM
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]

# kw = {'alpha': 0.2, 'marker':'.', 'edgecolor':'none', 'clip_on':False, 'color':'orange'}
# sm.graphics.mean_diff_plot(x, y_MAR, ax=ax1, scatter_kwds=kw)
# sm.graphics.mean_diff_plot(x, y_RACMO, ax=ax2, scatter_kwds=kw)

# Tukey parameters
tx_MAR = (df['obs']+df['MAR'])/2;     ty_MAR = df['obs']-df['MAR']
tx_RACMO = (df['obs']+df['RACMO'])/2; ty_RACMO = df['obs']-df['RACMO']
    
kw = {'mincnt':1, 'bins':'log', 'clip_on':True, 'gridsize':20, 'extent':[-3,3,-3,3], 'cmap':cm.cividis}
# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, alpha=0, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
    
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, vmax=hmax, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, vmax=hmax, **kw)


kwtext = {'path_effects':[pe.withStroke(linewidth=4, foreground="white")], 'color':'k'}
kwtext['horizontalalignment'] = 'left'
kwline = {'color':'k'}
xpos = -3

for ty,ax in [[ty_MAR,ax1],[ty_RACMO,ax2]]:
    y = ty.mean()
    _ = ax.axhline(y=y, **kwline)
    _ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='center', **kwtext)

    y = ty.mean() + 1.96 * ty.std()
    _ = ax.axhline(y=y, linestyle='--', **kwline)
    _ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='bottom', **kwtext)

    y = ty.mean() - 1.96 * ty.std()
    _ = ax.axhline(y=y, linestyle='--', **kwline)
    _ = ax.text(xpos, y-0.15, str(round(10**y,2)), verticalalignment='top', **kwtext)


ax1.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{MAR}}{2}$ [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel(r'$\mathrm{Observed} - \mathrm{MAR}$ [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{RACMO}}{2}$ [m$^{3}$ s$^{-1}$]')
ax2.set_ylabel(r'$\mathrm{Observed} - \mathrm{RACMO}$ [m$^{3}$ s$^{-1}$]')

lims = [np.min([ax1.get_xlim()[0], ax1.get_ylim()[0], ax2.get_xlim()[0], ax2.get_ylim()[0]]),
        np.max([ax1.get_xlim()[1], ax1.get_ylim()[1], ax2.get_xlim()[1], ax2.get_ylim()[1]])]
ticks = np.arange(round(lims[0]), round(lims[1])+1)
# ax.set_ylim(lims[0], lims[1])
# ax.set_xlim(lims[0], lims[1])
for ax in [ax1,ax2]:
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
    ax.set_yticklabels(labels)
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())
    
cax = fig.add_axes([0.40, 0.39, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')
# cb.set_label('N')

# _ = adjust_spines(ax1, ['left','bottom'])
# _ = adjust_spines(ax2, ['right','bottom'])
_ = adjust_spines(ax1, ['left','bottom'])
_ = adjust_spines(ax2, ['right','bottom'])

_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

plt.savefig("./fig/tukey_daily.png", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Modified Tukey & all daily data by discharge thirds

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)


<<load_all_obs>>
df = obs_noGEM

df['x'] = df['obs']
df['y_MAR'] = df['MAR'] / df['obs']
df['y_RACMO'] = df['RACMO'] / df['obs']
df = df.replace(0,np.nan).dropna()
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

THRESH=3
df['y_MAR'] = df['y_MAR'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)
df['y_RACMO'] = df['y_RACMO'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)

kw = {'mincnt':1,
      'bins':'log',
      'clip_on':True,
      'gridsize':20,
      'extent':[-THRESH,THRESH,-THRESH,THRESH],
      'cmap':plt.cm.cividis}

# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], alpha=0, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
    
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], vmax=hmax, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], vmax=hmax, **kw)

df_top = df[df['obs'] > df['obs'].quantile(0.33)]
df_bot = df[df['obs'] < df['obs'].quantile(0.33)]

kwline = {'color':'k'}
kwtext = {'path_effects':[pe.withStroke(linewidth=2, foreground="white")], 
          'color':'k',
          'fontsize':10,
          'verticalalignment':'center'}
for d in [df_top, df_bot]:
    
    for ax in [ax2,ax1]:
        if d is df_top:
            xpos = 3.2
            kwtext['horizontalalignment'] = 'left'
        elif d is df_bot:
            xpos = -2
            kwtext['horizontalalignment'] = 'right'

        if ax is ax1: yy = d['y_MAR']
        if ax is ax2: yy = d['y_RACMO']
        y = yy.mean()
        ax.plot([d['x'].min(),d['x'].max()], [y,y], **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

        # y = yy.mean() + 1.96 * yy.std()
        y = yy.quantile(0.95)
        ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

        # y = yy.mean() - 1.96 * yy.std()
        y = yy.quantile(0.05)
        ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

ax1.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel('MAR / Observed')
ax2.set_ylabel('RACMO / Observed')

lims = [-3.5,3.5]
ticks = np.arange(-3,3+1)
for ax in [ax1,ax2]:
    ax.set_xlim(lims[0], lims[1])
    ax.set_xticks(ticks)
    labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
    ax.set_xticks(ticks)
    ax.set_xticklabels(labels)

    ax.set_ylim(lims[0], lims[1])
    ax.set_yticks(ticks)
    ax.set_yticklabels(labels)

cax = fig.add_axes([0.37, 0.85, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])

_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100]]).ravel()
for ax in [ax1,ax2]:
    ax.set_xticks(mticks, minor=True)
    ax.set_yticks(mticks, minor=True)

plt.savefig("./fig/tukey_daily3.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Merge Tukey

convert ./fig/tukey_daily.png ./fig/tukey_daily3.png -gravity center -append fig/tukey.png
o ./fig/tukey.png

NOTDONE Scatter - Daily w/ weighted PI

=> instead of giving a range +500%/-80%, I suggest you to rather ompute the mean error in % for each measurement you have in Fig4 by removing 5% of highest model-obs differences (by keep only percentile 95) and by weighting the mean by the measurement values to not give the same weight to the very low runoff value which are not representative for me when the errors is given in %.

<<py_init>>
<<py_init_graphics>>

<<load_all_obs>>


# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

for k in obs.keys():

    df = obs[k]
    name = df.attrs['name']
    df = df.replace(0, np.nan).dropna()
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1, 
                label=name, edgecolor='none', clip_on=False)
    ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1, 
                label=name, edgecolor='none', clip_on=False)



df = obs_all

df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df.sort_values(by='obs', inplace=True)

df['diff'] = df['obs'] - df['MAR']
df['diff %'] = df['obs'] / 10**df['diff'] * 100

# drop 5/95 outliers
q = df['diff %'].quantile([0.05, 0.5, 0.95])
# df = df[(df['diff %'] > q[0.05]) & (df['diff %'] < q[0.95])]
df = df[(df['diff %'] > q[0.5])]


x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

weights = 10**x.values; weights = weights - np.min(weights)+1
weights = x.values * 0 + 1
# weights = x.values; weights = weights - np.min(weights)+1
# weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights))+0.01

X = sm.add_constant(x)

model = sm.OLS(y_MAR, X)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)

model = sm.OLS(y_RACMO, X, weights=weights)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)




coords = np.log10([1E-3, 1E4])
for ax in [ax1,ax2]:
    ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
    
    # kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)

    ax.set_ylim([-3,4])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
    ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.8,0), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)



# plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Scatter - Yearly sum w/ PI

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>
for k in obs.keys():

    df = obs[k]
    name = df.attrs['name']
    df = df.replace(0, np.nan).dropna()
    # m^3/s summed by year -> km^3/yr
    df = df.resample('A').sum() * 86400
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='$\mathrm{'+k+'}$', alpha=0.9, 
                label=name, clip_on=False, zorder=99)
    ax2.scatter(df['obs'], df['RACMO'], marker='$\mathrm{'+k+'}$', alpha=0.9, 
                clip_on=False, zorder=99)


# combine all into one for confidence intervals
# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in obs.keys():
    o = np.append(o, obs[k]['obs'].resample('A').sum())
    MAR = np.append(MAR, obs[k]['MAR'].resample('A').sum())
    RACMO = np.append(RACMO, obs[k]['RACMO'].resample('A').sum())

# m^3/s -> m^3/yr
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T * 86400


df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')




for ax in [ax1,ax2]:
    # ax.set_yscale('log')
    # ax.set_xscale('log')
    # ax.set_xlim(1E1,1E5)
    # ax.set_ylim(ax.get_xlim())
    ax.set_xlabel('Observed [m$^{3}$]')

    kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6,1E10]), **kw)
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6/2,1E10/2]), **kw)
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6*2,1E10*2]), **kw)

    ax.set_ylim([6,10])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([6,7,8,9,10])
    ax.set_yticklabels(['10$^{6}$','10$^{7}$','10$^{8}$','10$^{9}$','10$^{10}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

    # coords = ax.get_xlim()
    # kw = {'alpha':0.5, 'linewidth':1}
    # ax.plot([0,np.max(coords)],[0,np.max(coords)], 'k-', **kw)
    # ax.plot([0,np.max(coords)],[0,np.max(coords)*2], 'k--', **kw)
    # ax.plot([0,np.max(coords)],[0,np.max(coords)*0.5], 'k--', **kw)

    # locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12) 
    # ax.xaxis.set_major_locator(locmaj)
    # ax.yaxis.set_major_locator(locmaj)

    # kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
    # if ax == ax1:
    #     loc=1200
    #     ax.text(loc, (loc/2)*0.6, "RCM = 1/2 * Obs", **kwargs)
    #     # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
    #     loc=100
    #     ax.text(loc, (loc*2)*1.4, "RCM = 2 * Obs", **kwargs)

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$]')
ax2.set_ylabel('RACMO [m$^{^3}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.1), loc='lower left', mode="expand")
ax2.set_zorder(-2)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

for i,l in enumerate(leg.texts):
    l.set_y(-1.5)
#     l.set_x(-i*18+20)
# for i,l in enumerate(leg.legendHandles):
#     l.set_offsets([[-i*12.5+10+20,4],[-i*12.5+10+20,4]])

# plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
# plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

plt.savefig("./fig/scatter_yearsum.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.svg", bbox_inches='tight', dpi=300)

Nash-Sutcliff

# <<load_all_obs>>

for k in obs.keys():
    df = obs[k].dropna()
    if 'MAR_ice_upstream' not in df.columns: continue
    df['model'] = df['MAR_ice_upstream'] + df['MAR_land']
    NSE_MAR = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
    df['model'] = df['RACMO_ice_upstream'] + df['RACMO_land']
    NSE_RACMO = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
    print(k, NSE_MAR, NSE_RACMO)