Environment Initializations

Python

Packages

import numpy as np
import pandas as pd
import xarray as xr
import datetime

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std
from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)

Graphics

import matplotlib.pyplot as plt

from matplotlib import rc
rc('font', size=12)
rc('text', usetex=False)
# matplotlib.pyplot.xkcd()

C_MAR = "#000000" # Belgian flag black
C_RACMO = "#af1523" # Netherland flag red

C_lightblue = np.array((166, 206, 227))/255
C_darkblue = np.array((31, 120, 180))/255
C_lightgreen = np.array((127, 223, 138))/255
C_darkgreen = np.array((51, 160, 44))/255

C_darkblue = "#1f7ab7"

from adjust_spines import adjust_spines
import matplotlib.patheffects as pe

import seaborn as sns

Data Dir

I set DATADIR as a bash environment variable in my login scripts.
This is so that Python babel blocks can also easily get that property.

import os
DATADIR = os.environ['DATADIR']

Example:

<<get_DATADIR>>
print(DATADIR)

Bash

Init

set -o nounset
set -o pipefail

# set -o errexit

### uncomment the above line when doing initial run. When rerunning and
### counting on GRASS failing w/ overwrite issues (speed increase), the
### line above must be commented

red='\033[0;31m'; orange='\033[0;33m'; green='\033[0;32m'; nc='\033[0m' # No Color
log_info() { echo -e "${green}[$(date --iso-8601=seconds)] [INFO] ${@}${nc}"; }
log_warn() { echo -e "${orange}[$(date --iso-8601=seconds)] [WARN] ${@}${nc}"; }
log_err() { echo -e "${red}[$(date --iso-8601=seconds)] [ERR] ${@}${nc}" >&2; }

trap ctrl_c INT # trap ctrl-c and call ctrl_c()
ctrl_c() { log_err "CTRL-C. Cleaning up"; }

debug() { if [[ debug:- == 1 ]]; then log_warn "debug:"; echo $@; fi; }

<<GRASS_config>>

GRASS config

Config

https://grass.osgeo.org/grass74/manuals/variables.html

GRASS_VERBOSE
-1	complete silence (also errors and warnings are discarded)
0	only errors and warnings are printed
1	progress and important messages are printed (percent complete)
2	all module messages are printed
3	additional verbose messages are printed

export GRASS_VERBOSE=3
# export GRASS_MESSAGE_FORMAT=silent

if [ -z ${DATADIR+x} ]; then
    echo "DATADIR environment varible is unset."
    echo "Fix with: \"export DATADIR=/path/to/data\""
    exit 255
fi

set -x # print commands to STDOUT before running them

Observations

Obs to standard format at each obs

W: Watson

<<py_init>>

w = pd.read_csv("/home/kdm/data/van_As_2018/Watson_discharge_day_v03.txt", sep="\s+",
                parse_dates=[[0,1,2]],
                index_col=0)\
      .drop(["DayOfYear", "DayOfCentury"], axis='columns')\
      .rename({"WaterFluxDiversOnly(m3/s)"         : "divers",
               "Uncertainty(m3/s)"                 : "divers_err",
               "WaterFluxDivers&Temperature(m3/s)" : "divers_t",
               "Uncertainty(m3/s).1"               : "divers_t_err",
               "WaterFluxCumulative(km3)"          : "cum",
               "Uncertainty(km3)"                  : "cum_err"}, 
              axis='columns')

obs = w[['divers_t','divers_t_err']].rename({'divers_t':'Observed',
                                             'divers_t_err':'Observed error'}, axis='columns')
obs.index.name = 'time'
obs.to_csv("./dat/runoff/obs_W.csv")

Q: Qaanaaq

<<py_init>>

obs = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2017.txt", index_col=0, parse_dates=True)
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2018.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
tmp = pd.read_csv("/home/kdm/data.me/qaanaaq/discharge2019.txt", index_col=0, parse_dates=True)
obs = pd.concat((obs,tmp))
obs = obs.resample('1D')\
         .mean()\
         .rename({'Discharge':'Observed'}, axis='columns')

obs.index.name = "time"
obs.to_csv("./dat/runoff/obs_Q.csv")

L: Leverett

<<py_init>>

root="/home/kdm/data/Tedstone_2017"
# for y in np.arange(2009,2012+1):
csv = []
for y in np.arange(2009,2012+1):
    df = pd.read_csv(root + "/leverett_Q_" + str(y) + "_UTC.csv", 
                     comment="#", index_col=0)\
        .rename({"Discharge m3 s-1": "Observed"}, axis="columns")
    df.index = datetime.datetime(y,1,1) + np.array([datetime.timedelta(_-1) for _ in df.index])
    csv.append(df)
obs = pd.concat(csv, axis='index')\
    .resample('1D').mean()
obs.index.name = "time"

obs.to_csv("./dat/runoff/obs_L.csv")

N: Narsarsuaq

<<py_init>>

<<get_DATADIR>>
root=DATADIR+"/Hawkings_2016"
print(root)

obs = pd.read_excel(root+"/NarsarsuaqDischarge2013.xlsx")\
        .rename({"Q (m3 sec-1)" : "Observed"}, axis="columns")
obs.index = datetime.datetime(2013,1,1) + np.array([datetime.timedelta(_-1) for _ in obs['DecDay']])
obs.index.name = "time"
obs.drop('DecDay', inplace=True, axis='columns')
obs = obs.resample('1D').mean().dropna()

obs.to_csv("./dat/runoff/obs_Ks.csv")

GEM

<<py_init>>

obs = pd.read_csv("/home/kdm/data/GEM/GEM.csv", parse_dates=True, index_col=0)
obs.index.name = 'time'

# name, abbreviation
nloc = [['Kobbefjord', "Kb"],
        ['Oriartorfik', "O"],
        ['Teqinngalip', "T"],
        ['Kingigtorssuaq', "K"],
        ['Røde_Elv', "R"],
        ['Zackenberg', "Z"]]

for nl in nloc:
    obs[nl[0]].to_csv("./dat/runoff/obs_" + nl[1] + ".csv")

Load all observations

names = ['Kb Kobbefjord','K Kingigtorssuaq','L Leverett','Ks Kiattuut Sermiat','O Oriartorfik','Q Qaanaaq','R Røde Elv','T Teqinngalip','W Watson', 'Z Zackenberg']
name = [' '.join(_.split(" ")[1:]) for _ in names]
loc = [_.split(" ")[0] for _ in names]

obs = {} # store all in dict of dataarrays
for i,l in enumerate(loc):
    df_obs = pd.read_csv("./dat/runoff/obs_" + l + ".csv", index_col=0, parse_dates=True)
    df_obs.columns = ['obs'] if l != 'W' else ['obs','err']
    df_RCM = pd.read_csv("./dat/runoff/" + l + ".csv", index_col=0, parse_dates=True)
    df = df_obs.merge(df_RCM, left_index=True, right_index=True)

    # add upstream ice to all basins where it exists (not O or K)
    df['MAR'] = df['MAR_land'] + df['MAR_ice_upstream'] if 'MAR_ice_upstream' in df.columns else df['MAR_land']
    # Leverett should be just upstream ice, no land runoff
    if l == 'L': df['MAR'] = df['MAR_ice']
    # Same for RACMO
    df['RACMO'] = df['RACMO_land'] + df['RACMO_ice_upstream'] if 'RACMO_ice_upstream' in df.columns else df['RACMO_land']
    if l == 'L': df['RACMO'] = df['RACMO_ice']

    df['MAR'] = df['MAR'].rolling('7D', min_periods=5).mean()
    df['RACMO'] = df['RACMO'].rolling('7D', min_periods=5).mean()

    df.attrs['name'] = name[i]
    obs[l] = df

# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in loc:
    o = np.append(o, obs[k]['obs'])
    MAR = np.append(MAR, obs[k]['MAR'])
    RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "all"
obs_all = df

# same as above but without GEM basins
o,MAR,RACMO = [],[],[]
for k in loc:
    if k in ['Kb','K','O','T']: continue
    o = np.append(o, obs[k]['obs'])
    MAR = np.append(MAR, obs[k]['MAR'])
    RACMO = np.append(RACMO, obs[k]['RACMO'])
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T
df.attrs['name'] = "noGEM"
obs_noGEM = df

Scatter - Daily w/ PI

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>

# Plot all basins alone
for k in obs.keys():

    df = obs[k]
    df = df.replace(0, np.nan).dropna()
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1, 
                label=df.attrs['name'], edgecolor='none', clip_on=False)
    ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1, 
                label=df.attrs['name'], edgecolor='none', clip_on=False)



# fit to all basins together
df = obs_all
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# # drop 5/95 outliers
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')





# repeat but without GEM basins
df = obs_noGEM

df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# # # drop 5/95 outliers
# df['diff'] = df['obs'] - df['MAR']
# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05])]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax1.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left', color='red')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="red", alpha=0.1)
ax2.text(0.6, 0.13, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left', color='red')






# coords = np.array((ax1.get_xlim(),ax1.get_ylim(),ax2.get_xlim(),ax2.get_ylim())).flatten()
coords = np.log10([1E-3, 1E4])

for ax in [ax1,ax2]:
    # ax.set_yscale('log')
    # ax.set_xscale('log')
    # ax.set_xlim(2E-4,1E3)
    # ax.set_ylim(ax.get_xlim())
    ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
    
    kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
    ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)

    ax.set_ylim([-3,4])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
    ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

    # locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12) 
    # ax.xaxis.set_major_locator(locmaj)
    # ax.yaxis.set_major_locator(locmaj)

    # kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
    # if ax == ax1:
    #     loc=4E-3
    #     ax.text(loc, (loc/2)*0.4, "RCM = 1/2 * Obs", **kwargs)
    #     # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
    #     loc=1.5E-3
    #     ax.text(loc, (loc*2)*1.6, "RCM = 2 * Obs", **kwargs)

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.18), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100,1000]]).ravel()
for ax in [ax1,ax2]:
    ax.set_xticks(mticks, minor=True)
    ax.set_yticks(mticks, minor=True)

plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

NOTDONE Tukey - all daily data

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>
df = obs_noGEM
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

# q = df['obs'].quantile([0.05, 0.95])
# df = df[(df['obs'] > q[0.05]) & (df['obs'] < q[0.95])]

# kw = {'alpha': 0.2, 'marker':'.', 'edgecolor':'none', 'clip_on':False, 'color':'orange'}
# sm.graphics.mean_diff_plot(x, y_MAR, ax=ax1, scatter_kwds=kw)
# sm.graphics.mean_diff_plot(x, y_RACMO, ax=ax2, scatter_kwds=kw)

# Tukey parameters
tx_MAR = (df['obs']+df['MAR'])/2;     ty_MAR = df['obs']-df['MAR']
tx_RACMO = (df['obs']+df['RACMO'])/2; ty_RACMO = df['obs']-df['RACMO']
    
kw = {'mincnt':1, 'bins':'log', 'clip_on':True, 'gridsize':20, 'extent':[-3,3,-3,3], 'cmap':cm.cividis}
# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, alpha=0, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
    
h_MAR = ax1.hexbin(tx_MAR, ty_MAR, vmax=hmax, **kw)
h_RACMO = ax2.hexbin(tx_RACMO, ty_RACMO, vmax=hmax, **kw)


kwtext = {'path_effects':[pe.withStroke(linewidth=4, foreground="white")], 'color':'k'}
kwtext['horizontalalignment'] = 'left'
kwline = {'color':'k'}
xpos = -3

for ty,ax in [[ty_MAR,ax1],[ty_RACMO,ax2]]:
    y = ty.mean()
    _ = ax.axhline(y=y, **kwline)
    _ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='center', **kwtext)

    y = ty.mean() + 1.96 * ty.std()
    _ = ax.axhline(y=y, linestyle='--', **kwline)
    _ = ax.text(xpos, y, str(round(10**y,2)), verticalalignment='bottom', **kwtext)

    y = ty.mean() - 1.96 * ty.std()
    _ = ax.axhline(y=y, linestyle='--', **kwline)
    _ = ax.text(xpos, y-0.15, str(round(10**y,2)), verticalalignment='top', **kwtext)


ax1.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{MAR}}{2}$ [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel(r'$\mathrm{Observed} - \mathrm{MAR}$ [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel(r'$\frac{\mathrm{Observed} + \mathrm{RACMO}}{2}$ [m$^{3}$ s$^{-1}$]')
ax2.set_ylabel(r'$\mathrm{Observed} - \mathrm{RACMO}$ [m$^{3}$ s$^{-1}$]')

lims = [np.min([ax1.get_xlim()[0], ax1.get_ylim()[0], ax2.get_xlim()[0], ax2.get_ylim()[0]]),
        np.max([ax1.get_xlim()[1], ax1.get_ylim()[1], ax2.get_xlim()[1], ax2.get_ylim()[1]])]
ticks = np.arange(round(lims[0]), round(lims[1])+1)
# ax.set_ylim(lims[0], lims[1])
# ax.set_xlim(lims[0], lims[1])
for ax in [ax1,ax2]:
    ax.set_xticks(ticks)
    ax.set_yticks(ticks)
    labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
    ax.set_yticklabels(labels)
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())
    
cax = fig.add_axes([0.40, 0.39, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')
# cb.set_label('N')

# _ = adjust_spines(ax1, ['left','bottom'])
# _ = adjust_spines(ax2, ['right','bottom'])
_ = adjust_spines(ax1, ['left','bottom'])
_ = adjust_spines(ax2, ['right','bottom'])

_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

plt.savefig("./fig/tukey_daily.png", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# # plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Modified Tukey & all daily data by discharge thirds

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)


<<load_all_obs>>
df = obs_noGEM

df['x'] = df['obs']
df['y_MAR'] = df['MAR'] / df['obs']
df['y_RACMO'] = df['RACMO'] / df['obs']
df = df.replace(0,np.nan).dropna()
df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

THRESH=3
df['y_MAR'] = df['y_MAR'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)
df['y_RACMO'] = df['y_RACMO'].apply(lambda x: x if abs(x) < THRESH else np.sign(x)*THRESH)

kw = {'mincnt':1,
      'bins':'log',
      'clip_on':True,
      'gridsize':20,
      'extent':[-THRESH,THRESH,-THRESH,THRESH],
      'cmap':plt.cm.cividis}

# plot all to get max of both for colorbar range
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], alpha=0, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], alpha=0, **kw)
hmax = max([h_MAR.get_array().max(),h_RACMO.get_array().max()])
    
h_MAR = ax1.hexbin(df['x'], df['y_MAR'], vmax=hmax, **kw)
h_RACMO = ax2.hexbin(df['x'], df['y_RACMO'], vmax=hmax, **kw)

df_top = df[df['obs'] > df['obs'].quantile(0.33)]
df_bot = df[df['obs'] < df['obs'].quantile(0.33)]

kwline = {'color':'k'}
kwtext = {'path_effects':[pe.withStroke(linewidth=2, foreground="white")], 
          'color':'k',
          'fontsize':10,
          'verticalalignment':'center'}
for d in [df_top, df_bot]:
    
    for ax in [ax2,ax1]:
        if d is df_top:
            xpos = 3.2
            kwtext['horizontalalignment'] = 'left'
        elif d is df_bot:
            xpos = -2
            kwtext['horizontalalignment'] = 'right'

        if ax is ax1: yy = d['y_MAR']
        if ax is ax2: yy = d['y_RACMO']
        y = yy.mean()
        ax.plot([d['x'].min(),d['x'].max()], [y,y], **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

        # y = yy.mean() + 1.96 * yy.std()
        y = yy.quantile(0.95)
        ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

        # y = yy.mean() - 1.96 * yy.std()
        y = yy.quantile(0.05)
        ax.plot([d['x'].min(),d['x'].max()], [y,y], linestyle='--', **kwline)
        ax.text(xpos, y, str(round(10**y,2)), **kwtext)

ax1.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax2.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
ax1.set_ylabel('MAR / Observed')
ax2.set_ylabel('RACMO / Observed')

lims = [-3.5,3.5]
ticks = np.arange(-3,3+1)
for ax in [ax1,ax2]:
    ax.set_xlim(lims[0], lims[1])
    ax.set_xticks(ticks)
    labels = ['10$^{' + str(int(_)) + '}$' for _ in ticks]
    ax.set_xticks(ticks)
    ax.set_xticklabels(labels)

    ax.set_ylim(lims[0], lims[1])
    ax.set_yticks(ticks)
    ax.set_yticklabels(labels)

cax = fig.add_axes([0.37, 0.85, 0.2, 0.04])
cb = fig.colorbar(h_MAR, cax=cax, orientation='horizontal')

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])

_ = plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
_ = plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

mticks = np.array([np.log10(np.linspace(2*_, 9*_, num=8)) for _ in [0.001, 0.01, 0.1,1,10,100]]).ravel()
for ax in [ax1,ax2]:
    ax.set_xticks(mticks, minor=True)
    ax.set_yticks(mticks, minor=True)

plt.savefig("./fig/tukey_daily3.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Merge Tukey

convert ./fig/tukey_daily.png ./fig/tukey_daily3.png -gravity center -append fig/tukey.png
o ./fig/tukey.png

NOTDONE Scatter - Daily w/ weighted PI

=> instead of giving a range +500%/-80%, I suggest you to rather ompute the mean error in % for each measurement you have in Fig4 by removing 5% of highest model-obs differences (by keep only percentile 95) and by weighting the mean by the measurement values to not give the same weight to the very low runoff value which are not representative for me when the errors is given in %.

<<py_init>>
<<py_init_graphics>>

<<load_all_obs>>


# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

for k in obs.keys():

    df = obs[k]
    name = df.attrs['name']
    df = df.replace(0, np.nan).dropna()
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='.', alpha=0.1, 
                label=name, edgecolor='none', clip_on=False)
    ax2.scatter(df['obs'], df['RACMO'], marker='.', alpha=0.1, 
                label=name, edgecolor='none', clip_on=False)



df = obs_all

df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
df.sort_values(by='obs', inplace=True)

df['diff'] = df['obs'] - df['MAR']
df['diff %'] = df['obs'] / 10**df['diff'] * 100

# drop 5/95 outliers
q = df['diff %'].quantile([0.05, 0.5, 0.95])
# df = df[(df['diff %'] > q[0.05]) & (df['diff %'] < q[0.95])]
df = df[(df['diff %'] > q[0.5])]


x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

weights = 10**x.values; weights = weights - np.min(weights)+1
weights = x.values * 0 + 1
# weights = x.values; weights = weights - np.min(weights)+1
# weights = (weights - np.min(weights)) / (np.max(weights) - np.min(weights))+0.01

X = sm.add_constant(x)

model = sm.OLS(y_MAR, X)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)

model = sm.OLS(y_RACMO, X, weights=weights)
results = model.fit()
print(results.summary())
prstd, iv_l, iv_u = wls_prediction_std(results, weights=weights)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)




coords = np.log10([1E-3, 1E4])
for ax in [ax1,ax2]:
    ax.set_xlabel('Observed [m$^{3}$ s$^{-1}$]')
    
    # kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3,1E4]), **kw)
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3/5,1E4/5]), **kw)
    # ax.plot(np.log10([1E-3,1E4]), np.log10([1E-3*5,1E4*5]), **kw)

    ax.set_ylim([-3,4])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([-3,-2,-1, 0, 1,2,3,4])
    ax.set_yticklabels(['10$^{-3}$','10$^{-2}$','10$^{-1}$','10$^{0}$','10$^{1}$','10$^{2}$','10$^{3}$','10$^{4}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$ s$^{-1}$]')
ax2.set_ylabel('RACMO [m$^{^3}$ s$^{-1}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.8,0), loc='lower left', mode="expand")
ax2.set_zorder(-1)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)



# plt.savefig("./fig/scatter_daily.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_daily.svg", bbox_inches='tight', dpi=300)

Scatter - Yearly sum w/ PI

<<py_init>>
<<py_init_graphics>>

# plt.close(1)
fig = plt.figure(1, figsize=(8,3.5)) # w,h
# get_current_fig_manager().window.move(0,0)
fig.clf()
fig.set_tight_layout(True)
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

<<load_all_obs>>
for k in obs.keys():

    df = obs[k]
    name = df.attrs['name']
    df = df.replace(0, np.nan).dropna()
    # m^3/s summed by year -> km^3/yr
    df = df.resample('A').sum() * 86400
    df = np.log10(df)
    ax1.scatter(df['obs'], df['MAR'], marker='$\mathrm{'+k+'}$', alpha=0.9, 
                label=name, clip_on=False, zorder=99)
    ax2.scatter(df['obs'], df['RACMO'], marker='$\mathrm{'+k+'}$', alpha=0.9, 
                clip_on=False, zorder=99)


# combine all into one for confidence intervals
# one entry with everything, no time index, just all observation and model points
o,MAR,RACMO = [],[],[]
for k in obs.keys():
    o = np.append(o, obs[k]['obs'].resample('A').sum())
    MAR = np.append(MAR, obs[k]['MAR'].resample('A').sum())
    RACMO = np.append(RACMO, obs[k]['RACMO'].resample('A').sum())

# m^3/s -> m^3/yr
df = pd.DataFrame((o,MAR,RACMO), index=['obs','MAR','RACMO']).T * 86400


df = np.log10(df)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]


df.sort_values(by='obs', inplace=True)
x = df['obs']
y_MAR = df['MAR']
y_RACMO = df['RACMO']

X = sm.add_constant(x)
# X = x
model = sm.OLS(y_MAR, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax1.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax1.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax1.transAxes, horizontalalignment='left')

model = sm.OLS(y_RACMO, X)
results = model.fit()
prstd, iv_l, iv_u = wls_prediction_std(results)
ax2.fill_between(x, iv_u, iv_l, color="grey", alpha=0.25)
ax2.text(0.6, 0.05, 'r$^{2}$:' + str(round(results.rsquared,2)), transform=ax2.transAxes, horizontalalignment='left')




for ax in [ax1,ax2]:
    # ax.set_yscale('log')
    # ax.set_xscale('log')
    # ax.set_xlim(1E1,1E5)
    # ax.set_ylim(ax.get_xlim())
    ax.set_xlabel('Observed [m$^{3}$]')

    kw = {'alpha':0.5, 'linewidth':1, 'color':'k', 'linestyle':'-'}
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6,1E10]), **kw)
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6/2,1E10/2]), **kw)
    ax.plot(np.log10([1E6,1E10]), np.log10([1E6*2,1E10*2]), **kw)

    ax.set_ylim([6,10])
    ax.set_xlim(ax.get_ylim())
    ax.set_yticks([6,7,8,9,10])
    ax.set_yticklabels(['10$^{6}$','10$^{7}$','10$^{8}$','10$^{9}$','10$^{10}$'])
    ax.set_xticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_yticklabels())

    # coords = ax.get_xlim()
    # kw = {'alpha':0.5, 'linewidth':1}
    # ax.plot([0,np.max(coords)],[0,np.max(coords)], 'k-', **kw)
    # ax.plot([0,np.max(coords)],[0,np.max(coords)*2], 'k--', **kw)
    # ax.plot([0,np.max(coords)],[0,np.max(coords)*0.5], 'k--', **kw)

    # locmaj = matplotlib.ticker.LogLocator(base=10,numticks=12) 
    # ax.xaxis.set_major_locator(locmaj)
    # ax.yaxis.set_major_locator(locmaj)

    # kwargs = {'rotation':40, 'horizontalalignment':'center', 'fontsize':8, 'verticalalignment':'center'}
    # if ax == ax1:
    #     loc=1200
    #     ax.text(loc, (loc/2)*0.6, "RCM = 1/2 * Obs", **kwargs)
    #     # ax.text(loc, loc*1.3, "RCM = Obs", **kwargs)
    #     loc=100
    #     ax.text(loc, (loc*2)*1.4, "RCM = 2 * Obs", **kwargs)

adjust_spines(ax1, ['left','bottom'])
adjust_spines(ax2, ['right','bottom'])


ax1.set_ylabel('MAR [m$^{^3}$]')
ax2.set_ylabel('RACMO [m$^{^3}$]')

leg = ax1.legend(fontsize=8, frameon=False, bbox_to_anchor=(0.9,0.1), loc='lower left', mode="expand")
ax2.set_zorder(-2)
for lh in leg.legendHandles: 
    lh.set_alpha(1)

for i,l in enumerate(leg.texts):
    l.set_y(-1.5)
#     l.set_x(-i*18+20)
# for i,l in enumerate(leg.legendHandles):
#     l.set_offsets([[-i*12.5+10+20,4],[-i*12.5+10+20,4]])

# plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45)
# plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45)

plt.savefig("./fig/scatter_yearsum.png", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.pdf", bbox_inches='tight', dpi=300)
# plt.savefig("./fig/scatter_yearsum.svg", bbox_inches='tight', dpi=300)

Nash-Sutcliff

# <<load_all_obs>>

for k in obs.keys():
    df = obs[k].dropna()
    if 'MAR_ice_upstream' not in df.columns: continue
    df['model'] = df['MAR_ice_upstream'] + df['MAR_land']
    NSE_MAR = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
    df['model'] = df['RACMO_ice_upstream'] + df['RACMO_land']
    NSE_RACMO = 1 - (np.sum((df['model'] - df['obs'])**2) / np.sum((df['obs'] - df['obs'].mean())**2))
    print(k, NSE_MAR, NSE_RACMO)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

lob.org

lob.org

Environment Initializations

Python

Packages

Graphics

Data Dir

Bash

Init

GRASS config

Config

Observations

Obs to standard format at each obs

W: Watson

Q: Qaanaaq

L: Leverett

N: Narsarsuaq

GEM

Load all observations

Scatter - Daily w/ PI

NOTDONE Tukey - all daily data

Modified Tukey & all daily data by discharge thirds

Merge Tukey

NOTDONE Scatter - Daily w/ weighted PI

Scatter - Yearly sum w/ PI

Nash-Sutcliff

Files

lob.org

Latest commit

History

lob.org

File metadata and controls

Environment Initializations

Python

Packages

Graphics

Data Dir

Bash

Init

GRASS config

Config

Observations

Obs to standard format at each obs

W: Watson

Q: Qaanaaq

L: Leverett

N: Narsarsuaq

GEM

Load all observations

Scatter - Daily w/ PI

NOTDONE Tukey - all daily data

Modified Tukey & all daily data by discharge thirds

Merge Tukey

NOTDONE Scatter - Daily w/ weighted PI

Scatter - Yearly sum w/ PI

Nash-Sutcliff