From 79a0e557a1925e2004330a5dd4840960cff0cdeb Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 2 May 2024 18:16:49 +0100 Subject: [PATCH 01/12] Initial support for OTP26 --- .github/workflows/erlang.yml | 27 +++++++------------ priv/tracers/tracer_accumulating_time.erl | 2 +- priv/tracers/tracer_backend_latency.erl | 2 +- priv/tracers/tracer_eleveldb_put_size.erl | 2 +- priv/tracers/tracer_fsm_init.erl | 2 +- priv/tracers/tracer_func_args.erl | 2 +- priv/tracers/tracer_gc_latency.erl | 2 +- priv/tracers/tracer_latency_histogram.erl | 2 +- priv/tracers/tracer_merge_and_and_handoff.erl | 2 +- priv/tracers/tracer_timeit.erl | 2 +- rebar.config | 4 +-- src/riak_kv_bitcask_backend.erl | 2 +- src/riak_kv_eleveldb_backend.erl | 2 +- src/riak_kv_leveled_backend.erl | 15 +++++------ src/riak_kv_memory_backend.erl | 2 +- src/riak_kv_multi_backend.erl | 2 +- src/riak_kv_multi_prefix_backend.erl | 2 +- src/riak_kv_test_util.erl | 2 +- src/riak_kv_yessir_backend.erl | 2 +- src/riak_object.erl | 6 +++-- 20 files changed, 38 insertions(+), 46 deletions(-) diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index 037271a9ad..aaaa205c1d 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -2,9 +2,15 @@ name: Erlang CI on: push: - branches: [ nhse-develop ] + branches: + - nhse-develop + - nhse-develop-3.2 + - nhse-develop-3.4 pull_request: - branches: [ nhse-develop ] + branches: + - nhse-develop + - nhse-develop-3.2 + - nhse-develop-3.4 jobs: @@ -17,25 +23,12 @@ jobs: strategy: fail-fast: false matrix: - otp: [22, 24, 25] + otp: [24, 26] os: [ubuntu-latest] - # OTP lower than 23 does not run on ubuntu-latest (22.04), see - # https://github.com/erlef/setup-beam#compatibility-between-operating-system-and-erlangotp - exclude: - - otp: 22 - os: ubuntu-latest - include: - - otp: 22 - os: ubuntu-20.04 steps: - uses: lukka/get-cmake@latest - - uses: actions/checkout@v2 - - name: Install dependencies (Ubuntu) - if: ${{ startsWith(matrix.os, 'ubuntu') }} - run: | - sudo apt-get -qq update - sudo apt-get -qq install libsnappy-dev libc6-dev + - uses: actions/checkout@v4 - name: Install Erlang/OTP uses: erlef/setup-beam@v1 with: diff --git a/priv/tracers/tracer_accumulating_time.erl b/priv/tracers/tracer_accumulating_time.erl index f66acda005..cd80818382 100644 --- a/priv/tracers/tracer_accumulating_time.erl +++ b/priv/tracers/tracer_accumulating_time.erl @@ -35,7 +35,7 @@ start(Pid_list, MFA_list, IntervalMS) -> {started, TPid}. stop() -> - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), stopped. diff --git a/priv/tracers/tracer_backend_latency.erl b/priv/tracers/tracer_backend_latency.erl index 408df78f2b..ef5429f46d 100644 --- a/priv/tracers/tracer_backend_latency.erl +++ b/priv/tracers/tracer_backend_latency.erl @@ -129,7 +129,7 @@ stop() -> %% io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), %% catch folsom_metrics:delete_metric(foo), - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), stopped. diff --git a/priv/tracers/tracer_eleveldb_put_size.erl b/priv/tracers/tracer_eleveldb_put_size.erl index c776e7a47d..fe8602ab58 100644 --- a/priv/tracers/tracer_eleveldb_put_size.erl +++ b/priv/tracers/tracer_eleveldb_put_size.erl @@ -40,7 +40,7 @@ start(Interval) -> {started, TPid}. stop() -> - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), stopped. diff --git a/priv/tracers/tracer_fsm_init.erl b/priv/tracers/tracer_fsm_init.erl index ba9c236472..f8fc552aec 100644 --- a/priv/tracers/tracer_fsm_init.erl +++ b/priv/tracers/tracer_fsm_init.erl @@ -44,7 +44,7 @@ start(Interval) -> {started, TPid}. stop() -> - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), stopped. diff --git a/priv/tracers/tracer_func_args.erl b/priv/tracers/tracer_func_args.erl index c4d51bcb8b..2f78bc6f89 100644 --- a/priv/tracers/tracer_func_args.erl +++ b/priv/tracers/tracer_func_args.erl @@ -78,7 +78,7 @@ stop() -> TotalCalls = lists:sum([Count || {_Arg, Count} <- Res]), io:format("Total calls: ~p\n", [TotalCalls]), io:format("Call stats:\n~p\n", [catch lists:sort(Sort, Res)]), - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), timer:sleep(100), stopped. diff --git a/priv/tracers/tracer_gc_latency.erl b/priv/tracers/tracer_gc_latency.erl index 680d17aa55..e170997041 100644 --- a/priv/tracers/tracer_gc_latency.erl +++ b/priv/tracers/tracer_gc_latency.erl @@ -35,7 +35,7 @@ start(LatencyMS) -> {started, TPid}. stop() -> - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), timer:sleep(100), catch folsom_metrics:delete_metric(foo), diff --git a/priv/tracers/tracer_latency_histogram.erl b/priv/tracers/tracer_latency_histogram.erl index d7f513b580..034d73f0fe 100644 --- a/priv/tracers/tracer_latency_histogram.erl +++ b/priv/tracers/tracer_latency_histogram.erl @@ -95,7 +95,7 @@ start(Mod, Func, Arity, RunSeconds) -> stop() -> io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), timer:sleep(100), catch folsom_metrics:delete_metric(foo), diff --git a/priv/tracers/tracer_merge_and_and_handoff.erl b/priv/tracers/tracer_merge_and_and_handoff.erl index d4b57f3752..edb492e99b 100644 --- a/priv/tracers/tracer_merge_and_and_handoff.erl +++ b/priv/tracers/tracer_merge_and_and_handoff.erl @@ -43,7 +43,7 @@ start(Interval) -> {started, TPid}. stop() -> - dbg:stop_clear(), + riak_core_tracer:stop_and_clear(), catch exit(element(2,dbg:get_tracer()), kill), stopped. diff --git a/priv/tracers/tracer_timeit.erl b/priv/tracers/tracer_timeit.erl index 6ddcf0fa0e..7d21bc9da6 100644 --- a/priv/tracers/tracer_timeit.erl +++ b/priv/tracers/tracer_timeit.erl @@ -42,7 +42,7 @@ timeit(Mod, Fun, Arity, Type) -> dbg:p(all, call), dbg:tpl(Mod, Fun, Arity, [{'_', [], [{return_trace}]}]). -stop() -> dbg:stop_clear(). +stop() -> riak_core_tracer:stop_and_clear(). trace({trace, Pid, call, {Mod, Fun, _}}, {D, {all, {Count, Max}}}) -> D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), diff --git a/rebar.config b/rebar.config index d39d80a53e..5bf0da9f12 100644 --- a/rebar.config +++ b/rebar.config @@ -42,11 +42,11 @@ ]}. {deps, [ - {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-develop"}}}, + {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-d32-nhscore.i5-tracer"}}}, {sidejob, {git, "https://github.com/nhs-riak/sidejob.git", {branch, "nhse-develop"}}}, {bitcask, {git, "https://github.com/nhs-riak/bitcask.git", {branch, "nhse-develop"}}}, {redbug, {git, "https://github.com/massemanet/redbug", {branch, "master"}}}, - {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.2"}}}, + {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.5"}}}, {sext, {git, "https://github.com/uwiger/sext.git", {tag, "1.8.0"}}}, {riak_pipe, {git, "https://github.com/nhs-riak/riak_pipe.git", {branch, "nhse-develop"}}}, {riak_dt, {git, "https://github.com/nhs-riak/riak_dt.git", {branch, "nhse-develop"}}}, diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 0820838a11..e95a054812 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -230,7 +230,7 @@ get(Bucket, Key, #state{ref=Ref, key_vsn=KVers}=State) -> %% NOTE: The bitcask backend does not currently support %% secondary indexing and the_IndexSpecs parameter %% is ignored. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index ee927be454..9cdb12b28f 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -177,7 +177,7 @@ get(Bucket, Key, #state{read_opts=ReadOpts, {error, Reason, State} end. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). %% @doc Normal put, use existing option, do not modify write options -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> diff --git a/src/riak_kv_leveled_backend.erl b/src/riak_kv_leveled_backend.erl index c50b109fb3..aade73f399 100644 --- a/src/riak_kv_leveled_backend.erl +++ b/src/riak_kv_leveled_backend.erl @@ -234,12 +234,9 @@ head(Bucket, Key, #state{bookie=Bookie}=State) -> end. %% @doc Insert an object into the leveled backend. --type index_spec() :: {add, Index, SecondaryKey} | - {remove, Index, SecondaryKey}. - -spec flush_put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), state()) -> {ok, state()} | @@ -250,7 +247,7 @@ flush_put(Bucket, Key, IndexSpecs, Val, State) -> -spec put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), state()) -> {ok, state()} | @@ -262,7 +259,7 @@ put(Bucket, Key, IndexSpecs, Val, State) -> %% @doc Delete an object from the leveled backend -spec delete(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], state()) -> {ok, state()} | {error, term(), state()}. @@ -671,10 +668,10 @@ callback(Ref, UnexpectedCallback, State) -> %% =================================================================== %% Internal functions %% =================================================================== - +-type regex() :: {re_pattern, term(), term(), term(), term()}|undefined. -spec dollarkey_foldfun( - riak_kv_backend:fold_keys_fun(), boolean(), re:mp()|undefined) + riak_kv_backend:fold_keys_fun(), boolean(), regex()) -> riak_kv_backend:fold_objects_fun(). dollarkey_foldfun(FoldKeysFun, ReadTombs, TermRegex) -> FilteredFoldKeysFun = @@ -738,7 +735,7 @@ log_fragmentation(Allocator) -> %% flush_put or put has been called -spec do_put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), boolean(), state()) -> diff --git a/src/riak_kv_memory_backend.erl b/src/riak_kv_memory_backend.erl index 193aecb524..84b73f8cd4 100644 --- a/src/riak_kv_memory_backend.erl +++ b/src/riak_kv_memory_backend.erl @@ -208,7 +208,7 @@ get(Bucket, Key, State=#state{data_ref=DataRef, end. %% @doc Insert an object into the memory backend. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()}. put(Bucket, PrimaryKey, IndexSpecs, Val, State=#state{data_ref=DataRef, diff --git a/src/riak_kv_multi_backend.erl b/src/riak_kv_multi_backend.erl index b82df3d8d5..f6e1031329 100644 --- a/src/riak_kv_multi_backend.erl +++ b/src/riak_kv_multi_backend.erl @@ -257,7 +257,7 @@ head(Bucket, Key, State) -> %% @doc Insert an object with secondary index %% information into the kv backend --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_multi_prefix_backend.erl b/src/riak_kv_multi_prefix_backend.erl index b195b1af20..c6ce4f2840 100644 --- a/src/riak_kv_multi_prefix_backend.erl +++ b/src/riak_kv_multi_prefix_backend.erl @@ -294,7 +294,7 @@ head(Bucket, Key, State) -> %% @doc Insert an object with secondary index %% information into the kv backend --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_test_util.erl b/src/riak_kv_test_util.erl index 1fadbb89d8..6de3931c9f 100644 --- a/src/riak_kv_test_util.erl +++ b/src/riak_kv_test_util.erl @@ -294,7 +294,7 @@ dep_apps(Test, Extra) -> (_) -> ok end, - [sasl, Silencer, folsom, exometer_core, runtime_tools, + [sasl, Silencer, exometer_core, runtime_tools, mochiweb, webmachine, sidejob, poolboy, basho_stats, bitcask, eleveldb, riak_core, riak_pipe, riak_api, riak_dt, riak_pb, riak_kv, DefaultSetupFun, Extra]. diff --git a/src/riak_kv_yessir_backend.erl b/src/riak_kv_yessir_backend.erl index 1f8a39e6ae..63f089da75 100644 --- a/src/riak_kv_yessir_backend.erl +++ b/src/riak_kv_yessir_backend.erl @@ -272,7 +272,7 @@ make_get_return_val(RObj, false = _WantsBinary, #state{op_get = Gets} = S) -> {ok, RObj, S#state{op_get = Gets + 1}}. %% @doc Store an object, yes, sir! --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()}. put(_Bucket, _PKey, _IndexSpecs, _Val, #state{op_put = Puts} = S) -> diff --git a/src/riak_object.erl b/src/riak_object.erl index 6b04dcfcd3..9cc5e797ec 100644 --- a/src/riak_object.erl +++ b/src/riak_object.erl @@ -73,8 +73,11 @@ -type r_content() :: #r_content{}. -type index_op() :: add | remove. -type index_value() :: integer() | binary(). +-type index_spec() :: {index_op(), binary(), index_value()}. -type binary_version() :: v0 | v1. +-export_type([index_spec/0]). + -define(MAX_KEY_SIZE, 65536). -define(LASTMOD_LEN, 29). %% static length of rfc1123_date() type. Hard-coded in Erlang. @@ -984,8 +987,7 @@ dvv_enabled(Bucket) -> %% the case where there is no existing object %% stored for a key and therefore no existing %% index data. --spec index_specs(riak_object()) -> - [{index_op(), binary(), index_value()}]. +-spec index_specs(riak_object()) -> [{index_op(), binary(), index_value()}]. index_specs(Obj) -> Indexes = index_data(Obj), assemble_index_specs(Indexes, add). From 246774ac8fc204e87c8d7d84709a451fb5c2e6a6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 2 May 2024 18:23:12 +0100 Subject: [PATCH 02/12] Remove tracers --- priv/tracers/tracer_accumulating_time.erl | 79 ------- priv/tracers/tracer_backend_latency.erl | 199 ------------------ priv/tracers/tracer_eleveldb_put_size.erl | 77 ------- priv/tracers/tracer_fsm_init.erl | 92 -------- priv/tracers/tracer_func_args.erl | 104 --------- priv/tracers/tracer_gc_latency.erl | 82 -------- priv/tracers/tracer_large4.erl | 107 ---------- priv/tracers/tracer_latency_histogram.erl | 127 ----------- priv/tracers/tracer_merge_and_and_handoff.erl | 95 --------- priv/tracers/tracer_read_bin_trace_file.erl | 70 ------ priv/tracers/tracer_timeit.erl | 118 ----------- rebar.config | 1 - 12 files changed, 1151 deletions(-) delete mode 100644 priv/tracers/tracer_accumulating_time.erl delete mode 100644 priv/tracers/tracer_backend_latency.erl delete mode 100644 priv/tracers/tracer_eleveldb_put_size.erl delete mode 100644 priv/tracers/tracer_fsm_init.erl delete mode 100644 priv/tracers/tracer_func_args.erl delete mode 100644 priv/tracers/tracer_gc_latency.erl delete mode 100644 priv/tracers/tracer_large4.erl delete mode 100644 priv/tracers/tracer_latency_histogram.erl delete mode 100644 priv/tracers/tracer_merge_and_and_handoff.erl delete mode 100644 priv/tracers/tracer_read_bin_trace_file.erl delete mode 100644 priv/tracers/tracer_timeit.erl diff --git a/priv/tracers/tracer_accumulating_time.erl b/priv/tracers/tracer_accumulating_time.erl deleted file mode 100644 index cd80818382..0000000000 --- a/priv/tracers/tracer_accumulating_time.erl +++ /dev/null @@ -1,79 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_accumulating_time). - --compile(export_all). --compile(nowarn_export_all). - -start(Pid_list, MFA_list, IntervalMS) -> - dbg:tracer(process, {fun trace/2, new_stats()}), - [dbg:p(Pid, [call, timestamp, arity]) || Pid <- Pid_list], - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - {Mod, Func, Arity} <- MFA_list], - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(IntervalMS, TPid, print_report), - {started, TPid}. - -stop() -> - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace_ts, Pid, call, {Mod, Func, Arity}, TS}, {Dict}) -> - MFA = {Mod, Func, Arity}, - DKey = {Pid, MFA}, - {dict:store(DKey, TS, Dict)}; -trace({trace_ts, Pid, return_from, {Mod, Func, Arity}, _Res, TS}, {Dict}) -> - MFA = {Mod, Func, Arity}, - DKey = {Pid, MFA}, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - Elapsed = timer:now_diff(TS, Start), - SumKey = {sum, MFA}, - {OldCount, OldTime} = case dict:find(SumKey, Dict) of - error -> - {0, 0}; - {ok, Else} -> - Else - end, - Dict2 = dict:erase(DKey, Dict), - {dict:store(SumKey, {OldCount+1, OldTime+Elapsed}, Dict2)}; -trace(print_report, {Dict}) -> - print_stats(Dict), - {dict:from_list([X || {K, _V} = X <- dict:to_list(Dict), - element(1, K) /= sum])}; -trace(Unknown, {Dict}) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - {Dict}. - -new_stats() -> - {dict:new()}. - -print_stats(Dict) -> - Reports = lists:sort([{MFA, X} || {{sum, MFA}, X} <- dict:to_list(Dict)]), - [io:format("~p MFA ~p count ~p elapsed_msec ~p\n", - [time(), MFA, Count, Sum div 1000]) || - {MFA, {Count, Sum}} <- Reports]. diff --git a/priv/tracers/tracer_backend_latency.erl b/priv/tracers/tracer_backend_latency.erl deleted file mode 100644 index ef5429f46d..0000000000 --- a/priv/tracers/tracer_backend_latency.erl +++ /dev/null @@ -1,199 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_backend_latency). - --compile(export_all). --compile(nowarn_export_all). - -start() -> - start(500). - -start(LatencyMS) -> - start(LatencyMS, [get_fsm, put_fsm, - bitcask, eleveldb, file, prim_file, riak_kv_fs2_backend]). - -start(LatencyMS, Modules) -> - %% catch folsom_metrics:delete_metric(foo), - %% folsom_metrics:new_histogram(foo, uniform, 9981239823), - - dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}), - dbg:p(all, [call, timestamp, arity]), - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) || - lists:member(put_fsm, Modules), - Mod <- [riak_kv_put_fsm], - {Func, Arity} <- [{init, 1}, {finish, 2}]], - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) || - lists:member(get_fsm, Modules), - Mod <- [riak_kv_get_fsm], - {Func, Arity} <- [{init, 1}, {finalize, 1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(bitcask, Modules), - Mod <- [bitcask], - {Func, Arity} <- [ - {open,1}, {open,2}, - {close,1}, - {close_write_file,1}, - {get,2}, - {put,3}, - {delete,2}, - {sync,1}, - {iterator,3}, {iterator_next,1}, {iterator_release,1}, - {needs_merge,1}, - {is_empty_estimate,1}, - {status,1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(eleveldb, Modules), - Mod <- [eleveldb], - {Func, Arity} <- [ - {open,2}, - {close,1}, - {get,3}, - {put,4}, - {delete,3}, - {write,3}, - {status,2}, - {destroy,2}, - {is_empty,1}, - {iterator,2}, - {iterator,3}, - {iterator_move,2}, - {iterator_close,1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(file, Modules), - Mod <- [file], - {Func, Arity} <- [ - {open,2}, - {close,1}, - {pread,2}, - {pread,3}, - {read,2}, - {write,2}, - {pwrite,2}, - {pwrite,3}, - {truncate,1}, - {delete,1}, - {position,2}, - {sync,1} - ]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(prim_file, Modules), - Mod <- [prim_file], - {Func, Arity} <- [ - {list_dir,2}, - {read_file_info,1}, - {write_file_info,1} - ]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(riak_kv_fs2_backend, Modules), - Mod <- [riak_kv_fs2_backend], - {Func, Arity} <- [ - {get_object,4}, - {put_object,5}, - {delete,4}]], - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - %% timer:send_interval(Interval, TPid, print_report), - io:format("Not using timer:send_interval...\n"), - {started, TPid}. - -stop() -> - %% io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), - %% catch folsom_metrics:delete_metric(foo), - - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace_ts, Pid, call, {riak_kv_put_fsm, init, _}, TS}, {Dict, LMS}) -> - {dict:store({put, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, call, {riak_kv_put_fsm, finish, _}, TS}, {Dict, LatencyMS}) -> - Start = case dict:find({put, Pid}, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: put_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]); - _Elapsed -> - ok - end, - {dict:erase(Pid, Dict), LatencyMS}; -trace({trace_ts, Pid, call, {riak_kv_get_fsm, init, _}, TS}, {Dict, LMS}) -> - {dict:store({get, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, call, {riak_kv_get_fsm, finalize, _}, TS}, {Dict, LatencyMS}) -> - Start = case dict:find({get, Pid}, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: get_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]); - _Elapsed -> - ok - end, - {dict:erase(Pid, Dict), LatencyMS}; -trace({trace_ts, Pid, call, {Mod, _, _}, TS}, {Dict, LMS}) -> - {dict:store({Mod, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, return_from, {Mod, Func, _}, _Res, TS}, {Dict, LatencyMS}) -> - DKey = {Mod, Pid}, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: ~p ~p: ~p msec\n", [date(), time(), Mod, - Func, Elapsed]); - _Elapsed -> - ok - end, - %% if Mod == file, Func == pread -> - %% folsom_metrics_histogram:update(foo, Elapsed); - %% true -> - %% ok - %% end, - {dict:erase(DKey, Dict), LatencyMS}; -trace(print_report, DictStats) -> - %% print_stats(DictStats), - %% new_stats(); - DictStats; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_eleveldb_put_size.erl b/priv/tracers/tracer_eleveldb_put_size.erl deleted file mode 100644 index fe8602ab58..0000000000 --- a/priv/tracers/tracer_eleveldb_put_size.erl +++ /dev/null @@ -1,77 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_eleveldb_put_size). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(10*1000). - -start(Interval) -> - Stats = {StatName, _} = new_stats(), - reset_metric(StatName), - - dbg:tracer(process, {fun trace/2, Stats}), - dbg:p(all, [call]), - dbg:tpl(eleveldb, write, 3, [{'_', [], []}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {eleveldb, write, [_, PutList, _]}}, - {StatName, SumBytes}) -> - Bs = [begin - Bs = size(K) + size(V), - folsom_metrics_histogram:update(StatName, Bs), - Bs - end || {put, K, V} <- PutList], - {StatName, SumBytes + lists:sum(Bs)}; -trace(print_report, Stats = {StatName, _}) -> - print_stats(Stats), - reset_metric(StatName), - new_stats(); -trace(_Unknown, Stats) -> - erlang:display(wha), - %% io:format("Unknown! ~P\n", [Unknown, 20]), - Stats. - -new_stats() -> - {foo, 0}. - -print_stats({StatName, SumBytes}) -> - if SumBytes == 0 -> - io:format("~p ~p: 0 bytes\n", [date(), time()]); - true -> - Ps = folsom_metrics:get_histogram_statistics(StatName), - io:format("~p ~p: ~p bytes\n ~p\n", [date(), time(), SumBytes, Ps]) - end. - -reset_metric(Stats) -> - catch folsom_metrics:delete_metric(Stats), - folsom_metrics:new_histogram(Stats, uniform, 9981239823). diff --git a/priv/tracers/tracer_fsm_init.erl b/priv/tracers/tracer_fsm_init.erl deleted file mode 100644 index f8fc552aec..0000000000 --- a/priv/tracers/tracer_fsm_init.erl +++ /dev/null @@ -1,92 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_fsm_init). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(1*1000). - -start(Interval) -> - %%% Count the get, put, buckets, keys, exchange, and index FSM init() calls - dbg:tracer(process, {fun trace/2, new_stats()}), - dbg:p(all, call), - [dbg:tpl(Mod, init, 1, [{'_', [], []}]) || - Mod <- [riak_kv_buckets_fsm, riak_kv_exchange_fsm, riak_kv_get_fsm, riak_kv_index_fsm, riak_kv_keys_fsm, riak_kv_put_fsm]], - dbg:tpl(riak_kv_put_fsm, start_link, 3, [{'_', [], []}]), - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {riak_kv_put_fsm, start_link, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link+1, R, P, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_get_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R+1, P, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_put_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P+1, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_buckets_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B+1, E, I, K}; -trace({trace, _Pid, call, {riak_kv_exchange_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E+1, I, K}; -trace({trace, _Pid, call, {riak_kv_index_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E, I+1, K}; -trace({trace, _Pid, call, {riak_kv_keys_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E, I, K+1}; -trace(print_report, Stats) -> - print_stats(Stats), - new_stats(); -trace(Unknown, Stats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - Stats. - -new_stats() -> - {0, - 0, 0, 0, 0, 0, 0}. - -print_stats({Pstart_link, Get, Put, Buckets, Exchange, Index, Keys}) -> - Stats = [{put_start, Pstart_link}, - {get, Get}, - {put, Put}, - {buckets, Buckets}, - {exchange, Exchange}, - {index, Index}, - {keys, Keys}], - io:format("~p ~p: ~p\n", [date(), time(), Stats]). diff --git a/priv/tracers/tracer_func_args.erl b/priv/tracers/tracer_func_args.erl deleted file mode 100644 index 2f78bc6f89..0000000000 --- a/priv/tracers/tracer_func_args.erl +++ /dev/null @@ -1,104 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% For example: what ETS tables are being called the most by ets:lookup/2? -%% The 1st arg of ets:lookup/2 is the table name. -%% Watch for 10 seconds. -%% -%% > func_args_tracer:start(ets, lookup, 2, 10, fun(Args) -> hd(Args) end). -%% -%% Tracer pid: <0.16102.15>, use func_args_tracer:stop() to stop -%% Otherwise, tracing stops in 10 seconds -%% Current date & time: {2013,9,19} {18,5,48} -%% {started,<0.16102.15>} -%% Total calls: 373476 -%% Call stats: -%% [{folsom_histograms,114065}, -%% {ac_tab,69689}, -%% {ets_riak_core_ring_manager,67147}, -%% {folsom_spirals,57076}, -%% {riak_capability_ets,48862}, -%% {riak_core_node_watcher,8149}, -%% {riak_api_pb_registrations,8144}, -%% {folsom,243}, -%% {folsom_meters,43}, -%% {folsom_durations,20}, -%% {timer_tab,18}, -%% {folsom_gauges,8}, -%% {riak_core_stat_cache,5}, -%% {sys_dist,3}, -%% {inet_db,1}, -%% {21495958,1}, -%% {3145765,1}, -%% {3407910,1}] -%% - --module(tracer_func_args). - --compile(nowarn_export_all). --compile(export_all). - -start(Mod, Func, Arity, RunSeconds) -> - start(Mod, Func, Arity, RunSeconds, fun(Args) -> Args end). - -start(Mod, Func, Arity, RunSeconds, ArgMangler) -> - catch ets:delete(foo), - ets:new(foo, [named_table, public, set]), - dbg:tracer(process, {fun trace/2, new_stats({foo, ArgMangler})}), - dbg:p(all, call), - dbg:tpl(Mod, Func, Arity, [{'_', [], []}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]), - io:format("Current date & time: ~p ~p\n", [date(), time()]), - spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end), - {started, TPid}. - -stop() -> - Sort = fun({_,A}, {_, B}) -> A > B end, - Res = ets:tab2list(foo), - TotalCalls = lists:sum([Count || {_Arg, Count} <- Res]), - io:format("Total calls: ~p\n", [TotalCalls]), - io:format("Call stats:\n~p\n", [catch lists:sort(Sort, Res)]), - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - stopped. - -trace({trace, _Pid, call, {_, _, Args}}, {Tab, ArgMangler} = Acc) -> - Args2 = ArgMangler(Args), - try - ets:update_counter(Tab, Args2, {2, 1}) - catch _:_ -> - ets:insert(Tab, {Args2, 1}) - end, - Acc; -trace(Unknown, DictStats) -> - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats({Tab, _ArgMangler} = Acc) -> - ets:delete_all_objects(Tab), - Acc. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_gc_latency.erl b/priv/tracers/tracer_gc_latency.erl deleted file mode 100644 index e170997041..0000000000 --- a/priv/tracers/tracer_gc_latency.erl +++ /dev/null @@ -1,82 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_gc_latency). - --compile(nowarn_export_all). --compile(export_all). - -start(LatencyMS) -> - catch folsom_metrics:delete_metric(foo), - folsom_metrics:new_histogram(foo, uniform, 50*1000*1000), - dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}), - {ok, _} = dbg:p(all, [timestamp, garbage_collection, running]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Current date & time: ~p ~p local time\n", [date(), time()]), - {started, TPid}. - -stop() -> - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - catch folsom_metrics:delete_metric(foo), - stopped. - -trace({trace_ts, Pid, gc_start, _Stats, TS}, {Dict, LMS}) -> - {dict:store(Pid, TS, Dict), LMS}; -trace({trace_ts, Pid, gc_end, _Stats, TS}, {Dict, LMS}=Acc) -> - DKey = Pid, - case dict:find(DKey, Dict) of - {ok, GcStart} -> - Elapsed = erlang:max(-1, (timer:now_diff(TS, GcStart) div 1000)), - if Elapsed > LMS -> - io:format("~p: GC of ~p elapsed time ~p > threshold ~p\n", - [time(), Pid, Elapsed, LMS]), - io:format(" ~w,~w\n", [process_info(Pid, message_queue_len), _Stats]); - true -> - ok - end, - {dict:erase(DKey, Dict), LMS}; - error -> - Acc - end; -trace({trace_ts, Pid, InOrOut, _MFA, TS}, {Dict, _LMS}=Acc) -> - DKey = Pid, - case dict:find(DKey, Dict) of - {ok, GcStart} -> - io:format("Hey, pid ~p scheduled ~p but started GC ~p msec ago\n", - [Pid, InOrOut, timer:now_diff(TS, GcStart)]); - _ -> - ok - end, - Acc; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n\t~P", [Unknown, 20, DictStats,7]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_large4.erl b/priv/tracers/tracer_large4.erl deleted file mode 100644 index 93eb301c00..0000000000 --- a/priv/tracers/tracer_large4.erl +++ /dev/null @@ -1,107 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_large4). - --compile(nowarn_export_all). --compile(export_all). - --record(r_object, {bucket = '_', - key = '_', - contents = '_', - vclock = '_', - updatemetadata = '_', - updatevalue = '_' - }). - -go(Time, Count, Size) -> - ss(), - %% gets - GetMS = [{['_', - #r_object{bucket='$1', - key='$2'}], - [], - [{message,{{'$1','$2'}}}, {return_trace}]}], - erlang:trace_pattern({riak_kv_get_fsm, calculate_objsize, 2}, GetMS, [local]), - - %% puts - PutMS = [{['$1','$2','_','$3','_'], - [{'>',{size,'$3'},Size}], - [{message,{{'$1','$2',{size,'$3'}}}}]}], - erlang:trace_pattern({riak_kv_eleveldb_backend, put, 5}, PutMS, [local]), - erlang:trace_pattern({riak_kv_bitcask_backend, put, 5}, PutMS, [local]), - erlang:trace_pattern({riak_kv_memory_backend, put, 5}, PutMS, [local]), - - {Tracer, _} = spawn_monitor(?MODULE, tracer, [0, Count, Size, dict:new()]), - erlang:trace(all, true, [call, arity, {tracer, Tracer}]), - receive - {'DOWN', _, process, Tracer, _} -> - ok - after Time -> - exit(Tracer, kill), - receive - {'DOWN', _, process, Tracer, _} -> - ok - end - end, - ss(), - io:format("object trace stopped~n"). - -tracer(Limit, Limit, _, _) -> - ok; -tracer(Count, Limit, Threshold, Objs) -> - receive - {trace,Pid,call,{riak_kv_get_fsm,calculate_objsize,2},{Bucket,Key}} -> - Objs2 = dict:store(Pid, {Bucket,Key}, Objs), - tracer(Count+1, Limit, Threshold, Objs2); - {trace,Pid,return_from,{riak_kv_get_fsm,calculate_objsize,2},Size} -> - case Size >= Threshold of - true -> - case dict:find(Pid, Objs) of - {ok, {Bucket, Key}} -> - io:format("~p: get: ~p~n", [ts(), {Bucket, Key, Size}]); - _ -> - ok - end; - false -> - ok - end, - Objs2 = dict:erase(Pid, Objs), - tracer(Count+1, Limit, Threshold, Objs2); - {trace,_Pid,call,{riak_kv_eleveldb_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(l): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - {trace,_Pid,call,{riak_kv_bitcask_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(b): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - {trace,_Pid,call,{riak_kv_memory_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(m): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - Msg -> - io:format("tracer: ~p~n", [Msg]), - tracer(Count+1, Limit, Threshold, Objs) - end. - -ts() -> - calendar:now_to_datetime(os:timestamp()). - -ss() -> - erlang:trace_pattern({'_','_','_'}, false, [local]), - erlang:trace(all, false, [call, arity]). diff --git a/priv/tracers/tracer_latency_histogram.erl b/priv/tracers/tracer_latency_histogram.erl deleted file mode 100644 index 034d73f0fe..0000000000 --- a/priv/tracers/tracer_latency_histogram.erl +++ /dev/null @@ -1,127 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% For example: create a histogram of call latencies for bitcask:put/3. -%% Watch for 10 seconds. -%% -%% > latency_histogram_tracer:start(bitcask, put, 3, 10). -%% -%% Tracer pid: <0.2108.18>, use latency_histogram_tracer:stop() to stop -%% Otherwise, tracing stops in 10 seconds -%% Current date & time: {2013,9,19} {18,14,13} -%% {started,<0.2108.18>} -%% Histogram stats: -%% [{min,0}, -%% {max,48}, -%% {arithmetic_mean,2.765411819271055}, -%% {geometric_mean,2.527103493663478}, -%% {harmonic_mean,2.2674039086593973}, -%% {median,3}, -%% {variance,3.5629207473971585}, -%% {standard_deviation,1.8875700642352746}, -%% {skewness,2.0360354571500774}, -%% {kurtosis,18.529695846728423}, -%% {percentile,[{50,3},{75,4},{90,5},{95,6},{99,8},{999,14}]}, -%% {histogram,[{1,13436}, -%% {2,12304}, -%% {3,10789}, -%% {4,7397}, -%% {5,4191}, -%% {6,1929}, -%% {7,873}, -%% {8,420}, -%% {9,163}, -%% {10,79}, -%% {11,42}, -%% {12,47}, -%% {13,11}, -%% {14,16}, -%% {15,7}, -%% {16,5}, -%% {17,3}, -%% {18,4}, -%% {19,2}, -%% {20,4}, -%% {21,1}, -%% {22,11}, -%% {23,2}, -%% {24,1}, -%% {25,2}, -%% {26,1}, -%% {27,0}, -%% {28,1}, -%% {29,2}, -%% {30,0}, -%% {31,0}, -%% {40,2}, -%% {50,1}]}, -%% {n,51746}] - --module(tracer_latency_histogram). - --compile(nowarn_export_all). --compile(export_all). - -start(Mod, Func, Arity, RunSeconds) -> - catch folsom_metrics:delete_metric(foo), - folsom_metrics:new_histogram(foo, uniform, 50*1000*1000), - dbg:tracer(process, {fun trace/2, new_stats(0)}), - dbg:p(all, [call, timestamp, arity]), - dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]), - io:format("Current date & time: ~p ~p\n", [date(), time()]), - spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end), - {started, TPid}. - -stop() -> - io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - catch folsom_metrics:delete_metric(foo), - stopped. - -trace({trace_ts, Pid, call, {_, _, _}, TS}, {Dict, LMS}) -> - {dict:store(Pid, TS, Dict), LMS}; -trace({trace_ts, Pid, return_from, {_, _, _}, _Res, TS}, {Dict, LatencyMS}) -> - DKey = Pid, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - Elapsed = timer:now_diff(TS, Start) div 1000, - folsom_metrics_histogram:update(foo, Elapsed), - {dict:erase(DKey, Dict), LatencyMS}; -trace(print_report, DictStats) -> - DictStats; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_merge_and_and_handoff.erl b/priv/tracers/tracer_merge_and_and_handoff.erl deleted file mode 100644 index edb492e99b..0000000000 --- a/priv/tracers/tracer_merge_and_and_handoff.erl +++ /dev/null @@ -1,95 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_merge_and_and_handoff). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(1*1000). - -start(Interval) -> - dbg:tracer(process, {fun trace/2, {orddict:new(), orddict:new()}}), - dbg:p(all, call), - dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], []}]), - dbg:tpl(riak_kv_vnode, encode_handoff_item, 2, [{'_', [], []}]), - dbg:tpl(riak_core_handoff_receiver, process_message, 3, [{'_', [], []}]), - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - riak_core_tracer:stop_and_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {bitcask, merge_single_entry, - [K, V, _TS, _FId, {File,_,_,_}, _State]}}, - {MDict, HDict}) -> - Dir = re:replace(File, "/[^/]*\$", "", [{return, binary}]), - Bytes = size(K) + size(V), - MDict2 = increment_cbdict(MDict, Dir, Bytes), - {MDict2, HDict}; -trace({trace, _Pid, call, {riak_kv_vnode, encode_handoff_item, - [{B, K}, V]}}, - {MDict, HDict}) -> - Bytes = size(B) + size(K) + size(V), - Key = "all-sending-handoff", - HDict2 = increment_cbdict(HDict, Key, Bytes), - {MDict, HDict2}; -trace({trace, _Pid, call, {riak_core_handoff_receiver, process_message, - [_Type, Msg, State]}}, - {MDict, HDict}) -> - Bytes = size(Msg), - Partition = element(5, State), % ugly hack - Key = Partition, - HDict2 = increment_cbdict(HDict, Key, Bytes), - {MDict, HDict2}; -trace(print_report, {MDict, HDict}) -> - print_stats(MDict, merge), - print_stats(HDict, handoff), - {orddict:new(), orddict:new()}. - -%% "cb" = count + bytes -increment_cbdict(Dict, Key, Bytes) -> - orddict:update(Key, fun({Count, Bs}) -> {Count + 1, Bs + Bytes} end, - {1, Bytes}, Dict). - -print_stats(Dict, Type) -> - F = fun(Key, {Count, Bytes}, {SumC, SumB}) when Count > 0 -> - io:format("~p ~p: ~p items ~p bytes ~p avg-size ~p\n", - [date(), time(), Count, Bytes, Bytes div Count, Key]), - {SumC + Count, SumB + Bytes}; - (_, _, Acc) -> - Acc - end, - {Count, Bytes} = orddict:fold(F, {0, 0}, Dict), - Avg = if Count > 0 -> Bytes div Count; - true -> 0 - end, - io:format("~p ~p: ~p total: ~p items ~p bytes ~p avg-size\n", - [date(), time(), Type, Count, Bytes, Avg]). diff --git a/priv/tracers/tracer_read_bin_trace_file.erl b/priv/tracers/tracer_read_bin_trace_file.erl deleted file mode 100644 index cfe3a2efeb..0000000000 --- a/priv/tracers/tracer_read_bin_trace_file.erl +++ /dev/null @@ -1,70 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_read_bin_trace_file). - --compile(nowarn_export_all). --compile(export_all). - -read(Path) -> - read(Path, 1). - -read(Path, LatencyMS) -> - {ok, FH} = file:open(Path, [read, binary, raw]), - read(file:read(FH, 5), FH, LatencyMS, []). - -read(eof, _FH, _, _) -> - ok; -read({ok, <>}, FH, LatencyMS, Hist) -> - {ok, Bin} = file:read(FH, Size), - case binary_to_term(Bin) of - {trace_ts, _, call, {M,F,A}, Time} -> - %%io:format("call MFA = ~p:~p/~p, ", [M, F, length(A)]), - read(file:read(FH, 5), FH, LatencyMS, [{{M,F,length(A)}, Time, A}|Hist]); - {trace_ts, _, return_from, MFA, Res, EndTime} -> - %%io:format("MFA ~p Hist ~p\n", [MFA, Hist]), - try - {value, {_, StartTime, A}, NewHist} = lists:keytake(MFA, 1, Hist), - MSec = timer:now_diff(EndTime, StartTime)/1000, - if MSec > LatencyMS -> - io:format("~p ~p msec\nArgs: (~p/~p) ~P\nRes: ~P\n\n", - [MFA, MSec, - erts_debug:flat_size(A), erts_debug:size(A), - A, 20, Res, 20]); - true -> - ok - end, - read(file:read(FH, 5), FH, LatencyMS, NewHist) - catch - error:{badmatch,false} -> - read(file:read(FH, 5), FH, LatencyMS, Hist); - Class:Reason:Stacktrace -> - io:format("ERR ~p ~p @ ~p\n", [Class, Reason, Stacktrace]), - read(file:read(FH, 5), FH, LatencyMS, Hist) - end - end. - -%% read(eof, _FH) -> -%% ok; -%% read({ok, <>}, FH) -> -%% {ok, Bin} = file:read(FH, Size), -%% io:format("~P\n", [binary_to_term(Bin), 15]), -%% read(file:read(FH, 5), FH). - diff --git a/priv/tracers/tracer_timeit.erl b/priv/tracers/tracer_timeit.erl deleted file mode 100644 index 7d21bc9da6..0000000000 --- a/priv/tracers/tracer_timeit.erl +++ /dev/null @@ -1,118 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_timeit). - --compile(nowarn_export_all). --compile(export_all). - -%% @doc Dynamically add timing to MFA. There are various types of -%% timing. -%% -%% all - time latency of all calls to MFA -%% -%% {sample, N, Max} - sample every N calls and stop sampling after Max -%% -%% {threshold, Millis, Max} - count # of calls where latency is > Millis -%% and count # of calls total, thus percentage of calls over threshold -timeit(Mod, Fun, Arity, Type) -> - Type2 = case Type of - {sample, N, Max} -> {sample, {N, Max}, {0, 0, 0}}; - {threshold, Millis, Max} -> {threshold, {Millis, Max}, {0, 0}}; - {all, Max} -> {all, {0, Max}} - end, - dbg:tracer(process, {fun trace/2, {orddict:new(), Type2}}), - dbg:p(all, call), - dbg:tpl(Mod, Fun, Arity, [{'_', [], [{return_trace}]}]). - -stop() -> riak_core_tracer:stop_and_clear(). - -trace({trace, Pid, call, {Mod, Fun, _}}, {D, {all, {Count, Max}}}) -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {all, {Count, Max}}}; -trace({trace, Pid, call, {Mod, Fun, _}}, - {D, {sample, {N, Max}, {M, K, Total}}}) -> - M2 = M+1, - Total2 = Total+1, - if N == M2 -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {sample, {N, Max}, {0, K, Total2}}}; - true -> - {D, {sample, {N, Max}, {M2, K, Total2}}} - end; -trace({trace, Pid, call, {Mod, Fun, _}}, - {D, {threshold, {Millis, Max}, {Over, Total}}}) -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {threshold, {Millis, Max}, {Over, Total+1}}}; - -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {all, {Count, Max}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - Count2 = Count+1, - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs/1000, - io:format(user, "~p:~p:~p: ~p ms\n", [Pid, Mod, Fun, ElapsedMs]), - if Count2 == Max -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {all, {Count2, Max}}} - end; - error -> Acc - end; -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {sample, {N, Max}, {M, K, Total}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - K2 = K+1, - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs/1000, - io:format(user, "[sample ~p/~p] ~p:~p:~p: ~p ms\n", - [K2, Total, Pid, Mod, Fun, ElapsedMs]), - if K2 == Max -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {sample, {N, Max}, {M, K2, Total}}} - end; - error -> Acc - end; -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {threshold, {Millis, Max}, {Over, Total}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs / 1000, - if ElapsedMs > Millis -> - Over2 = Over+1, - io:format(user, "[over threshold ~p, ~p/~p] ~p:~p:~p: ~p ms\n", - [Millis, Over2, Total, Pid, Mod, Fun, ElapsedMs]); - true -> - Over2 = Over - end, - if Max == Over -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {threshold, {Millis, Max}, {Over2, Total}}} - end; - error -> Acc - end. diff --git a/rebar.config b/rebar.config index 5bf0da9f12..e3b2862a24 100644 --- a/rebar.config +++ b/rebar.config @@ -1,6 +1,5 @@ {minimum_otp_vsn, "22.0"}. -{src_dirs, ["./priv/tracers", "./src"]}. {cover_enabled, false}. {edoc_opts, [{preprocess, true}]}. {erl_opts, [warnings_as_errors, From 6c46b9ddeae3178548657fce2d9c2414a3d9c856 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Sat, 11 May 2024 20:22:25 +0200 Subject: [PATCH 03/12] Update rebar.config --- rebar.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rebar.config b/rebar.config index e3b2862a24..2ed3cb8089 100644 --- a/rebar.config +++ b/rebar.config @@ -41,7 +41,7 @@ ]}. {deps, [ - {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-d32-nhscore.i5-tracer"}}}, + {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-develop-3.4"}}}, {sidejob, {git, "https://github.com/nhs-riak/sidejob.git", {branch, "nhse-develop"}}}, {bitcask, {git, "https://github.com/nhs-riak/bitcask.git", {branch, "nhse-develop"}}}, {redbug, {git, "https://github.com/massemanet/redbug", {branch, "master"}}}, From 1d04e595fc446dad5754865f1b639c8988670ff4 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 May 2024 11:06:24 +0200 Subject: [PATCH 04/12] Update rebar.config --- rebar.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rebar.config b/rebar.config index 2ed3cb8089..212880c0ed 100644 --- a/rebar.config +++ b/rebar.config @@ -49,8 +49,8 @@ {sext, {git, "https://github.com/uwiger/sext.git", {tag, "1.8.0"}}}, {riak_pipe, {git, "https://github.com/nhs-riak/riak_pipe.git", {branch, "nhse-develop"}}}, {riak_dt, {git, "https://github.com/nhs-riak/riak_dt.git", {branch, "nhse-develop"}}}, - {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-develop"}}}, + {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-d34-otp26"}}}, {hyper, {git, "https://github.com/nhs-riak/hyper", {branch, "nhse-develop"}}}, {kv_index_tictactree, {git, "https://github.com/nhs-riak/kv_index_tictactree.git", {branch, "nhse-develop"}}}, - {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-develop"}}} + {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-d34-otp26"}}} ]}. From 123558bf14a58d7db0acb7454bd4f8cfe246ebb6 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 May 2024 13:01:15 +0200 Subject: [PATCH 05/12] Cache compiled regex as persistent_term --- src/riak_kv_wm_object.erl | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/src/riak_kv_wm_object.erl b/src/riak_kv_wm_object.erl index dfc66d43be..3659fc7421 100644 --- a/src/riak_kv_wm_object.erl +++ b/src/riak_kv_wm_object.erl @@ -601,7 +601,7 @@ malformed_index_headers(RD, Ctx) -> %% client's PUT request, to be indexed at write time. extract_index_fields(RD) -> PrefixSize = length(?HEAD_INDEX_PREFIX), - {ok, RE} = re:compile(",\\s"), + RE = get_compiled_index_regex(), F = fun({K,V}, Acc) -> KList = riak_kv_wm_utils:any_to_list(K), @@ -1255,7 +1255,7 @@ get_link_heads(RD, Ctx) -> Bucket = Ctx#ctx.bucket, %% Get a list of link headers... - LinkHeaders1 = + LinkHeaders = case wrq:get_req_header(?HEAD_LINK, RD) of undefined -> []; Heads -> string:tokens(Heads, ",") @@ -1264,21 +1264,14 @@ get_link_heads(RD, Ctx) -> %% Decode the link headers. Throw an exception if we can't %% properly parse any of the headers... {BucketLinks, KeyLinks} = - case APIVersion of - 1 -> - {ok, BucketRegex} = - re:compile("= 2 -> - {ok, BucketRegex} = - re:compile(?V2_BUCKET_REGEX), - {ok, KeyRegex} = - re:compile(?V2_KEY_REGEX), - extract_links(LinkHeaders1, BucketRegex, KeyRegex) - end, + case LinkHeaders of + [] -> + {[], []}; + LinkHeaders -> + {KeyRegex, BucketRegex} = + get_compiled_link_regex(APIVersion, Prefix), + extract_links(LinkHeaders, BucketRegex, KeyRegex) + end, %% Validate that the only bucket header is pointing to the parent %% bucket... @@ -1287,7 +1280,7 @@ get_link_heads(RD, Ctx) -> true -> KeyLinks; false -> - throw({invalid_link_headers, LinkHeaders1}) + throw({invalid_link_headers, LinkHeaders}) end. %% Run each LinkHeader string() through the BucketRegex and From e8da63fccd523b7db7f961ddc0c639a97526d2cb Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Tue, 14 May 2024 13:01:54 +0200 Subject: [PATCH 06/12] Update riak_kv_wm_object.erl --- src/riak_kv_wm_object.erl | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/riak_kv_wm_object.erl b/src/riak_kv_wm_object.erl index 3659fc7421..9fbb84f498 100644 --- a/src/riak_kv_wm_object.erl +++ b/src/riak_kv_wm_object.erl @@ -1310,6 +1310,50 @@ extract_links_1([LinkHeader|Rest], BucketRegex, KeyRegex, BucketAcc, KeyAcc) -> extract_links_1([], _BucketRegex, _KeyRegex, BucketAcc, KeyAcc) -> {BucketAcc, KeyAcc}. +-type mp() :: {re_pattern, _, _, _, _}. + +-spec get_compiled_link_regex(non_neg_integer(), string()) -> {mp(), mp()}. +get_compiled_link_regex(1, Prefix) -> + case persistent_term:get(compiled_link_regex_v1, undefined) of + undefined -> + {ok, KeyRegex} = re:compile(" + PreCompiledExpressions + end; +get_compiled_link_regex(Two, _Prefix) when Two >= 2 -> + case persistent_term:get(compiled_link_regex_v2, undefined) of + undefined -> + {ok, KeyRegex} = re:compile(?V2_KEY_REGEX), + {ok, BucketRegex} = re:compile(?V2_BUCKET_REGEX), + persistent_term:put( + compiled_link_regex_v2, + {KeyRegex, BucketRegex} + ), + {KeyRegex, BucketRegex}; + PreCompiledExpressions -> + PreCompiledExpressions + end. + +-spec get_compiled_index_regex() -> mp(). +get_compiled_index_regex() -> + case persistent_term:get(compiled_index_regex, undefined) of + undefined -> + {ok, IndexRegex} = re:compile(",\\s"), + persistent_term:put( + compiled_index_regex, + IndexRegex + ), + IndexRegex; + PreCompiledIndexRegex -> + PreCompiledIndexRegex + end. + -spec get_ctype(riak_kv_wm_object_dict(), term()) -> string(). %% @doc Work out the content type for this object - use the metadata if provided get_ctype(MD,V) -> From 63d9913456de0861322802ef70ff93ca14b71817 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 16 May 2024 14:03:00 +0100 Subject: [PATCH 07/12] Use compiled regex (#31) Rather than compiling the regex and then discarding, use the compiled regex to improve query performance. Requires some further alignment of PB and HTTP API to check the compiled regular expression before forming the query within each aPI --- src/riak_index.erl | 25 +++- src/riak_kv_pb_index.erl | 73 ++++++----- src/riak_kv_wm_index.erl | 275 +++++++++++++++++++++------------------ 3 files changed, 214 insertions(+), 159 deletions(-) diff --git a/src/riak_index.erl b/src/riak_index.erl index f0c2986c19..2052cbecbf 100644 --- a/src/riak_index.erl +++ b/src/riak_index.erl @@ -40,7 +40,9 @@ upgrade_query/1, object_key_in_range/3, index_key_in_range/3, - add_timeout_opt/2 + add_timeout_opt/2, + compile_re/1, + is_valid_index/1 ]). -include_lib("kernel/include/logger.hrl"). @@ -74,6 +76,8 @@ % -type query_def() :: {ok, term()} | {error, term()} | {term(), {error, term()}}. % -export_type([query_def/0]). -type query_def() :: #riak_kv_index_v3{}|#riak_kv_index_v2{}. +-type pcre_mp() :: {re_pattern, term(), term(), term(), term()}. + -export_type([query_def/0]). -type last_result() :: {value(), key()} | key(). @@ -83,6 +87,16 @@ -type query_version() :: v1 | v2 | v3. +-type index_query() :: ?KV_INDEX_Q{}. + +-spec compile_re( + undefined|iodata()) -> + {ok, pcre_mp()|undefined}|{error, {string(), non_neg_integer()}}. +compile_re(undefined) -> + {ok, undefined}; +compile_re(Regex) -> + re:compile(Regex). + mapred_index(Dest, Args) -> mapred_index(Dest, Args, ?TIMEOUT). @@ -318,6 +332,15 @@ to_index_query(Args) -> Version = riak_core_capability:get({riak_kv, secondary_index_version}, v1), to_index_query(Version, Args). +-spec is_valid_index(index_query()) -> boolean(). +is_valid_index( + ?KV_INDEX_Q{start_term = Start, term_regex = Regex}) + when is_integer(Start), Regex =/= undefined -> + false; +is_valid_index(_Query) -> + true. + + %% @doc upgrade a V1 Query to a v2 Query make_query({eq, ?BUCKETFIELD, _Bucket}, Q) -> {ok, Q?KV_INDEX_Q{filter_field=?BUCKETFIELD, return_terms=false}}; diff --git a/src/riak_kv_pb_index.erl b/src/riak_kv_pb_index.erl index 6c36280a4f..79a710651f 100644 --- a/src/riak_kv_pb_index.erl +++ b/src/riak_kv_pb_index.erl @@ -71,31 +71,23 @@ validate_request(#rpbindexreq{qtype = range, range_min = Min, range_max = Max}) when not is_binary(Min); not is_binary(Max) -> {error, {format, "Invalid range query: ~p -> ~p", [Min, Max]}}; validate_request(#rpbindexreq{term_regex = TermRe} = Req) -> - {ValRe, ValErr} = ensure_compiled_re(TermRe), - case ValRe of - error -> - {error, {format, "Invalid term regular expression ~p : ~p", [TermRe, ValErr]}}; - _ -> - validate_query(Req) + case riak_index:compile_re(TermRe) of + {error, ReError} -> + {error, + {format, + "Invalid term regular expression ~p : ~p", + [TermRe, ReError]}}; + {ok, CompiledRegex} -> + {ok, Query} = + riak_index:to_index_query(query_params(Req, CompiledRegex)), + case riak_index:is_valid_index(Query) of + false -> + {error, "Can not use term regular expression in integer query"}; + _ -> + Query + end end. -validate_query(Req) -> - Query = riak_index:to_index_query(query_params(Req)), - case Query of - {ok, ?KV_INDEX_Q{start_term = Start, term_regex = Re}} when is_integer(Start) - andalso Re =/= undefined -> - {error, "Can not use term regular expression in integer query"}; - _ -> - Query - end. - -ensure_compiled_re(TermRe) -> - case TermRe of - undefined -> - {undefined, undefined}; - _ -> - re:compile(TermRe) - end. %% @doc process/2 callback. Handles an incoming request message. process(#rpbindexreq{stream = S} = Req, State) -> @@ -126,7 +118,7 @@ validate_request_and_maybe_perform_query(Req, State) -> error_accept(Class, State) -> {error, riak_core_util:job_class_disabled_message(binary, Class), State}. -maybe_perform_query({ok, Query}, Req=#rpbindexreq{stream=true}, State) -> +maybe_perform_query(Query, Req=#rpbindexreq{stream=true}, State) -> #rpbindexreq{type=T, bucket=B, max_results=MaxResults, timeout=Timeout, pagination_sort=PgSort0, continuation=Continuation} = Req, #state{client=Client} = State, @@ -139,7 +131,7 @@ maybe_perform_query({ok, Query}, Req=#rpbindexreq{stream=true}, State) -> riak_client:stream_get_index(Bucket, Query, Opts, Client), ReturnTerms = riak_index:return_terms(Req#rpbindexreq.return_terms, Query), {reply, {stream, ReqId}, State#state{req_id=ReqId, req=Req#rpbindexreq{return_terms=ReturnTerms}}}; -maybe_perform_query({ok, Query}, Req, State) -> +maybe_perform_query(Query, Req, State) -> #rpbindexreq{type=T, bucket=B, max_results=MaxResults, return_terms=ReturnTerms0, timeout=Timeout, pagination_sort=PgSort0, continuation=Continuation} = Req, @@ -166,20 +158,33 @@ handle_query_results(ReturnTerms, MaxResults, {ok, Results}, State) -> Resp = encode_results(ReturnTerms, Results, Cont), {reply, Resp, State}. -query_params(#rpbindexreq{index=Index= <<"$bucket">>, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + index=Index= <<"$bucket">>, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {return_terms, false}, {term_regex, Re}, {max_results, MaxResults}, {continuation, Continuation}]; -query_params(#rpbindexreq{qtype=eq, index=Index, key=Value, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + qtype=eq, + index=Index, + key=Value, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {start_term, Value}, {end_term, Value}, {term_regex, Re}, {max_results, MaxResults}, {return_terms, false}, {continuation, Continuation}]; -query_params(#rpbindexreq{index=Index, range_min=Min, range_max=Max, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + index=Index, + range_min=Min, + range_max=Max, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {start_term, Min}, {end_term, Max}, {term_regex, Re}, {max_results, MaxResults}, {continuation, Continuation}]. diff --git a/src/riak_kv_wm_index.erl b/src/riak_kv_wm_index.erl index 1aa54bc60d..3b38566b5a 100644 --- a/src/riak_kv_wm_index.erl +++ b/src/riak_kv_wm_index.erl @@ -186,165 +186,192 @@ request_class(ReqData) -> %% a known type. malformed_request(RD, Ctx) -> %% Pull the params... - Bucket = list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(bucket, RD))), - IndexField = list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(field, RD))), + Bucket = + list_to_binary( + riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(bucket, RD))), + IndexField = + list_to_binary( + riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(field, RD))), Args1 = wrq:path_tokens(RD), - Args2 = [list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, X)) || X <- Args1], + Args2 = + [list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, X)) + || X <- Args1], ReturnTerms0 = wrq:get_qs_value(?Q_2I_RETURNTERMS, "false", RD), ReturnTerms1 = normalize_boolean(string:to_lower(ReturnTerms0)), Continuation = wrq:get_qs_value(?Q_2I_CONTINUATION, RD), PgSort0 = wrq:get_qs_value(?Q_2I_PAGINATION_SORT, RD), - PgSort = case PgSort0 of - undefined -> undefined; - _ -> normalize_boolean(string:to_lower(PgSort0)) - end, + PgSort = + case PgSort0 of + undefined -> + undefined; + _ -> + normalize_boolean(string:to_lower(PgSort0)) + end, MaxVal = validate_max(wrq:get_qs_value(?Q_2I_MAX_RESULTS, RD)), TermRegex = wrq:get_qs_value(?Q_2I_TERM_REGEX, RD), Timeout0 = wrq:get_qs_value("timeout", RD), - {Start, End} = case {IndexField, Args2} of - {<<"$bucket">>, _} -> {undefined, undefined}; - {_, []} -> {undefined, undefined}; - {_, [S]} -> {S, S}; - {_, [S, E]} -> {S, E} - end, + {Start, End} = + case {IndexField, Args2} of + {<<"$bucket">>, _} -> {undefined, undefined}; + {_, []} -> {undefined, undefined}; + {_, [S]} -> {S, S}; + {_, [S, E]} -> {S, E} + end, IsEqualOp = length(Args1) == 1, InternalReturnTerms = not( IsEqualOp orelse IndexField == <<"$field">> ), - QRes = riak_index:to_index_query( - [ - {field, IndexField}, {start_term, Start}, {end_term, End}, - {return_terms, InternalReturnTerms}, - {continuation, Continuation}, - {term_regex, TermRegex} - ] - ++ [{max_results, MaxResults} || {true, MaxResults} <- [MaxVal]] - ), - ValRe = case TermRegex of - undefined -> - ok; - _ -> - re:compile(TermRegex) - end, - Stream0 = wrq:get_qs_value("stream", "false", RD), Stream = normalize_boolean(string:to_lower(Stream0)), - - case {PgSort, ReturnTerms1, validate_timeout(Timeout0), MaxVal, - QRes, - ValRe, Stream} of - {malformed, _, _, _, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a boolean", - [?Q_2I_PAGINATION_SORT, PgSort0]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, malformed, _, _, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a boolean", - [?Q_2I_RETURNTERMS, ReturnTerms0]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, _, - {ok, ?KV_INDEX_Q{start_term=NormStart}}, - {ok, _CompiledRe}, _} - when is_integer(NormStart) -> - {true, - wrq:set_resp_body("Can not use term regular expressions" - " on integer queries", - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, _, - _, - {error, ReError}, _} -> - {true, - wrq:set_resp_body( - io_lib:format("Invalid term regular expression ~p : ~p", - [TermRegex, ReError]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, {true, Timeout}, {true, MaxResults}, - {ok, Query}, - _, _} -> - %% Request is valid. - ReturnTerms2 = riak_index:return_terms(ReturnTerms1, Query), - %% Special case: a continuation implies pagination sort - %% even if no max_results was given. - PgSortFinal = case Continuation of - undefined -> PgSort; - _ -> true - end, - NewCtx = Ctx#ctx{ - bucket = Bucket, - index_query = Query, - max_results = MaxResults, - return_terms = ReturnTerms2, - timeout=Timeout, - pagination_sort = PgSortFinal, - streamed = Stream - }, - {false, RD, NewCtx}; - {_, _, _, _, - {error, Reason}, - _, _} -> + case riak_index:compile_re(TermRegex) of + {error, ReError} -> {true, wrq:set_resp_body( - io_lib:format("Invalid query: ~p~n", [Reason]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, {false, BadVal}, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a positive integer", - [?Q_2I_MAX_RESULTS, BadVal]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, {error, Input}, _, - _, - _, _} -> - {true, wrq:append_to_resp_body(io_lib:format("Bad timeout " - "value ~p. Must be a non-negative integer~n", - [Input]), - wrq:set_resp_header(?HEAD_CTYPE, - "text/plain", RD)), Ctx} + io_lib:format( + "Invalid term regular expression ~p : ~p", + [TermRegex, ReError] + ), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + {ok, CompiledRegex} -> + QueryResult = + riak_index:to_index_query( + [{field, IndexField}, {start_term, Start}, {end_term, End}, + {max_results, MaxVal}, + {continuation, Continuation}, + {return_terms, InternalReturnTerms}, + {term_regex, CompiledRegex} + ] + ), + case QueryResult of + {error, Reason} -> + {true, + wrq:set_resp_body( + io_lib:format( + "Invalid query: ~p~n", [Reason]), + wrq:set_resp_header( + ?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + {ok, Query} -> + case riak_index:is_valid_index(Query) of + false -> + {true, + wrq:set_resp_body( + "Can not use term regular expressions" + " on integer queries", + wrq:set_resp_header( + ?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + _ -> + ValidationResult = + validate_query( + PgSort, + ReturnTerms1, + validate_timeout(Timeout0), + MaxVal, + Stream, + RD + ), + case ValidationResult of + ok -> + PgSortFinal = + case Continuation of + undefined -> + PgSort; + _ -> + true + end, + NewCtx = + Ctx#ctx{ + bucket = Bucket, + index_query = Query, + max_results = MaxVal, + return_terms = + riak_index:return_terms( + ReturnTerms1, Query), + timeout=Timeout0, + pagination_sort = PgSortFinal, + streamed = Stream + }, + {false, RD, NewCtx}; + ErrorMessage -> + {true, ErrorMessage, Ctx} + end + end + end end. + +validate_query({malformed, BadPgSort}, _, _, _, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a boolean", + [?Q_2I_PAGINATION_SORT, BadPgSort]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, {malformed, BadReturnTerms}, _, _, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a boolean", + [?Q_2I_RETURNTERMS, BadReturnTerms]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, {malformed, BadN}, _, _, RD) -> + wrq:append_to_resp_body( + io_lib:format( + "Bad timeout " + "value ~p. Must be a non-negative integer~n", + [BadN]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, {malformed, BadMR}, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a positive integer", + [?Q_2I_MAX_RESULTS, BadMR]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, _, {malformed, BadStreamReq}, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid stream. ~p is not a boolean", + [BadStreamReq]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, _, _, _RD) -> + %% Request is valid. + ok. + +-spec validate_timeout( + undefined|string()) -> + non_neg_integer()|{malformed, string()}. validate_timeout(undefined) -> - {true, undefined}; + undefined; validate_timeout(Str) -> - try - list_to_integer(Str) of + try list_to_integer(Str) of Int when Int >= 0 -> - {true, Int}; + Int; Neg -> - {error, Neg} + {malformed, Neg} catch _:_ -> - {error, Str} + {malformed, Str} end. +-spec validate_max( + undefined|string()) -> non_neg_integer()|all|{malformed, string()}. validate_max(undefined) -> - {true, all}; + all; validate_max(N) when is_list(N) -> - try - list_to_integer(N) of + try list_to_integer(N) of Max when Max > 0 -> - {true, Max}; + Max; ZeroOrLess -> - {false, ZeroOrLess} + {malformed, ZeroOrLess} catch _:_ -> - {false, N} + {malformed, N} end. +-spec normalize_boolean(string()) -> boolean()|{malformed, string()}. normalize_boolean("false") -> false; normalize_boolean("true") -> true; -normalize_boolean(_) -> - malformed. +normalize_boolean(OtherString) -> + {malformed, OtherString}. -spec content_types_provided(#wm_reqdata{}, context()) -> {[{ContentType::string(), Producer::atom()}], #wm_reqdata{}, context()}. From 9e20474eb6fb9faca4d22ae7e9c19c39e1a32fc2 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 16 May 2024 14:40:02 +0100 Subject: [PATCH 08/12] Fix timeout validation i.e. use validated timeout --- src/riak_kv_wm_index.erl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/riak_kv_wm_index.erl b/src/riak_kv_wm_index.erl index 3b38566b5a..3c0ca08b69 100644 --- a/src/riak_kv_wm_index.erl +++ b/src/riak_kv_wm_index.erl @@ -209,7 +209,7 @@ malformed_request(RD, Ctx) -> end, MaxVal = validate_max(wrq:get_qs_value(?Q_2I_MAX_RESULTS, RD)), TermRegex = wrq:get_qs_value(?Q_2I_TERM_REGEX, RD), - Timeout0 = wrq:get_qs_value("timeout", RD), + Timeout = validate_timeout(wrq:get_qs_value("timeout", RD)), {Start, End} = case {IndexField, Args2} of {<<"$bucket">>, _} -> {undefined, undefined}; @@ -265,7 +265,7 @@ malformed_request(RD, Ctx) -> validate_query( PgSort, ReturnTerms1, - validate_timeout(Timeout0), + Timeout, MaxVal, Stream, RD @@ -287,7 +287,7 @@ malformed_request(RD, Ctx) -> return_terms = riak_index:return_terms( ReturnTerms1, Query), - timeout=Timeout0, + timeout=Timeout, pagination_sort = PgSortFinal, streamed = Stream }, From 9c82f926b3ab66b396d2f7f00cefa9fc9064a665 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 6 Jun 2024 11:33:00 +0100 Subject: [PATCH 09/12] Switch to map for HTTP headers Prevents continuous re-processing of strings to extract headers --- src/riak_kv_wm_object.erl | 194 ++++++++++++++++++++++---------------- 1 file changed, 114 insertions(+), 80 deletions(-) diff --git a/src/riak_kv_wm_object.erl b/src/riak_kv_wm_object.erl index 9fbb84f498..c2db707561 100644 --- a/src/riak_kv_wm_object.erl +++ b/src/riak_kv_wm_object.erl @@ -176,21 +176,18 @@ riak, %% local | {node(), atom()} - params for riak client doc, %% {ok, riak_object()}|{error, term()} - the object found vtag, %% string() - vtag the user asked for - bucketprops, %% proplist() - properties of the bucket links, %% [link()] - links of the object index_fields, %% [index_field()] method, %% atom() - HTTP method for the request ctype, %% string() - extracted content-type provided charset, %% string() | undefined - extracted character set provided timeout, %% integer() - passed-in timeout value in ms - security %% security context + security, %% security context + header_map %% map of http headers interesting within this module }). --ifdef(namespaced_types). + -type riak_kv_wm_object_dict() :: dict:dict(). --else. --type riak_kv_wm_object_dict() :: dict(). --endif. -include_lib("webmachine/include/webmachine.hrl"). -include("riak_kv_wm_raw.hrl"). @@ -204,13 +201,82 @@ -type link() :: {{Bucket::binary(), Key::binary()}, Tag::binary()}. --define(DEFAULT_TIMEOUT, 60000). -define(V1_BUCKET_REGEX, "/([^/]+)>; ?rel=\"([^\"]+)\""). -define(V1_KEY_REGEX, "/([^/]+)/([^/]+)>; ?riaktag=\"([^\"]+)\""). -define(V2_BUCKET_REGEX, "; ?rel=\"([^\"]+)\""). -define(V2_KEY_REGEX, "; ?riaktag=\"([^\"]+)\""). +-define(BINHEAD_CTYPE, <<"content-type">>). +-define(BINHEAD_IF_NOT_MODIFIED, <<"x-riak-if-not-modified">>). +-define(BINHEAD_NONE_MATCH, <<"if-none-match">>). +-define(BINHEAD_MATCH, <<"if-match">>). +-define(BINHEAD_UNMODIFIED_SINCE, <<"if-unmodified-since">>). +-define(BINHEAD_ENCODING, <<"content-encoding">>). +-define(BINHEAD_ACCEPT, <<"accept">>). +-define(BINHEAD_LINK, <<"link">>). +-define(BINHEAD_VCLOCK, <<"x-riak-vclock">>). +-define(PREFIX_USERMETA, "x-riak-meta-"). +-define(PREFIX_INDEX, "x-riak-index-"). + +-spec make_reqheader_map(request_data()) -> #{binary() => any()}. +make_reqheader_map(RD) -> + lists:foldl( + fun({HeadKey, HeadVal}, AccMap) -> + accumulate_header_info( + string:lowercase( + list_to_binary( + riak_kv_wm_utils:any_to_list(HeadKey) + ) + ), + HeadVal, + AccMap + ) + end, + maps:new(), + mochiweb_headers:to_list(wrq:req_headers(RD)) + ). + +accumulate_header_info(<>, T, MapAcc) -> + maps:update_with( + ?PREFIX_INDEX, + fun(Indices) -> [{Field, T}|Indices] end, + [{Field, T}], + MapAcc + ); +accumulate_header_info(<>, V, MapAcc) -> + maps:update_with( + ?PREFIX_USERMETA, + fun(Indices) -> + [{<>, V}|Indices] + end, + [{<>, V}], + MapAcc + ); +accumulate_header_info(?BINHEAD_ACCEPT, V, MapAcc) -> + maps:put(?BINHEAD_ACCEPT, V, MapAcc); +accumulate_header_info(?BINHEAD_CTYPE, V, MapAcc) -> + maps:put(?BINHEAD_CTYPE, V, MapAcc); +accumulate_header_info(?BINHEAD_ENCODING, V, MapAcc) -> + maps:put(?BINHEAD_ENCODING, V, MapAcc); +accumulate_header_info(?BINHEAD_VCLOCK, VC, MapAcc) -> + maps:put( + ?BINHEAD_VCLOCK, + riak_object:decode_vclock(base64:decode(VC)), + MapAcc); +accumulate_header_info(?BINHEAD_LINK, V, MapAcc) -> + maps:put(?BINHEAD_LINK, V, MapAcc); +accumulate_header_info(?BINHEAD_IF_NOT_MODIFIED, V, MapAcc) -> + maps:put(?BINHEAD_IF_NOT_MODIFIED, V, MapAcc); +accumulate_header_info(?BINHEAD_UNMODIFIED_SINCE, V, MapAcc) -> + maps:put(?BINHEAD_UNMODIFIED_SINCE, V, MapAcc); +accumulate_header_info(?BINHEAD_MATCH, V, MapAcc) -> + maps:put(?BINHEAD_MATCH, V, MapAcc); +accumulate_header_info(?BINHEAD_NONE_MATCH, V, MapAcc) -> + maps:put(?BINHEAD_NONE_MATCH, V, MapAcc); +accumulate_header_info(_DiscardIdx, _Value, MapAcc) -> + MapAcc. + -spec init(proplists:proplist()) -> {ok, context()}. %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. @@ -248,6 +314,7 @@ service_available(RD, Ctx0=#ctx{riak=RiakProps}) -> list_to_binary( riak_kv_wm_utils:maybe_decode_uri(RD, K)) end, + HeaderMap = make_reqheader_map(RD), {true, RD, Ctx#ctx{ @@ -255,6 +322,7 @@ service_available(RD, Ctx0=#ctx{riak=RiakProps}) -> client=C, bucket=Bucket, key=Key, + header_map = HeaderMap, vtag=wrq:get_qs_value(?Q_VTAG, RD)}}; Error -> {false, @@ -401,7 +469,7 @@ malformed_request([H|T], RD, Ctx) -> %% PUT/POST. %% This should probably result in a 415 using the known_content_type callback malformed_content_type(RD, Ctx) -> - case wrq:get_req_header(?HEAD_CTYPE, RD) of + case maps:get(?BINHEAD_CTYPE, Ctx#ctx.header_map, undefined) of undefined -> {true, missing_content_type(RD), Ctx}; RawCType -> @@ -552,7 +620,7 @@ normalize_rw_param(V) -> list_to_integer(V). %% A link header should be of the form: %% </Prefix/Bucket/Key>; riaktag="Tag",... malformed_link_headers(RD, Ctx) -> - case catch get_link_heads(RD, Ctx) of + case catch get_link_heads(Ctx) of Links when is_list(Links) -> {false, RD, Ctx#ctx{links=Links}}; _Error when Ctx#ctx.api_version == 1-> @@ -581,7 +649,7 @@ malformed_link_headers(RD, Ctx) -> %% An index field should be of the form "index_fieldname_type" malformed_index_headers(RD, Ctx) -> %% Get a list of index_headers... - IndexFields1 = extract_index_fields(RD), + IndexFields1 = extract_index_fields(Ctx), %% Validate the fields. If validation passes, then the index %% headers are correctly formed. @@ -596,34 +664,20 @@ malformed_index_headers(RD, Ctx) -> Ctx} end. --spec extract_index_fields(#wm_reqdata{}) -> proplists:proplist(). +-spec extract_index_fields(context()) -> proplists:proplist(). %% @doc Extract fields from headers prefixed by "x-riak-index-" in the %% client's PUT request, to be indexed at write time. -extract_index_fields(RD) -> - PrefixSize = length(?HEAD_INDEX_PREFIX), +extract_index_fields(Ctx) -> RE = get_compiled_index_regex(), - F = - fun({K,V}, Acc) -> - KList = riak_kv_wm_utils:any_to_list(K), - case lists:prefix(?HEAD_INDEX_PREFIX, string:to_lower(KList)) of - true -> - %% Isolate the name of the index field. - IndexField = - list_to_binary( - element(2, lists:split(PrefixSize, KList))), - - %% HACK ALERT: Split values on comma. The HTTP - %% spec allows for comma separated tokens - %% where the tokens can be quoted strings. We - %% don't currently support quoted strings. - %% (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) - Values = re:split(V, RE, [{return, binary}]), - [{IndexField, X} || X <- Values] ++ Acc; - false -> - Acc - end - end, - lists:foldl(F, [], mochiweb_headers:to_list(wrq:req_headers(RD))). + lists:flatten( + lists:map( + fun({Field, Term}) -> + Values = re:split(Term, RE, [{return, binary}]), + [{Field, X} || X <- Values] + end, + maps:get(?PREFIX_INDEX, Ctx#ctx.header_map, []) + ) + ). -spec content_types_provided(#wm_reqdata{}, context()) -> {[{ContentType::string(), Producer::atom()}], #wm_reqdata{}, context()}. @@ -723,7 +777,7 @@ encodings_provided(RD, Ctx0) -> %% of a key-level PUT request will be accepted by this resource. %% (A key-level put *must* include a Content-Type header.) content_types_accepted(RD, Ctx) -> - case wrq:get_req_header(?HEAD_CTYPE, RD) of + case maps:get(?BINHEAD_CTYPE, Ctx#ctx.header_map, undefined) of undefined -> %% user must specify content type of the data {[], RD, Ctx}; @@ -758,7 +812,7 @@ resource_exists(RD, Ctx0) -> ToFetch = case Method of UpdM when UpdM =:= 'PUT'; UpdM =:= 'POST'; UpdM =:= 'DELETE' -> - conditional_headers_present(RD) == true; + conditional_headers_present(Ctx0) == true; _ -> true end, @@ -800,7 +854,9 @@ resource_exists(RD, Ctx0) -> -spec is_conflict(request_data(), context()) -> {boolean(), request_data(), context()}. is_conflict(RD, Ctx) -> - case {Ctx#ctx.method, wrq:get_req_header(?HEAD_IF_NOT_MODIFIED, RD)} of + NotModified = + maps:get(?BINHEAD_IF_NOT_MODIFIED, Ctx#ctx.header_map, undefined), + case {Ctx#ctx.method, NotModified} of {_ , undefined} -> {false, RD, Ctx}; {UpdM, NotModifiedClock} when UpdM =:= 'PUT'; UpdM =:= 'POST' -> @@ -819,16 +875,12 @@ is_conflict(RD, Ctx) -> {false, RD, Ctx} end. --spec conditional_headers_present(request_data()) -> boolean(). -conditional_headers_present(RD) -> - NoneMatch = - (wrq:get_req_header("If-None-Match", RD) =/= undefined), - Match = - (wrq:get_req_header("If-Match", RD) =/= undefined), - UnModifiedSince = - (wrq:get_req_header("If-Unmodified-Since", RD) =/= undefined), - NotModified = - (wrq:get_req_header(?HEAD_IF_NOT_MODIFIED, RD) =/= undefined), +-spec conditional_headers_present(context()) -> boolean(). +conditional_headers_present(Ctx) -> + NoneMatch = maps:is_key(?BINHEAD_NONE_MATCH, Ctx#ctx.header_map), + Match = maps:is_key(?BINHEAD_MATCH, Ctx#ctx.header_map), + UnModifiedSince = maps:is_key(?BINHEAD_UNMODIFIED_SINCE, Ctx#ctx.header_map), + NotModified = maps:is_key(?BINHEAD_IF_NOT_MODIFIED, Ctx#ctx.header_map), (NoneMatch or Match or UnModifiedSince or NotModified). @@ -875,8 +927,12 @@ accept_doc_body( links=L, ctype=CType, charset=Charset, index_fields=IF}) -> Doc0 = riak_object:new(riak_kv_wm_utils:maybe_bucket_type(T,B), K, <<>>), - VclockDoc = riak_object:set_vclock(Doc0, decode_vclock_header(RD)), - UserMeta = extract_user_meta(RD), + VclockDoc = + riak_object:set_vclock( + Doc0, + maps:get(?BINHEAD_VCLOCK, Ctx#ctx.header_map, vclock:fresh()) + ), + UserMeta = maps:get(?PREFIX_USERMETA, Ctx#ctx.header_map, []), CTypeMD = dict:store(?MD_CTYPE, CType, dict:new()), CharsetMD = if Charset /= undefined -> @@ -885,7 +941,7 @@ accept_doc_body( CTypeMD end, EncMD = - case wrq:get_req_header(?HEAD_ENCODING, RD) of + case maps:get(?BINHEAD_ENCODING, Ctx#ctx.header_map, undefined) of undefined -> CharsetMD; E -> dict:store(?MD_ENCODING, E, CharsetMD) end, @@ -902,7 +958,7 @@ accept_doc_body( _ -> [] end, Options = make_options(Options0, Ctx), - NoneMatch = (wrq:get_req_header("If-None-Match", RD) =/= undefined), + NoneMatch = maps:is_key(?BINHEAD_NONE_MATCH, Ctx#ctx.header_map), Options2 = case riak_kv_util:consistent_object(B) and NoneMatch of true -> [{if_none_match, true}|Options]; @@ -929,7 +985,7 @@ send_returnbody(RD, DocCtx, _HasSiblings = false) -> %% Handle the sibling case. Send either the sibling message body, or a %% multipart body, depending on what the client accepts. send_returnbody(RD, DocCtx, _HasSiblings = true) -> - AcceptHdr = wrq:get_req_header("Accept", RD), + AcceptHdr = maps:get(?BINHEAD_ACCEPT, DocCtx#ctx.header_map, undefined), PossibleTypes = ["multipart/mixed", "text/plain"], case webmachine_util:choose_media_type(PossibleTypes, AcceptHdr) of "multipart/mixed" -> @@ -958,17 +1014,6 @@ add_conditional_headers(RD, Ctx) -> calendar:universal_time_to_local_time(LM)), RD4), {RD5,Ctx3}. --spec extract_user_meta(#wm_reqdata{}) -> proplists:proplist(). -%% @doc Extract headers prefixed by X-Riak-Meta- in the client's PUT request -%% to be returned by subsequent GET requests. -extract_user_meta(RD) -> - lists:filter(fun({K,_V}) -> - lists:prefix( - ?HEAD_USERMETA_PREFIX, - string:to_lower(riak_kv_wm_utils:any_to_list(K))) - end, - mochiweb_headers:to_list(wrq:req_headers(RD))). - -spec multiple_choices(#wm_reqdata{}, context()) -> {boolean(), #wm_reqdata{}, context()}. %% @doc Determine whether a document has siblings. If the user has @@ -1136,16 +1181,6 @@ encode_vclock_header(RD, #ctx{doc={error, {deleted, VClock}}}) -> wrq:set_resp_header( ?HEAD_VCLOCK, binary_to_list(base64:encode(BinVClock)), RD). --spec decode_vclock_header(#wm_reqdata{}) -> vclock:vclock(). -%% @doc Translate the X-Riak-Vclock header value from the request into -%% its Erlang representation. If no vclock header exists, a fresh -%% vclock is returned. -decode_vclock_header(RD) -> - case wrq:get_req_header(?HEAD_VCLOCK, RD) of - undefined -> vclock:fresh(); - Head -> riak_object:decode_vclock(base64:decode(Head)) - end. - -spec ensure_doc(context()) -> context(). %% @doc Ensure that the 'doc' field of the context() has been filled %% with the result of a riak_client:get request. This is a @@ -1177,11 +1212,10 @@ delete_resource(RD, Ctx=#ctx{bucket_type=T, bucket=B, key=K, client=C}) -> Options = make_options([], Ctx), BT = riak_kv_wm_utils:maybe_bucket_type(T,B), Result = - case wrq:get_req_header(?HEAD_VCLOCK, RD) of + case maps:get(?BINHEAD_VCLOCK, Ctx#ctx.header_map, undefined) of undefined -> riak_client:delete(BT, K, Options, C); - _ -> - VC = decode_vclock_header(RD), + VC -> riak_client:delete_vclock(BT, K, VC, Options, C) end, case Result of @@ -1245,18 +1279,18 @@ normalize_last_modified(MD) -> httpd_util:convert_request_date(Rfc1123) end. --spec get_link_heads(#wm_reqdata{}, context()) -> [link()]. +-spec get_link_heads(context()) -> [link()]. %% @doc Extract the list of links from the Link request header. %% This function will die if an invalid link header format %% is found. -get_link_heads(RD, Ctx) -> +get_link_heads(Ctx) -> APIVersion = Ctx#ctx.api_version, Prefix = Ctx#ctx.prefix, Bucket = Ctx#ctx.bucket, %% Get a list of link headers... LinkHeaders = - case wrq:get_req_header(?HEAD_LINK, RD) of + case maps:get(?BINHEAD_LINK, Ctx#ctx.header_map, undefined) of undefined -> []; Heads -> string:tokens(Heads, ",") end, From ef61d8bf0ad75c11c62a7e8c2b5a99f5ab8f4f80 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 6 Jun 2024 11:36:03 +0100 Subject: [PATCH 10/12] Add profiler --- src/riak_kv.app.src | 1 + src/riak_kv_util.erl | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/riak_kv.app.src b/src/riak_kv.app.src index 62763883a1..a9884996f4 100644 --- a/src/riak_kv.app.src +++ b/src/riak_kv.app.src @@ -6,6 +6,7 @@ {applications, [ kernel, stdlib, + tools, sasl, crypto, riak_api, diff --git a/src/riak_kv_util.erl b/src/riak_kv_util.erl index b5ef6d782a..26641f55a6 100644 --- a/src/riak_kv_util.erl +++ b/src/riak_kv_util.erl @@ -52,7 +52,8 @@ is_modfun_allowed/2, shuffle_list/1, kv_ready/0, - ngr_initial_timeout/0 + ngr_initial_timeout/0, + profile_riak/1 ]). -export([report_hashtree_tokens/0, reset_hashtree_tokens/2]). @@ -519,6 +520,26 @@ shuffle_list(L) -> lists:map(fun({_R, X0}) -> X0 end, lists:keysort(1, lists:map(fun(X) -> {rand:uniform(), X} end, L))). +-spec profile_riak(pos_integer()) -> analyzed|failed. +profile_riak(ProfileTime) -> + eprof:start(), + case eprof:start_profiling(erlang:processes()) of + profiling -> + timer:sleep(ProfileTime), + case eprof:stop_profiling() of + profiling_stopped -> + eprof:analyze( + total, [{filter, [{time, 10 * ProfileTime}]}] + ), + stopped = eprof:stop(), + analyzed; + _ -> + stopped = eprof:stop(), + failed_running + end; + _ -> + failed_starting + end. %% =================================================================== %% EUnit tests From 664b6e5abc33e2c794a80f37cfa511c0bd00962d Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 6 Jun 2024 11:46:57 +0100 Subject: [PATCH 11/12] Type fix --- src/riak_kv_util.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_kv_util.erl b/src/riak_kv_util.erl index 26641f55a6..5041ea0e49 100644 --- a/src/riak_kv_util.erl +++ b/src/riak_kv_util.erl @@ -529,7 +529,7 @@ profile_riak(ProfileTime) -> case eprof:stop_profiling() of profiling_stopped -> eprof:analyze( - total, [{filter, [{time, 10 * ProfileTime}]}] + total, [{filter, [{time, float(10 * ProfileTime)}]}] ), stopped = eprof:stop(), analyzed; From 9605fb39053ff2bf205ad9ee5a46f1c7c008cd32 Mon Sep 17 00:00:00 2001 From: Martin Sumner Date: Thu, 6 Jun 2024 17:59:58 +0100 Subject: [PATCH 12/12] Remove more strings .. and use webmachine with support for looking up pre-normalised keys in request headers --- rebar.config | 4 +- src/riak_kv_wm_object.erl | 131 +++++++++++++++++++------------------- 2 files changed, 69 insertions(+), 66 deletions(-) diff --git a/rebar.config b/rebar.config index 212880c0ed..2043d87af4 100644 --- a/rebar.config +++ b/rebar.config @@ -49,8 +49,8 @@ {sext, {git, "https://github.com/uwiger/sext.git", {tag, "1.8.0"}}}, {riak_pipe, {git, "https://github.com/nhs-riak/riak_pipe.git", {branch, "nhse-develop"}}}, {riak_dt, {git, "https://github.com/nhs-riak/riak_dt.git", {branch, "nhse-develop"}}}, - {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-d34-otp26"}}}, + {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-d34-nhskv.i30-lowercase"}}}, {hyper, {git, "https://github.com/nhs-riak/hyper", {branch, "nhse-develop"}}}, {kv_index_tictactree, {git, "https://github.com/nhs-riak/kv_index_tictactree.git", {branch, "nhse-develop"}}}, - {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-d34-otp26"}}} + {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-d34-nhskv.i30-lowercase"}}} ]}. diff --git a/src/riak_kv_wm_object.erl b/src/riak_kv_wm_object.erl index c2db707561..fe98eaa193 100644 --- a/src/riak_kv_wm_object.erl +++ b/src/riak_kv_wm_object.erl @@ -224,17 +224,13 @@ make_reqheader_map(RD) -> lists:foldl( fun({HeadKey, HeadVal}, AccMap) -> accumulate_header_info( - string:lowercase( - list_to_binary( - riak_kv_wm_utils:any_to_list(HeadKey) - ) - ), + list_to_binary(HeadKey), HeadVal, AccMap ) end, maps:new(), - mochiweb_headers:to_list(wrq:req_headers(RD)) + mochiweb_headers:to_normalised_list(wrq:req_headers(RD)) ). accumulate_header_info(<>, T, MapAcc) -> @@ -513,49 +509,54 @@ malformed_rw_params(RD, Ctx) -> Res = lists:foldl(fun malformed_rw_param/2, {false, RD, Ctx}, - [{#ctx.r, "r", "default"}, - {#ctx.w, "w", "default"}, - {#ctx.dw, "dw", "default"}, - {#ctx.rw, "rw", "default"}, - {#ctx.pw, "pw", "default"}, - {#ctx.node_confirms, "node_confirms", "default"}, - {#ctx.pr, "pr", "default"}]), + [{#ctx.r, "r", default}, + {#ctx.w, "w", default}, + {#ctx.dw, "dw", default}, + {#ctx.rw, "rw", default}, + {#ctx.pw, "pw", default}, + {#ctx.node_confirms, "node_confirms", default}, + {#ctx.pr, "pr", default}]), Res2 = lists:foldl(fun malformed_custom_param/2, Res, [{#ctx.sync_on_write, "sync_on_write", - "default", + default, [default, backend, one, all]}]), lists:foldl(fun malformed_boolean_param/2, Res2, - [{#ctx.basic_quorum, "basic_quorum", "default"}, - {#ctx.notfound_ok, "notfound_ok", "default"}, - {#ctx.asis, "asis", "false"}]). + [{#ctx.basic_quorum, "basic_quorum", default}, + {#ctx.notfound_ok, "notfound_ok", default}, + {#ctx.asis, "asis", false}]). --spec malformed_rw_param({Idx::integer(), Name::string(), Default::string()}, +-spec malformed_rw_param({Idx::integer(), Name::string(), Default::atom()}, {boolean(), #wm_reqdata{}, context()}) -> {boolean(), #wm_reqdata{}, context()}. %% @doc Check that a specific r, w, dw, or rw query param is a %% string-encoded integer. Store its result in context() if it %% is, or print an error message in #wm_reqdata{} if it is not. malformed_rw_param({Idx, Name, Default}, {Result, RD, Ctx}) -> - case catch normalize_rw_param(wrq:get_qs_value(Name, Default, RD)) of - P when (is_atom(P) orelse is_integer(P)) -> - {Result, RD, setelement(Idx, Ctx, P)}; - _ -> - {true, - wrq:append_to_resp_body( - io_lib:format("~s query parameter must be an integer or " - "one of the following words: 'one', 'quorum' or 'all'~n", - [Name]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + case catch normalize_rw_param(ExtractedString) of + P when (is_atom(P) orelse is_integer(P)) -> + {Result, RD, setelement(Idx, Ctx, P)}; + _ -> + {true, + wrq:append_to_resp_body( + io_lib:format("~s query parameter must be an integer or " + "one of the following words: 'one', 'quorum' or 'all'~n", + [Name]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. -spec malformed_custom_param({Idx::integer(), Name::string(), - Default::string(), + Default::atom(), AllowedValues::[atom()]}, {boolean(), #wm_reqdata{}, context()}) -> {boolean(), #wm_reqdata{}, context()}. @@ -563,46 +564,48 @@ malformed_rw_param({Idx, Name, Default}, {Result, RD, Ctx}) -> %% Store its result in context() if it is, or print an error message %% in #wm_reqdata{} if it is not. malformed_custom_param({Idx, Name, Default, AllowedValues}, {Result, RD, Ctx}) -> - AllowedValueTuples = [{V} || V <- AllowedValues], - Option= - lists:keyfind( - list_to_atom( - string:to_lower( - wrq:get_qs_value(Name, Default, RD))), - 1, - AllowedValueTuples), - case Option of - false -> - ErrorText = - "~s query parameter must be one of the following words: ~p~n", - {true, - wrq:append_to_resp_body( - io_lib:format(ErrorText, [Name, AllowedValues]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - _ -> - {Value} = Option, - {Result, RD, setelement(Idx, Ctx, Value)} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + UsableValue = list_to_atom(string:lowercase(ExtractedString)), + case lists:member(UsableValue, AllowedValues) of + true -> + {Result, RD, setelement(Idx, Ctx, UsableValue)}; + false -> + ErrorText = + "~s query parameter must be one of the following words: ~p~n", + {true, + wrq:append_to_resp_body( + io_lib:format(ErrorText, [Name, AllowedValues]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. %% @doc Check that a specific query param is a %% string-encoded boolean. Store its result in context() if it %% is, or print an error message in #wm_reqdata{} if it is not. malformed_boolean_param({Idx, Name, Default}, {Result, RD, Ctx}) -> - case string:to_lower(wrq:get_qs_value(Name, Default, RD)) of - "true" -> - {Result, RD, setelement(Idx, Ctx, true)}; - "false" -> - {Result, RD, setelement(Idx, Ctx, false)}; - "default" -> - {Result, RD, setelement(Idx, Ctx, default)}; - _ -> - {true, - wrq:append_to_resp_body( - io_lib:format("~s query parameter must be true or false~n", - [Name]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + case string:lowercase(ExtractedString) of + "true" -> + {Result, RD, setelement(Idx, Ctx, true)}; + "false" -> + {Result, RD, setelement(Idx, Ctx, false)}; + "default" -> + {Result, RD, setelement(Idx, Ctx, default)}; + _ -> + {true, + wrq:append_to_resp_body( + io_lib:format("~s query parameter must be true or false~n", + [Name]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. normalize_rw_param("backend") -> backend;