diff --git a/.github/workflows/erlang.yml b/.github/workflows/erlang.yml index 037271a9a..aaaa205c1 100644 --- a/.github/workflows/erlang.yml +++ b/.github/workflows/erlang.yml @@ -2,9 +2,15 @@ name: Erlang CI on: push: - branches: [ nhse-develop ] + branches: + - nhse-develop + - nhse-develop-3.2 + - nhse-develop-3.4 pull_request: - branches: [ nhse-develop ] + branches: + - nhse-develop + - nhse-develop-3.2 + - nhse-develop-3.4 jobs: @@ -17,25 +23,12 @@ jobs: strategy: fail-fast: false matrix: - otp: [22, 24, 25] + otp: [24, 26] os: [ubuntu-latest] - # OTP lower than 23 does not run on ubuntu-latest (22.04), see - # https://github.com/erlef/setup-beam#compatibility-between-operating-system-and-erlangotp - exclude: - - otp: 22 - os: ubuntu-latest - include: - - otp: 22 - os: ubuntu-20.04 steps: - uses: lukka/get-cmake@latest - - uses: actions/checkout@v2 - - name: Install dependencies (Ubuntu) - if: ${{ startsWith(matrix.os, 'ubuntu') }} - run: | - sudo apt-get -qq update - sudo apt-get -qq install libsnappy-dev libc6-dev + - uses: actions/checkout@v4 - name: Install Erlang/OTP uses: erlef/setup-beam@v1 with: diff --git a/priv/tracers/tracer_accumulating_time.erl b/priv/tracers/tracer_accumulating_time.erl deleted file mode 100644 index f66acda00..000000000 --- a/priv/tracers/tracer_accumulating_time.erl +++ /dev/null @@ -1,79 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_accumulating_time). - --compile(export_all). --compile(nowarn_export_all). - -start(Pid_list, MFA_list, IntervalMS) -> - dbg:tracer(process, {fun trace/2, new_stats()}), - [dbg:p(Pid, [call, timestamp, arity]) || Pid <- Pid_list], - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - {Mod, Func, Arity} <- MFA_list], - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(IntervalMS, TPid, print_report), - {started, TPid}. - -stop() -> - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace_ts, Pid, call, {Mod, Func, Arity}, TS}, {Dict}) -> - MFA = {Mod, Func, Arity}, - DKey = {Pid, MFA}, - {dict:store(DKey, TS, Dict)}; -trace({trace_ts, Pid, return_from, {Mod, Func, Arity}, _Res, TS}, {Dict}) -> - MFA = {Mod, Func, Arity}, - DKey = {Pid, MFA}, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - Elapsed = timer:now_diff(TS, Start), - SumKey = {sum, MFA}, - {OldCount, OldTime} = case dict:find(SumKey, Dict) of - error -> - {0, 0}; - {ok, Else} -> - Else - end, - Dict2 = dict:erase(DKey, Dict), - {dict:store(SumKey, {OldCount+1, OldTime+Elapsed}, Dict2)}; -trace(print_report, {Dict}) -> - print_stats(Dict), - {dict:from_list([X || {K, _V} = X <- dict:to_list(Dict), - element(1, K) /= sum])}; -trace(Unknown, {Dict}) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - {Dict}. - -new_stats() -> - {dict:new()}. - -print_stats(Dict) -> - Reports = lists:sort([{MFA, X} || {{sum, MFA}, X} <- dict:to_list(Dict)]), - [io:format("~p MFA ~p count ~p elapsed_msec ~p\n", - [time(), MFA, Count, Sum div 1000]) || - {MFA, {Count, Sum}} <- Reports]. diff --git a/priv/tracers/tracer_backend_latency.erl b/priv/tracers/tracer_backend_latency.erl deleted file mode 100644 index 408df78f2..000000000 --- a/priv/tracers/tracer_backend_latency.erl +++ /dev/null @@ -1,199 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_backend_latency). - --compile(export_all). --compile(nowarn_export_all). - -start() -> - start(500). - -start(LatencyMS) -> - start(LatencyMS, [get_fsm, put_fsm, - bitcask, eleveldb, file, prim_file, riak_kv_fs2_backend]). - -start(LatencyMS, Modules) -> - %% catch folsom_metrics:delete_metric(foo), - %% folsom_metrics:new_histogram(foo, uniform, 9981239823), - - dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}), - dbg:p(all, [call, timestamp, arity]), - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) || - lists:member(put_fsm, Modules), - Mod <- [riak_kv_put_fsm], - {Func, Arity} <- [{init, 1}, {finish, 2}]], - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], []}]) || - lists:member(get_fsm, Modules), - Mod <- [riak_kv_get_fsm], - {Func, Arity} <- [{init, 1}, {finalize, 1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(bitcask, Modules), - Mod <- [bitcask], - {Func, Arity} <- [ - {open,1}, {open,2}, - {close,1}, - {close_write_file,1}, - {get,2}, - {put,3}, - {delete,2}, - {sync,1}, - {iterator,3}, {iterator_next,1}, {iterator_release,1}, - {needs_merge,1}, - {is_empty_estimate,1}, - {status,1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(eleveldb, Modules), - Mod <- [eleveldb], - {Func, Arity} <- [ - {open,2}, - {close,1}, - {get,3}, - {put,4}, - {delete,3}, - {write,3}, - {status,2}, - {destroy,2}, - {is_empty,1}, - {iterator,2}, - {iterator,3}, - {iterator_move,2}, - {iterator_close,1}]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(file, Modules), - Mod <- [file], - {Func, Arity} <- [ - {open,2}, - {close,1}, - {pread,2}, - {pread,3}, - {read,2}, - {write,2}, - {pwrite,2}, - {pwrite,3}, - {truncate,1}, - {delete,1}, - {position,2}, - {sync,1} - ]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(prim_file, Modules), - Mod <- [prim_file], - {Func, Arity} <- [ - {list_dir,2}, - {read_file_info,1}, - {write_file_info,1} - ]], - - [catch dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]) || - lists:member(riak_kv_fs2_backend, Modules), - Mod <- [riak_kv_fs2_backend], - {Func, Arity} <- [ - {get_object,4}, - {put_object,5}, - {delete,4}]], - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - %% timer:send_interval(Interval, TPid, print_report), - io:format("Not using timer:send_interval...\n"), - {started, TPid}. - -stop() -> - %% io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), - %% catch folsom_metrics:delete_metric(foo), - - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace_ts, Pid, call, {riak_kv_put_fsm, init, _}, TS}, {Dict, LMS}) -> - {dict:store({put, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, call, {riak_kv_put_fsm, finish, _}, TS}, {Dict, LatencyMS}) -> - Start = case dict:find({put, Pid}, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: put_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]); - _Elapsed -> - ok - end, - {dict:erase(Pid, Dict), LatencyMS}; -trace({trace_ts, Pid, call, {riak_kv_get_fsm, init, _}, TS}, {Dict, LMS}) -> - {dict:store({get, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, call, {riak_kv_get_fsm, finalize, _}, TS}, {Dict, LatencyMS}) -> - Start = case dict:find({get, Pid}, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: get_fsm: ~p msec @ ~p ~p\n", [date(), time(), Elapsed, node(), Pid]); - _Elapsed -> - ok - end, - {dict:erase(Pid, Dict), LatencyMS}; -trace({trace_ts, Pid, call, {Mod, _, _}, TS}, {Dict, LMS}) -> - {dict:store({Mod, Pid}, TS, Dict), LMS}; -trace({trace_ts, Pid, return_from, {Mod, Func, _}, _Res, TS}, {Dict, LatencyMS}) -> - DKey = {Mod, Pid}, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - case timer:now_diff(TS, Start) div 1000 of - Elapsed when Elapsed > LatencyMS -> - io:format("~p ~p: ~p ~p: ~p msec\n", [date(), time(), Mod, - Func, Elapsed]); - _Elapsed -> - ok - end, - %% if Mod == file, Func == pread -> - %% folsom_metrics_histogram:update(foo, Elapsed); - %% true -> - %% ok - %% end, - {dict:erase(DKey, Dict), LatencyMS}; -trace(print_report, DictStats) -> - %% print_stats(DictStats), - %% new_stats(); - DictStats; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_eleveldb_put_size.erl b/priv/tracers/tracer_eleveldb_put_size.erl deleted file mode 100644 index c776e7a47..000000000 --- a/priv/tracers/tracer_eleveldb_put_size.erl +++ /dev/null @@ -1,77 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_eleveldb_put_size). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(10*1000). - -start(Interval) -> - Stats = {StatName, _} = new_stats(), - reset_metric(StatName), - - dbg:tracer(process, {fun trace/2, Stats}), - dbg:p(all, [call]), - dbg:tpl(eleveldb, write, 3, [{'_', [], []}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {eleveldb, write, [_, PutList, _]}}, - {StatName, SumBytes}) -> - Bs = [begin - Bs = size(K) + size(V), - folsom_metrics_histogram:update(StatName, Bs), - Bs - end || {put, K, V} <- PutList], - {StatName, SumBytes + lists:sum(Bs)}; -trace(print_report, Stats = {StatName, _}) -> - print_stats(Stats), - reset_metric(StatName), - new_stats(); -trace(_Unknown, Stats) -> - erlang:display(wha), - %% io:format("Unknown! ~P\n", [Unknown, 20]), - Stats. - -new_stats() -> - {foo, 0}. - -print_stats({StatName, SumBytes}) -> - if SumBytes == 0 -> - io:format("~p ~p: 0 bytes\n", [date(), time()]); - true -> - Ps = folsom_metrics:get_histogram_statistics(StatName), - io:format("~p ~p: ~p bytes\n ~p\n", [date(), time(), SumBytes, Ps]) - end. - -reset_metric(Stats) -> - catch folsom_metrics:delete_metric(Stats), - folsom_metrics:new_histogram(Stats, uniform, 9981239823). diff --git a/priv/tracers/tracer_fsm_init.erl b/priv/tracers/tracer_fsm_init.erl deleted file mode 100644 index ba9c23647..000000000 --- a/priv/tracers/tracer_fsm_init.erl +++ /dev/null @@ -1,92 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_fsm_init). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(1*1000). - -start(Interval) -> - %%% Count the get, put, buckets, keys, exchange, and index FSM init() calls - dbg:tracer(process, {fun trace/2, new_stats()}), - dbg:p(all, call), - [dbg:tpl(Mod, init, 1, [{'_', [], []}]) || - Mod <- [riak_kv_buckets_fsm, riak_kv_exchange_fsm, riak_kv_get_fsm, riak_kv_index_fsm, riak_kv_keys_fsm, riak_kv_put_fsm]], - dbg:tpl(riak_kv_put_fsm, start_link, 3, [{'_', [], []}]), - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {riak_kv_put_fsm, start_link, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link+1, R, P, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_get_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R+1, P, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_put_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P+1, B, E, I, K}; -trace({trace, _Pid, call, {riak_kv_buckets_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B+1, E, I, K}; -trace({trace, _Pid, call, {riak_kv_exchange_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E+1, I, K}; -trace({trace, _Pid, call, {riak_kv_index_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E, I+1, K}; -trace({trace, _Pid, call, {riak_kv_keys_fsm, init, _}}, - {Pstart_link, R, P, B, E, I, K}) -> - {Pstart_link, R, P, B, E, I, K+1}; -trace(print_report, Stats) -> - print_stats(Stats), - new_stats(); -trace(Unknown, Stats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - Stats. - -new_stats() -> - {0, - 0, 0, 0, 0, 0, 0}. - -print_stats({Pstart_link, Get, Put, Buckets, Exchange, Index, Keys}) -> - Stats = [{put_start, Pstart_link}, - {get, Get}, - {put, Put}, - {buckets, Buckets}, - {exchange, Exchange}, - {index, Index}, - {keys, Keys}], - io:format("~p ~p: ~p\n", [date(), time(), Stats]). diff --git a/priv/tracers/tracer_func_args.erl b/priv/tracers/tracer_func_args.erl deleted file mode 100644 index c4d51bcb8..000000000 --- a/priv/tracers/tracer_func_args.erl +++ /dev/null @@ -1,104 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% For example: what ETS tables are being called the most by ets:lookup/2? -%% The 1st arg of ets:lookup/2 is the table name. -%% Watch for 10 seconds. -%% -%% > func_args_tracer:start(ets, lookup, 2, 10, fun(Args) -> hd(Args) end). -%% -%% Tracer pid: <0.16102.15>, use func_args_tracer:stop() to stop -%% Otherwise, tracing stops in 10 seconds -%% Current date & time: {2013,9,19} {18,5,48} -%% {started,<0.16102.15>} -%% Total calls: 373476 -%% Call stats: -%% [{folsom_histograms,114065}, -%% {ac_tab,69689}, -%% {ets_riak_core_ring_manager,67147}, -%% {folsom_spirals,57076}, -%% {riak_capability_ets,48862}, -%% {riak_core_node_watcher,8149}, -%% {riak_api_pb_registrations,8144}, -%% {folsom,243}, -%% {folsom_meters,43}, -%% {folsom_durations,20}, -%% {timer_tab,18}, -%% {folsom_gauges,8}, -%% {riak_core_stat_cache,5}, -%% {sys_dist,3}, -%% {inet_db,1}, -%% {21495958,1}, -%% {3145765,1}, -%% {3407910,1}] -%% - --module(tracer_func_args). - --compile(nowarn_export_all). --compile(export_all). - -start(Mod, Func, Arity, RunSeconds) -> - start(Mod, Func, Arity, RunSeconds, fun(Args) -> Args end). - -start(Mod, Func, Arity, RunSeconds, ArgMangler) -> - catch ets:delete(foo), - ets:new(foo, [named_table, public, set]), - dbg:tracer(process, {fun trace/2, new_stats({foo, ArgMangler})}), - dbg:p(all, call), - dbg:tpl(Mod, Func, Arity, [{'_', [], []}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]), - io:format("Current date & time: ~p ~p\n", [date(), time()]), - spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end), - {started, TPid}. - -stop() -> - Sort = fun({_,A}, {_, B}) -> A > B end, - Res = ets:tab2list(foo), - TotalCalls = lists:sum([Count || {_Arg, Count} <- Res]), - io:format("Total calls: ~p\n", [TotalCalls]), - io:format("Call stats:\n~p\n", [catch lists:sort(Sort, Res)]), - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - stopped. - -trace({trace, _Pid, call, {_, _, Args}}, {Tab, ArgMangler} = Acc) -> - Args2 = ArgMangler(Args), - try - ets:update_counter(Tab, Args2, {2, 1}) - catch _:_ -> - ets:insert(Tab, {Args2, 1}) - end, - Acc; -trace(Unknown, DictStats) -> - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats({Tab, _ArgMangler} = Acc) -> - ets:delete_all_objects(Tab), - Acc. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_gc_latency.erl b/priv/tracers/tracer_gc_latency.erl deleted file mode 100644 index 680d17aa5..000000000 --- a/priv/tracers/tracer_gc_latency.erl +++ /dev/null @@ -1,82 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_gc_latency). - --compile(nowarn_export_all). --compile(export_all). - -start(LatencyMS) -> - catch folsom_metrics:delete_metric(foo), - folsom_metrics:new_histogram(foo, uniform, 50*1000*1000), - dbg:tracer(process, {fun trace/2, new_stats(LatencyMS)}), - {ok, _} = dbg:p(all, [timestamp, garbage_collection, running]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Current date & time: ~p ~p local time\n", [date(), time()]), - {started, TPid}. - -stop() -> - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - catch folsom_metrics:delete_metric(foo), - stopped. - -trace({trace_ts, Pid, gc_start, _Stats, TS}, {Dict, LMS}) -> - {dict:store(Pid, TS, Dict), LMS}; -trace({trace_ts, Pid, gc_end, _Stats, TS}, {Dict, LMS}=Acc) -> - DKey = Pid, - case dict:find(DKey, Dict) of - {ok, GcStart} -> - Elapsed = erlang:max(-1, (timer:now_diff(TS, GcStart) div 1000)), - if Elapsed > LMS -> - io:format("~p: GC of ~p elapsed time ~p > threshold ~p\n", - [time(), Pid, Elapsed, LMS]), - io:format(" ~w,~w\n", [process_info(Pid, message_queue_len), _Stats]); - true -> - ok - end, - {dict:erase(DKey, Dict), LMS}; - error -> - Acc - end; -trace({trace_ts, Pid, InOrOut, _MFA, TS}, {Dict, _LMS}=Acc) -> - DKey = Pid, - case dict:find(DKey, Dict) of - {ok, GcStart} -> - io:format("Hey, pid ~p scheduled ~p but started GC ~p msec ago\n", - [Pid, InOrOut, timer:now_diff(TS, GcStart)]); - _ -> - ok - end, - Acc; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n\t~P", [Unknown, 20, DictStats,7]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_large4.erl b/priv/tracers/tracer_large4.erl deleted file mode 100644 index 93eb301c0..000000000 --- a/priv/tracers/tracer_large4.erl +++ /dev/null @@ -1,107 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_large4). - --compile(nowarn_export_all). --compile(export_all). - --record(r_object, {bucket = '_', - key = '_', - contents = '_', - vclock = '_', - updatemetadata = '_', - updatevalue = '_' - }). - -go(Time, Count, Size) -> - ss(), - %% gets - GetMS = [{['_', - #r_object{bucket='$1', - key='$2'}], - [], - [{message,{{'$1','$2'}}}, {return_trace}]}], - erlang:trace_pattern({riak_kv_get_fsm, calculate_objsize, 2}, GetMS, [local]), - - %% puts - PutMS = [{['$1','$2','_','$3','_'], - [{'>',{size,'$3'},Size}], - [{message,{{'$1','$2',{size,'$3'}}}}]}], - erlang:trace_pattern({riak_kv_eleveldb_backend, put, 5}, PutMS, [local]), - erlang:trace_pattern({riak_kv_bitcask_backend, put, 5}, PutMS, [local]), - erlang:trace_pattern({riak_kv_memory_backend, put, 5}, PutMS, [local]), - - {Tracer, _} = spawn_monitor(?MODULE, tracer, [0, Count, Size, dict:new()]), - erlang:trace(all, true, [call, arity, {tracer, Tracer}]), - receive - {'DOWN', _, process, Tracer, _} -> - ok - after Time -> - exit(Tracer, kill), - receive - {'DOWN', _, process, Tracer, _} -> - ok - end - end, - ss(), - io:format("object trace stopped~n"). - -tracer(Limit, Limit, _, _) -> - ok; -tracer(Count, Limit, Threshold, Objs) -> - receive - {trace,Pid,call,{riak_kv_get_fsm,calculate_objsize,2},{Bucket,Key}} -> - Objs2 = dict:store(Pid, {Bucket,Key}, Objs), - tracer(Count+1, Limit, Threshold, Objs2); - {trace,Pid,return_from,{riak_kv_get_fsm,calculate_objsize,2},Size} -> - case Size >= Threshold of - true -> - case dict:find(Pid, Objs) of - {ok, {Bucket, Key}} -> - io:format("~p: get: ~p~n", [ts(), {Bucket, Key, Size}]); - _ -> - ok - end; - false -> - ok - end, - Objs2 = dict:erase(Pid, Objs), - tracer(Count+1, Limit, Threshold, Objs2); - {trace,_Pid,call,{riak_kv_eleveldb_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(l): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - {trace,_Pid,call,{riak_kv_bitcask_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(b): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - {trace,_Pid,call,{riak_kv_memory_backend,put,5},{Bucket,Key,Size}} -> - io:format("~p: put(m): ~p~n", [ts(), {Bucket, Key, Size}]), - tracer(Count+1, Limit, Threshold, Objs); - Msg -> - io:format("tracer: ~p~n", [Msg]), - tracer(Count+1, Limit, Threshold, Objs) - end. - -ts() -> - calendar:now_to_datetime(os:timestamp()). - -ss() -> - erlang:trace_pattern({'_','_','_'}, false, [local]), - erlang:trace(all, false, [call, arity]). diff --git a/priv/tracers/tracer_latency_histogram.erl b/priv/tracers/tracer_latency_histogram.erl deleted file mode 100644 index d7f513b58..000000000 --- a/priv/tracers/tracer_latency_histogram.erl +++ /dev/null @@ -1,127 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - -%% For example: create a histogram of call latencies for bitcask:put/3. -%% Watch for 10 seconds. -%% -%% > latency_histogram_tracer:start(bitcask, put, 3, 10). -%% -%% Tracer pid: <0.2108.18>, use latency_histogram_tracer:stop() to stop -%% Otherwise, tracing stops in 10 seconds -%% Current date & time: {2013,9,19} {18,14,13} -%% {started,<0.2108.18>} -%% Histogram stats: -%% [{min,0}, -%% {max,48}, -%% {arithmetic_mean,2.765411819271055}, -%% {geometric_mean,2.527103493663478}, -%% {harmonic_mean,2.2674039086593973}, -%% {median,3}, -%% {variance,3.5629207473971585}, -%% {standard_deviation,1.8875700642352746}, -%% {skewness,2.0360354571500774}, -%% {kurtosis,18.529695846728423}, -%% {percentile,[{50,3},{75,4},{90,5},{95,6},{99,8},{999,14}]}, -%% {histogram,[{1,13436}, -%% {2,12304}, -%% {3,10789}, -%% {4,7397}, -%% {5,4191}, -%% {6,1929}, -%% {7,873}, -%% {8,420}, -%% {9,163}, -%% {10,79}, -%% {11,42}, -%% {12,47}, -%% {13,11}, -%% {14,16}, -%% {15,7}, -%% {16,5}, -%% {17,3}, -%% {18,4}, -%% {19,2}, -%% {20,4}, -%% {21,1}, -%% {22,11}, -%% {23,2}, -%% {24,1}, -%% {25,2}, -%% {26,1}, -%% {27,0}, -%% {28,1}, -%% {29,2}, -%% {30,0}, -%% {31,0}, -%% {40,2}, -%% {50,1}]}, -%% {n,51746}] - --module(tracer_latency_histogram). - --compile(nowarn_export_all). --compile(export_all). - -start(Mod, Func, Arity, RunSeconds) -> - catch folsom_metrics:delete_metric(foo), - folsom_metrics:new_histogram(foo, uniform, 50*1000*1000), - dbg:tracer(process, {fun trace/2, new_stats(0)}), - dbg:p(all, [call, timestamp, arity]), - dbg:tpl(Mod, Func, Arity, [{'_', [], [{return_trace}]}]), - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - io:format("Otherwise, tracing stops in ~p seconds\n", [RunSeconds]), - io:format("Current date & time: ~p ~p\n", [date(), time()]), - spawn(fun() -> timer:sleep(RunSeconds * 1000), stop() end), - {started, TPid}. - -stop() -> - io:format("Histogram stats:\n~p\n", [catch folsom_metrics:get_histogram_statistics(foo)]), - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - timer:sleep(100), - catch folsom_metrics:delete_metric(foo), - stopped. - -trace({trace_ts, Pid, call, {_, _, _}, TS}, {Dict, LMS}) -> - {dict:store(Pid, TS, Dict), LMS}; -trace({trace_ts, Pid, return_from, {_, _, _}, _Res, TS}, {Dict, LatencyMS}) -> - DKey = Pid, - Start = case dict:find(DKey, Dict) of - {ok, StTime} -> StTime; - error -> os:timestamp() - end, - Elapsed = timer:now_diff(TS, Start) div 1000, - folsom_metrics_histogram:update(foo, Elapsed), - {dict:erase(DKey, Dict), LatencyMS}; -trace(print_report, DictStats) -> - DictStats; -trace(Unknown, DictStats) -> - erlang:display(wha), - io:format("Unknown! ~P\n", [Unknown, 20]), - DictStats. - -new_stats(LatencyMS) -> - {dict:new(), LatencyMS}. - -print_stats(_DictStats) -> - ok. - diff --git a/priv/tracers/tracer_merge_and_and_handoff.erl b/priv/tracers/tracer_merge_and_and_handoff.erl deleted file mode 100644 index d4b57f375..000000000 --- a/priv/tracers/tracer_merge_and_and_handoff.erl +++ /dev/null @@ -1,95 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_merge_and_and_handoff). - --compile(nowarn_export_all). --compile(export_all). - -start() -> - start(1*1000). - -start(Interval) -> - dbg:tracer(process, {fun trace/2, {orddict:new(), orddict:new()}}), - dbg:p(all, call), - dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], []}]), - dbg:tpl(riak_kv_vnode, encode_handoff_item, 2, [{'_', [], []}]), - dbg:tpl(riak_core_handoff_receiver, process_message, 3, [{'_', [], []}]), - - %% Don't need return_trace events for this use case, but here's - %% how to do it if needed. - %%dbg:tpl(bitcask, merge_single_entry, 6, [{'_', [], [{return_trace}]}]). - - {ok, TPid} = dbg:get_tracer(), - io:format("Tracer pid: ~p, use ~p:stop() to stop\n", [TPid, ?MODULE]), - timer:send_interval(Interval, TPid, print_report), - {started, TPid}. - -stop() -> - dbg:stop_clear(), - catch exit(element(2,dbg:get_tracer()), kill), - stopped. - -trace({trace, _Pid, call, {bitcask, merge_single_entry, - [K, V, _TS, _FId, {File,_,_,_}, _State]}}, - {MDict, HDict}) -> - Dir = re:replace(File, "/[^/]*\$", "", [{return, binary}]), - Bytes = size(K) + size(V), - MDict2 = increment_cbdict(MDict, Dir, Bytes), - {MDict2, HDict}; -trace({trace, _Pid, call, {riak_kv_vnode, encode_handoff_item, - [{B, K}, V]}}, - {MDict, HDict}) -> - Bytes = size(B) + size(K) + size(V), - Key = "all-sending-handoff", - HDict2 = increment_cbdict(HDict, Key, Bytes), - {MDict, HDict2}; -trace({trace, _Pid, call, {riak_core_handoff_receiver, process_message, - [_Type, Msg, State]}}, - {MDict, HDict}) -> - Bytes = size(Msg), - Partition = element(5, State), % ugly hack - Key = Partition, - HDict2 = increment_cbdict(HDict, Key, Bytes), - {MDict, HDict2}; -trace(print_report, {MDict, HDict}) -> - print_stats(MDict, merge), - print_stats(HDict, handoff), - {orddict:new(), orddict:new()}. - -%% "cb" = count + bytes -increment_cbdict(Dict, Key, Bytes) -> - orddict:update(Key, fun({Count, Bs}) -> {Count + 1, Bs + Bytes} end, - {1, Bytes}, Dict). - -print_stats(Dict, Type) -> - F = fun(Key, {Count, Bytes}, {SumC, SumB}) when Count > 0 -> - io:format("~p ~p: ~p items ~p bytes ~p avg-size ~p\n", - [date(), time(), Count, Bytes, Bytes div Count, Key]), - {SumC + Count, SumB + Bytes}; - (_, _, Acc) -> - Acc - end, - {Count, Bytes} = orddict:fold(F, {0, 0}, Dict), - Avg = if Count > 0 -> Bytes div Count; - true -> 0 - end, - io:format("~p ~p: ~p total: ~p items ~p bytes ~p avg-size\n", - [date(), time(), Type, Count, Bytes, Avg]). diff --git a/priv/tracers/tracer_read_bin_trace_file.erl b/priv/tracers/tracer_read_bin_trace_file.erl deleted file mode 100644 index cfe3a2efe..000000000 --- a/priv/tracers/tracer_read_bin_trace_file.erl +++ /dev/null @@ -1,70 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_read_bin_trace_file). - --compile(nowarn_export_all). --compile(export_all). - -read(Path) -> - read(Path, 1). - -read(Path, LatencyMS) -> - {ok, FH} = file:open(Path, [read, binary, raw]), - read(file:read(FH, 5), FH, LatencyMS, []). - -read(eof, _FH, _, _) -> - ok; -read({ok, <>}, FH, LatencyMS, Hist) -> - {ok, Bin} = file:read(FH, Size), - case binary_to_term(Bin) of - {trace_ts, _, call, {M,F,A}, Time} -> - %%io:format("call MFA = ~p:~p/~p, ", [M, F, length(A)]), - read(file:read(FH, 5), FH, LatencyMS, [{{M,F,length(A)}, Time, A}|Hist]); - {trace_ts, _, return_from, MFA, Res, EndTime} -> - %%io:format("MFA ~p Hist ~p\n", [MFA, Hist]), - try - {value, {_, StartTime, A}, NewHist} = lists:keytake(MFA, 1, Hist), - MSec = timer:now_diff(EndTime, StartTime)/1000, - if MSec > LatencyMS -> - io:format("~p ~p msec\nArgs: (~p/~p) ~P\nRes: ~P\n\n", - [MFA, MSec, - erts_debug:flat_size(A), erts_debug:size(A), - A, 20, Res, 20]); - true -> - ok - end, - read(file:read(FH, 5), FH, LatencyMS, NewHist) - catch - error:{badmatch,false} -> - read(file:read(FH, 5), FH, LatencyMS, Hist); - Class:Reason:Stacktrace -> - io:format("ERR ~p ~p @ ~p\n", [Class, Reason, Stacktrace]), - read(file:read(FH, 5), FH, LatencyMS, Hist) - end - end. - -%% read(eof, _FH) -> -%% ok; -%% read({ok, <>}, FH) -> -%% {ok, Bin} = file:read(FH, Size), -%% io:format("~P\n", [binary_to_term(Bin), 15]), -%% read(file:read(FH, 5), FH). - diff --git a/priv/tracers/tracer_timeit.erl b/priv/tracers/tracer_timeit.erl deleted file mode 100644 index 6ddcf0fa0..000000000 --- a/priv/tracers/tracer_timeit.erl +++ /dev/null @@ -1,118 +0,0 @@ -%% ------------------------------------------------------------------- -%% -%% Copyright (c) 2014 Basho Technologies, Inc. All Rights Reserved. -%% -%% This file is provided to you under the Apache License, -%% Version 2.0 (the "License"); you may not use this file -%% except in compliance with the License. You may obtain -%% a copy of the License at -%% -%% http://www.apache.org/licenses/LICENSE-2.0 -%% -%% Unless required by applicable law or agreed to in writing, -%% software distributed under the License is distributed on an -%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -%% KIND, either express or implied. See the License for the -%% specific language governing permissions and limitations -%% under the License. -%% -%% ------------------------------------------------------------------- - --module(tracer_timeit). - --compile(nowarn_export_all). --compile(export_all). - -%% @doc Dynamically add timing to MFA. There are various types of -%% timing. -%% -%% all - time latency of all calls to MFA -%% -%% {sample, N, Max} - sample every N calls and stop sampling after Max -%% -%% {threshold, Millis, Max} - count # of calls where latency is > Millis -%% and count # of calls total, thus percentage of calls over threshold -timeit(Mod, Fun, Arity, Type) -> - Type2 = case Type of - {sample, N, Max} -> {sample, {N, Max}, {0, 0, 0}}; - {threshold, Millis, Max} -> {threshold, {Millis, Max}, {0, 0}}; - {all, Max} -> {all, {0, Max}} - end, - dbg:tracer(process, {fun trace/2, {orddict:new(), Type2}}), - dbg:p(all, call), - dbg:tpl(Mod, Fun, Arity, [{'_', [], [{return_trace}]}]). - -stop() -> dbg:stop_clear(). - -trace({trace, Pid, call, {Mod, Fun, _}}, {D, {all, {Count, Max}}}) -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {all, {Count, Max}}}; -trace({trace, Pid, call, {Mod, Fun, _}}, - {D, {sample, {N, Max}, {M, K, Total}}}) -> - M2 = M+1, - Total2 = Total+1, - if N == M2 -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {sample, {N, Max}, {0, K, Total2}}}; - true -> - {D, {sample, {N, Max}, {M2, K, Total2}}} - end; -trace({trace, Pid, call, {Mod, Fun, _}}, - {D, {threshold, {Millis, Max}, {Over, Total}}}) -> - D2 = orddict:store({Pid, Mod, Fun}, os:timestamp(), D), - {D2, {threshold, {Millis, Max}, {Over, Total+1}}}; - -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {all, {Count, Max}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - Count2 = Count+1, - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs/1000, - io:format(user, "~p:~p:~p: ~p ms\n", [Pid, Mod, Fun, ElapsedMs]), - if Count2 == Max -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {all, {Count2, Max}}} - end; - error -> Acc - end; -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {sample, {N, Max}, {M, K, Total}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - K2 = K+1, - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs/1000, - io:format(user, "[sample ~p/~p] ~p:~p:~p: ~p ms\n", - [K2, Total, Pid, Mod, Fun, ElapsedMs]), - if K2 == Max -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {sample, {N, Max}, {M, K2, Total}}} - end; - error -> Acc - end; -trace({trace, Pid, return_from, {Mod, Fun, _}, _Result}, - Acc={D, {threshold, {Millis, Max}, {Over, Total}}}) -> - Key = {Pid, Mod, Fun}, - case orddict:find(Key, D) of - {ok, StartTime} -> - ElapsedUs = timer:now_diff(os:timestamp(), StartTime), - ElapsedMs = ElapsedUs / 1000, - if ElapsedMs > Millis -> - Over2 = Over+1, - io:format(user, "[over threshold ~p, ~p/~p] ~p:~p:~p: ~p ms\n", - [Millis, Over2, Total, Pid, Mod, Fun, ElapsedMs]); - true -> - Over2 = Over - end, - if Max == Over -> stop(); - true -> - D2 = orddict:erase(Key, D), - {D2, {threshold, {Millis, Max}, {Over2, Total}}} - end; - error -> Acc - end. diff --git a/rebar.config b/rebar.config index d39d80a53..2043d87af 100644 --- a/rebar.config +++ b/rebar.config @@ -1,6 +1,5 @@ {minimum_otp_vsn, "22.0"}. -{src_dirs, ["./priv/tracers", "./src"]}. {cover_enabled, false}. {edoc_opts, [{preprocess, true}]}. {erl_opts, [warnings_as_errors, @@ -42,16 +41,16 @@ ]}. {deps, [ - {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-develop"}}}, + {riak_core, {git, "https://github.com/nhs-riak/riak_core.git", {branch, "nhse-develop-3.4"}}}, {sidejob, {git, "https://github.com/nhs-riak/sidejob.git", {branch, "nhse-develop"}}}, {bitcask, {git, "https://github.com/nhs-riak/bitcask.git", {branch, "nhse-develop"}}}, {redbug, {git, "https://github.com/massemanet/redbug", {branch, "master"}}}, - {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.2"}}}, + {recon, {git, "https://github.com/ferd/recon", {tag, "2.5.5"}}}, {sext, {git, "https://github.com/uwiger/sext.git", {tag, "1.8.0"}}}, {riak_pipe, {git, "https://github.com/nhs-riak/riak_pipe.git", {branch, "nhse-develop"}}}, {riak_dt, {git, "https://github.com/nhs-riak/riak_dt.git", {branch, "nhse-develop"}}}, - {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-develop"}}}, + {riak_api, {git, "https://github.com/nhs-riak/riak_api.git", {branch, "nhse-d34-nhskv.i30-lowercase"}}}, {hyper, {git, "https://github.com/nhs-riak/hyper", {branch, "nhse-develop"}}}, {kv_index_tictactree, {git, "https://github.com/nhs-riak/kv_index_tictactree.git", {branch, "nhse-develop"}}}, - {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-develop"}}} + {rhc, {git, "https://github.com/nhs-riak/riak-erlang-http-client", {branch, "nhse-d34-nhskv.i30-lowercase"}}} ]}. diff --git a/src/riak_index.erl b/src/riak_index.erl index f0c2986c1..2052cbecb 100644 --- a/src/riak_index.erl +++ b/src/riak_index.erl @@ -40,7 +40,9 @@ upgrade_query/1, object_key_in_range/3, index_key_in_range/3, - add_timeout_opt/2 + add_timeout_opt/2, + compile_re/1, + is_valid_index/1 ]). -include_lib("kernel/include/logger.hrl"). @@ -74,6 +76,8 @@ % -type query_def() :: {ok, term()} | {error, term()} | {term(), {error, term()}}. % -export_type([query_def/0]). -type query_def() :: #riak_kv_index_v3{}|#riak_kv_index_v2{}. +-type pcre_mp() :: {re_pattern, term(), term(), term(), term()}. + -export_type([query_def/0]). -type last_result() :: {value(), key()} | key(). @@ -83,6 +87,16 @@ -type query_version() :: v1 | v2 | v3. +-type index_query() :: ?KV_INDEX_Q{}. + +-spec compile_re( + undefined|iodata()) -> + {ok, pcre_mp()|undefined}|{error, {string(), non_neg_integer()}}. +compile_re(undefined) -> + {ok, undefined}; +compile_re(Regex) -> + re:compile(Regex). + mapred_index(Dest, Args) -> mapred_index(Dest, Args, ?TIMEOUT). @@ -318,6 +332,15 @@ to_index_query(Args) -> Version = riak_core_capability:get({riak_kv, secondary_index_version}, v1), to_index_query(Version, Args). +-spec is_valid_index(index_query()) -> boolean(). +is_valid_index( + ?KV_INDEX_Q{start_term = Start, term_regex = Regex}) + when is_integer(Start), Regex =/= undefined -> + false; +is_valid_index(_Query) -> + true. + + %% @doc upgrade a V1 Query to a v2 Query make_query({eq, ?BUCKETFIELD, _Bucket}, Q) -> {ok, Q?KV_INDEX_Q{filter_field=?BUCKETFIELD, return_terms=false}}; diff --git a/src/riak_kv.app.src b/src/riak_kv.app.src index 62763883a..a9884996f 100644 --- a/src/riak_kv.app.src +++ b/src/riak_kv.app.src @@ -6,6 +6,7 @@ {applications, [ kernel, stdlib, + tools, sasl, crypto, riak_api, diff --git a/src/riak_kv_bitcask_backend.erl b/src/riak_kv_bitcask_backend.erl index 0820838a1..e95a05481 100644 --- a/src/riak_kv_bitcask_backend.erl +++ b/src/riak_kv_bitcask_backend.erl @@ -230,7 +230,7 @@ get(Bucket, Key, #state{ref=Ref, key_vsn=KVers}=State) -> %% NOTE: The bitcask backend does not currently support %% secondary indexing and the_IndexSpecs parameter %% is ignored. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_eleveldb_backend.erl b/src/riak_kv_eleveldb_backend.erl index ee927be45..9cdb12b28 100644 --- a/src/riak_kv_eleveldb_backend.erl +++ b/src/riak_kv_eleveldb_backend.erl @@ -177,7 +177,7 @@ get(Bucket, Key, #state{read_opts=ReadOpts, {error, Reason, State} end. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). %% @doc Normal put, use existing option, do not modify write options -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> diff --git a/src/riak_kv_leveled_backend.erl b/src/riak_kv_leveled_backend.erl index c50b109fb..aade73f39 100644 --- a/src/riak_kv_leveled_backend.erl +++ b/src/riak_kv_leveled_backend.erl @@ -234,12 +234,9 @@ head(Bucket, Key, #state{bookie=Bookie}=State) -> end. %% @doc Insert an object into the leveled backend. --type index_spec() :: {add, Index, SecondaryKey} | - {remove, Index, SecondaryKey}. - -spec flush_put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), state()) -> {ok, state()} | @@ -250,7 +247,7 @@ flush_put(Bucket, Key, IndexSpecs, Val, State) -> -spec put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), state()) -> {ok, state()} | @@ -262,7 +259,7 @@ put(Bucket, Key, IndexSpecs, Val, State) -> %% @doc Delete an object from the leveled backend -spec delete(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], state()) -> {ok, state()} | {error, term(), state()}. @@ -671,10 +668,10 @@ callback(Ref, UnexpectedCallback, State) -> %% =================================================================== %% Internal functions %% =================================================================== - +-type regex() :: {re_pattern, term(), term(), term(), term()}|undefined. -spec dollarkey_foldfun( - riak_kv_backend:fold_keys_fun(), boolean(), re:mp()|undefined) + riak_kv_backend:fold_keys_fun(), boolean(), regex()) -> riak_kv_backend:fold_objects_fun(). dollarkey_foldfun(FoldKeysFun, ReadTombs, TermRegex) -> FilteredFoldKeysFun = @@ -738,7 +735,7 @@ log_fragmentation(Allocator) -> %% flush_put or put has been called -spec do_put(riak_object:bucket(), riak_object:key(), - [index_spec()], + [riak_object:index_spec()], binary(), boolean(), state()) -> diff --git a/src/riak_kv_memory_backend.erl b/src/riak_kv_memory_backend.erl index 193aecb52..84b73f8cd 100644 --- a/src/riak_kv_memory_backend.erl +++ b/src/riak_kv_memory_backend.erl @@ -208,7 +208,7 @@ get(Bucket, Key, State=#state{data_ref=DataRef, end. %% @doc Insert an object into the memory backend. --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()}. put(Bucket, PrimaryKey, IndexSpecs, Val, State=#state{data_ref=DataRef, diff --git a/src/riak_kv_multi_backend.erl b/src/riak_kv_multi_backend.erl index b82df3d8d..f6e103132 100644 --- a/src/riak_kv_multi_backend.erl +++ b/src/riak_kv_multi_backend.erl @@ -257,7 +257,7 @@ head(Bucket, Key, State) -> %% @doc Insert an object with secondary index %% information into the kv backend --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_multi_prefix_backend.erl b/src/riak_kv_multi_prefix_backend.erl index b195b1af2..c6ce4f284 100644 --- a/src/riak_kv_multi_prefix_backend.erl +++ b/src/riak_kv_multi_prefix_backend.erl @@ -294,7 +294,7 @@ head(Bucket, Key, State) -> %% @doc Insert an object with secondary index %% information into the kv backend --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()} | {error, term(), state()}. diff --git a/src/riak_kv_pb_index.erl b/src/riak_kv_pb_index.erl index 6c36280a4..79a710651 100644 --- a/src/riak_kv_pb_index.erl +++ b/src/riak_kv_pb_index.erl @@ -71,31 +71,23 @@ validate_request(#rpbindexreq{qtype = range, range_min = Min, range_max = Max}) when not is_binary(Min); not is_binary(Max) -> {error, {format, "Invalid range query: ~p -> ~p", [Min, Max]}}; validate_request(#rpbindexreq{term_regex = TermRe} = Req) -> - {ValRe, ValErr} = ensure_compiled_re(TermRe), - case ValRe of - error -> - {error, {format, "Invalid term regular expression ~p : ~p", [TermRe, ValErr]}}; - _ -> - validate_query(Req) + case riak_index:compile_re(TermRe) of + {error, ReError} -> + {error, + {format, + "Invalid term regular expression ~p : ~p", + [TermRe, ReError]}}; + {ok, CompiledRegex} -> + {ok, Query} = + riak_index:to_index_query(query_params(Req, CompiledRegex)), + case riak_index:is_valid_index(Query) of + false -> + {error, "Can not use term regular expression in integer query"}; + _ -> + Query + end end. -validate_query(Req) -> - Query = riak_index:to_index_query(query_params(Req)), - case Query of - {ok, ?KV_INDEX_Q{start_term = Start, term_regex = Re}} when is_integer(Start) - andalso Re =/= undefined -> - {error, "Can not use term regular expression in integer query"}; - _ -> - Query - end. - -ensure_compiled_re(TermRe) -> - case TermRe of - undefined -> - {undefined, undefined}; - _ -> - re:compile(TermRe) - end. %% @doc process/2 callback. Handles an incoming request message. process(#rpbindexreq{stream = S} = Req, State) -> @@ -126,7 +118,7 @@ validate_request_and_maybe_perform_query(Req, State) -> error_accept(Class, State) -> {error, riak_core_util:job_class_disabled_message(binary, Class), State}. -maybe_perform_query({ok, Query}, Req=#rpbindexreq{stream=true}, State) -> +maybe_perform_query(Query, Req=#rpbindexreq{stream=true}, State) -> #rpbindexreq{type=T, bucket=B, max_results=MaxResults, timeout=Timeout, pagination_sort=PgSort0, continuation=Continuation} = Req, #state{client=Client} = State, @@ -139,7 +131,7 @@ maybe_perform_query({ok, Query}, Req=#rpbindexreq{stream=true}, State) -> riak_client:stream_get_index(Bucket, Query, Opts, Client), ReturnTerms = riak_index:return_terms(Req#rpbindexreq.return_terms, Query), {reply, {stream, ReqId}, State#state{req_id=ReqId, req=Req#rpbindexreq{return_terms=ReturnTerms}}}; -maybe_perform_query({ok, Query}, Req, State) -> +maybe_perform_query(Query, Req, State) -> #rpbindexreq{type=T, bucket=B, max_results=MaxResults, return_terms=ReturnTerms0, timeout=Timeout, pagination_sort=PgSort0, continuation=Continuation} = Req, @@ -166,20 +158,33 @@ handle_query_results(ReturnTerms, MaxResults, {ok, Results}, State) -> Resp = encode_results(ReturnTerms, Results, Cont), {reply, Resp, State}. -query_params(#rpbindexreq{index=Index= <<"$bucket">>, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + index=Index= <<"$bucket">>, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {return_terms, false}, {term_regex, Re}, {max_results, MaxResults}, {continuation, Continuation}]; -query_params(#rpbindexreq{qtype=eq, index=Index, key=Value, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + qtype=eq, + index=Index, + key=Value, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {start_term, Value}, {end_term, Value}, {term_regex, Re}, {max_results, MaxResults}, {return_terms, false}, {continuation, Continuation}]; -query_params(#rpbindexreq{index=Index, range_min=Min, range_max=Max, - term_regex=Re, max_results=MaxResults, - continuation=Continuation}) -> +query_params( + #rpbindexreq{ + index=Index, + range_min=Min, + range_max=Max, + max_results=MaxResults, + continuation=Continuation}, + Re) -> [{field, Index}, {start_term, Min}, {end_term, Max}, {term_regex, Re}, {max_results, MaxResults}, {continuation, Continuation}]. diff --git a/src/riak_kv_test_util.erl b/src/riak_kv_test_util.erl index 1fadbb89d..6de3931c9 100644 --- a/src/riak_kv_test_util.erl +++ b/src/riak_kv_test_util.erl @@ -294,7 +294,7 @@ dep_apps(Test, Extra) -> (_) -> ok end, - [sasl, Silencer, folsom, exometer_core, runtime_tools, + [sasl, Silencer, exometer_core, runtime_tools, mochiweb, webmachine, sidejob, poolboy, basho_stats, bitcask, eleveldb, riak_core, riak_pipe, riak_api, riak_dt, riak_pb, riak_kv, DefaultSetupFun, Extra]. diff --git a/src/riak_kv_util.erl b/src/riak_kv_util.erl index b5ef6d782..5041ea0e4 100644 --- a/src/riak_kv_util.erl +++ b/src/riak_kv_util.erl @@ -52,7 +52,8 @@ is_modfun_allowed/2, shuffle_list/1, kv_ready/0, - ngr_initial_timeout/0 + ngr_initial_timeout/0, + profile_riak/1 ]). -export([report_hashtree_tokens/0, reset_hashtree_tokens/2]). @@ -519,6 +520,26 @@ shuffle_list(L) -> lists:map(fun({_R, X0}) -> X0 end, lists:keysort(1, lists:map(fun(X) -> {rand:uniform(), X} end, L))). +-spec profile_riak(pos_integer()) -> analyzed|failed. +profile_riak(ProfileTime) -> + eprof:start(), + case eprof:start_profiling(erlang:processes()) of + profiling -> + timer:sleep(ProfileTime), + case eprof:stop_profiling() of + profiling_stopped -> + eprof:analyze( + total, [{filter, [{time, float(10 * ProfileTime)}]}] + ), + stopped = eprof:stop(), + analyzed; + _ -> + stopped = eprof:stop(), + failed_running + end; + _ -> + failed_starting + end. %% =================================================================== %% EUnit tests diff --git a/src/riak_kv_wm_index.erl b/src/riak_kv_wm_index.erl index 1aa54bc60..3c0ca08b6 100644 --- a/src/riak_kv_wm_index.erl +++ b/src/riak_kv_wm_index.erl @@ -186,165 +186,192 @@ request_class(ReqData) -> %% a known type. malformed_request(RD, Ctx) -> %% Pull the params... - Bucket = list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(bucket, RD))), - IndexField = list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(field, RD))), + Bucket = + list_to_binary( + riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(bucket, RD))), + IndexField = + list_to_binary( + riak_kv_wm_utils:maybe_decode_uri(RD, wrq:path_info(field, RD))), Args1 = wrq:path_tokens(RD), - Args2 = [list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, X)) || X <- Args1], + Args2 = + [list_to_binary(riak_kv_wm_utils:maybe_decode_uri(RD, X)) + || X <- Args1], ReturnTerms0 = wrq:get_qs_value(?Q_2I_RETURNTERMS, "false", RD), ReturnTerms1 = normalize_boolean(string:to_lower(ReturnTerms0)), Continuation = wrq:get_qs_value(?Q_2I_CONTINUATION, RD), PgSort0 = wrq:get_qs_value(?Q_2I_PAGINATION_SORT, RD), - PgSort = case PgSort0 of - undefined -> undefined; - _ -> normalize_boolean(string:to_lower(PgSort0)) - end, + PgSort = + case PgSort0 of + undefined -> + undefined; + _ -> + normalize_boolean(string:to_lower(PgSort0)) + end, MaxVal = validate_max(wrq:get_qs_value(?Q_2I_MAX_RESULTS, RD)), TermRegex = wrq:get_qs_value(?Q_2I_TERM_REGEX, RD), - Timeout0 = wrq:get_qs_value("timeout", RD), - {Start, End} = case {IndexField, Args2} of - {<<"$bucket">>, _} -> {undefined, undefined}; - {_, []} -> {undefined, undefined}; - {_, [S]} -> {S, S}; - {_, [S, E]} -> {S, E} - end, + Timeout = validate_timeout(wrq:get_qs_value("timeout", RD)), + {Start, End} = + case {IndexField, Args2} of + {<<"$bucket">>, _} -> {undefined, undefined}; + {_, []} -> {undefined, undefined}; + {_, [S]} -> {S, S}; + {_, [S, E]} -> {S, E} + end, IsEqualOp = length(Args1) == 1, InternalReturnTerms = not( IsEqualOp orelse IndexField == <<"$field">> ), - QRes = riak_index:to_index_query( - [ - {field, IndexField}, {start_term, Start}, {end_term, End}, - {return_terms, InternalReturnTerms}, - {continuation, Continuation}, - {term_regex, TermRegex} - ] - ++ [{max_results, MaxResults} || {true, MaxResults} <- [MaxVal]] - ), - ValRe = case TermRegex of - undefined -> - ok; - _ -> - re:compile(TermRegex) - end, - Stream0 = wrq:get_qs_value("stream", "false", RD), Stream = normalize_boolean(string:to_lower(Stream0)), - - case {PgSort, ReturnTerms1, validate_timeout(Timeout0), MaxVal, - QRes, - ValRe, Stream} of - {malformed, _, _, _, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a boolean", - [?Q_2I_PAGINATION_SORT, PgSort0]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, malformed, _, _, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a boolean", - [?Q_2I_RETURNTERMS, ReturnTerms0]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, _, - {ok, ?KV_INDEX_Q{start_term=NormStart}}, - {ok, _CompiledRe}, _} - when is_integer(NormStart) -> - {true, - wrq:set_resp_body("Can not use term regular expressions" - " on integer queries", - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, _, - _, - {error, ReError}, _} -> - {true, - wrq:set_resp_body( - io_lib:format("Invalid term regular expression ~p : ~p", - [TermRegex, ReError]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, {true, Timeout}, {true, MaxResults}, - {ok, Query}, - _, _} -> - %% Request is valid. - ReturnTerms2 = riak_index:return_terms(ReturnTerms1, Query), - %% Special case: a continuation implies pagination sort - %% even if no max_results was given. - PgSortFinal = case Continuation of - undefined -> PgSort; - _ -> true - end, - NewCtx = Ctx#ctx{ - bucket = Bucket, - index_query = Query, - max_results = MaxResults, - return_terms = ReturnTerms2, - timeout=Timeout, - pagination_sort = PgSortFinal, - streamed = Stream - }, - {false, RD, NewCtx}; - {_, _, _, _, - {error, Reason}, - _, _} -> + case riak_index:compile_re(TermRegex) of + {error, ReError} -> {true, wrq:set_resp_body( - io_lib:format("Invalid query: ~p~n", [Reason]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, _, {false, BadVal}, - _, - _, _} -> - {true, - wrq:set_resp_body(io_lib:format("Invalid ~p. ~p is not a positive integer", - [?Q_2I_MAX_RESULTS, BadVal]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - {_, _, {error, Input}, _, - _, - _, _} -> - {true, wrq:append_to_resp_body(io_lib:format("Bad timeout " - "value ~p. Must be a non-negative integer~n", - [Input]), - wrq:set_resp_header(?HEAD_CTYPE, - "text/plain", RD)), Ctx} + io_lib:format( + "Invalid term regular expression ~p : ~p", + [TermRegex, ReError] + ), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + {ok, CompiledRegex} -> + QueryResult = + riak_index:to_index_query( + [{field, IndexField}, {start_term, Start}, {end_term, End}, + {max_results, MaxVal}, + {continuation, Continuation}, + {return_terms, InternalReturnTerms}, + {term_regex, CompiledRegex} + ] + ), + case QueryResult of + {error, Reason} -> + {true, + wrq:set_resp_body( + io_lib:format( + "Invalid query: ~p~n", [Reason]), + wrq:set_resp_header( + ?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + {ok, Query} -> + case riak_index:is_valid_index(Query) of + false -> + {true, + wrq:set_resp_body( + "Can not use term regular expressions" + " on integer queries", + wrq:set_resp_header( + ?HEAD_CTYPE, "text/plain", RD)), + Ctx}; + _ -> + ValidationResult = + validate_query( + PgSort, + ReturnTerms1, + Timeout, + MaxVal, + Stream, + RD + ), + case ValidationResult of + ok -> + PgSortFinal = + case Continuation of + undefined -> + PgSort; + _ -> + true + end, + NewCtx = + Ctx#ctx{ + bucket = Bucket, + index_query = Query, + max_results = MaxVal, + return_terms = + riak_index:return_terms( + ReturnTerms1, Query), + timeout=Timeout, + pagination_sort = PgSortFinal, + streamed = Stream + }, + {false, RD, NewCtx}; + ErrorMessage -> + {true, ErrorMessage, Ctx} + end + end + end end. + +validate_query({malformed, BadPgSort}, _, _, _, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a boolean", + [?Q_2I_PAGINATION_SORT, BadPgSort]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, {malformed, BadReturnTerms}, _, _, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a boolean", + [?Q_2I_RETURNTERMS, BadReturnTerms]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, {malformed, BadN}, _, _, RD) -> + wrq:append_to_resp_body( + io_lib:format( + "Bad timeout " + "value ~p. Must be a non-negative integer~n", + [BadN]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, {malformed, BadMR}, _, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid ~p. ~p is not a positive integer", + [?Q_2I_MAX_RESULTS, BadMR]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, _, {malformed, BadStreamReq}, RD) -> + wrq:set_resp_body( + io_lib:format( + "Invalid stream. ~p is not a boolean", + [BadStreamReq]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)); +validate_query(_, _, _, _, _, _RD) -> + %% Request is valid. + ok. + +-spec validate_timeout( + undefined|string()) -> + non_neg_integer()|{malformed, string()}. validate_timeout(undefined) -> - {true, undefined}; + undefined; validate_timeout(Str) -> - try - list_to_integer(Str) of + try list_to_integer(Str) of Int when Int >= 0 -> - {true, Int}; + Int; Neg -> - {error, Neg} + {malformed, Neg} catch _:_ -> - {error, Str} + {malformed, Str} end. +-spec validate_max( + undefined|string()) -> non_neg_integer()|all|{malformed, string()}. validate_max(undefined) -> - {true, all}; + all; validate_max(N) when is_list(N) -> - try - list_to_integer(N) of + try list_to_integer(N) of Max when Max > 0 -> - {true, Max}; + Max; ZeroOrLess -> - {false, ZeroOrLess} + {malformed, ZeroOrLess} catch _:_ -> - {false, N} + {malformed, N} end. +-spec normalize_boolean(string()) -> boolean()|{malformed, string()}. normalize_boolean("false") -> false; normalize_boolean("true") -> true; -normalize_boolean(_) -> - malformed. +normalize_boolean(OtherString) -> + {malformed, OtherString}. -spec content_types_provided(#wm_reqdata{}, context()) -> {[{ContentType::string(), Producer::atom()}], #wm_reqdata{}, context()}. diff --git a/src/riak_kv_wm_object.erl b/src/riak_kv_wm_object.erl index dfc66d43b..fe98eaa19 100644 --- a/src/riak_kv_wm_object.erl +++ b/src/riak_kv_wm_object.erl @@ -176,21 +176,18 @@ riak, %% local | {node(), atom()} - params for riak client doc, %% {ok, riak_object()}|{error, term()} - the object found vtag, %% string() - vtag the user asked for - bucketprops, %% proplist() - properties of the bucket links, %% [link()] - links of the object index_fields, %% [index_field()] method, %% atom() - HTTP method for the request ctype, %% string() - extracted content-type provided charset, %% string() | undefined - extracted character set provided timeout, %% integer() - passed-in timeout value in ms - security %% security context + security, %% security context + header_map %% map of http headers interesting within this module }). --ifdef(namespaced_types). + -type riak_kv_wm_object_dict() :: dict:dict(). --else. --type riak_kv_wm_object_dict() :: dict(). --endif. -include_lib("webmachine/include/webmachine.hrl"). -include("riak_kv_wm_raw.hrl"). @@ -204,13 +201,78 @@ -type link() :: {{Bucket::binary(), Key::binary()}, Tag::binary()}. --define(DEFAULT_TIMEOUT, 60000). -define(V1_BUCKET_REGEX, "/([^/]+)>; ?rel=\"([^\"]+)\""). -define(V1_KEY_REGEX, "/([^/]+)/([^/]+)>; ?riaktag=\"([^\"]+)\""). -define(V2_BUCKET_REGEX, "; ?rel=\"([^\"]+)\""). -define(V2_KEY_REGEX, "; ?riaktag=\"([^\"]+)\""). +-define(BINHEAD_CTYPE, <<"content-type">>). +-define(BINHEAD_IF_NOT_MODIFIED, <<"x-riak-if-not-modified">>). +-define(BINHEAD_NONE_MATCH, <<"if-none-match">>). +-define(BINHEAD_MATCH, <<"if-match">>). +-define(BINHEAD_UNMODIFIED_SINCE, <<"if-unmodified-since">>). +-define(BINHEAD_ENCODING, <<"content-encoding">>). +-define(BINHEAD_ACCEPT, <<"accept">>). +-define(BINHEAD_LINK, <<"link">>). +-define(BINHEAD_VCLOCK, <<"x-riak-vclock">>). +-define(PREFIX_USERMETA, "x-riak-meta-"). +-define(PREFIX_INDEX, "x-riak-index-"). + +-spec make_reqheader_map(request_data()) -> #{binary() => any()}. +make_reqheader_map(RD) -> + lists:foldl( + fun({HeadKey, HeadVal}, AccMap) -> + accumulate_header_info( + list_to_binary(HeadKey), + HeadVal, + AccMap + ) + end, + maps:new(), + mochiweb_headers:to_normalised_list(wrq:req_headers(RD)) + ). + +accumulate_header_info(<>, T, MapAcc) -> + maps:update_with( + ?PREFIX_INDEX, + fun(Indices) -> [{Field, T}|Indices] end, + [{Field, T}], + MapAcc + ); +accumulate_header_info(<>, V, MapAcc) -> + maps:update_with( + ?PREFIX_USERMETA, + fun(Indices) -> + [{<>, V}|Indices] + end, + [{<>, V}], + MapAcc + ); +accumulate_header_info(?BINHEAD_ACCEPT, V, MapAcc) -> + maps:put(?BINHEAD_ACCEPT, V, MapAcc); +accumulate_header_info(?BINHEAD_CTYPE, V, MapAcc) -> + maps:put(?BINHEAD_CTYPE, V, MapAcc); +accumulate_header_info(?BINHEAD_ENCODING, V, MapAcc) -> + maps:put(?BINHEAD_ENCODING, V, MapAcc); +accumulate_header_info(?BINHEAD_VCLOCK, VC, MapAcc) -> + maps:put( + ?BINHEAD_VCLOCK, + riak_object:decode_vclock(base64:decode(VC)), + MapAcc); +accumulate_header_info(?BINHEAD_LINK, V, MapAcc) -> + maps:put(?BINHEAD_LINK, V, MapAcc); +accumulate_header_info(?BINHEAD_IF_NOT_MODIFIED, V, MapAcc) -> + maps:put(?BINHEAD_IF_NOT_MODIFIED, V, MapAcc); +accumulate_header_info(?BINHEAD_UNMODIFIED_SINCE, V, MapAcc) -> + maps:put(?BINHEAD_UNMODIFIED_SINCE, V, MapAcc); +accumulate_header_info(?BINHEAD_MATCH, V, MapAcc) -> + maps:put(?BINHEAD_MATCH, V, MapAcc); +accumulate_header_info(?BINHEAD_NONE_MATCH, V, MapAcc) -> + maps:put(?BINHEAD_NONE_MATCH, V, MapAcc); +accumulate_header_info(_DiscardIdx, _Value, MapAcc) -> + MapAcc. + -spec init(proplists:proplist()) -> {ok, context()}. %% @doc Initialize this resource. This function extracts the %% 'prefix' and 'riak' properties from the dispatch args. @@ -248,6 +310,7 @@ service_available(RD, Ctx0=#ctx{riak=RiakProps}) -> list_to_binary( riak_kv_wm_utils:maybe_decode_uri(RD, K)) end, + HeaderMap = make_reqheader_map(RD), {true, RD, Ctx#ctx{ @@ -255,6 +318,7 @@ service_available(RD, Ctx0=#ctx{riak=RiakProps}) -> client=C, bucket=Bucket, key=Key, + header_map = HeaderMap, vtag=wrq:get_qs_value(?Q_VTAG, RD)}}; Error -> {false, @@ -401,7 +465,7 @@ malformed_request([H|T], RD, Ctx) -> %% PUT/POST. %% This should probably result in a 415 using the known_content_type callback malformed_content_type(RD, Ctx) -> - case wrq:get_req_header(?HEAD_CTYPE, RD) of + case maps:get(?BINHEAD_CTYPE, Ctx#ctx.header_map, undefined) of undefined -> {true, missing_content_type(RD), Ctx}; RawCType -> @@ -445,49 +509,54 @@ malformed_rw_params(RD, Ctx) -> Res = lists:foldl(fun malformed_rw_param/2, {false, RD, Ctx}, - [{#ctx.r, "r", "default"}, - {#ctx.w, "w", "default"}, - {#ctx.dw, "dw", "default"}, - {#ctx.rw, "rw", "default"}, - {#ctx.pw, "pw", "default"}, - {#ctx.node_confirms, "node_confirms", "default"}, - {#ctx.pr, "pr", "default"}]), + [{#ctx.r, "r", default}, + {#ctx.w, "w", default}, + {#ctx.dw, "dw", default}, + {#ctx.rw, "rw", default}, + {#ctx.pw, "pw", default}, + {#ctx.node_confirms, "node_confirms", default}, + {#ctx.pr, "pr", default}]), Res2 = lists:foldl(fun malformed_custom_param/2, Res, [{#ctx.sync_on_write, "sync_on_write", - "default", + default, [default, backend, one, all]}]), lists:foldl(fun malformed_boolean_param/2, Res2, - [{#ctx.basic_quorum, "basic_quorum", "default"}, - {#ctx.notfound_ok, "notfound_ok", "default"}, - {#ctx.asis, "asis", "false"}]). + [{#ctx.basic_quorum, "basic_quorum", default}, + {#ctx.notfound_ok, "notfound_ok", default}, + {#ctx.asis, "asis", false}]). --spec malformed_rw_param({Idx::integer(), Name::string(), Default::string()}, +-spec malformed_rw_param({Idx::integer(), Name::string(), Default::atom()}, {boolean(), #wm_reqdata{}, context()}) -> {boolean(), #wm_reqdata{}, context()}. %% @doc Check that a specific r, w, dw, or rw query param is a %% string-encoded integer. Store its result in context() if it %% is, or print an error message in #wm_reqdata{} if it is not. malformed_rw_param({Idx, Name, Default}, {Result, RD, Ctx}) -> - case catch normalize_rw_param(wrq:get_qs_value(Name, Default, RD)) of - P when (is_atom(P) orelse is_integer(P)) -> - {Result, RD, setelement(Idx, Ctx, P)}; - _ -> - {true, - wrq:append_to_resp_body( - io_lib:format("~s query parameter must be an integer or " - "one of the following words: 'one', 'quorum' or 'all'~n", - [Name]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + case catch normalize_rw_param(ExtractedString) of + P when (is_atom(P) orelse is_integer(P)) -> + {Result, RD, setelement(Idx, Ctx, P)}; + _ -> + {true, + wrq:append_to_resp_body( + io_lib:format("~s query parameter must be an integer or " + "one of the following words: 'one', 'quorum' or 'all'~n", + [Name]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. -spec malformed_custom_param({Idx::integer(), Name::string(), - Default::string(), + Default::atom(), AllowedValues::[atom()]}, {boolean(), #wm_reqdata{}, context()}) -> {boolean(), #wm_reqdata{}, context()}. @@ -495,46 +564,48 @@ malformed_rw_param({Idx, Name, Default}, {Result, RD, Ctx}) -> %% Store its result in context() if it is, or print an error message %% in #wm_reqdata{} if it is not. malformed_custom_param({Idx, Name, Default, AllowedValues}, {Result, RD, Ctx}) -> - AllowedValueTuples = [{V} || V <- AllowedValues], - Option= - lists:keyfind( - list_to_atom( - string:to_lower( - wrq:get_qs_value(Name, Default, RD))), - 1, - AllowedValueTuples), - case Option of - false -> - ErrorText = - "~s query parameter must be one of the following words: ~p~n", - {true, - wrq:append_to_resp_body( - io_lib:format(ErrorText, [Name, AllowedValues]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx}; - _ -> - {Value} = Option, - {Result, RD, setelement(Idx, Ctx, Value)} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + UsableValue = list_to_atom(string:lowercase(ExtractedString)), + case lists:member(UsableValue, AllowedValues) of + true -> + {Result, RD, setelement(Idx, Ctx, UsableValue)}; + false -> + ErrorText = + "~s query parameter must be one of the following words: ~p~n", + {true, + wrq:append_to_resp_body( + io_lib:format(ErrorText, [Name, AllowedValues]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. %% @doc Check that a specific query param is a %% string-encoded boolean. Store its result in context() if it %% is, or print an error message in #wm_reqdata{} if it is not. malformed_boolean_param({Idx, Name, Default}, {Result, RD, Ctx}) -> - case string:to_lower(wrq:get_qs_value(Name, Default, RD)) of - "true" -> - {Result, RD, setelement(Idx, Ctx, true)}; - "false" -> - {Result, RD, setelement(Idx, Ctx, false)}; - "default" -> - {Result, RD, setelement(Idx, Ctx, default)}; - _ -> - {true, - wrq:append_to_resp_body( - io_lib:format("~s query parameter must be true or false~n", - [Name]), - wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), - Ctx} + case wrq:get_qs_value(Name, RD) of + undefined -> + {Result, RD, setelement(Idx, Ctx, Default)}; + ExtractedString -> + case string:lowercase(ExtractedString) of + "true" -> + {Result, RD, setelement(Idx, Ctx, true)}; + "false" -> + {Result, RD, setelement(Idx, Ctx, false)}; + "default" -> + {Result, RD, setelement(Idx, Ctx, default)}; + _ -> + {true, + wrq:append_to_resp_body( + io_lib:format("~s query parameter must be true or false~n", + [Name]), + wrq:set_resp_header(?HEAD_CTYPE, "text/plain", RD)), + Ctx} + end end. normalize_rw_param("backend") -> backend; @@ -552,7 +623,7 @@ normalize_rw_param(V) -> list_to_integer(V). %% A link header should be of the form: %% </Prefix/Bucket/Key>; riaktag="Tag",... malformed_link_headers(RD, Ctx) -> - case catch get_link_heads(RD, Ctx) of + case catch get_link_heads(Ctx) of Links when is_list(Links) -> {false, RD, Ctx#ctx{links=Links}}; _Error when Ctx#ctx.api_version == 1-> @@ -581,7 +652,7 @@ malformed_link_headers(RD, Ctx) -> %% An index field should be of the form "index_fieldname_type" malformed_index_headers(RD, Ctx) -> %% Get a list of index_headers... - IndexFields1 = extract_index_fields(RD), + IndexFields1 = extract_index_fields(Ctx), %% Validate the fields. If validation passes, then the index %% headers are correctly formed. @@ -596,34 +667,20 @@ malformed_index_headers(RD, Ctx) -> Ctx} end. --spec extract_index_fields(#wm_reqdata{}) -> proplists:proplist(). +-spec extract_index_fields(context()) -> proplists:proplist(). %% @doc Extract fields from headers prefixed by "x-riak-index-" in the %% client's PUT request, to be indexed at write time. -extract_index_fields(RD) -> - PrefixSize = length(?HEAD_INDEX_PREFIX), - {ok, RE} = re:compile(",\\s"), - F = - fun({K,V}, Acc) -> - KList = riak_kv_wm_utils:any_to_list(K), - case lists:prefix(?HEAD_INDEX_PREFIX, string:to_lower(KList)) of - true -> - %% Isolate the name of the index field. - IndexField = - list_to_binary( - element(2, lists:split(PrefixSize, KList))), - - %% HACK ALERT: Split values on comma. The HTTP - %% spec allows for comma separated tokens - %% where the tokens can be quoted strings. We - %% don't currently support quoted strings. - %% (http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html) - Values = re:split(V, RE, [{return, binary}]), - [{IndexField, X} || X <- Values] ++ Acc; - false -> - Acc - end - end, - lists:foldl(F, [], mochiweb_headers:to_list(wrq:req_headers(RD))). +extract_index_fields(Ctx) -> + RE = get_compiled_index_regex(), + lists:flatten( + lists:map( + fun({Field, Term}) -> + Values = re:split(Term, RE, [{return, binary}]), + [{Field, X} || X <- Values] + end, + maps:get(?PREFIX_INDEX, Ctx#ctx.header_map, []) + ) + ). -spec content_types_provided(#wm_reqdata{}, context()) -> {[{ContentType::string(), Producer::atom()}], #wm_reqdata{}, context()}. @@ -723,7 +780,7 @@ encodings_provided(RD, Ctx0) -> %% of a key-level PUT request will be accepted by this resource. %% (A key-level put *must* include a Content-Type header.) content_types_accepted(RD, Ctx) -> - case wrq:get_req_header(?HEAD_CTYPE, RD) of + case maps:get(?BINHEAD_CTYPE, Ctx#ctx.header_map, undefined) of undefined -> %% user must specify content type of the data {[], RD, Ctx}; @@ -758,7 +815,7 @@ resource_exists(RD, Ctx0) -> ToFetch = case Method of UpdM when UpdM =:= 'PUT'; UpdM =:= 'POST'; UpdM =:= 'DELETE' -> - conditional_headers_present(RD) == true; + conditional_headers_present(Ctx0) == true; _ -> true end, @@ -800,7 +857,9 @@ resource_exists(RD, Ctx0) -> -spec is_conflict(request_data(), context()) -> {boolean(), request_data(), context()}. is_conflict(RD, Ctx) -> - case {Ctx#ctx.method, wrq:get_req_header(?HEAD_IF_NOT_MODIFIED, RD)} of + NotModified = + maps:get(?BINHEAD_IF_NOT_MODIFIED, Ctx#ctx.header_map, undefined), + case {Ctx#ctx.method, NotModified} of {_ , undefined} -> {false, RD, Ctx}; {UpdM, NotModifiedClock} when UpdM =:= 'PUT'; UpdM =:= 'POST' -> @@ -819,16 +878,12 @@ is_conflict(RD, Ctx) -> {false, RD, Ctx} end. --spec conditional_headers_present(request_data()) -> boolean(). -conditional_headers_present(RD) -> - NoneMatch = - (wrq:get_req_header("If-None-Match", RD) =/= undefined), - Match = - (wrq:get_req_header("If-Match", RD) =/= undefined), - UnModifiedSince = - (wrq:get_req_header("If-Unmodified-Since", RD) =/= undefined), - NotModified = - (wrq:get_req_header(?HEAD_IF_NOT_MODIFIED, RD) =/= undefined), +-spec conditional_headers_present(context()) -> boolean(). +conditional_headers_present(Ctx) -> + NoneMatch = maps:is_key(?BINHEAD_NONE_MATCH, Ctx#ctx.header_map), + Match = maps:is_key(?BINHEAD_MATCH, Ctx#ctx.header_map), + UnModifiedSince = maps:is_key(?BINHEAD_UNMODIFIED_SINCE, Ctx#ctx.header_map), + NotModified = maps:is_key(?BINHEAD_IF_NOT_MODIFIED, Ctx#ctx.header_map), (NoneMatch or Match or UnModifiedSince or NotModified). @@ -875,8 +930,12 @@ accept_doc_body( links=L, ctype=CType, charset=Charset, index_fields=IF}) -> Doc0 = riak_object:new(riak_kv_wm_utils:maybe_bucket_type(T,B), K, <<>>), - VclockDoc = riak_object:set_vclock(Doc0, decode_vclock_header(RD)), - UserMeta = extract_user_meta(RD), + VclockDoc = + riak_object:set_vclock( + Doc0, + maps:get(?BINHEAD_VCLOCK, Ctx#ctx.header_map, vclock:fresh()) + ), + UserMeta = maps:get(?PREFIX_USERMETA, Ctx#ctx.header_map, []), CTypeMD = dict:store(?MD_CTYPE, CType, dict:new()), CharsetMD = if Charset /= undefined -> @@ -885,7 +944,7 @@ accept_doc_body( CTypeMD end, EncMD = - case wrq:get_req_header(?HEAD_ENCODING, RD) of + case maps:get(?BINHEAD_ENCODING, Ctx#ctx.header_map, undefined) of undefined -> CharsetMD; E -> dict:store(?MD_ENCODING, E, CharsetMD) end, @@ -902,7 +961,7 @@ accept_doc_body( _ -> [] end, Options = make_options(Options0, Ctx), - NoneMatch = (wrq:get_req_header("If-None-Match", RD) =/= undefined), + NoneMatch = maps:is_key(?BINHEAD_NONE_MATCH, Ctx#ctx.header_map), Options2 = case riak_kv_util:consistent_object(B) and NoneMatch of true -> [{if_none_match, true}|Options]; @@ -929,7 +988,7 @@ send_returnbody(RD, DocCtx, _HasSiblings = false) -> %% Handle the sibling case. Send either the sibling message body, or a %% multipart body, depending on what the client accepts. send_returnbody(RD, DocCtx, _HasSiblings = true) -> - AcceptHdr = wrq:get_req_header("Accept", RD), + AcceptHdr = maps:get(?BINHEAD_ACCEPT, DocCtx#ctx.header_map, undefined), PossibleTypes = ["multipart/mixed", "text/plain"], case webmachine_util:choose_media_type(PossibleTypes, AcceptHdr) of "multipart/mixed" -> @@ -958,17 +1017,6 @@ add_conditional_headers(RD, Ctx) -> calendar:universal_time_to_local_time(LM)), RD4), {RD5,Ctx3}. --spec extract_user_meta(#wm_reqdata{}) -> proplists:proplist(). -%% @doc Extract headers prefixed by X-Riak-Meta- in the client's PUT request -%% to be returned by subsequent GET requests. -extract_user_meta(RD) -> - lists:filter(fun({K,_V}) -> - lists:prefix( - ?HEAD_USERMETA_PREFIX, - string:to_lower(riak_kv_wm_utils:any_to_list(K))) - end, - mochiweb_headers:to_list(wrq:req_headers(RD))). - -spec multiple_choices(#wm_reqdata{}, context()) -> {boolean(), #wm_reqdata{}, context()}. %% @doc Determine whether a document has siblings. If the user has @@ -1136,16 +1184,6 @@ encode_vclock_header(RD, #ctx{doc={error, {deleted, VClock}}}) -> wrq:set_resp_header( ?HEAD_VCLOCK, binary_to_list(base64:encode(BinVClock)), RD). --spec decode_vclock_header(#wm_reqdata{}) -> vclock:vclock(). -%% @doc Translate the X-Riak-Vclock header value from the request into -%% its Erlang representation. If no vclock header exists, a fresh -%% vclock is returned. -decode_vclock_header(RD) -> - case wrq:get_req_header(?HEAD_VCLOCK, RD) of - undefined -> vclock:fresh(); - Head -> riak_object:decode_vclock(base64:decode(Head)) - end. - -spec ensure_doc(context()) -> context(). %% @doc Ensure that the 'doc' field of the context() has been filled %% with the result of a riak_client:get request. This is a @@ -1177,11 +1215,10 @@ delete_resource(RD, Ctx=#ctx{bucket_type=T, bucket=B, key=K, client=C}) -> Options = make_options([], Ctx), BT = riak_kv_wm_utils:maybe_bucket_type(T,B), Result = - case wrq:get_req_header(?HEAD_VCLOCK, RD) of + case maps:get(?BINHEAD_VCLOCK, Ctx#ctx.header_map, undefined) of undefined -> riak_client:delete(BT, K, Options, C); - _ -> - VC = decode_vclock_header(RD), + VC -> riak_client:delete_vclock(BT, K, VC, Options, C) end, case Result of @@ -1245,18 +1282,18 @@ normalize_last_modified(MD) -> httpd_util:convert_request_date(Rfc1123) end. --spec get_link_heads(#wm_reqdata{}, context()) -> [link()]. +-spec get_link_heads(context()) -> [link()]. %% @doc Extract the list of links from the Link request header. %% This function will die if an invalid link header format %% is found. -get_link_heads(RD, Ctx) -> +get_link_heads(Ctx) -> APIVersion = Ctx#ctx.api_version, Prefix = Ctx#ctx.prefix, Bucket = Ctx#ctx.bucket, %% Get a list of link headers... - LinkHeaders1 = - case wrq:get_req_header(?HEAD_LINK, RD) of + LinkHeaders = + case maps:get(?BINHEAD_LINK, Ctx#ctx.header_map, undefined) of undefined -> []; Heads -> string:tokens(Heads, ",") end, @@ -1264,21 +1301,14 @@ get_link_heads(RD, Ctx) -> %% Decode the link headers. Throw an exception if we can't %% properly parse any of the headers... {BucketLinks, KeyLinks} = - case APIVersion of - 1 -> - {ok, BucketRegex} = - re:compile("= 2 -> - {ok, BucketRegex} = - re:compile(?V2_BUCKET_REGEX), - {ok, KeyRegex} = - re:compile(?V2_KEY_REGEX), - extract_links(LinkHeaders1, BucketRegex, KeyRegex) - end, + case LinkHeaders of + [] -> + {[], []}; + LinkHeaders -> + {KeyRegex, BucketRegex} = + get_compiled_link_regex(APIVersion, Prefix), + extract_links(LinkHeaders, BucketRegex, KeyRegex) + end, %% Validate that the only bucket header is pointing to the parent %% bucket... @@ -1287,7 +1317,7 @@ get_link_heads(RD, Ctx) -> true -> KeyLinks; false -> - throw({invalid_link_headers, LinkHeaders1}) + throw({invalid_link_headers, LinkHeaders}) end. %% Run each LinkHeader string() through the BucketRegex and @@ -1317,6 +1347,50 @@ extract_links_1([LinkHeader|Rest], BucketRegex, KeyRegex, BucketAcc, KeyAcc) -> extract_links_1([], _BucketRegex, _KeyRegex, BucketAcc, KeyAcc) -> {BucketAcc, KeyAcc}. +-type mp() :: {re_pattern, _, _, _, _}. + +-spec get_compiled_link_regex(non_neg_integer(), string()) -> {mp(), mp()}. +get_compiled_link_regex(1, Prefix) -> + case persistent_term:get(compiled_link_regex_v1, undefined) of + undefined -> + {ok, KeyRegex} = re:compile(" + PreCompiledExpressions + end; +get_compiled_link_regex(Two, _Prefix) when Two >= 2 -> + case persistent_term:get(compiled_link_regex_v2, undefined) of + undefined -> + {ok, KeyRegex} = re:compile(?V2_KEY_REGEX), + {ok, BucketRegex} = re:compile(?V2_BUCKET_REGEX), + persistent_term:put( + compiled_link_regex_v2, + {KeyRegex, BucketRegex} + ), + {KeyRegex, BucketRegex}; + PreCompiledExpressions -> + PreCompiledExpressions + end. + +-spec get_compiled_index_regex() -> mp(). +get_compiled_index_regex() -> + case persistent_term:get(compiled_index_regex, undefined) of + undefined -> + {ok, IndexRegex} = re:compile(",\\s"), + persistent_term:put( + compiled_index_regex, + IndexRegex + ), + IndexRegex; + PreCompiledIndexRegex -> + PreCompiledIndexRegex + end. + -spec get_ctype(riak_kv_wm_object_dict(), term()) -> string(). %% @doc Work out the content type for this object - use the metadata if provided get_ctype(MD,V) -> diff --git a/src/riak_kv_yessir_backend.erl b/src/riak_kv_yessir_backend.erl index 1f8a39e6a..63f089da7 100644 --- a/src/riak_kv_yessir_backend.erl +++ b/src/riak_kv_yessir_backend.erl @@ -272,7 +272,7 @@ make_get_return_val(RObj, false = _WantsBinary, #state{op_get = Gets} = S) -> {ok, RObj, S#state{op_get = Gets + 1}}. %% @doc Store an object, yes, sir! --type index_spec() :: {add, Index, SecondaryKey} | {remove, Index, SecondaryKey}. +-type index_spec() :: riak_object:index_spec(). -spec put(riak_object:bucket(), riak_object:key(), [index_spec()], binary(), state()) -> {ok, state()}. put(_Bucket, _PKey, _IndexSpecs, _Val, #state{op_put = Puts} = S) -> diff --git a/src/riak_object.erl b/src/riak_object.erl index 6b04dcfcd..9cc5e797e 100644 --- a/src/riak_object.erl +++ b/src/riak_object.erl @@ -73,8 +73,11 @@ -type r_content() :: #r_content{}. -type index_op() :: add | remove. -type index_value() :: integer() | binary(). +-type index_spec() :: {index_op(), binary(), index_value()}. -type binary_version() :: v0 | v1. +-export_type([index_spec/0]). + -define(MAX_KEY_SIZE, 65536). -define(LASTMOD_LEN, 29). %% static length of rfc1123_date() type. Hard-coded in Erlang. @@ -984,8 +987,7 @@ dvv_enabled(Bucket) -> %% the case where there is no existing object %% stored for a key and therefore no existing %% index data. --spec index_specs(riak_object()) -> - [{index_op(), binary(), index_value()}]. +-spec index_specs(riak_object()) -> [{index_op(), binary(), index_value()}]. index_specs(Obj) -> Indexes = index_data(Obj), assemble_index_specs(Indexes, add).