Skip to content

Commit 33be0bc

Browse files
committed
Avoid timeouts in ddoc scanner callback
Previously the `ddoc(St, DbName, #doc{})` scanner callback was called from the `fabric:all_docs()` context while it was fetching design documents. That can work when design documents are just filtered or checked quickly like in the feature detector plugin. However, if a plugin wanted to do perform a longer running operation the `fabric:all_docs()` call would timeout and crash. To allow plugins to spend arbitrarily long time in the ddoc calback, switch to fetching small batches of design documents and then calling the plugin `ddoc/3` callback outside the `fabric:all_docs()` context. Thanks to Robert Newson (@rnewson) for the original idea of doing batched design doc iteration.
1 parent d33edf7 commit 33be0bc

File tree

4 files changed

+84
-22
lines changed

4 files changed

+84
-22
lines changed

rel/overlay/etc/default.ini

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,11 @@ url = {{nouveau_url}}
10111011
; is shared across all running plugins.
10121012
;doc_rate_limit = 1000
10131013

1014+
; Batch size to use when fetching design documents. For lots of small design
1015+
; documents this value could be increased to 500 or 1000. If design documents
1016+
; are large (100KB+) it could make sense to decrease it a bit to 25 or 10.
1017+
;ddoc_batch_size = 100
1018+
10141019
[couch_scanner_plugins]
10151020
;couch_scanner_plugin_ddoc_features = false
10161021
;couch_scanner_plugin_find = false

src/couch_scanner/src/couch_scanner_plugin.erl

Lines changed: 66 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@
171171

172172
-define(CHECKPOINT_INTERVAL_SEC, 10).
173173
-define(STOP_TIMEOUT_SEC, 5).
174+
-define(DDOC_BATCH_SIZE, 100).
174175

175176
-record(st, {
176177
id,
@@ -326,7 +327,7 @@ scan_db([_ | _] = Shards, #st{} = St) ->
326327
case Go of
327328
ok ->
328329
St2 = rate_limit(St1, db),
329-
St3 = fold_ddocs(fun scan_ddocs_fold/2, St2),
330+
St3 = scan_ddocs(St2),
330331
{Shards1, St4} = shards_callback(St3, Shards),
331332
St5 = scan_shards(Shards1, St4),
332333
{ok, St5};
@@ -339,16 +340,6 @@ scan_db([_ | _] = Shards, #st{} = St) ->
339340
{ok, St}
340341
end.
341342

342-
scan_ddocs_fold({meta, _}, #st{} = Acc) ->
343-
{ok, Acc};
344-
scan_ddocs_fold({row, RowProps}, #st{} = Acc) ->
345-
DDoc = couch_util:get_value(doc, RowProps),
346-
scan_ddoc(ejson_to_doc(DDoc), Acc);
347-
scan_ddocs_fold(complete, #st{} = Acc) ->
348-
{ok, Acc};
349-
scan_ddocs_fold({error, Error}, _Acc) ->
350-
exit({shutdown, {scan_ddocs_fold, Error}}).
351-
352343
scan_shards([], #st{} = St) ->
353344
St;
354345
scan_shards([#shard{} = Shard | Rest], #st{} = St) ->
@@ -654,28 +645,78 @@ shards_by_range(Shards) ->
654645
Dict = lists:foldl(Fun, orddict:new(), Shards),
655646
orddict:to_list(Dict).
656647

657-
% Design doc fetching helper
658-
659-
fold_ddocs(Fun, #st{dbname = DbName, mod = Mod} = Acc) ->
648+
scan_ddocs(#st{mod = Mod} = St) ->
660649
case is_exported(Mod, ddoc, 3) of
661650
true ->
662-
QArgs = #mrargs{
663-
include_docs = true,
664-
extra = [{namespace, <<"_design">>}]
665-
},
666651
try
667-
{ok, Acc1} = fabric:all_docs(DbName, [?ADMIN_CTX], Fun, Acc, QArgs),
668-
Acc1
652+
fold_ddocs_batched(St, <<?DESIGN_DOC_PREFIX>>)
669653
catch
670654
error:database_does_not_exist ->
671-
Acc
655+
St
672656
end;
673657
false ->
674658
% If the plugin doesn't export the ddoc callback, don't bother calling
675659
% fabric:all_docs, as it's expensive
676-
Acc
660+
St
661+
end.
662+
663+
fold_ddocs_batched(#st{dbname = DbName} = St, <<_/binary>> = StartKey) ->
664+
QArgs = #mrargs{
665+
include_docs = true,
666+
start_key = StartKey,
667+
extra = [{namespace, <<?DESIGN_DOC_PREFIX0>>}],
668+
% Need limit > 1 for the algorithm below to work
669+
limit = max(2, cfg_ddoc_batch_size())
670+
},
671+
Cbk =
672+
fun
673+
({meta, _}, {Cnt, Id, DDocs}) ->
674+
{ok, {Cnt, Id, DDocs}};
675+
({row, Props}, {Cnt, _Id, DDocs}) ->
676+
EJson = couch_util:get_value(doc, Props),
677+
DDoc = #doc{id = Id} = ejson_to_doc(EJson),
678+
case Id =:= StartKey of
679+
true ->
680+
% We get there if we're continuing batched iteration so
681+
% we skip this ddoc as we already processed it. In the
682+
% first batch StartKey will be <<"_design/">> and
683+
% that's an invalid document ID so will never match.
684+
{ok, {Cnt + 1, Id, DDocs}};
685+
false ->
686+
{ok, {Cnt + 1, Id, [DDoc | DDocs]}}
687+
end;
688+
(complete, {Cnt, Id, DDocs}) ->
689+
{ok, {Cnt, Id, lists:reverse(DDocs)}};
690+
({error, Error}, {_Cnt, _Id, _DDocs}) ->
691+
exit({shutdown, {scan_ddocs_fold, Error}})
692+
end,
693+
Acc0 = {0, StartKey, []},
694+
{ok, {Cnt, LastId, DDocs}} = fabric:all_docs(DbName, [?ADMIN_CTX], Cbk, Acc0, QArgs),
695+
case scan_ddoc_batch(DDocs, {ok, St}) of
696+
{ok, #st{} = St1} ->
697+
if
698+
is_integer(Cnt), Cnt < QArgs#mrargs.limit ->
699+
% We got less than we asked for so we're done
700+
St1;
701+
Cnt == QArgs#mrargs.limit ->
702+
% We got all the docs we asked for, there are probably more docs
703+
% so we recurse and fetch the next batch.
704+
fold_ddocs_batched(St1, LastId)
705+
end;
706+
{stop, #st{} = St1} ->
707+
% Plugin wanted to stop scanning ddocs, so we stop
708+
St1
677709
end.
678710

711+
% Call plugin ddocs callback. These may take an arbitrarily long time to
712+
% process.
713+
scan_ddoc_batch(_, {stop, #st{} = St}) ->
714+
{stop, St};
715+
scan_ddoc_batch([], {ok, #st{} = St}) ->
716+
{ok, St};
717+
scan_ddoc_batch([#doc{} = DDoc | Rest], {ok, #st{} = St}) ->
718+
scan_ddoc_batch(Rest, scan_ddoc(DDoc, St)).
719+
679720
% Simple ejson to #doc{} function to avoid all the extra validation in from_json_obj/1.
680721
% We just got these docs from the cluster, they are already saved on disk.
681722
ejson_to_doc({[_ | _] = Props}) ->
@@ -708,6 +749,9 @@ cfg(Mod, Key, Default) when is_list(Key) ->
708749
Section = atom_to_list(Mod),
709750
config:get(Section, Key, Default).
710751

752+
cfg_ddoc_batch_size() ->
753+
config:get_integer("couch_scanner", "ddoc_batch_size", ?DDOC_BATCH_SIZE).
754+
711755
schedule_time(Mod, LastSec, NowSec) ->
712756
After = cfg(Mod, "after", "restart"),
713757
Repeat = cfg(Mod, "repeat", "restart"),

src/couch_scanner/test/eunit/couch_scanner_test.erl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ setup() ->
5353
meck:new(couch_scanner_server, [passthrough]),
5454
meck:new(couch_scanner_util, [passthrough]),
5555
Ctx = test_util:start_couch([fabric, couch_scanner]),
56+
% Run with the smallest batch size to exercise the batched
57+
% ddoc iteration
58+
config:set("couch_scanner", "ddoc_batch_size", "2", false),
5659
DbName1 = <<"dbname1", (?tempdb())/binary>>,
5760
DbName2 = <<"dbname2", (?tempdb())/binary>>,
5861
DbName3 = <<"dbname3", (?tempdb())/binary>>,

src/docs/src/config/scanner.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,16 @@ Scanner Options
8585
[couch_scanner]
8686
doc_rate_limit = 1000
8787

88+
.. config:option:: ddoc_batch_size
89+
90+
Batch size to use when fetching design documents. For lots of small
91+
design documents this value could be increased to 500 or 1000. If
92+
design documents are large (100KB+) it could make sense to decrease it
93+
a bit to 25 or 10. ::
94+
95+
[couch_scanner]
96+
ddoc_batch_size = 100
97+
8898
.. config:section:: couch_scanner_plugins :: Enable Scanner Plugins
8999
90100
.. config:option:: {plugin}

0 commit comments

Comments
 (0)