171171
172172-define (CHECKPOINT_INTERVAL_SEC , 10 ).
173173-define (STOP_TIMEOUT_SEC , 5 ).
174+ -define (DDOC_BATCH_SIZE , 100 ).
174175
175176-record (st , {
176177 id ,
@@ -326,7 +327,7 @@ scan_db([_ | _] = Shards, #st{} = St) ->
326327 case Go of
327328 ok ->
328329 St2 = rate_limit (St1 , db ),
329- St3 = fold_ddocs ( fun scan_ddocs_fold / 2 , St2 ),
330+ St3 = scan_ddocs ( St2 ),
330331 {Shards1 , St4 } = shards_callback (St3 , Shards ),
331332 St5 = scan_shards (Shards1 , St4 ),
332333 {ok , St5 };
@@ -339,16 +340,6 @@ scan_db([_ | _] = Shards, #st{} = St) ->
339340 {ok , St }
340341 end .
341342
342- scan_ddocs_fold ({meta , _ }, # st {} = Acc ) ->
343- {ok , Acc };
344- scan_ddocs_fold ({row , RowProps }, # st {} = Acc ) ->
345- DDoc = couch_util :get_value (doc , RowProps ),
346- scan_ddoc (ejson_to_doc (DDoc ), Acc );
347- scan_ddocs_fold (complete , # st {} = Acc ) ->
348- {ok , Acc };
349- scan_ddocs_fold ({error , Error }, _Acc ) ->
350- exit ({shutdown , {scan_ddocs_fold , Error }}).
351-
352343scan_shards ([], # st {} = St ) ->
353344 St ;
354345scan_shards ([# shard {} = Shard | Rest ], # st {} = St ) ->
@@ -654,28 +645,78 @@ shards_by_range(Shards) ->
654645 Dict = lists :foldl (Fun , orddict :new (), Shards ),
655646 orddict :to_list (Dict ).
656647
657- % Design doc fetching helper
658-
659- fold_ddocs (Fun , # st {dbname = DbName , mod = Mod } = Acc ) ->
648+ scan_ddocs (# st {mod = Mod } = St ) ->
660649 case is_exported (Mod , ddoc , 3 ) of
661650 true ->
662- QArgs = # mrargs {
663- include_docs = true ,
664- extra = [{namespace , <<" _design" >>}]
665- },
666651 try
667- {ok , Acc1 } = fabric :all_docs (DbName , [? ADMIN_CTX ], Fun , Acc , QArgs ),
668- Acc1
652+ fold_ddocs_batched (St , <<? DESIGN_DOC_PREFIX >>)
669653 catch
670654 error :database_does_not_exist ->
671- Acc
655+ St
672656 end ;
673657 false ->
674658 % If the plugin doesn't export the ddoc callback, don't bother calling
675659 % fabric:all_docs, as it's expensive
676- Acc
660+ St
661+ end .
662+
663+ fold_ddocs_batched (# st {dbname = DbName } = St , <<_ /binary >> = StartKey ) ->
664+ QArgs = # mrargs {
665+ include_docs = true ,
666+ start_key = StartKey ,
667+ extra = [{namespace , <<? DESIGN_DOC_PREFIX0 >>}],
668+ % Need limit > 1 for the algorithm below to work
669+ limit = max (2 , cfg_ddoc_batch_size ())
670+ },
671+ Cbk =
672+ fun
673+ ({meta , _ }, {Cnt , Id , DDocs }) ->
674+ {ok , {Cnt , Id , DDocs }};
675+ ({row , Props }, {Cnt , _Id , DDocs }) ->
676+ EJson = couch_util :get_value (doc , Props ),
677+ DDoc = # doc {id = Id } = ejson_to_doc (EJson ),
678+ case Id =:= StartKey of
679+ true ->
680+ % We get there if we're continuing batched iteration so
681+ % we skip this ddoc as we already processed it. In the
682+ % first batch StartKey will be <<"_design/">> and
683+ % that's an invalid document ID so will never match.
684+ {ok , {Cnt + 1 , Id , DDocs }};
685+ false ->
686+ {ok , {Cnt + 1 , Id , [DDoc | DDocs ]}}
687+ end ;
688+ (complete , {Cnt , Id , DDocs }) ->
689+ {ok , {Cnt , Id , lists :reverse (DDocs )}};
690+ ({error , Error }, {_Cnt , _Id , _DDocs }) ->
691+ exit ({shutdown , {scan_ddocs_fold , Error }})
692+ end ,
693+ Acc0 = {0 , StartKey , []},
694+ {ok , {Cnt , LastId , DDocs }} = fabric :all_docs (DbName , [? ADMIN_CTX ], Cbk , Acc0 , QArgs ),
695+ case scan_ddoc_batch (DDocs , {ok , St }) of
696+ {ok , # st {} = St1 } ->
697+ if
698+ is_integer (Cnt ), Cnt < QArgs # mrargs .limit ->
699+ % We got less than we asked for so we're done
700+ St1 ;
701+ Cnt == QArgs # mrargs .limit ->
702+ % We got all the docs we asked for, there are probably more docs
703+ % so we recurse and fetch the next batch.
704+ fold_ddocs_batched (St1 , LastId )
705+ end ;
706+ {stop , # st {} = St1 } ->
707+ % Plugin wanted to stop scanning ddocs, so we stop
708+ St1
677709 end .
678710
711+ % Call plugin ddocs callback. These may take an arbitrarily long time to
712+ % process.
713+ scan_ddoc_batch (_ , {stop , # st {} = St }) ->
714+ {stop , St };
715+ scan_ddoc_batch ([], {ok , # st {} = St }) ->
716+ {ok , St };
717+ scan_ddoc_batch ([# doc {} = DDoc | Rest ], {ok , # st {} = St }) ->
718+ scan_ddoc_batch (Rest , scan_ddoc (DDoc , St )).
719+
679720% Simple ejson to #doc{} function to avoid all the extra validation in from_json_obj/1.
680721% We just got these docs from the cluster, they are already saved on disk.
681722ejson_to_doc ({[_ | _ ] = Props }) ->
@@ -708,6 +749,9 @@ cfg(Mod, Key, Default) when is_list(Key) ->
708749 Section = atom_to_list (Mod ),
709750 config :get (Section , Key , Default ).
710751
752+ cfg_ddoc_batch_size () ->
753+ config :get_integer (" couch_scanner" , " ddoc_batch_size" , ? DDOC_BATCH_SIZE ).
754+
711755schedule_time (Mod , LastSec , NowSec ) ->
712756 After = cfg (Mod , " after" , " restart" ),
713757 Repeat = cfg (Mod , " repeat" , " restart" ),
0 commit comments