1010from scrapy .exceptions import ScrapyDeprecationWarning
1111from twisted .internet .error import ConnectionRefusedError , ConnectionDone
1212
13- from scrapy_crawlera .utils import exp_backoff_full_jitter
13+ from scrapy_crawlera .utils import exp_backoff
1414
1515
1616class CrawleraMiddleware (object ):
@@ -24,9 +24,9 @@ class CrawleraMiddleware(object):
2424 preserve_delay = False
2525 header_prefix = 'X-Crawlera-'
2626 conflicting_headers = ('X-Crawlera-Profile' , 'X-Crawlera-UA' )
27- noslaves_attempts = 0
28- noslaves_base_delay = 15
29- noslaves_max_delay = 180
27+ backoff_step = 15
28+ backoff_max = 180
29+ exp_backoff = None
3030
3131 _settings = [
3232 ('apikey' , str ),
@@ -36,6 +36,8 @@ class CrawleraMiddleware(object):
3636 ('maxbans' , int ),
3737 ('download_timeout' , int ),
3838 ('preserve_delay' , bool ),
39+ ('backoff_step' , int ),
40+ ('backoff_max' , int ),
3941 ]
4042
4143 def __init__ (self , crawler ):
@@ -71,6 +73,7 @@ def open_spider(self, spider):
7173 "To avoid this behaviour you can use the CRAWLERA_PRESERVE_DELAY setting but keep in mind that this may slow down the crawl significantly" )
7274
7375 self ._headers = self .crawler .settings .get ('CRAWLERA_DEFAULT_HEADERS' , {}).items ()
76+ self .exp_backoff = exp_backoff (self .backoff_step , self .backoff_max )
7477
7578 def _settings_get (self , type_ , * a , ** kw ):
7679 if type_ is int :
@@ -158,8 +161,7 @@ def process_response(self, request, response, spider):
158161 self ._restore_original_delay (request )
159162
160163 if self ._is_no_available_proxies (response ):
161- after = self ._get_noslaves_delay ()
162- self ._set_custom_delay (request , after )
164+ self ._set_custom_delay (request , next (self .exp_backoff ))
163165 else :
164166 self ._reset_noslaves_delay ()
165167
@@ -207,22 +209,9 @@ def _get_slot(self, request):
207209 key = self ._get_slot_key (request )
208210 return key , self .crawler .engine .downloader .slots .get (key )
209211
210- def _get_noslaves_delay (self ):
211- """
212- Returns the amount of delay to use in case of no available proxies,
213- also increments the number of attempts due to no proxies
214- """
215- delay = exp_backoff_full_jitter (
216- self .noslaves_attempts ,
217- self .noslaves_max_delay ,
218- self .noslaves_base_delay
219- )
220- self .noslaves_attempts += 1
221- return delay
222-
223212 def _reset_noslaves_delay (self ):
224213 """Reset the number of attempts due to no available proxies"""
225- self .noslaves_attempts = 0
214+ self .exp_backoff = exp_backoff ( self . backoff_step , self . backoff_max )
226215
227216 def _set_custom_delay (self , request , delay ):
228217 """Set custom delay for slot and save original one."""
0 commit comments