diff --git a/scripts/update_oai-pmh.rb b/scripts/update_oai-pmh.rb index 3ae9ad525d16cf3653fd41ee9b4be27e38b996ea..9d532dea7a5589a167f56958ced3ee7902964465 100644 --- a/scripts/update_oai-pmh.rb +++ b/scripts/update_oai-pmh.rb @@ -5,7 +5,10 @@ require 'yaml' require 'date' require 'nokogiri' -MAX_HTTP_GET_RETRIES = 3 +MAX_HTTP_GET_RETRIES = 10 + +FAILED_REQUEST_INITIAL = 60 +FAILED_REQUEST_BACKOFF = 120 def check_oai_pmh_endpoint(endpoint_url) queryIdentify_url = endpoint_url + "?verb=Identify" @@ -99,6 +102,11 @@ def list_identifiers(url) total = 0 set_counts = {} + # Whether the server has already responded successfully in the past. Using + # the presence of the resumptionToken to determine this is, perhaps, + # questionable. + hasServerResponded = url.include?('resumptionToken=') + response = nil attempts = 0 while !response @@ -116,18 +124,25 @@ def list_identifiers(url) raise StandardError.new "Received status code #{response.code}" end rescue HTTParty::Error, SocketError, Timeout::Error => e - if attempts >= MAX_HTTP_GET_RETRIES - raise StandardError.new "Too many GET requests timed out on #{url}" - end # FIXME output needs to take into account current whether to # insert a new-line. As a quick hack, use the presence of # resumptionToken in the URL to indicate a newline is needed (this # works most of the time, but not always) - if url.include?('resumptionToken=') && attempts == 1 + if hasServerResponded && attempts == 1 print "\n" end print " Attempt #{attempts} of #{MAX_HTTP_GET_RETRIES} failed: #{e.message}\n" - sleep(5); + + if attempts >= MAX_HTTP_GET_RETRIES + raise StandardError.new "Too many GET requests failed for #{url}" + end + + delay = FAILED_REQUEST_INITIAL + if hasServerResponded + delay += attempts * FAILED_REQUEST_BACKOFF + end + + sleep(delay); response = nil end end