diff --git a/Gemfile b/Gemfile index 3bfb88ee515d828d9dcce9190d796cc6318ca1ad..b2a1231fba812a3af44f10b66c126220c34a6ef4 100644 --- a/Gemfile +++ b/Gemfile @@ -5,6 +5,7 @@ gem "jekyll" # add dependencies for HTTP requests gem "httparty" +gem 'persistent_httparty' # add dependencies to work with yaml and xml files gem "yaml" diff --git a/Gemfile.lock b/Gemfile.lock index c232de96183627a4fc97a01426c3020e8a180c63..3cc110ae551f5b76444e542e9486ee68b3078b71 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,6 +11,8 @@ GEM eventmachine (1.2.7) ffi (1.15.5) forwardable-extended (2.6.0) + gene_pool (1.5.0) + concurrent-ruby (>= 1.0) google-protobuf (3.23.3-x86_64-linux) http_parser.rb (0.8.0) httparty (0.21.0) @@ -53,6 +55,11 @@ GEM racc (~> 1.4) pathutil (0.16.2) forwardable-extended (~> 2.6) + persistent_http (1.0.6) + gene_pool (>= 1.3) + persistent_httparty (0.1.2) + httparty (~> 0.9) + persistent_http (< 2) public_suffix (5.0.1) racc (1.7.3) rake (13.0.6) @@ -78,6 +85,7 @@ DEPENDENCIES httparty jekyll nokogiri + persistent_httparty yaml BUNDLED WITH diff --git a/scripts/update_oai-pmh.rb b/scripts/update_oai-pmh.rb index c57fd2fe3649b30bb06eef48d0d01b77470e861a..d94a368c5269107ce684287fd8d1b40b3c3514bc 100644 --- a/scripts/update_oai-pmh.rb +++ b/scripts/update_oai-pmh.rb @@ -4,6 +4,7 @@ require 'httparty' require 'yaml' require 'date' require 'nokogiri' +require 'persistent_httparty' class FailedOaiPmhRequest < StandardError def initialize(type, msg) @@ -14,11 +15,27 @@ end class OaiPmhClient include HTTParty + persistent_connection_adapter + headers {"User-Agent" => "LEAPS-WG3-client/0.1"} def initialize(oai_pmh_endpoint, from_id=nil) self.class.base_uri oai_pmh_endpoint + # TODO add from_id support as 'From' request header + + # WORK_AROUND: The connection adapter is persistent across multiple + # URLs (which is good), but the adapter+connection-caching somehow + # results in all requests being sent to the same host. + # + # To work-around this, we clear the connection caching code, forcing + # a new connection caching. + # + # This is acceptable, as we process all requests from a single endpoint + # before moving onto the next request and we process endpoints + # sequentially. + conn_adapter = self.class.default_options[:connection_adapter] + conn_adapter.persistent_http = nil end def identify()