bes  Updated for version 3.20.10
retriever.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 
5 // Copyright (c) 2016 OPeNDAP, Inc.
6 // Author: Nathan Potter <ndp@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 
25 #include "config.h"
26 
27 #include <fcntl.h>
28 
29 #include <unistd.h>
30 #include <time.h>
31 
32 #include <memory>
33 #include <cstdlib>
34 #include <cstring>
35 #include <cassert>
36 #include <cerrno>
37 #include <sstream>
38 #include <iostream>
39 #include <fstream>
40 #include <GetOpt.h>
41 
42 #include <curl/curl.h>
43 
44 
45 #include <libdap/D4Dimensions.h>
46 #include <libdap/D4StreamMarshaller.h>
47 
48 #include "BESInternalError.h"
49 #include "BESUtil.h"
50 #include "CurlUtils.h"
51 #include "TheBESKeys.h"
52 #include "BESLog.h"
53 #include "BESDebug.h"
54 #include "BESStopWatch.h"
55 
56 #include "awsv4.h"
57 #include "HttpNames.h"
58 #include "url_impl.h"
59 #include "EffectiveUrl.h"
60 #include "EffectiveUrlCache.h"
61 #include "RemoteResource.h"
62 
63 #include "Chunk.h"
64 #include "CredentialsManager.h"
65 #include "AccessCredentials.h"
66 #include "CredentialsManager.h"
67 #include "CurlHandlePool.h"
68 #include "DmrppCommon.h"
69 #include "DmrppRequestHandler.h"
70 #include "DmrppByte.h"
71 #include "DmrppArray.h"
72 #include "DMRpp.h"
73 #include "DmrppTypeFactory.h"
74 #include "DmrppD4Group.h"
75 #include "DmrppParserSax2.h"
76 
77 //#include <memory>
78 //#include <iterator>
79 //#include <algorithm>
80 
81 
82 bool Debug = false;
83 bool debug = false;
84 bool bes_debug = false;
85 
86 using std::cerr;
87 using std::endl;
88 using std::string;
89 
90 #define prolog std::string("retriever::").append(__func__).append("() - ")
91 
92 #define NULL_BODY_HASH "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
93 
94 
99 string get_errno() {
100  char *s_err = strerror(errno);
101  if (s_err)
102  return s_err;
103  else
104  return "Unknown error.";
105 }
106 
107 
117 dmrpp::DmrppRequestHandler *bes_setup(
118  const string &bes_config_file,
119  const string &bes_log_file,
120  const string &bes_debug_log_file,
121  const string &bes_debug_keys,
122  const string &http_netrc_file,
123  const string &http_cache_dir
124 ) {
125  if (debug) cerr << prolog << "BEGIN" << endl;
126 
127  TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
128  TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
129  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
130  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
131 
132  if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
133 
134 
135  if (!http_netrc_file.empty()) {
136  TheBESKeys::TheKeys()->set_key(HTTP_NETRC_FILE_KEY, http_netrc_file, false); // Set the netrc file
137  }
138 
139  if (!http_cache_dir.empty()) {
140  TheBESKeys::TheKeys()->set_key(HTTP_CACHE_DIR_KEY, http_cache_dir, false); // Set the netrc file
141  }
142 
143  // Initialize the dmr++ goodness.
144  auto foo = new dmrpp::DmrppRequestHandler("Chaos");
145 
146  if (debug) cerr << prolog << "END" << endl;
147  return foo;
148 }
149 
150 curl_slist *aws_sign_request_url(shared_ptr<http::url> &target_url, curl_slist *request_headers) {
151 
152  if (debug) cerr << prolog << "BEGIN" << endl;
153 
154  AccessCredentials *credentials = CredentialsManager::theCM()->get(target_url);
155  if (credentials && credentials->is_s3_cred()) {
156  if (debug)
157  cerr << prolog << "Got AWS S3 AccessCredentials instance: " << endl << credentials->to_json() << endl;
158  // If there are available credentials, and they are S3 credentials then we need to sign
159  // the request
160  const std::time_t request_time = std::time(0);
161 
162  const std::string auth_header =
163  AWSV4::compute_awsv4_signature(
164  target_url,
165  request_time,
166  credentials->get(AccessCredentials::ID_KEY),
167  credentials->get(AccessCredentials::KEY_KEY),
168  credentials->get(AccessCredentials::REGION_KEY),
169  "s3");
170 
171  // passing nullptr for the first call allocates the curl_slist
172  // The following code builds the slist that holds the headers. This slist is freed
173  // once the URL is dereferenced in dmrpp_easy_handle::read_data(). jhrg 11/26/19
174  request_headers = curl::append_http_header(request_headers, "Authorization", auth_header);
175 
176  // We pre-compute the sha256 hash of a null message body
177  request_headers = curl::append_http_header(request_headers, "x-amz-content-sha256", NULL_BODY_HASH);
178  request_headers = curl::append_http_header(request_headers, "x-amz-date", AWSV4::ISO8601_date(request_time));
179  }
180  if (debug) cerr << prolog << "END" << endl;
181  return request_headers;
182 }
183 
189 size_t get_remote_size(shared_ptr<http::url> &target_url, bool aws_signing) {
190  if (debug) cerr << prolog << "BEGIN" << endl;
191 
192  char error_buffer[CURL_ERROR_SIZE];
193  std::vector<std::string> resp_hdrs;
194  curl_slist *request_headers = nullptr;
195 
196  request_headers = curl::add_edl_auth_headers(request_headers);
197 
198  if (aws_signing)
199  request_headers = aws_sign_request_url(target_url, request_headers);
200 
201  CURL *ceh = curl::init(target_url->str(), request_headers, &resp_hdrs);
202  curl::set_error_buffer(ceh, error_buffer);
203 
204  // In cURLville, CURLOPT_NOBODY means a HEAD request i.e. Don't send the response body a.k.a. "NoBody"
205  CURLcode curl_status = curl_easy_setopt(ceh, CURLOPT_NOBODY, 1L);
206  curl::eval_curl_easy_setopt_result(curl_status, prolog, "CURLOPT_NOBODY", error_buffer, __FILE__, __LINE__);
207 
208  if (Debug) cerr << prolog << "cURL HEAD request is configured" << endl;
209 
210  curl::super_easy_perform(ceh);
211 
212  curl::unset_error_buffer(ceh);
213  if (request_headers)
214  curl_slist_free_all(request_headers);
215  if (ceh)
216  curl_easy_cleanup(ceh);
217 
218  bool done = false;
219  size_t how_big_it_is = 0;
220  string content_length_hdr_key("content-length: ");
221  for (size_t i = 0; !done && i < resp_hdrs.size(); i++) {
222  if (Debug) cerr << prolog << "HEADER[" << i << "]: " << resp_hdrs[i] << endl;
223  string lc_header = BESUtil::lowercase(resp_hdrs[i]);
224  size_t index = lc_header.find(content_length_hdr_key);
225  if (index == 0) {
226  string value = lc_header.substr(content_length_hdr_key.size());
227  how_big_it_is = stol(value);
228  done = true;
229  }
230  }
231  if (!done)
232  throw BESInternalError(prolog + "Failed to determine size of target resource: " + target_url->str(), __FILE__, __LINE__);
233 
234  if (debug) cerr << prolog << "END" << endl;
235 
236  return how_big_it_is;
237 }
238 size_t get_max_retrival_size(const size_t &max_target_size, shared_ptr<http::url> &target_url) {
239  size_t target_size = max_target_size;
240  if (max_target_size == 0) {
241  target_size = get_remote_size(target_url, true);
242  if (debug) cerr << prolog << "Remote resource size is " << max_target_size << " bytes. " << endl;
243  }
244  return target_size;
245 }
246 
252 void simple_get(const string target_url_str, const string output_file_base) {
253 
254  string output_file = output_file_base + "_simple_get.out";
255  vector<string> resp_hdrs;
256  mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
257  int fd;
258  if ((fd = open(output_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, mode)) < 0) {
259  throw BESInternalError(get_errno(), __FILE__, __LINE__);
260  }
261  {
262  BESStopWatch sw;
263  sw.start(prolog + "url: " + target_url_str);
264  shared_ptr<http::url> target_url(new http::url(target_url_str));
265  curl::http_get_and_write_resource(target_url, fd,
266  &resp_hdrs); // Throws BESInternalError if there is a curl error.
267  }
268  close(fd);
269 
270  if (Debug) {
271  for (size_t i = 0; i < resp_hdrs.size(); i++) {
272  cerr << prolog << "ResponseHeader[" << i << "]: " << resp_hdrs[i] << endl;
273  }
274  }
275 }
276 
277 
285 void make_chunks(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
286  vector<dmrpp::Chunk *> &chunks) {
287  if (debug) cerr << prolog << "BEGIN" << endl;
288  size_t chunk_size = target_size / chunk_count;
289  size_t chunk_start = 0;
290  size_t chunk_index;
291  for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
292  vector<unsigned long long> position_in_array;
293  position_in_array.push_back(chunk_index);
294  if (debug)
295  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
296  << chunk_size << endl;
297  auto chunk = new dmrpp::Chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
298  chunk_start += chunk_size;
299  chunks.push_back(chunk);
300  }
301  if (target_size % chunk_size) {
302  // So there's a remainder and we should make a final chunk for it too.
303  size_t last_chunk_size = target_size - chunk_start;
304  if (debug)
305  cerr << prolog << "Remainder chunk. chunk[" << chunks.size() << "] last_chunk_size: " << last_chunk_size
306  << endl;
307  if (debug)
308  cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
309  << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
310  if (last_chunk_size > 0) {
311  vector<unsigned long long> position_in_array;
312  position_in_array.push_back(chunk_index);
313  if (debug)
314  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
315  << last_chunk_size << endl;
316  auto last_chunk = new dmrpp::Chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
317  chunks.push_back(last_chunk);
318  }
319  }
320  if (debug) cerr << prolog << "END chunks: " << chunks.size() << endl;
321 }
322 
323 
330 void serial_chunky_get(shared_ptr<http::url> &target_url, const size_t target_size, const unsigned long chunk_count,
331  const string &output_file_base) {
332 
333  shared_ptr<http::url> effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
334  if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl->str() << endl;
335  size_t retrieval_size = get_max_retrival_size(target_size, effectiveUrl);
336 
337  string output_file = output_file_base + "_serial_chunky_get.out";
338  vector<dmrpp::Chunk *> chunks;
339  make_chunks(target_url, retrieval_size, chunk_count, chunks);
340 
341  std::ofstream ofs;
342  ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
343  if (ofs.fail())
344  throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
345 
346  for (size_t i = 0; i < chunks.size(); i++) {
347  stringstream ss;
348  ss << prolog << "chunk={index: " << i << ", offset: " << chunks[i]->get_offset() << ", size: "
349  << chunks[i]->get_size() << "}";
350 
351  {
352  BESStopWatch sw;
353  sw.start(ss.str());
354  chunks[i]->read_chunk();
355  }
356 
357  if (debug) cerr << ss.str() << " retrieval from: " << target_url << " completed, timing finished." << endl;
358  ofs.write(chunks[i]->get_rbuf(), chunks[i]->get_rbuf_size());
359  if (debug) cerr << ss.str() << " has been written to: " << output_file << endl;
360  }
361  auto itr = chunks.begin();
362  while (itr != chunks.end()) {
363  delete *itr;
364  *itr = 0;
365  itr++;
366  }
367 
368 }
369 
370 
371 void parse_dmrpp(const string &dmrpp_filename_url){
372  if(debug) cerr << prolog << "BEGIN" << endl;
373 
374  dmrpp::DmrppParserSax2 parser;
375  string target_file_url = dmrpp_filename_url;
376  string target_file;
377 
378  const string http_protocol("http://");
379  const string https_protocol("https://");
380  const string file_protocol("file://");
381 
382  if(debug) cerr << prolog << "dmrpp_filename_url: " << dmrpp_filename_url << endl;
383 
384  if(target_file_url.empty())
385  throw BESInternalError(prolog + "The dmr++ filename was empty.", __FILE__, __LINE__);
386 
387 
388  if(target_file_url.rfind(http_protocol,0)==0 || target_file_url.rfind(https_protocol,0)==0 ){
389  // Use RemoteResource to get the thing.
390  shared_ptr<http::url> tfile_url(new http::url(target_file_url));
391  http::RemoteResource target_resource(tfile_url,prolog+"Timer");
392  target_resource.retrieveResource();
393  target_file = target_resource.getCacheFileName();
394  }
395  else if(target_file_url.rfind(file_protocol,0)==0){
396  target_file = target_file_url.substr(file_protocol.length());
397  }
398  else {
399  target_file_url = file_protocol + target_file_url;
400  }
401 
402  if(debug) cerr << prolog << " target_file: " << target_file << endl;
403 
404  ifstream ifs(target_file);
405  if(ifs.fail())
406  throw BESInternalError(prolog + "Failed open to dmr++ file: " + dmrpp_filename_url, __FILE__, __LINE__);
407 
408  dmrpp::DmrppTypeFactory factory;
409  dmrpp::DMRpp dmr(&factory);
410  dmr.set_href(target_file_url);
411  stringstream msg;
412  msg << prolog << dmrpp_filename_url;
413  {
414  BESStopWatch sw;
415  sw.start(msg.str());
416  parser.intern(ifs, &dmr);
417  }
418 
419  if (Debug) {
420  cerr << prolog << "Built dataset: " << endl;
422  libdap::XMLWriter xmlWriter;
423  dmr.print_dmrpp(xmlWriter, dmr.get_href());
424  cerr << xmlWriter.get_doc() << endl;
425  }
426  if(debug) cerr << prolog << "END" << endl;
427 
428 
429 }
430 
431 
432 
439 void add_chunks(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
440  dmrpp::DmrppArray *target_array) {
441 
442  if (debug) cerr << prolog << "BEGIN" << endl;
443 
444  size_t chunk_size = target_size / chunk_count;
445  if (chunk_size == 0)
446  throw BESInternalError(prolog + "Chunk size was zero.", __FILE__, __LINE__);
447  stringstream chunk_dim_size;
448  chunk_dim_size << chunk_size;
449  target_array->parse_chunk_dimension_sizes(chunk_dim_size.str());
450 
451  size_t chunk_start = 0;
452  size_t chunk_index;
453  for (chunk_index = 0; chunk_index < chunk_count; chunk_index++) {
454  vector<unsigned long long> position_in_array;
455  position_in_array.push_back(chunk_start);
456  if (debug)
457  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
458  << chunk_size << " chunk_poa: " << position_in_array[0] << endl;
459  target_array->add_chunk(target_url, "LE", chunk_size, chunk_start, position_in_array);
460  chunk_start += chunk_size;
461  }
462  if (target_size % chunk_size) {
463  // So there's a remainder and we should make a final chunk for it too.
464  size_t last_chunk_size = target_size - chunk_start;
465  if (debug)
466  cerr << prolog << "Remainder chunk! target_size: " << target_size << " index: " << chunk_index
467  << " last_chunk_start: " << chunk_start << " last_chunk_size: " << last_chunk_size << endl;
468  if (last_chunk_size > 0) {
469  vector<unsigned long long> position_in_array;
470  position_in_array.push_back(chunk_start);
471  if (debug)
472  cerr << prolog << "chunks[" << chunk_index << "] chunk_start: " << chunk_start << " chunk_size: "
473  << last_chunk_size << " chunk_poa: " << position_in_array[0] << endl;
474  target_array->add_chunk(target_url, "LE", last_chunk_size, chunk_start, position_in_array);
475  }
476  }
477  if (debug) cerr << prolog << "END" << endl;
478 }
479 
480 
481 
489 size_t array_get(shared_ptr<http::url> &target_url, const size_t &target_size, const size_t &chunk_count,
490  const string &output_file_base) {
491 
492  if (debug) cerr << prolog << "BEGIN" << endl;
493  string output_file = output_file_base + "_array_get.out";
494  std::ofstream ofs;
495  ofs.open(output_file, std::fstream::in | std::fstream::out | std::ofstream::trunc | std::ofstream::binary);
496  if (ofs.fail())
497  throw BESInternalError(prolog + "Failed to open file: " + output_file, __FILE__, __LINE__);
498 
499  auto *tmplt = new dmrpp::DmrppByte("data");
500  auto *target_array = new dmrpp::DmrppArray("data", tmplt);
501  delete tmplt; // Because the Vector() constructor made a copy and it's our problem...
502 
503  target_array->append_dim(target_size);
504  add_chunks(target_url, target_size, chunk_count, target_array);
505  target_array->set_send_p(true); // Mark it to be sent so that it will be read.
506 
507  dmrpp::DmrppTypeFactory factory;
508  dmrpp::DMRpp dmr(&factory);
509  dmr.set_href(target_url->str());
510  dmrpp::DmrppD4Group *root = dynamic_cast<dmrpp::DmrppD4Group *>(dmr.root());
511  root->add_var_nocopy(target_array);
512  root->set_in_selection(true);
513 
514  if (debug) {
515  cerr << prolog << "Built dataset: " << endl;
517  libdap::XMLWriter xmlWriter;
518  dmr.print_dmrpp(xmlWriter, dmr.get_href());
519  cerr << xmlWriter.get_doc() << endl;
520  }
521 
522  {
523  stringstream timer_msg;
524  timer_msg << prolog << "DmrppD4Group.intern_data() for " << target_size << " bytes in " << chunk_count <<
525  " chunks, parallel transfers ";
526  if (dmrpp::DmrppRequestHandler::d_use_transfer_threads) {
527  timer_msg << "enabled. (max: " << dmrpp::DmrppRequestHandler::d_max_transfer_threads << ")";
528  } else {
529  timer_msg << "disabled.";
530  }
531  BESStopWatch sw;
532  sw.start(timer_msg.str());
533  root->intern_data();
534  }
535 
536  size_t started = ofs.tellp();
537  libdap::D4StreamMarshaller streamMarshaller(ofs);
538  root->serialize(streamMarshaller, dmr);
539 
540  size_t stopped = ofs.tellp();
541  size_t numberOfBytesWritten = stopped - started;
542  if (debug) cerr << prolog << "target_size: " << target_size << " numberOfBytesWritten: " << numberOfBytesWritten << endl;
543 
544  // delete target_array; // Don't have to delete this because we added it to the DMR using add_var_nocopy()
545  if (debug) cerr << prolog << "END" << endl;
546  return numberOfBytesWritten;
547 
548 }
549 
550 
551 
580 #if 0
581 int test_plan_01(const string &target_url,
582  const string &output_prefix,
583  const unsigned int reps,
584  const size_t retrieval_size,
585  const unsigned int power_of_two_chunk_count,
586  const unsigned int power_of_two_threads_max,
587  const string &output_file_base
588  ) {
589  int result = 0;
590  if (debug)
591  cerr << prolog << "BEGIN" << endl;
592 
593  try {
594  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
595  if (debug)
596  cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
597  size_t target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
598 
599  // Outer loop on chunk size
600  size_t chunk_count = 2;
601  for (size_t chunk_pwr = 1; chunk_pwr <= power_of_two_chunk_count; chunk_pwr++) {
602 
603  // We turn off parallel transfers to get a baseline that is the single threaded, serial retrieval of the chunks.
604  dmrpp::DmrppRequestHandler::d_use_transfer_threads = false;
605  for ( unsigned int rep = 0; rep < reps; rep++) {
606  array_get(effectiveUrl, target_size, chunk_count, output_file_base );
607  }
608 
609  // Now we enable threads and starting with 2 work up to power_of_two_threads_max
610  dmrpp::DmrppRequestHandler::d_use_transfer_threads = true;
611  unsigned int thread_count = 2;
612  for ( unsigned int tpwr = 1; tpwr <= power_of_two_threads_max; tpwr++) {
613  dmrpp::DmrppRequestHandler::d_max_transfer_threads = thread_count;
614  for ( unsigned int rep = 0; rep < reps; rep++) {
615  array_get(effectiveUrl, target_size, chunk_count, output_file_base);
616  }
617  thread_count *= 2;
618  }
619  chunk_count *= 2;
620  }
621  }
622  catch (
623  BESError e
624  ) {
625  cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file()<< ":" << e. get_line() << endl;
626  result = 1;
627  }
628  catch (...) {
629  cerr << prolog << "Caught Unknown Exception." <<
630  endl;
631  result = 2;
632  }
633  cerr << prolog << "END" << endl;
634  return result;
635 }
636 #endif
637 
644 int main(int argc, char *argv[]) {
645 
646  int result = 0;
647  string bes_log_file;
648  string bes_debug_log_file = "cerr";
649  string bes_debug_keys = "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr";
650  shared_ptr<http::url> target_url(new http::url("https://www.opendap.org/pub/binary/hyrax-1.16/centos-7.x/bes-debuginfo-3.20.7-1.static.el7.x86_64.rpm"));
651  string output_file_base("retriever");
652  string http_cache_dir;
653  string prefix;
654  size_t pwr2_number_o_chunks = 18;
655  size_t max_target_size = 0;
656  string http_netrc_file;
657  unsigned int reps=10;
658  unsigned pwr2_parallel_reads = 0;
659  // Unused bool aws_sign_request_url = false;
660 
661  char *prefixCstr = getenv("prefix");
662  if (prefixCstr) {
663  prefix = prefixCstr;
664  } else {
665  prefix = "/";
666  }
667  auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
668 
669 
670  GetOpt getopt(argc, argv, "h:r:n:C:c:o:u:l:S:dbDp:"); // Removed A. Unused jhrg 11/23/21
671  int option_char;
672  while ((option_char = getopt()) != -1) {
673  switch (option_char) {
674  case 'D':
675  Debug = true;
676  debug = true;
677  break;
678  case 'd':
679  debug = true;
680  break;
681  case 'b':
682  bes_debug = true;
683  break;
684 #if 0
685  case 'A':
686  // Unused aws_sign_request_url = true;
687  break;
688 #endif
689  case 'c':
690  bes_config_file = getopt.optarg;
691  break;
692  case 'u':
693  target_url = shared_ptr<http::url>(new http::url(getopt.optarg));
694  break;
695  case 'l':
696  bes_log_file = getopt.optarg;
697  break;
698  case 'n':
699  http_netrc_file = getopt.optarg;
700  break;
701  case 'o':
702  output_file_base = getopt.optarg;
703  break;
704  case 'C':
705  pwr2_number_o_chunks = atol(getopt.optarg);
706  break;
707  case 'S':
708  max_target_size = atol(getopt.optarg);
709  break;
710  case 'p':
711  pwr2_parallel_reads = atol(getopt.optarg);
712  break;
713  case 'r':
714  reps = atol(getopt.optarg);
715  break;
716  case 'h':
717  http_cache_dir = getopt.optarg;
718  break;
719 
720  default:
721  break;
722  }
723  }
724 
725  if (bes_log_file.empty()) {
726  bes_log_file = output_file_base + "_bes.log";
727  }
728 
729  cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
730  cerr << prolog << "debug: " << (debug ? "true" : "false") << endl;
731  cerr << prolog << "Debug: " << (Debug ? "true" : "false") << endl;
732  cerr << prolog << "bes_debug: " << (bes_debug ? "true" : "false") << endl;
733  cerr << prolog << "output_file_base: '" << output_file_base << "'" << endl;
734  cerr << prolog << "bes_config_file: '" << bes_config_file << "'" << endl;
735  cerr << prolog << "bes_log_file: '" << bes_log_file << "'" << endl;
736  cerr << prolog << "bes_debug_log_file: '" << bes_debug_log_file << "'" << endl;
737  cerr << prolog << "bes_debug_keys: '" << bes_debug_keys << "'" << endl;
738  cerr << prolog << "http_netrc_file: '" << http_netrc_file << "'" << endl;
739  cerr << prolog << "target_url: '" << target_url->str() << "'" << endl;
740  cerr << prolog << "max_target_size: '" << max_target_size << "'" << endl;
741  cerr << prolog << "number_o_chunks: 2^" << pwr2_number_o_chunks << endl;
742  cerr << prolog << "reps: " << reps << endl;
743  if (pwr2_parallel_reads)
744  cerr << prolog << "parallel_reads: ENABLED (max: 2^" << pwr2_parallel_reads << ")" << endl;
745  else
746  cerr << prolog << "parallel_reads: DISABLED" << endl;
747  cerr << prolog << "-- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - -- - " << endl;
748 
749 
750  try {
751  if(pwr2_parallel_reads){
752  unsigned long long int max_threads = 1ULL << pwr2_parallel_reads;
753  dmrpp::DmrppRequestHandler::d_use_transfer_threads = true;
754  dmrpp::DmrppRequestHandler::d_max_transfer_threads = max_threads;
755  }
756  else {
757  dmrpp::DmrppRequestHandler::d_use_transfer_threads = false;
758  dmrpp::DmrppRequestHandler::d_max_transfer_threads = 1;
759  }
760 
761  dmrpp::DmrppRequestHandler *dmrppRH = bes_setup(bes_config_file, bes_log_file, bes_debug_log_file,
762  bes_debug_keys, http_netrc_file,http_cache_dir);
763 
764  shared_ptr<http::url> effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
765  if (debug) cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
766  size_t target_size = get_max_retrival_size(max_target_size, effectiveUrl);
767 
768  unsigned long long int chunks = 1ULL << pwr2_number_o_chunks;
769  if (debug) cerr << prolog << "Dividing target into " << chunks << " chunks." << endl;
770 
771 
772 
773  array_get(effectiveUrl, target_size, chunks, output_file_base);
774 
775 
776 #if 0 // these work but are parked a.t.m.
777  result = test_plan_01(
778  target_url,
779  output_file_base,
780  reps,
781  max_target_size,
782  pwr2_number_o_chunks,
783  pwr2_parallel_reads,
784  output_file_base) ;
785 
786  simple_get(effectiveUrl, output_file_base);
787  serial_chunky_get( effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
788 
789  parse_dmrpp(target_url);
790 
791 
792  string effectiveUrl = http::EffectiveUrlCache::TheCache()->get_effective_url(target_url);
793  if (debug)
794  cerr << prolog << "curl::retrieve_effective_url() returned: " << effectiveUrl << endl;
795  target_size = get_max_retrival_size(retrieval_size, effectiveUrl);
796  array_get(effectiveUrl, max_target_size, pwr2_number_o_chunks, output_file_base);
797 #endif
798 
799  curl_global_cleanup();
800  delete dmrppRH;
801  }
802  catch (BESError e) {
803  cerr << prolog << "Caught BESError. Message: " << e.get_message() << " " << e.get_file() << ":" << e.get_line()
804  << endl;
805  result = 1;
806  }
807  catch (...) {
808  cerr << prolog << "Caught Unknown Exception." << endl;
809  result = 2;
810  }
811 
812  return result;
813 }
virtual std::string get(const std::string &key)
virtual bool is_s3_cred()
Do the URL, ID, Key amd Region items make up an S3 Credential?
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:98
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
AccessCredentials * get(std::shared_ptr< http::url > &url)
static CredentialsManager * theCM()
Returns the singleton instance of the CrednetialsManager.
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:206
static std::string ConfigFile
Definition: TheBESKeys.h:185
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:68
static bool d_print_chunks
if true, print_dap4() prints chunk elements
Definition: DmrppCommon.h:118
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:134
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void intern(std::istream &f, libdap::DMR *dest_dmr)
static EffectiveUrlCache * TheCache()
Get the singleton EffectiveUrlCache instance.
std::shared_ptr< EffectiveUrl > get_effective_url(std::shared_ptr< url > source_url)