bes  Updated for version 3.20.10
HttpCache.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of the BES http package, part of the Hyrax data server.
5 
6 // Copyright (c) 2020 OPeNDAP, Inc.
7 // Author: Nathan Potter <ndp@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 // Authors:
26 // ndp Nathan Potter <ndp@opendap.org>
27 
28 #include <config.h>
29 
30 #include <sys/stat.h>
31 
32 #include <string>
33 #include <fstream>
34 #include <sstream>
35 #include <vector>
36 
37 #include <cstdlib>
38 
39 #include "PicoSHA2/picosha2.h"
40 
41 #include <BESInternalError.h>
42 #include <BESDebug.h>
43 #include <BESUtil.h>
44 #include <TheBESKeys.h>
45 
46 #include "HttpCache.h"
47 #include "HttpUtils.h"
48 #include "HttpNames.h"
49 #include "url_impl.h"
50 
51 #ifdef HAVE_ATEXIT
52 #define AT_EXIT(x) atexit((x))
53 #else
54 #define AT_EXIT(x)
55 #endif
56 #define prolog string("HttpCache::").append(__func__).append("() - ")
57 
58 
59 using std::endl;
60 using std::string;
61 using std::vector;
62 using std::stringstream;
63 
64 namespace http {
65 
66  HttpCache *HttpCache::d_instance = 0;
67  bool HttpCache::d_enabled = true;
68 
69  unsigned long HttpCache::getCacheSizeFromConfig() {
70  bool found = false;
71  string size;
72  unsigned long size_in_megabytes = 0;
73  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_SIZE_KEY, size, found);
74 
75  if (found) {
76  std::istringstream iss(size);
77  iss >> size_in_megabytes;
78  } else {
79  stringstream msg;
80  msg << prolog << "The BES Key " << HTTP_CACHE_SIZE_KEY << " is not set.";
81  BESDEBUG(HTTP_MODULE, msg.str() << endl);
82  throw BESInternalError(msg.str(), __FILE__, __LINE__);
83  }
84 
85  return size_in_megabytes;
86  }
87 
88  unsigned long HttpCache::getCacheExpiresTime() {
89  bool found = false;
90  string time;
91  unsigned long time_in_seconds = 0;
92  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_EXPIRES_TIME_KEY, time, found);
93 
94  if (found) {
95  std::istringstream iss(time);
96  iss >> time_in_seconds;
97  } else {
98  time_in_seconds = REMOTE_RESOURCE_DEFAULT_EXPIRED_INTERVAL;
99  }
100 
101  return time_in_seconds;
102  }
103 
104  string HttpCache::getCacheDirFromConfig() {
105  bool found;
106  string subdir = "";
107  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_DIR_KEY, subdir, found);
108 
109  if (!found) {
110  stringstream msg;
111  msg << prolog << "The BES Key " << HTTP_CACHE_DIR_KEY << " is not set.";
112  BESDEBUG(HTTP_MODULE, msg.str() << endl);
113  throw BESInternalError(msg.str(), __FILE__, __LINE__);
114  }
115 
116  return subdir;
117  }
118 
119  string HttpCache::getCachePrefixFromConfig() {
120  bool found;
121  string prefix = "";
122  TheBESKeys::TheKeys()->get_value(HTTP_CACHE_PREFIX_KEY, prefix, found);
123 
124  if (found) {
125  prefix = BESUtil::lowercase(prefix);
126  } else {
127  stringstream msg;
128  msg << prolog << "The BES Key " << HTTP_CACHE_PREFIX_KEY << " is not set.";
129  BESDEBUG(HTTP_MODULE, msg.str() << endl);
130  throw BESInternalError(msg.str(), __FILE__, __LINE__);
131  }
132 
133  return prefix;
134  }
135 
136  HttpCache::HttpCache() {
137  BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
138 
139  string cacheDir = getCacheDirFromConfig();
140  string cachePrefix = getCachePrefixFromConfig();
141  unsigned long cacheSizeMbytes = getCacheSizeFromConfig();
142 
143  BESDEBUG(HTTP_MODULE, prolog << "Cache configuration params: " << cacheDir << ", " << cachePrefix << ", "
144  << cacheSizeMbytes << endl);
145  initialize(cacheDir, cachePrefix, cacheSizeMbytes);
146 
147  BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
148  }
149 
150 #if 1
151  HttpCache::HttpCache(const string &cache_dir, const string &prefix, unsigned long long size) {
152 
153  BESDEBUG(HTTP_MODULE, prolog << "BEGIN" << endl);
154 
155  initialize(cache_dir, prefix, size);
156 
157  BESDEBUG(HTTP_MODULE, prolog << "END" << endl);
158  }
159 #endif
160 #if 0
161  HttpCache *
162  HttpCache::get_instance(const string &cache_dir, const string &cache_file_prefix,
163  unsigned long long max_cache_size) {
164  if (d_enabled && d_instance == 0) {
165  if (dir_exists(cache_dir)) {
166  d_instance = new HttpCache(cache_dir, cache_file_prefix, max_cache_size);
167  d_enabled = d_instance->cache_enabled();
168  if (!d_enabled) {
169  delete d_instance;
170  d_instance = 0;
171  BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is DISABLED" << endl);
172  } else {
173  AT_EXIT(delete_instance);
174 
175  BESDEBUG(HTTP_MODULE, "HttpCache::" << __func__ << "() - " << "Cache is ENABLED" << endl);
176  }
177  }
178  }
179 
180  return d_instance;
181  }
182 #endif
183 
187  HttpCache *
189  if (d_enabled && d_instance == 0) {
190  try {
191  d_instance = new HttpCache();
192  d_enabled = d_instance->cache_enabled();
193  if (!d_enabled) {
194  delete d_instance;
195  d_instance = 0;
196  BESDEBUG(HTTP_MODULE, prolog << "Cache is DISABLED" << endl);
197  } else {
198  AT_EXIT(delete_instance);
199 
200  BESDEBUG(HTTP_MODULE, prolog << "Cache is ENABLED" << endl);
201  }
202  }
203  catch (BESInternalError &bie) {
204  BESDEBUG(HTTP_MODULE,
205  "[ERROR] HttpCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message()
206  << endl);
207  }
208  }
209 
210  return d_instance;
211  }
212 
213 #if HASH_CACHE_FILENAME
214 
215  string
216  HttpCache::get_hash(const string &s)
217  {
218  if (s.empty()){
219  string msg = "You cannot hash the empty string.";
220  BESDEBUG(HTTP_MODULE, prolog << msg << endl);
221  throw BESInternalError(msg, __FILE__, __LINE__);
222  }
223  return picosha2::hash256_hex_string(s[0] == '/' ? s : "/" + s);
224  }
225 
226 
227  bool is_url(const string &candidate){
228  size_t index = candidate.find(HTTP_PROTOCOL);
229  if(index){
230  index = candidate.find(HTTPS_PROTOCOL);
231  if(index){
232  return false;
233  }
234  }
235  return true;
236  }
237 
238 
246  string get_real_name_extension(const string &identifier){
247  string real_name_extension;
248 
249  string path_part;
250 
251  if(is_url(identifier)) {
252  // Since it's a URL it might have a massive query string attached, and since wee
253  // have no idea what the query parameters mean, we'll just punt and look at the path part of the URL.
254  // We make an instance of http::url which will carve up the URL for us.
255  http::url target_url(identifier);
256  path_part = target_url.path();
257  }
258  else {
259  path_part = identifier;
260  }
261 
262  vector<string> path_elements;
263  // Now that we a "path" (none of that query string mess) we can tokenize it.
264  BESUtil::tokenize(path_part,path_elements);
265  if(!path_elements.empty()){
266  string last = path_elements.back();
267  if(last != path_part)
268  real_name_extension = "#" + last; // This utilizes a hack in libdap
269  }
270  return real_name_extension;
271  }
272 
273 
282  string HttpCache::get_cache_file_name(const string &uid, const string &src_id, bool mangle){
283  stringstream cache_filename;
284  string hashed_part;
285  string real_name_extension;
286  string uid_part;
287 
288  if(!uid.empty())
289  uid_part = uid + "_";
290 
291  if(mangle){
292  hashed_part = get_hash(src_id);
293  }
294  else {
295  hashed_part = src_id;
296  }
297  real_name_extension = get_real_name_extension(src_id);
298 
299  cache_filename << get_cache_file_prefix() << uid_part << hashed_part << real_name_extension;
300 
301  string cf_name = BESUtil::assemblePath(this->get_cache_directory(), cache_filename.str() );
302 
303  return cf_name;
304  }
305 
306 
307  string HttpCache::get_cache_file_name( const string &src, bool mangle){
308  string uid;
309  return get_cache_file_name(uid,src, mangle);
310  }
311 
312 
313 #endif
314 
315 } // namespace http
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
void initialize(const std::string &cache_dir, const std::string &prefix, unsigned long long size)
Initialize an instance of FileLockingCache.
const std::string get_cache_directory()
static bool dir_exists(const std::string &dir)
const std::string get_cache_file_prefix()
exception thrown if internal error encountered
static void tokenize(const std::string &str, std::vector< std::string > &tokens, const std::string &delimiters="/")
Definition: BESUtil.cc:1086
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
A cache for content accessed via HTTP.
Definition: HttpCache.h:54
static HttpCache * get_instance()
Definition: HttpCache.cc:188
virtual std::string get_cache_file_name(const std::string &uid, const std::string &src, bool mangle=true)
Definition: HttpCache.cc:282
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55
string get_real_name_extension(const string &identifier)
Definition: HttpCache.cc:246