bes  Updated for version 3.20.10
CmrCatalog.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 //
3 // CMRCatalog.cc
4 //
5 // This file is part of BES cmr_module
6 //
7 // Copyright (c) 2018 OPeNDAP, Inc.
8 // Author: Nathan Potter <ndp@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 // Please read the full copyright statement in the file COPYRIGHT_URI.
26 //
27 
28 #include "config.h"
29 
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <dirent.h>
33 
34 #include <cstring>
35 #include <cerrno>
36 
37 #include <sstream>
38 #include <cassert>
39 
40 #include <memory>
41 #include <algorithm>
42 
43 #include "BESUtil.h"
44 #include "BESCatalogUtils.h"
45 #include "BESCatalogEntry.h"
46 
47 #include "CatalogNode.h"
48 #include "CatalogItem.h"
49 
50 #include "BESInfo.h"
51 #include "BESContainerStorageList.h"
52 #include "BESFileContainerStorage.h"
53 #include "BESLog.h"
54 
55 #include "BESInternalError.h"
56 #include "BESForbiddenError.h"
57 #include "BESNotFoundError.h"
58 #include "BESSyntaxUserError.h"
59 
60 #include "TheBESKeys.h"
61 #include "BESDebug.h"
62 
63 #include "CmrApi.h"
64 #include "CmrNames.h"
65 #include "CmrCatalog.h"
66 
67 using namespace bes;
68 using namespace std;
69 
70 #define prolog std::string("CmrCatalog::").append(__func__).append("() - ")
71 
72 namespace cmr {
73 
85 CmrCatalog::CmrCatalog(const std::string &name /* = “CMR” */) : BESCatalog(name) {
86  bool found = false;
87  TheBESKeys::TheKeys()->get_values(CMR_COLLECTIONS, d_collections, found);
88  if(!found){
89  throw BESInternalError(string("The CMR module must define at least one collection name using the key; '")+CMR_COLLECTIONS
90  +"'", __FILE__, __LINE__);
91  }
92 
93  found = false;
94  TheBESKeys::TheKeys()->get_values(CMR_FACETS, d_facets, found);
95  if(!found){
96  throw BESInternalError(string("The CMR module must define at least one facet name using the key; '")+CMR_COLLECTIONS
97  +"'", __FILE__, __LINE__);
98  }
99 }
100 
101 CmrCatalog::~CmrCatalog()
102 {
103 }
105 CmrCatalog::get_node(const string &path) const
106 {
107  return get_node_NEW(path);
108 }
109 
110 
112 CmrCatalog::get_node_NEW(const string &ppath) const
113 {
114  string path = BESUtil::normalize_path(ppath,true, false);
115  vector<string> path_elements = BESUtil::split(path);
116  BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
117 
118  string epoch_time = BESUtil::get_time(0,false);
119 
120  CmrApi cmrApi;
121  bes::CatalogNode *node;
122 
123  if(path_elements.empty()){
124  node = new CatalogNode("/");
125  node->set_lmt(epoch_time);
126  node->set_catalog_name(CMR_CATALOG_NAME);
127  for(size_t i=0; i<d_collections.size() ; i++){
128  CatalogItem *collection = new CatalogItem();
129  collection->set_name(d_collections[i]);
130  collection->set_type(CatalogItem::node);
131  node->add_node(collection);
132  }
133  }
134  else {
135  for(size_t i=0; i< path_elements.size() ;i++){
136  if(path_elements[i]=="-")
137  path_elements[i] = "";
138  }
139 
140  string collection = path_elements[0];
141  BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_collections.size(): " << d_collections.size() << endl);
142  bool valid_collection = false;
143  for(size_t i=0; i<d_collections.size() && !valid_collection ; i++){
144  if(collection == d_collections[i])
145  valid_collection = true;
146  }
147  if(!valid_collection){
148  throw BESNotFoundError("The CMR catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
149  }
150  BESDEBUG(MODULE, prolog << "Collection " << collection << " is valid." << endl);
151  if(path_elements.size() >1){
152  string facet = path_elements[1];
153  bool valid_facet = false;
154  for(size_t i=0; i<d_facets.size() && !valid_facet ; i++){
155  if(facet == d_facets[i])
156  valid_facet = true;
157  }
158  if(!valid_facet){
159  throw BESNotFoundError("The CMR collection '"+collection+"' does not contain a facet named '"+facet+"'",__FILE__,__LINE__);
160  }
161 
162  if(facet=="temporal"){
163  BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
164  node = new CatalogNode(path);
165  node->set_lmt(epoch_time);
166  node->set_catalog_name(CMR_CATALOG_NAME);
167 
168 
169  switch( path_elements.size()){
170 
171  case 2: // The path ends at temporal facet, so we need the year nodes.
172  {
173  vector<string> years;
174 
175  BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection<< endl);
176  cmrApi.get_years(collection, years);
177  for(size_t i=0; i<years.size() ; i++){
178  CatalogItem *collection = new CatalogItem();
179  collection->set_type(CatalogItem::node);
180  collection->set_name(years[i]);
181  collection->set_is_data(false);
182  collection->set_lmt(epoch_time);
183  collection->set_size(0);
184  node->add_node(collection);
185  }
186  }
187  break;
188 
189  case 3: // The path ends at years facet, so we need the month nodes.
190  {
191  string year = path_elements[2];
192  string month("");
193  string day("");
194  vector<string> months;
195 
196  BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection << " year: " << year << endl);
197  cmrApi.get_months(collection, year, months);
198  for(size_t i=0; i<months.size() ; i++){
199  CatalogItem *collection = new CatalogItem();
200  collection->set_type(CatalogItem::node);
201  collection->set_name(months[i]);
202  collection->set_is_data(false);
203  collection->set_lmt(epoch_time);
204  collection->set_size(0);
205  node->add_node(collection);
206  }
207  }
208  break;
209 
210  case 4: // The path ends at months facet, so we need the day nodes.
211  {
212  string year = path_elements[2];
213  string month = path_elements[3];
214  string day("");
215  vector<string> days;
216 
217  BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection << " year: " << year << " month: " << month << endl);
218  cmrApi.get_days(collection, year, month, days);
219  for(size_t i=0; i<days.size() ; i++){
220  CatalogItem *collection = new CatalogItem();
221  collection->set_type(CatalogItem::node);
222  collection->set_name(days[i]);
223  collection->set_is_data(false);
224  collection->set_lmt(epoch_time);
225  collection->set_size(0);
226  node->add_node(collection);
227  }
228  }
229  break;
230 
231  case 5: // The path ends at the days facet, so we need the granule nodes.
232  {
233  string year = path_elements[2];
234  string month = path_elements[3];
235  string day = path_elements[4];
236  BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection << " year: " << year << " month: " << month << " day: " << day << endl);
237  vector<Granule *> granules;
238  cmrApi.get_granules(collection, year, month, day, granules);
239  for(size_t i=0; i<granules.size() ; i++){
240  node->add_leaf(granules[i]->getCatalogItem(get_catalog_utils()));
241  }
242  }
243  break;
244 
245  case 6: // Looks like they are trying to get a particular granule...
246  {
247  string year = path_elements[2];
248  string month = path_elements[3];
249  string day = path_elements[4];
250  string granule_id = path_elements[5];
251  BESDEBUG(MODULE, prolog << "Request resolved to leaf granule/dataset name, collection: " << collection << " year: " << year
252  << " month: " << month << " day: " << day << " granule: " << granule_id << endl);
253  Granule *granule = cmrApi.get_granule(collection,year,month,day,granule_id);
254  if(granule){
255  CatalogItem *granuleItem = new CatalogItem();
256  granuleItem->set_type(CatalogItem::leaf);
257  granuleItem->set_name(granule->getName());
258  granuleItem->set_is_data(true);
259  granuleItem->set_lmt(granule->getLastModifiedStr());
260  granuleItem->set_size(granule->getSize());
261  node->set_leaf(granuleItem);
262  }
263  else {
264  throw BESNotFoundError("No such resource: "+path,__FILE__,__LINE__);
265  }
266  }
267  break;
268 
269  default:
270  {
271  throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
272  }
273  break;
274  }
275 
276  }
277  else {
278  throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
279  }
280  }
281  else {
282  BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection << endl);
283  node = new CatalogNode(path);
284  node->set_lmt(epoch_time);
285  node->set_catalog_name(CMR_CATALOG_NAME);
286  for(size_t i=0; i<d_facets.size() ; i++){
287  CatalogItem *collection = new CatalogItem();
288  collection->set_name(d_facets[i]);
289  collection->set_type(CatalogItem::node);
290  collection->set_lmt(epoch_time);
291  BESDEBUG(MODULE, prolog << "Adding facet: " << d_facets[i] << endl);
292  node->add_node(collection);
293  }
294  }
295  }
296  return node;
297 }
298 
299 
300 // path must start with a '/'. By this class it will be interpreted as a
301 // starting at the CatalogDirectory instance's root directory. It may either
302 // end in a '/' or not.
303 //
304 // If it is not a directory - that is an error. (return null or throw?)
305 //
306 // Item names are relative
322 CmrCatalog::get_node_OLD(const string &ppath) const
323 {
324  string path = BESUtil::normalize_path(ppath,true, false);
325  vector<string> path_elements = BESUtil::split(path);
326  BESDEBUG(MODULE, prolog << "path: '" << path << "' path_elements.size(): " << path_elements.size() << endl);
327 
328  string epoch_time = BESUtil::get_time(0,false);
329 
330  CmrApi cmrApi;
331  bes::CatalogNode *node;
332 
333  if(path_elements.empty()){
334  node = new CatalogNode("/");
335  node->set_lmt(epoch_time);
336  node->set_catalog_name(CMR_CATALOG_NAME);
337  for(size_t i=0; i<d_collections.size() ; i++){
338  CatalogItem *collection = new CatalogItem();
339  collection->set_name(d_collections[i]);
340  collection->set_type(CatalogItem::node);
341  node->add_node(collection);
342  }
343  }
344  else {
345  string collection = path_elements[0];
346  BESDEBUG(MODULE, prolog << "Checking for collection: " << collection << " d_collections.size(): " << d_collections.size() << endl);
347  bool valid_collection = false;
348  for(size_t i=0; i<d_collections.size() && !valid_collection ; i++){
349  if(collection == d_collections[i])
350  valid_collection = true;
351  }
352  if(!valid_collection){
353  throw BESNotFoundError("The CMR catalog does not contain a collection named '"+collection+"'",__FILE__,__LINE__);
354  }
355  BESDEBUG(MODULE, prolog << "Collection " << collection << " is valid." << endl);
356  if(path_elements.size() >1){
357  string facet = path_elements[1];
358  bool valid_facet = false;
359  for(size_t i=0; i<d_facets.size() && !valid_facet ; i++){
360  if(facet == d_facets[i])
361  valid_facet = true;
362  }
363  if(!valid_facet){
364  throw BESNotFoundError("The CMR collection '"+collection+"' does not contain a facet named '"+facet+"'",__FILE__,__LINE__);
365  }
366 
367  if(facet=="temporal"){
368  BESDEBUG(MODULE, prolog << "Found Temporal Facet"<< endl);
369  node = new CatalogNode(path);
370  node->set_lmt(epoch_time);
371  node->set_catalog_name(CMR_CATALOG_NAME);
372 
373 
374  switch( path_elements.size()){
375  case 2: // The path ends at temporal facet, so we need the years.
376  {
377  vector<string> years;
378 
379  BESDEBUG(MODULE, prolog << "Getting year nodes for collection: " << collection<< endl);
380  cmrApi.get_years(collection, years);
381  for(size_t i=0; i<years.size() ; i++){
382  CatalogItem *collection = new CatalogItem();
383  collection->set_type(CatalogItem::node);
384  collection->set_name(years[i]);
385  collection->set_is_data(false);
386  collection->set_lmt(epoch_time);
387  collection->set_size(0);
388  node->add_node(collection);
389  }
390  }
391  break;
392  case 3:
393  {
394  string year = path_elements[2];
395  string month("");
396  string day("");
397  vector<string> months;
398 
399  BESDEBUG(MODULE, prolog << "Getting month nodes for collection: " << collection << " year: " << year << endl);
400  cmrApi.get_months(collection, year, months);
401  for(size_t i=0; i<months.size() ; i++){
402  CatalogItem *collection = new CatalogItem();
403  collection->set_type(CatalogItem::node);
404  collection->set_name(months[i]);
405  collection->set_is_data(false);
406  collection->set_lmt(epoch_time);
407  collection->set_size(0);
408  node->add_node(collection);
409  }
410  }
411  break;
412  case 4:
413  {
414  string year = path_elements[2];
415  string month = path_elements[3];
416  string day("");
417  vector<string> days;
418 
419  BESDEBUG(MODULE, prolog << "Getting day nodes for collection: " << collection << " year: " << year << " month: " << month << endl);
420  cmrApi.get_days(collection, year, month, days);
421  for(size_t i=0; i<days.size() ; i++){
422  CatalogItem *collection = new CatalogItem();
423  collection->set_type(CatalogItem::node);
424  collection->set_name(days[i]);
425  collection->set_is_data(false);
426  collection->set_lmt(epoch_time);
427  collection->set_size(0);
428  node->add_node(collection);
429  }
430  }
431  break;
432  case 5:
433  {
434  string year = path_elements[2];
435  string month = path_elements[3];
436  string day = path_elements[4];
437  BESDEBUG(MODULE, prolog << "Getting granule leaves for collection: " << collection << " year: " << year << " month: " << month << " day: " << day << endl);
438  vector<Granule *> granules;
439  cmrApi.get_granules(collection, year, month, day, granules);
440  for(size_t i=0; i<granules.size() ; i++){
441  node->add_leaf(granules[i]->getCatalogItem(get_catalog_utils()));
442  }
443  }
444  break;
445  default:
446  throw BESSyntaxUserError("CmrCatalog: The path '"+path+"' does not describe a valid temporal facet search.",__FILE__,__LINE__);
447  break;
448  }
449  }
450  else {
451  throw BESNotFoundError("The CMR catalog only supports temporal faceting.",__FILE__,__LINE__);
452  }
453  }
454  else {
455  BESDEBUG(MODULE, prolog << "Building facet list for collection: " << collection << endl);
456  node = new CatalogNode(path);
457  node->set_lmt(epoch_time);
458  node->set_catalog_name(CMR_CATALOG_NAME);
459  for(size_t i=0; i<d_facets.size() ; i++){
460  CatalogItem *collection = new CatalogItem();
461  collection->set_name(d_facets[i]);
462  collection->set_type(CatalogItem::node);
463  collection->set_lmt(epoch_time);
464  BESDEBUG(MODULE, prolog << "Adding facet: " << d_facets[i] << endl);
465  node->add_node(collection);
466  }
467  }
468  }
469  return node;
470 }
471 
472 #if 0
474 CmrCatalog::get_node(const string &path) const
475 {
476 
477  string rootdir = d_utils->get_root_dir();
478 
479  // This will throw the appropriate exception (Forbidden or Not Found).
480  // Checks to make sure the different elements of the path are not
481  // symbolic links if follow_sym_links is set to false, and checks to
482  // make sure have permission to access node and the node exists.
483  BESUtil::check_path(path, rootdir, d_utils->follow_sym_links());
484 
485  string fullpath = rootdir + path;
486 
487  DIR *dip = opendir(fullpath.c_str());
488  if (!dip)
489  throw BESInternalError(
490  "A CMRCatalog can only return nodes for directory. The path '" + path
491  + "' is not a directory for BESCatalog '" + get_catalog_name() + "'.", __FILE__, __LINE__);
492 
493  try {
494  // The node is a directory
495 
496  // Based on other code (show_catalogs()), use BESCatalogUtils::exclude() on
497  // a directory, but BESCatalogUtils::include() on a file.
498  if (d_utils->exclude(path))
499  throw BESForbiddenError(
500  string("The path '") + path + "' is not included in the catalog '" + get_catalog_name() + "'.",
501  __FILE__, __LINE__);
502 
503  CatalogNode *node = new CatalogNode(path);
504 
505  node->set_catalog_name(get_catalog_name());
506  struct stat buf;
507  int statret = stat(fullpath.c_str(), &buf);
508  if (statret == 0 /* && S_ISDIR(buf.st_mode) */)
509  node->set_lmt(get_time(buf.st_mtime));
510 
511  struct dirent *dit;
512  while ((dit = readdir(dip)) != NULL) {
513  string item = dit->d_name;
514  if (item == "." || item == "..") continue;
515 
516  string item_path = fullpath + "/" + item;
517 
518  // TODO add a test in configure for the readdir macro(s) DT_REG, DT_LNK
519  // and DT_DIR and use those, if present, to determine if the name is a
520  // link, directory or regular file. These are not present on all systems.
521  // Also, since we need mtime, this is not a huge time saver. But if we
522  // decide not to use the mtime, using these macros could save lots of system
523  // calls. jhrg 3/9/18
524 
525  // Skip this dir entry if it is a sym link and follow links is false
526  if (d_utils->follow_sym_links() == false) {
527  struct stat lbuf;
528  (void) lstat(item_path.c_str(), &lbuf);
529  if (S_ISLNK(lbuf.st_mode)) continue;
530  }
531 
532  // Is this a directory or a file? Should it be excluded or included?
533  statret = stat(item_path.c_str(), &buf);
534  if (statret == 0 && S_ISDIR(buf.st_mode) && !d_utils->exclude(item)) {
535 #if 0
536  // Add a new node; set the size to zero.
537  node->add_item(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
538 #endif
539  node->add_node(new CatalogItem(item, 0, get_time(buf.st_mtime), CatalogItem::node));
540  }
541  else if (statret == 0 && S_ISREG(buf.st_mode) && d_utils->include(item)) {
542 #if 0
543  // Add a new leaf.
544  node->add_item(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
545  d_utils->is_data(item), CatalogItem::leaf));
546 #endif
547  node->add_leaf(new CatalogItem(item, buf.st_size, get_time(buf.st_mtime),
548  d_utils->is_data(item), CatalogItem::leaf));
549  }
550  else {
551  VERBOSE("Excluded the item '" << item_path << "' from the catalog '" << get_catalog_name() << "' node listing." << endl);
552  }
553  } // end of the while loop
554 
555  closedir(dip);
556 
558 
559  sort(node->nodes_begin(), node->nodes_end(), ordering);
560  sort(node->leaves_begin(), node->leaves_end(), ordering);
561 
562  return node;
563  }
564  catch (...) {
565  closedir(dip);
566  throw;
567  }
568 }
569 #endif
570 
571 
579 void CmrCatalog::dump(ostream &strm) const
580 {
581  strm << BESIndent::LMarg << prolog << "(" << (void *) this << ")" << endl;
582  BESIndent::Indent();
583 
584  strm << BESIndent::LMarg << "catalog utilities: " << endl;
585  BESIndent::Indent();
586  get_catalog_utils()->dump(strm);
587  BESIndent::UnIndent();
588  BESIndent::UnIndent();
589 }
590 
591 } // namespace cmr
virtual void dump(std::ostream &strm) const
dump the contents of this object to the specified ostream
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:103
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
static std::vector< std::string > split(const std::string &s, char delim='/', bool skip_empty=true)
Splits the string s into the return vector of tokens using the delimiter delim and skipping empty val...
Definition: BESUtil.cc:1159
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:260
static std::string normalize_path(const std::string &path, bool leading_separator, bool trailing_separator, std::string separator="/")
Removes duplicate separators and provides leading and trailing separators as directed.
Definition: BESUtil.cc:1040
static std::string get_time(bool use_local_time=false)
Definition: BESUtil.cc:1108
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
void set_name(std::string n)
Set the name of the item.
Definition: CatalogItem.h:135
void set_size(size_t s)
Set the size of the item.
Definition: CatalogItem.h:140
void set_is_data(bool id)
Is this item data that the BES should interpret?
Definition: CatalogItem.h:150
void set_lmt(std::string lmt)
Set the LMT for this item.
Definition: CatalogItem.h:145
void set_type(item_type t)
Set the type for this item.
Definition: CatalogItem.h:155
void get_years(std::string collection_name, std::vector< std::string > &years_result)
Definition: CmrApi.cc:352
void get_days(std::string collection_name, std::string r_year, std::string r_month, std::vector< std::string > &days_result)
Definition: CmrApi.cc:445
void get_granules(std::string collection_name, std::string r_year, std::string r_month, std::string r_day, std::vector< cmr::Granule * > &granules)
Definition: CmrApi.cc:536
void get_months(std::string collection_name, std::string year, std::vector< std::string > &months_result)
Definition: CmrApi.cc:382
virtual bes::CatalogNode * get_node_OLD(const std::string &path) const
Get a CatalogNode for the given path in the current catalog.
Definition: CmrCatalog.cc:322
virtual void dump(std::ostream &strm) const
dumps information about this object
Definition: CmrCatalog.cc:579