bes  Updated for version 3.20.10
DMZ.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 
5 // Copyright (c) 2021 OPeNDAP, Inc.
6 // Author: James Gallagher <jgallagher@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 // #include "config.h"
25 
26 #include <vector>
27 #include <string>
28 #include <iostream>
29 #include <fstream>
30 
31 #include <cstring>
32 
33 #include <libdap/BaseType.h>
34 #include <libdap/Array.h>
35 #include <libdap/Type.h>
36 #include <libdap/D4Dimensions.h>
37 #include <libdap/D4Group.h>
38 #include <libdap/D4BaseTypeFactory.h>
39 #include <libdap/D4Enum.h>
40 #include <libdap/D4EnumDefs.h>
41 #include <libdap/D4Attributes.h>
42 #include <libdap/D4Maps.h>
43 #include <libdap/DMR.h>
44 #include <libdap/util.h> // is_simple_type()
45 
46 // TODO Needed? jhrg 11/23/21
47 #define PUGIXML_NO_XPATH
48 #define PUGIXML_HEADER_ONLY
49 #include <pugixml.hpp>
50 
51 #include "url_impl.h" // see bes/http
52 #include "DMRpp.h"
53 #include "DMZ.h" // this includes the pugixml header
54 #include "DmrppCommon.h"
55 #include "DmrppArray.h"
56 #include "DmrppD4Group.h"
57 #include "Base64.h"
58 #include "DmrppRequestHandler.h"
59 #include "BESInternalError.h"
60 #include "BESDebug.h"
61 
62 using namespace pugi;
63 using namespace std;
64 using namespace libdap;
65 
66 // The pugixml library does not grok namespaces. So, for a tag named 'dmrpp:chunks'
67 // if TREAT_NAMESPACES_AS_LITERALS is '1' the parser matches the whole string. If it
68 // is '0' the parser only matches the characters after the colon. In both cases the
69 // namespace (as XML intends) is not used. Using '1' is a bit more efficient.
70 // jhrg 11/2/21
71 #define TREAT_NAMESPACES_AS_LITERALS 1
72 
73 // THe code can either search for a DAP variable's information in the XML, or it can
74 // record that during the parse process. Set this when/if the code does the latter.
75 // using this simplifies the lazy-load process, particularly for the DAP2 dds and
76 // data responses (which have not yet been coded completely). jhrg 11/17/21
77 #define USE_CACHED_XML_NODE 1
78 
79 #define PARSER "dmz"
80 #define prolog std::string("DMZ::").append(__func__).append("() - ")
81 
82 namespace dmrpp {
83 
84 const std::set<std::string> variable_elements{"Byte", "Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32",
85  "UInt64", "Float32", "Float64", "String", "Structure", "Sequence",
86  "Enum", "Opaque"};
87 
89 static inline bool is_eq(const char *value, const char *key)
90 {
91 #if TREAT_NAMESPACES_AS_LITERALS
92  return strcmp(value, key) == 0;
93 #else
94  bool found = strcmp(value, key) == 0;
95  if (found) {
96  return true;
97  }
98  else {
99  const char* colon = strchr(value, ':');
100  return colon && strcmp(colon + 1, key) == 0;
101  }
102 #endif
103 }
104 
106 static inline bool has_dim_nodes(const xml_node &var_node)
107 {
108  return var_node.child("Dim"); // just one is enough
109 }
110 
112 static inline bool member_of(const set<string> &elements_set, const string &element_name)
113 {
114  return elements_set.find(element_name) != elements_set.end();
115 }
116 
118 static inline DmrppCommon *dc(BaseType *btp)
119 {
120  auto *dc = dynamic_cast<DmrppCommon*>(btp);
121  if (!dc)
122  throw BESInternalError(string("Expected a BaseType that was also a DmrppCommon instance (")
123  .append((btp) ? btp->name() : "unknown").append(")."), __FILE__, __LINE__);
124  return dc;
125 }
126 
132 DMZ::DMZ(const string &file_name)
133 {
134  parse_xml_doc(file_name);
135 }
136 
141 void
142 DMZ::parse_xml_doc(const std::string &file_name)
143 {
144  std::ifstream stream(file_name);
145 
146  // Free memory used by a previously parsed document.
147  d_xml_doc.reset();
148 
149  // parse_ws_pcdata_single will include the space when it appears in a <Value> </Value>
150  // DAP Attribute element. jhrg 11/3/21
151  pugi::xml_parse_result result = d_xml_doc.load(stream, pugi::parse_default | pugi::parse_ws_pcdata_single);
152 
153  if (!result)
154  throw BESInternalError(string("DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
155 
156  if (!d_xml_doc.document_element())
157  throw BESInternalError("No DMR++ data present.", __FILE__, __LINE__);
158 }
159 
169 void DMZ::process_dataset(DMR *dmr, const xml_node &xml_root)
170 {
171  // Process the attributes
172  int required_attrs_found = 0; // there are 1
173  string href_attr;
174  bool href_trusted = false;
175  string dmrpp_version; // empty or holds a value if dmrpp::version is present
176  for (xml_attribute attr = xml_root.first_attribute(); attr; attr = attr.next_attribute()) {
177  if (is_eq(attr.name(), "name")) {
178  ++required_attrs_found;
179  dmr->set_name(attr.value());
180  }
181  else if (is_eq(attr.name(), "dapVersion")) {
182  dmr->set_dap_version(attr.value());
183  }
184  else if (is_eq(attr.name(), "dmrVersion")) {
185  dmr->set_dmr_version(attr.value());
186  }
187  else if (is_eq(attr.name(), "base")) {
188  dmr->set_request_xml_base(attr.value());
189  BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << dmr->request_xml_base() << "'" << endl);
190  }
191  // The pugixml library does not use XML namespaces AFAIK. jhrg 11/2/21
192  else if (is_eq(attr.name(), "xmlns")) {
193  dmr->set_namespace(attr.value());
194  }
195  // This code does not use namespaces. By default, we assume the DMR++ elements
196  // all use the namespace prefix 'dmrpp'. jhrg 11/2/21
197  else if (is_eq(attr.name(), "dmrpp:href")) {
198  href_attr = attr.value();
199  }
200  else if (is_eq(attr.name(), "dmrpp:trust")) {
201  href_trusted = is_eq(attr.value(), "true");
202  }
203  else if (is_eq(attr.name(), "dmrpp:version")) {
204  dmrpp_version = attr.value();
205  }
206  // We allow other, non recognized attributes, so there is no 'else' jhrg 10/20/21
207  }
208 
209  if (dmrpp_version.empty()) { // old style DMR++, set enable-kludge flag
210  DmrppRequestHandler::d_emulate_original_filter_order_behavior = true;
211  }
212  else {
213  auto dmrpp = dynamic_cast<DMRpp*>(dmr);
214  if (dmrpp) {
215  dmrpp->set_version(dmrpp_version);
216  }
217  }
218 
219  if (required_attrs_found != 1)
220  throw BESInternalError("DMR++ XML dataset element missing one or more required attributes.", __FILE__, __LINE__);
221 
222  d_dataset_elem_href.reset(new http::url(href_attr, href_trusted));
223 }
224 
230 void DMZ::process_dimension(D4Group *grp, const xml_node &dimension_node)
231 {
232  string name_value;
233  string size_value;
234  for (xml_attribute attr = dimension_node.first_attribute(); attr; attr = attr.next_attribute()) {
235  if (is_eq(attr.name(), "name")) {
236  name_value = attr.value();
237  }
238  else if (is_eq(attr.name(), "size")) {
239  size_value = attr.value();
240  }
241  }
242 
243  if (name_value.empty() || size_value.empty())
244  throw BESInternalError("The required attribute 'name' or 'size' was missing from a Dimension element.", __FILE__, __LINE__);
245 
246  // This getter (dim_def) allocates a new object if needed.
247  try {
248  auto *dimension = new D4Dimension();
249  dimension->set_name(name_value);
250  dimension->set_size(size_value);
251  grp->dims()->add_dim_nocopy(dimension);
252  }
253  catch (Error &e) {
254  throw BESInternalError(e.get_error_message(), __FILE__, __LINE__);
255  }
256 }
257 
265 void DMZ::process_dim(DMR *dmr, D4Group *grp, Array *array, const xml_node &dim_node)
266 {
267  assert(array->is_vector_type());
268 
269  string name_value;
270  string size_value;
271  for (xml_attribute attr = dim_node.first_attribute(); attr; attr = attr.next_attribute()) {
272  if (is_eq(attr.name(), "name")) {
273  name_value = attr.value();
274  }
275  else if (is_eq(attr.name(), "size")) {
276  size_value = attr.value();
277  }
278  }
279 
280  if (name_value.empty() && size_value.empty())
281  throw BESInternalError("Either 'size' or 'name' must be used in a Dim element.", __FILE__, __LINE__);
282  if (!name_value.empty() && !size_value.empty())
283  throw BESInternalError("Only one of 'size' and 'name' are allowed in a Dim element, but both were used.", __FILE__, __LINE__);
284 
285  if (!size_value.empty()) {
286  BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << stoi(size_value) << endl);
287  array->append_dim(stoi(size_value));
288  }
289  else if (!name_value.empty()) {
290  BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name_value << endl);
291 
292  D4Dimension *dim;
293  if (name_value[0] == '/') // lookup the Dimension in the root group
294  dim = dmr->root()->find_dim(name_value);
295  else
296  // get enclosing Group and lookup Dimension there
297  dim = grp->find_dim(name_value);
298 
299  if (!dim)
300  throw BESInternalError("The dimension '" + name_value + "' was not found while parsing the variable '" + array->name() + "'.",__FILE__,__LINE__);
301 
302  array->append_dim(dim);
303  }
304 }
305 
306 void DMZ::process_map(DMR *dmr, D4Group *grp, Array *array, const xml_node &map_node)
307 {
308  assert(array->is_vector_type());
309 
310  string name_value;
311  string size_value;
312  for (xml_attribute attr = map_node.first_attribute(); attr; attr = attr.next_attribute()) {
313  if (is_eq(attr.name(), "name")) {
314  name_value = attr.value();
315  }
316  }
317 
318  // All map names are FQNs. If we get one that isn't, assume it's within the most current group.
319  if (name_value[0] != '/')
320  name_value = grp->FQN() + name_value;
321 
322  // The array variable that holds the data for the Map
323  Array *map_source = dmr->root()->find_map_source(name_value);
324 
325  // In the SAX2 parser, we had 'strict' and 'permissive' modes. For Maps, permissive
326  // allowed the DAP variable for a Map to be missing so that users could request just
327  // the data with the maps. I'm implementing that behavior. Below is the original
328  // comment from DmrppParserSAX2.cc. jhrg 11/3/21
329 
330  // Change: If the parser is in 'strict' mode (the default) and the Array named by
331  // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
332  // mode), then this is not an error. However, the Array referenced by the Map will
333  // be null. This is a change in the parser's behavior to accommodate requests for
334  // Arrays that include Maps that do not also include the Map(s) in the request.
335  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
336 
337  array->maps()->add_map(new D4Map(name_value, map_source));
338 }
339 
354 void DMZ::process_variable(DMR *dmr, D4Group *group, Constructor *parent, const xml_node &var_node)
355 {
356  assert(group);
357 
358  // Variables are declared using nodes with type names (e.g., <Float32...>)
359  // Variables are arrays if they have one or more <Dim...> child nodes.
360  Type t = get_type(var_node.name());
361 
362  assert(t != dods_group_c); // Groups are special and handled elsewhere
363 
364  bool is_array_type = has_dim_nodes(var_node);
365  BaseType *btp;
366  if (is_array_type) {
367  btp = add_array_variable(dmr, group, parent, t, var_node);
368  if (t == dods_structure_c || t == dods_sequence_c) {
369  assert(btp->type() == dods_array_c && btp->var()->type() == t);
370  // NB: For an array of a Constructor, add children to the Constructor, not the array
371  parent = dynamic_cast<Constructor*>(btp->var());
372  assert(parent);
373  for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
374  if (member_of(variable_elements, child.name()))
375  process_variable(dmr, group, parent, child);
376  }
377  }
378  }
379  else {
380  btp = add_scalar_variable(dmr, group, parent, t, var_node);
381  if (t == dods_structure_c || t == dods_sequence_c) {
382  assert(btp->type() == t);
383  parent = dynamic_cast<Constructor*>(btp);
384  assert(parent);
385  for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
386  if (member_of(variable_elements, child.name()))
387  process_variable(dmr, group, parent, child);
388  }
389  }
390  }
391 
392  dc(btp)->set_xml_node(var_node);
393 }
394 
402 BaseType *DMZ::build_variable(DMR *dmr, D4Group *group, Type t, const xml_node &var_node)
403 {
404  assert(dmr->factory());
405 
406  string name_value;
407  string enum_value;
408  for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
409  if (is_eq(attr.name(), "name")) {
410  name_value = attr.value();
411  }
412  if (is_eq(attr.name(), "enum")) {
413  enum_value = attr.value();
414  }
415  }
416 
417  if (name_value.empty())
418  throw BESInternalError("The variable 'name' attribute was missing.", __FILE__, __LINE__);
419 
420  BaseType *btp = dmr->factory()->NewVariable(t, name_value);
421  if (!btp)
422  throw BESInternalError("Could not instantiate the variable ' "+ name_value +"'.", __FILE__, __LINE__);
423 
424  btp->set_is_dap4(true);
425 
426  if (t == dods_enum_c) {
427  if (enum_value.empty())
428  throw BESInternalError("The variable ' " + name_value + "' lacks an 'enum' attribute.", __FILE__, __LINE__);
429 
430  D4EnumDef *enum_def;
431  if (enum_value[0] == '/')
432  enum_def = dmr->root()->find_enum_def(enum_value);
433  else
434  enum_def = group->find_enum_def(enum_value);
435 
436  if (!enum_def)
437  throw BESInternalError("Could not find the Enumeration definition '" + enum_value + "'.", __FILE__, __LINE__);
438 
439  dynamic_cast<D4Enum&>(*btp).set_enumeration(enum_def);
440  }
441 
442  return btp;
443 }
444 
455 BaseType *DMZ::add_scalar_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
456 {
457  assert(group);
458 
459  BaseType *btp = build_variable(dmr, group, t, var_node);
460 
461  // if parent is non-null, the code should add the new var to a constructor,
462  // else add the new var to the group.
463  if (parent)
464  parent->add_var_nocopy(btp);
465  else
466  group->add_var_nocopy(btp);
467 
468  return btp;
469 }
470 
485 BaseType *DMZ::add_array_variable(DMR *dmr, D4Group *group, Constructor *parent, Type t, const xml_node &var_node)
486 {
487  assert(group);
488 
489  BaseType *btp = build_variable(dmr, group, t, var_node);
490 
491  // Transform the scalar to an array
492  auto *array = static_cast<Array*>(dmr->factory()->NewVariable(dods_array_c, btp->name()));
493  array->set_is_dap4(true);
494  array->add_var_nocopy(btp);
495 
496  // The SAX parser set up the parse of attributes here. For the thin DMR, we won't
497  // parse those from the DMR now. jhrg 10/21/21
498 
499  // Now grab the dimension elements
500  for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
501  if (is_eq(child.name(), "Dim")) {
502  process_dim(dmr, group, array, child);
503  }
504  else if (is_eq(child.name(), "Map")) {
505  process_map(dmr, group, array, child);
506  }
507  }
508 
509  if (parent)
510  parent->add_var_nocopy(array);
511  else
512  group->add_var_nocopy(array);
513 
514  return array;
515 }
516 
525 void DMZ::process_group(DMR *dmr, D4Group *parent, const xml_node &var_node)
526 {
527  string name_value;
528  for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
529  if (is_eq(attr.name(), "name")) {
530  name_value = attr.value();
531  }
532  }
533 
534  if (name_value.empty())
535  throw BESInternalError("The required attribute 'name' was missing from a Group element.", __FILE__, __LINE__);
536 
537  BaseType *btp = dmr->factory()->NewVariable(dods_group_c, name_value);
538  if (!btp)
539  throw BESInternalError("Could not instantiate the Group '" + name_value + "'.", __FILE__, __LINE__);
540 
541  auto new_group = dynamic_cast<DmrppD4Group*>(btp);
542 
543  // Need to set this to get the D4Attribute behavior in the type classes
544  // shared between DAP2 and DAP4. jhrg 4/18/13
545  new_group->set_is_dap4(true);
546 
547  // link it up and change the current group
548  new_group->set_parent(parent);
549  parent->add_group_nocopy(new_group);
550 
551  // Save the xml_node so that we can later find unprocessed XML without searching
552  new_group->set_xml_node(var_node);
553 
554  // Now parse all the child nodes of the Group.
555  // NB: this is the same block of code as in build_thin_dmr(); refactor. jhrg 10/21/21
556  for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
557  if (is_eq(child.name(), "Dimension")) {
558  process_dimension(new_group, child);
559  }
560  else if (is_eq(child.name(), "Group")) {
561  process_group(dmr, new_group, child);
562  }
563  else if (member_of(variable_elements, child.name())) {
564  process_variable(dmr, new_group, nullptr, child);
565  }
566  }
567 }
568 
574 void DMZ::build_thin_dmr(DMR *dmr)
575 {
576  auto xml_root_node = d_xml_doc.first_child();
577 
578  process_dataset(dmr, xml_root_node);
579 
580  auto root_group = dmr->root();
581 
582  auto *dg = dynamic_cast<DmrppD4Group*>(root_group);
583  if (!dg)
584  throw BESInternalError("Expected the root group to also be an instance of DmrppD4Group.", __FILE__, __LINE__);
585 
586  dg->set_xml_node(xml_root_node);
587 
588  for (auto child = xml_root_node.first_child(); child; child = child.next_sibling()) {
589  if (is_eq(child.name(), "Dimension")) {
590  process_dimension(dg, child);
591  }
592  else if (is_eq(child.name(), "Group")) {
593  process_group(dmr, dg, child);
594  }
595  // TODO Add EnumDef
596  else if (member_of(variable_elements, child.name())) {
597  process_variable(dmr, dg, nullptr, child);
598  }
599  }
600 }
601 
610 void DMZ::process_attribute(D4Attributes *attributes, const xml_node &dap_attr_node)
611 {
612  string name_value;
613  string type_value;
614  for (xml_attribute attr = dap_attr_node.first_attribute(); attr; attr = attr.next_attribute()) {
615  if (is_eq(attr.name(), "name")) {
616  name_value = attr.value();
617  }
618  if (is_eq(attr.name(), "type")) {
619  type_value = attr.value();
620  }
621  }
622 
623  if (name_value.empty() || type_value.empty())
624  throw BESInternalError("The required attribute 'name' or 'type' was missing from an Attribute element.", __FILE__, __LINE__);
625 
626  if (type_value == "Container") {
627  // Make the new attribute container and add it to current container
628  auto *dap_attr_cont = new D4Attribute(name_value, attr_container_c);
629  attributes->add_attribute_nocopy(dap_attr_cont);
630  // In this call, 'attributes()' will allocate the D4Attributes object
631  // that will hold the container's attributes.
632  // Test to see if there really are child "Attribute" nodes - empty containers
633  // are allowed. jhrg 11/4/21
634  if (dap_attr_node.first_child()) {
635  for (auto attr_node: dap_attr_node.children("Attribute")) {
636  process_attribute(dap_attr_cont->attributes(), attr_node);
637  }
638  }
639  }
640  else if (type_value == "OtherXML") {
641  // TODO Add support for OtherXML
642  }
643  else {
644  // Make the D4Attribute and add it to the D4Attributes attribute container
645  auto *attribute = new D4Attribute(name_value, StringToD4AttributeType(type_value));
646  attributes->add_attribute_nocopy(attribute);
647  // Process one or more Value elements
648  for (auto value_elem = dap_attr_node.first_child(); value_elem; value_elem = value_elem.next_sibling()) {
649  if (is_eq(value_elem.name(), "Value")) {
650  attribute->add_value(value_elem.child_value()); // returns the text of the first data node
651  }
652  }
653  }
654 }
655 
673 void DMZ::build_basetype_chain(BaseType *btp, stack<BaseType*> &bt)
674 {
675  auto parent = btp->get_parent();
676  bt.push(btp);
677 
678  // The parent must be non-null and not the root group (the root group has no parent).
679  if (parent && !(parent->type() == dods_group_c && parent->get_parent() == nullptr))
680  build_basetype_chain(parent, bt);
681 }
682 
683 xml_node DMZ::get_variable_xml_node_helper(const xml_node &/*parent_node*/, stack<BaseType*> &/*bt*/)
684 {
685 #if !USE_CACHED_XML_NODE
686  // When we have an array of Structure or Sequence, both the Array and the
687  // Structure BaseType are pushed on the stack. This happens because, for
688  // constructors, other variables reference them as a parent node (while that's
689  // not the case for the cardinal types held by an array). Here we pop the
690  // Array off the stack. A better solution might be to better control what gets
691  // pushed by build_basetype_chain(). jhrg 10/24/21
692  if (bt.top()->type() == dods_array_c && bt.top()->var()->is_constructor_type())
693  bt.pop();
694 
695  // The DMR XML stores both scalar and array variables using XML elements
696  // named for the cardinal type. For an array, that is the type of the
697  // element, so we use BaseType->var()->type_name() for an Array.
698  string type_name = bt.top()->type() == dods_array_c ? bt.top()->var()->type_name(): bt.top()->type_name();
699  string var_name = bt.top()->name();
700  bt.pop();
701 
702  // Now look for the node with the correct element type and matching name
703  for (auto node = parent_node.child(type_name.c_str()); node; node = node.next_sibling()) {
704  for (xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
705  if (is_eq(attr.name(), "name") && is_eq(attr.value(), var_name.c_str())) {
706  // if this is the last BaseType on the stack, return the node
707  if (bt.empty())
708  return node;
709  else
710  return get_variable_xml_node_helper(node, bt);
711  }
712  }
713  }
714 
715  return xml_node(); // return an empty node
716 #else
717  return xml_node(); // return an empty node
718 #endif
719 }
720 
727 xml_node DMZ::get_variable_xml_node(BaseType *btp) const
728 {
729 #if USE_CACHED_XML_NODE
730  auto node = dc(btp)->get_xml_node();
731  if (node == nullptr)
732  throw BESInternalError(string("The xml_node for '").append(btp->name()).append("' was not recorded."), __FILE__, __LINE__);
733 
734  return node;
735 #else
736  // load the BaseType objects onto a stack, since we start at the leaf and
737  // go backward using its 'parent' pointer, the order of BaseTypes on the
738  // stack will match the order in the hierarchy of the DOM tree.
739  stack<BaseType*> bt;
740  build_basetype_chain(btp, bt);
741 
742  xml_node dataset = d_xml_doc.first_child();
743  if (!dataset || !is_eq(dataset.name(), "Dataset"))
744  throw BESInternalError("No DMR++ has been parsed.", __FILE__, __LINE__);
745 
746  auto node = get_variable_xml_node_helper(dataset, bt);
747  return node;
748 #endif
749 }
750 
756 
768 void
769 DMZ::load_attributes(BaseType *btp)
770 {
771  if (dc(btp)->get_attributes_loaded())
772  return;
773 
774  load_attributes(btp, get_variable_xml_node(btp));
775 
776  // TODO Remove redundant
777  dc(btp)->set_attributes_loaded(true);
778 
779  switch (btp->type()) {
780  // When we load attributes for an Array, the set_send_p() method
781  // is called for its 'template' variable, but that call fails (and
782  // the attributes are already loaded). This block marks the attributes
783  // as loaded so the 'var_node == nullptr' exception above does not
784  // get thrown. Maybe a better fix would be to mark 'child variables'
785  // as having their attributes loaded. jhrg 11/16/21
786  case dods_array_c: {
787  dc(btp->var())->set_attributes_loaded(true);
788  break;
789  }
790 
791  // FIXME There are no tests for this code. The above bock for Array
792  // was needed, so it seems likely that this will be too, but ...
793  // jhrg 11/16/21
794  case dods_structure_c:
795  case dods_sequence_c:
796  case dods_grid_c: {
797  auto *c = dynamic_cast<Constructor*>(btp);
798  if (c) {
799  for (auto i = c->var_begin(), e = c->var_end(); i != e; i++) {
800  dc(btp->var())->set_attributes_loaded(true);
801  }
802  break;
803  }
804  }
805 
806  default:
807  break;
808  }
809 }
810 
816 void
817 DMZ::load_attributes(BaseType *btp, xml_node var_node) const
818 {
819  if (dc(btp)->get_attributes_loaded())
820  return;
821 
822  // Attributes for this node will be held in the var_node siblings.
823  // NB: Make an explict call to the BaseType implementation in case
824  // the attributes() method is specialized for this DMR++ code to
825  // trigger a lazy-load of the variables' attributes. jhrg 10/24/21
826  // Could also use BaseType::set_attributes(). jhrg
827  auto attributes = btp->BaseType::attributes();
828  for (auto child = var_node.first_child(); child; child = child.next_sibling()) {
829  if (is_eq(child.name(), "Attribute")) {
830  process_attribute(attributes, child);
831  }
832  }
833 
834  dc(btp)->set_attributes_loaded(true);
835 }
836 
841 void
842 DMZ::load_attributes(Constructor *constructor)
843 {
844  load_attributes(constructor, get_variable_xml_node(constructor));
845  for (auto i = constructor->var_begin(), e = constructor->var_end(); i != e; ++i) {
846  // Groups are not allowed inside a Constructor
847  assert((*i)->type() != dods_group_c);
848  load_attributes(*i);
849  }
850 }
851 
852 void
853 DMZ::load_attributes(D4Group *group) {
854  // The root group is special; look for its DAP Attributes in the Dataset element
855  if (group->get_parent() == nullptr) {
856  xml_node dataset = d_xml_doc.child("Dataset");
857  if (!dataset)
858  throw BESInternalError("Could not find the 'Dataset' element in the DMR++ XML document.", __FILE__, __LINE__);
859  load_attributes(group, dataset);
860  }
861  else {
862  load_attributes(group, get_variable_xml_node(group));
863  }
864 
865  for (auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
866  // Even though is_constructor_type() returns true for instances of D4Group,
867  // Groups are kept under a separate container from variables because they
868  // have a different function than the Structure and Sequence types (Groups
869  // never hold data).
870  assert((*i)->type() != dods_group_c);
871  load_attributes(*i);
872  }
873 
874  for (auto i = group->grp_begin(), e = group->grp_end(); i != e; ++i) {
875  load_attributes(*i);
876  }
877 }
878 
879 void DMZ::load_all_attributes(libdap::DMR *dmr)
880 {
881  assert(d_xml_doc != nullptr);
882  load_attributes(dmr->root());
883 }
884 
886 
891 
901 void
902 DMZ::process_compact(BaseType *btp, const xml_node &compact)
903 {
904  dc(btp)->set_compact(true);
905 
906  auto char_data = compact.child_value();
907  if (!char_data)
908  throw BESInternalError("The dmrpp::compact is missing data values.",__FILE__,__LINE__);
909 
910  std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
911 
912  if (btp->type() != dods_array_c)
913  throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
914 
915  // We know from the above that this is an Array, so accessing btp->var() is OK.
916  switch (btp->var()->type()) {
917  case dods_array_c:
918  throw BESInternalError("DMR++ document fail: An Array may not be the template for an Array.", __FILE__, __LINE__);
919 
920  case dods_byte_c:
921  case dods_char_c:
922  case dods_int8_c:
923  case dods_uint8_c:
924  case dods_int16_c:
925  case dods_uint16_c:
926  case dods_int32_c:
927  case dods_uint32_c:
928  case dods_int64_c:
929  case dods_uint64_c:
930 
931  case dods_enum_c:
932 
933  case dods_float32_c:
934  case dods_float64_c:
935  btp->val2buf(reinterpret_cast<void *>(&decoded[0]));
936  btp->set_read_p(true);
937  break;
938 
939  case dods_str_c:
940  case dods_url_c: {
941  std::string str(decoded.begin(), decoded.end());
942  auto *st = static_cast<DmrppArray *>(btp);
943  // Although val2buf() takes a void*, for DAP Str and Url types, it casts
944  // that to std::string*. jhrg 11/4/21
945  st->val2buf(&str);
946  st->set_read_p(true);
947  break;
948  }
949 
950  default:
951  throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
952  }
953 }
954 
962 void DMZ::process_chunk(DmrppCommon *dc, const xml_node &chunk) const
963 {
964  string href;
965  string trust;
966  string offset;
967  string size;
968  string chunk_position_in_array;
969 
970  bool href_trusted = false;
971 
972  for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
973  if (is_eq(attr.name(), "href")) {
974  href = attr.value();
975  }
976  else if (is_eq(attr.name(), "trust")) {
977  href_trusted = is_eq(attr.value(), "true");
978  }
979  else if (is_eq(attr.name(), "offset")) {
980  offset = attr.value();
981  }
982  else if (is_eq(attr.name(), "nBytes")) {
983  size = attr.value();
984  }
985  else if (is_eq(attr.name(), "chunkPositionInArray")) {
986  chunk_position_in_array = attr.value();
987  }
988  }
989 
990  if (offset.empty() || size.empty())
991  throw BESInternalError("Both size and offset are required for a chunk node.", __FILE__, __LINE__);
992 
993  if (!href.empty()) {
994  // TODO For many cases, there are many chunks that share a URL. We could store
995  // a hash_map of known URLs and cut down on the total number of shared pointers.
996  // jhrg 11/22/21
997  shared_ptr<http::url> data_url(new http::url(href, href_trusted));
998  dc->add_chunk(data_url, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
999  }
1000  else {
1001  dc->add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
1002  }
1003 }
1004 
1011 void DMZ::process_cds_node(DmrppCommon *dc, const xml_node &chunks)
1012 {
1013  for (auto child = chunks.child("dmrpp:chunkDimensionSizes"); child /*&& !cds_found*/; child = child.next_sibling()) {
1014  if (is_eq(child.name(), "dmrpp:chunkDimensionSizes")) {
1015  string sizes = child.child_value();
1016  dc->parse_chunk_dimension_sizes(sizes);
1017  }
1018  }
1019 }
1020 
1021 // a 'dmrpp:chunks' node has a chunkDimensionSizes node and then one or more chunks
1022 // nodes, and they have to be in that order.
1023 void DMZ::process_chunks(DmrppCommon *dc, const xml_node &chunks)
1024 {
1025  for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
1026  if (is_eq(attr.name(), "compressionType")) {
1027  dc->set_filter(attr.value());
1028  }
1029  }
1030 
1031  // Look for the chunksDimensionSizes element - it will not be present for contiguous data
1032  process_cds_node(dc, chunks);
1033 
1034  // Chunks for this node will be held in the var_node siblings.
1035  for (auto chunk = chunks.child("dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
1036  if (is_eq(chunk.name(), "dmrpp:chunk")) {
1037  process_chunk(dc, chunk);
1038  }
1039  }
1040 }
1041 
1050 void DMZ::load_chunks(BaseType *btp)
1051 {
1052  if (dc(btp)->get_chunks_loaded())
1053  return;
1054 
1055  // goto the DOM tree node for this variable
1056  xml_node var_node = get_variable_xml_node(btp);
1057  if (var_node == nullptr)
1058  throw BESInternalError("Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
1059 
1060  // Chunks for this node will be held in the var_node siblings. For a given BaseType, there should
1061  // be only one chunks node xor one chunk node.
1062  int chunks_found = 0;
1063  int chunk_found = 0;
1064  int compact_found = 0;
1065  auto child = var_node.child("dmrpp:chunks");
1066  if (child) {
1067  chunks_found = 1;
1068  process_chunks(dc(btp), child);
1069  }
1070 
1071  auto chunk = var_node.child("dmrpp:chunk");
1072  if (chunk) {
1073  chunk_found = 1;
1074  process_chunk(dc(btp), chunk);
1075 
1076  }
1077 
1078  auto compact = var_node.child("dmrpp:compact");
1079  if (compact) {
1080  compact_found = 1;
1081  process_compact(btp, compact);
1082  }
1083 
1084  // Here we (optionally) check that exactly one of the three types of node was found
1085  if (DmrppRequestHandler::d_require_chunks) {
1086  int elements_found = chunks_found + chunk_found + compact_found;
1087  if (elements_found != 1) {
1088  ostringstream oss;
1089  oss << "Expected chunk, chunks or compact information in the DMR++ data. Found " << elements_found
1090  << " types of nodes.";
1091  throw BESInternalError(oss.str(), __FILE__, __LINE__);
1092  }
1093  }
1094 
1095  dc(btp)->set_chunks_loaded(true);
1096 }
1097 
1099 
1100 } // namespace dmrpp
exception thrown if internal error encountered
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
Definition: DmrppCommon.cc:134
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void set_filter(const std::string &value)
Set the value of the filters property.
Definition: DmrppCommon.cc:108
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:147
Type
Type of JSON value.
Definition: rapidjson.h:664