bes  Updated for version 3.20.10
DmrppParserSax2.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2012 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 #include <iostream>
28 #include <sstream>
29 
30 #include <cstring>
31 #include <cstdarg>
32 #include <cassert>
33 
34 #include <libxml/parserInternals.h>
35 
36 #include <libdap/DMR.h>
37 
38 #include <libdap/BaseType.h>
39 #include <libdap/Array.h>
40 #include <libdap/D4Group.h>
41 #include <libdap/D4Attributes.h>
42 #include <libdap/D4Maps.h>
43 #include <libdap/D4Enum.h>
44 #include <libdap/D4BaseTypeFactory.h>
45 
46 #include <libdap/DapXmlNamespaces.h>
47 #include <libdap/util.h>
48 
49 #include <BESInternalError.h>
50 #include <BESDebug.h>
51 #include <BESCatalog.h>
52 #include <BESCatalogUtils.h>
53 #include <BESCatalogList.h>
54 #include <BESUtil.h>
55 #include <TheBESKeys.h>
56 #include <BESRegex.h>
57 
58 #include "DmrppRequestHandler.h"
59 #include "DMRpp.h"
60 #include "DmrppParserSax2.h"
61 #include "DmrppCommon.h"
62 #include "DmrppStr.h"
63 #include "DmrppNames.h"
64 #include "DmrppArray.h"
65 
66 #include "CurlUtils.h"
67 #include "HttpNames.h"
68 
69 #include "Base64.h"
70 
71 #define FIVE_12K 524288
72 #define ONE_MB 1048576
73 #define MAX_INPUT_LINE_LENGTH ONE_MB
74 #define INCLUDE_BESDEBUG_ISSET 0
75 
76 #define prolog std::string("DmrppParserSax2::").append(__func__).append("() - ")
77 
78 static const string dmrpp_namespace = "http://xml.opendap.org/dap/dmrpp/1.0.0#";
79 
80 
81 using namespace libdap;
82 using namespace std;
84 
85 namespace dmrpp {
86 
87 static const char *states[] = {
88  "parser_start",
89  "inside_dataset",
90  // inside_group is the state just after parsing the start of a Group
91  // element.
92  "inside_group",
93  "inside_attribute_container",
94  "inside_attribute",
95  "inside_attribute_value",
96  "inside_other_xml_attribute",
97  "inside_enum_def",
98  "inside_enum_const",
99  "inside_dim_def",
100  // This covers Byte, ..., Url, Opaque
101  "inside_simple_type",
102  // "inside_array",
103  "inside_dim",
104  "inside_map",
105  "inside_constructor",
106  "not_dap4_element",
107  "inside_dmrpp_object",
108  "inside_dmrpp_chunkDimensionSizes_element",
109  "inside_dmrpp_compact_element",
110  "parser_unknown",
111  "parser_error",
112  "parser_fatal_error",
113  "parser_end"
114  };
115 
116 static bool is_not(const char *name, const char *tag)
117 {
118  return strcmp(name, tag) != 0;
119 }
120 
121 
130 D4EnumDef *
131 DmrppParserSax2::enum_def()
132 {
133  if (!d_enum_def) d_enum_def = new D4EnumDef;
134 
135  return d_enum_def;
136 }
137 
144 D4Dimension *
145 DmrppParserSax2::dim_def()
146 {
147  if (!d_dim_def) d_dim_def = new D4Dimension;
148 
149  return d_dim_def;
150 }
151 
152 /* Search through the attribute array for a given attribute name.
153  * If the name is found, return the string value for that attribute
154  * @param name: Search for this name
155  * @param attributes: Array that holds the attribute values to search
156  * @param num_attributes: Number of attributes
157  * @return string value of attribute; the empty string if the name was not found
158  */
159 string DmrppParserSax2::get_attribute_val(const string &name, const xmlChar **attributes, int num_attributes)
160 {
161  unsigned int index = 0;
162  for (int i = 0; i < num_attributes; ++i, index += 5) {
163  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
164  return string((const char *)attributes[index+3], (const char *)attributes[index+4]);
165  }
166  }
167  return "";
168 }
169 
170 #if 0
176 void DmrppParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
177 {
178  if (!xml_attrs.empty()) xml_attrs.clear(); // erase old attributes
179 
180  // Make a value using the attribute name and the prefix, namespace URI
181  // and the value. The prefix might be null.
182  unsigned int index = 0;
183  for (int i = 0; i < nb_attributes; ++i, index += 5) {
184  xml_attrs.insert(
185  map<string, XMLAttribute>::value_type(string((const char *) attributes[index]),
186  XMLAttribute(attributes + index + 1)));
187 
188  BESDEBUG(PARSER, prolog <<
189  "XML Attribute '" << (const char *)attributes[index] << "': " << xml_attrs[(const char *)attributes[index]].value << endl);
190  }
191 }
192 #endif
193 
200 void DmrppParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
201 {
202  // make a value with the prefix and namespace URI. The prefix might be null.
203  for (int i = 0; i < nb_namespaces; ++i) {
204  namespace_table.insert(
205  map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *) namespaces[i * 2] : "",
206  (const char *) namespaces[i * 2 + 1]));
207  }
208 }
209 
210 #if 0
217 bool DmrppParserSax2::check_required_attribute(const string & attr)
218 {
219  if (xml_attrs.find(attr) == xml_attrs.end()) {
220  dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
221  return false;
222  }
223  else
224  return true;
225 }
226 #endif
227 
228 /*
229  * An improved version of the previous check_required_attribute.
230  * Searches for an attribute name within the attribute array.
231  * @param name: The attribute name to search for
232  * @param attributes: The attribute array
233  * @param num_attributes: The number of attributes
234  * @return success: true
235  * failure: false
236  */
237 bool DmrppParserSax2::check_required_attribute(const string &name, const xmlChar **attributes, int num_attributes)
238 {
239  unsigned int index = 0;
240  for (int i = 0; i < num_attributes; ++i, index += 5) {
241  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
242  return true;
243  }
244  }
245 
246  dmr_error(this, "Required attribute '%s' not found.", name.c_str());
247  return false;
248 }
249 
250 #if 0
257 bool DmrppParserSax2::check_attribute(const string & attr)
258 {
259  return (xml_attrs.find(attr) != xml_attrs.end());
260 }
261 #endif
262 
273 bool DmrppParserSax2::check_attribute(const string &name, const xmlChar **attributes, int num_attributes)
274 {
275  unsigned int index = 0;
276  for (int i = 0; i < num_attributes; ++i, index += 5) {
277  if (strncmp(name.c_str(), (const char *)attributes[index], name.length()) == 0) {
278  return true;
279  }
280  }
281  return false;
282 }
283 
284 bool DmrppParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
285 {
286  if (is_not(name, "Dimension")) return false;
287 
288 #if 0
289  transfer_xml_attrs(attrs, nb_attributes);
290 #endif
291 
292 #if 0
293  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("size", attrs, nb_attributes))) {
294  dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
295  return false;
296  }
297 #endif
298 
299  if (!check_required_attribute("name", attrs, nb_attributes)) {
300  dmr_error(this, "The required attribute 'name' was missing from a Dimension element.");
301  return false;
302  }
303 
304  if (!check_required_attribute("size", attrs, nb_attributes)) {
305  dmr_error(this, "The required attribute 'size' was missing from a Dimension element.");
306  return false;
307  }
308 
309  // This getter (dim_def) allocates a new object if needed.
310  dim_def()->set_name(get_attribute_val("name", attrs, nb_attributes));
311  try {
312  dim_def()->set_size(get_attribute_val("size", attrs, nb_attributes));
313  }
314  catch (Error &e) {
315  dmr_error(this, e.get_error_message().c_str());
316  return false;
317  }
318 
319  return true;
320 }
321 
339 bool DmrppParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
340 {
341  if (is_not(name, "Dim")) return false;
342 
343 #if 0
344  transfer_xml_attrs(attrs, nb_attributes);
345 #endif
346 #if 0
347  if (check_attribute("size", attrs, nb_attributes) && check_attribute("name", attrs, nb_attributes)) {
348  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
349  return false;
350  }
351  if (!(check_attribute("size", attrs, nb_attributes) || check_attribute("name", attrs, nb_attributes))) {
352  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
353  return false;
354  }
355 #endif
356  bool has_size = check_attribute("size", attrs, nb_attributes);
357  bool has_name = check_attribute("name", attrs, nb_attributes);
358  if (has_size && has_name) {
359  dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
360  return false;
361  }
362  if (!has_size && !has_name) {
363  dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
364  return false;
365  }
366 
367 
368  if (!top_basetype()->is_vector_type()) {
369  // Make the top BaseType* an array
370  BaseType *b = top_basetype();
371  pop_basetype();
372 
373  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
374  a->set_is_dap4(true);
375  a->add_var_nocopy(b);
376  a->set_attributes_nocopy(b->attributes());
377  // trick: instead of popping b's attributes, copying them and then pushing
378  // a's copy, just move the pointer (but make sure there's only one object that
379  // references that pointer).
380  b->set_attributes_nocopy(0);
381 
382  push_basetype(a);
383  }
384 
385  assert(top_basetype()->is_vector_type());
386 
387  Array *a = static_cast<Array*>(top_basetype());
388  if (has_size) {
389  size_t dim_size = stoi(get_attribute_val("size", attrs, nb_attributes));
390  BESDEBUG(PARSER, prolog << "Processing nameless Dim of size: " << dim_size << endl);
391  a->append_dim(dim_size); // low budget code for now. jhrg 8/20/13, modified to use new function. kln 9/7/19
392  return true;
393  }
394  else if (has_name) {
395  string name = get_attribute_val("name", attrs, nb_attributes);
396  BESDEBUG(PARSER, prolog << "Processing Dim with named Dimension reference: " << name << endl);
397 
398  D4Dimension *dim = 0;
399  if (name[0] == '/') // lookup the Dimension in the root group
400  dim = dmr()->root()->find_dim(name);
401  else
402  // get enclosing Group and lookup Dimension there
403  dim = top_group()->find_dim(name);
404 
405  if (!dim)
406  throw BESInternalError("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
407  a->append_dim(dim);
408  return true;
409  }
410  return false;
411 }
412 
413 
414 bool DmrppParserSax2::process_dmrpp_compact_start(const char *name){
415  if ( strcmp(name, "compact") == 0) {
416  BESDEBUG(PARSER, prolog << "DMR++ compact element. localname: " << name << endl);
417  BaseType *bt = top_basetype();
418  if (!bt) throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
419  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
420  if (!dc)
421  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
422  dc->set_compact(true);
423  return true;
424  }
425  else {
426  return false;
427  }
428 }
429 
430 
431 void DmrppParserSax2::process_dmrpp_compact_end(const char *localname)
432 {
433  BESDEBUG(PARSER, prolog << "BEGIN DMR++ compact element. localname: " << localname << endl);
434  if (is_not(localname, "compact"))
435  return;
436 
437  BaseType *target = top_basetype();
438  if (!target)
439  throw BESInternalError("Could not locate parent BaseType during parse operation.", __FILE__, __LINE__);
440  BESDEBUG(PARSER, prolog << "BaseType: " << target->type_name() << " " << target->name() << endl);
441 
442  if (target->type() != dods_array_c)
443  throw BESInternalError("The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
444 
445  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(target); // Get the Dmrpp common info
446  if (!dc)
447  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
448 
449  dc->set_compact(true);
450 
451  // DmrppParserSax2::dmr_error(this, "Expected an end value tag; found '%s' instead.", localname);
452 
453  std::string data(char_data);
454  BESDEBUG(PARSER, prolog << "Read compact element text. size: " << data.size() << " length: " << data.length() << " value: '" << data << "'" << endl);
455 
456  std::vector <u_int8_t> decoded = base64::Base64::decode(data);
457 
458  switch (target->var()->type()) {
459  case dods_array_c:
460  throw BESInternalError("Parser state has been corrupted. An Array may not be the template for an Array.", __FILE__, __LINE__);
461  break;
462 
463  case dods_byte_c:
464  case dods_char_c:
465  case dods_int8_c:
466  case dods_uint8_c:
467  case dods_int16_c:
468  case dods_uint16_c:
469  case dods_int32_c:
470  case dods_uint32_c:
471  case dods_int64_c:
472  case dods_uint64_c:
473 
474  case dods_enum_c:
475 
476  case dods_float32_c:
477  case dods_float64_c:
478  target->val2buf(reinterpret_cast<void *>(&decoded[0]));
479  target->set_read_p(true);
480  break;
481 
482  case dods_str_c:
483  case dods_url_c:
484  {
485  std::string str(decoded.begin(), decoded.end());
486  DmrppArray *st = dynamic_cast<DmrppArray *>(target);
487  if(!st){
488  stringstream msg;
489  msg << prolog << "The target BaseType MUST be an array. and it's a " << target->type_name();
490  BESDEBUG(MODULE, msg.str() << endl);
491  throw BESInternalError(msg.str(),__FILE__,__LINE__);
492  }
493  st->val2buf(&str);
494  st->set_read_p(true);
495  }
496  break;
497 
498  default:
499  throw BESInternalError("Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
500  break;
501  }
502  char_data = ""; // Null this after use.
503 
504  BESDEBUG(PARSER, prolog << "END" << endl);
505 }
506 
507 bool DmrppParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
508 {
509  if (is_not(name, "Map")) return false;
510 
511 #if 0
512  transfer_xml_attrs(attrs, nb_attributes);
513 #endif
514 
515  if (!check_attribute("name", attrs, nb_attributes)) {
516  dmr_error(this, "The 'name' attribute must be used in a Map element.");
517  return false;
518  }
519 
520  if (!top_basetype()->is_vector_type()) {
521  // Make the top BaseType* an array
522  BaseType *b = top_basetype();
523  pop_basetype();
524 
525  Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
526  a->set_is_dap4(true);
527  a->add_var_nocopy(b);
528  a->set_attributes_nocopy(b->attributes());
529  // trick: instead of popping b's attributes, copying them and then pushing
530  // a's copy, just move the pointer (but make sure there's only one object that
531  // references that pointer).
532  b->set_attributes_nocopy(0);
533 
534  push_basetype(a);
535  }
536 
537  assert(top_basetype()->is_vector_type());
538 
539  Array *a = static_cast<Array*>(top_basetype());
540 
541  string map_name = get_attribute_val("name", attrs, nb_attributes);
542  if (get_attribute_val("name", attrs, nb_attributes).at(0) != '/') map_name = top_group()->FQN() + map_name;
543 
544  Array *map_source = 0; // The array variable that holds the data for the Map
545 
546  if (map_name[0] == '/') // lookup the Map in the root group
547  map_source = dmr()->root()->find_map_source(map_name);
548  else
549  // get enclosing Group and lookup Map there
550  map_source = top_group()->find_map_source(map_name);
551 
552  // Change: If the parser is in 'strict' mode (the default) and the Array named by
553  // the Map cannot be found, it is an error. If 'strict' mode is false (permissive
554  // mode), then this is not an error. However, the Array referenced by the Map will
555  // be null. This is a change in the parser's behavior to accommodate requests for
556  // Arrays that include Maps that do not also include the Map(s) in the request.
557  // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
558  if (!map_source && d_strict)
559  throw BESInternalError("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.",__FILE__,__LINE__);
560 
561  a->maps()->add_map(new D4Map(map_name, map_source));
562 
563  return true;
564 }
565 
566 bool DmrppParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
567 {
568  if (is_not(name, "Group")) return false;
569 
570 #if 0
571  transfer_xml_attrs(attrs, nb_attributes);
572 #endif
573 
574  if (!check_required_attribute("name", attrs, nb_attributes)) {
575  dmr_error(this, "The required attribute 'name' was missing from a Group element.");
576  return false;
577  }
578 
579  BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, get_attribute_val("name", attrs, nb_attributes));
580  if (!btp) {
581  dmr_fatal_error(this, "Could not instantiate the Group '%s'.", get_attribute_val("name", attrs, nb_attributes).c_str());
582  return false;
583  }
584 
585  D4Group *grp = static_cast<D4Group*>(btp);
586 
587  // Need to set this to get the D4Attribute behavior in the type classes
588  // shared between DAP2 and DAP4. jhrg 4/18/13
589  grp->set_is_dap4(true);
590 
591  // link it up and change the current group
592  D4Group *parent = top_group();
593  if (!parent) {
594  dmr_fatal_error(this, "No Group on the Group stack.");
595  return false;
596  }
597 
598  grp->set_parent(parent);
599  parent->add_group_nocopy(grp);
600 
601  push_group(grp);
602  push_attributes(grp->attributes());
603  return true;
604 }
605 
612 inline bool DmrppParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
613 {
614  if (is_not(name, "Attribute")) return false;
615 
616 #if 0
617  // These methods set the state to parser_error if a problem is found.
618  transfer_xml_attrs(attrs, nb_attributes);
619 #endif
620 
621  // add error
622  if (!(check_required_attribute(string("name"), attrs, nb_attributes) && check_required_attribute(string("type"), attrs, nb_attributes))) {
623  dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
624  return false;
625  }
626 
627  if (get_attribute_val("type", attrs, nb_attributes) == "Container") {
628  push_state(inside_attribute_container);
629 
630  BESDEBUG(PARSER, prolog << "Pushing attribute container " << get_attribute_val("name", attrs, nb_attributes) << endl);
631  D4Attribute *child = new D4Attribute(get_attribute_val("name", attrs, nb_attributes), attr_container_c);
632 
633  D4Attributes *tos = top_attributes();
634  // add return
635  if (!tos) {
636  delete child;
637  dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
638  return false;
639  }
640 
641  tos->add_attribute_nocopy(child);
642  push_attributes(child->attributes());
643  }
644  else if (get_attribute_val("type", attrs, nb_attributes) == "OtherXML") {
645  push_state(inside_other_xml_attribute);
646 
647  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
648  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
649  }
650  else {
651  push_state(inside_attribute);
652 
653  dods_attr_name = get_attribute_val("name", attrs, nb_attributes);
654  dods_attr_type = get_attribute_val("type", attrs, nb_attributes);
655  }
656 
657  return true;
658 }
659 
665 inline bool DmrppParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
666 {
667  if (is_not(name, "Enumeration")) return false;
668 
669 #if 0
670  transfer_xml_attrs(attrs, nb_attributes);
671 #endif
672 
673  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("basetype", attrs, nb_attributes))) {
674  dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
675  return false;
676  }
677 
678  Type t = get_type(get_attribute_val("basetype", attrs, nb_attributes).c_str());
679  if (!is_integer_type(t)) {
680  dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
681  get_attribute_val("name", attrs, nb_attributes).c_str(), get_attribute_val("basetype", attrs, nb_attributes).c_str());
682  return false;
683  }
684 
685  // This getter allocates a new object if needed.
686  string enum_def_path = get_attribute_val("name", attrs, nb_attributes);
687 #if 0
688  // Use FQNs when things are referenced, not when they are defined
689  if (xml_attrs["name"].value[0] != '/')
690  enum_def_path = top_group()->FQN() + enum_def_path;
691 #endif
692  enum_def()->set_name(enum_def_path);
693  enum_def()->set_type(t);
694 
695  return true;
696 }
697 
698 inline bool DmrppParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
699 {
700  if (is_not(name, "EnumConst")) return false;
701 
702 #if 0
703  // These methods set the state to parser_error if a problem is found.
704  transfer_xml_attrs(attrs, nb_attributes);
705 #endif
706 
707  if (!(check_required_attribute("name", attrs, nb_attributes) && check_required_attribute("value", attrs, nb_attributes))) {
708  dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
709  return false;
710  }
711 
712  istringstream iss(get_attribute_val("value", attrs, nb_attributes));
713  long long value = 0;
714  iss >> skipws >> value;
715  if (iss.fail() || iss.bad()) {
716  dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
717  get_attribute_val("value", attrs, nb_attributes).c_str());
718  }
719  else if (!enum_def()->is_valid_enum_value(value)) {
720  dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
721  get_attribute_val("value", attrs, nb_attributes).c_str(), D4type_name(d_enum_def->type()).c_str());
722  }
723  else {
724  // unfortunate choice of names... args are 'label' and 'value'
725  enum_def()->add_value(get_attribute_val("name", attrs, nb_attributes), value);
726  }
727 
728  return true;
729 }
730 
736 inline bool DmrppParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
737 {
738  Type t = get_type(name);
739  if (is_simple_type(t)) {
740  process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
741  return true;
742  }
743  else {
744  switch (t) {
745  case dods_structure_c:
746  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
747  return true;
748 
749  case dods_sequence_c:
750  process_variable_helper(t, inside_constructor, attrs, nb_attributes);
751  return true;
752 
753  default:
754  return false;
755  }
756  }
757 }
758 
766 void DmrppParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
767 {
768 #if 0
769  transfer_xml_attrs(attrs, nb_attributes);
770 #endif
771 
772  if (check_required_attribute("name", attrs, nb_attributes)) {
773  BaseType *btp = dmr()->factory()->NewVariable(t, get_attribute_val("name", attrs, nb_attributes));
774  if (!btp) {
775  dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
776  return;
777  }
778 
779  if ((t == dods_enum_c) && check_required_attribute("enum", attrs, nb_attributes)) {
780  D4EnumDef *enum_def = 0;
781  string enum_path = get_attribute_val("enum", attrs, nb_attributes);
782  if (enum_path[0] == '/')
783  enum_def = dmr()->root()->find_enum_def(enum_path);
784  else
785  enum_def = top_group()->find_enum_def(enum_path);
786 
787  if (!enum_def) dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
788 
789  static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
790  }
791 
792  btp->set_is_dap4(true); // see comment above
793  push_basetype(btp);
794 
795  push_attributes(btp->attributes());
796 
797  push_state(s);
798  }
799 }
800 
811 void DmrppParserSax2::dmr_start_document(void * p)
812 {
813  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
814  parser->error_msg = "";
815  parser->char_data = "";
816 
817  // Set this in intern_helper so that the loop test for the parser_end
818  // state works for the first iteration. It seems like XMLParseChunk calls this
819  // function on it's first run. jhrg 9/16/13
820  // parser->push_state(parser_start);
821 
822  parser->push_attributes(parser->dmr()->root()->attributes());
823 
824  BESDEBUG(PARSER, prolog << "Parser start state: " << states[parser->get_state()] << endl);
825 }
826 
829 void DmrppParserSax2::dmr_end_document(void * p)
830 {
831  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
832 
833  BESDEBUG(PARSER, prolog << "Parser end state: " << states[parser->get_state()] << endl);
834 
835  if (parser->get_state() != parser_end)
836  DmrppParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
837 
838  // If we've found any sort of error, don't make the DMR; intern() will
839  // take care of the error.
840  if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error) return;
841 
842  if (!parser->empty_basetype() || parser->empty_group())
843  DmrppParserSax2::dmr_error(parser,
844  "The document did not contain a valid root Group or contained unbalanced tags.");
845 
846 #if INCLUDE_BESDEBUG_ISSET
847  if(BESDebug::IsSet(PARSER)){
848  ostream *os = BESDebug::GetStrm();
849  *os << prolog << "parser->top_group() BEGIN " << endl;
850  parser->top_group()->dump(*os);
851  *os << endl << prolog << "parser->top_group() END " << endl;
852  }
853 #endif
854 
855  parser->pop_group(); // leave the stack 'clean'
856  parser->pop_attributes();
857 }
858 
859 void DmrppParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
860  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/, const xmlChar **attributes)
861 {
862  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
863  const char *localname = reinterpret_cast<const char *>(l);
864 
865  string this_element_ns_name(URI ? (char *) URI : "null");
866 
867  if (parser->get_state() != parser_error) {
868  string dap4_ns_name = DapXmlNamspaces::getDapNamespaceString(DAP_4_0);
869  BESDEBUG(PARSER, prolog << "dap4_ns_name: " << dap4_ns_name << endl);
870 
871  if (this_element_ns_name == dmrpp_namespace) {
872  if (strcmp(localname, "chunkDimensionSizes") == 0) {
873  BESDEBUG(PARSER, prolog << "Found dmrpp:chunkDimensionSizes element. Pushing state." << endl);
874  parser->push_state(inside_dmrpp_chunkDimensionSizes_element);
875  }
876  else if (strcmp(localname, "compact") == 0) {
877  BESDEBUG(PARSER, prolog << "Found dmrpp:compact element. Pushing state." << endl);
878  parser->push_state(inside_dmrpp_compact_element);
879  }
880  else {
881  BESDEBUG(PARSER,
882  prolog << "Start of element in dmrpp namespace: " << localname << " detected." << endl);
883  parser->push_state(inside_dmrpp_object);
884  }
885  }
886  else if (this_element_ns_name != dap4_ns_name) {
887  BESDEBUG(PARSER, prolog << "Start of non DAP4 element: " << localname << " detected." << endl);
888  parser->push_state(not_dap4_element);
889  }
890  }
891 
892  BESDEBUG(PARSER, prolog << "Start element " << localname << " prefix: " << (prefix ? (char *) prefix : "null") << " ns: "
893  << this_element_ns_name << " (state: " << states[parser->get_state()] << ")" << endl);
894 
895  switch (parser->get_state()) {
896  case parser_start:
897  if (is_not(localname, "Dataset"))
898  DmrppParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.",
899  localname);
900 
901  parser->root_ns = URI ? (const char *) URI : "";
902 
903 #if 0
904  parser->transfer_xml_attrs(attributes, nb_attributes);
905 #endif
906 
907  if (parser->check_required_attribute(string("name"), attributes, nb_attributes))
908  parser->dmr()->set_name(parser->get_attribute_val("name", attributes, nb_attributes));
909 
910  // Record the DMR++ builder version number. For now, if this is present, we have a 'new'
911  // DMR++ and if it is not present, we have an old DMR++. One (the?) important difference
912  // between the two is that the new version has the order of the filters correct and the
913  // current version of the handler code _expects_ this. The old version of the DMR++ had
914  // the order reversed (at least for most - all? - data). So we have this kludge to enable
915  // those old DMR++ files to work. See DmrppCommon::set_filter() for the other half of the
916  // hack. Note that the attribute 'version' is in the dmrpp xml namespace. jhrg 11/9/21
917  if (parser->check_attribute("version", attributes, nb_attributes)) {
918  auto dmrpp = dynamic_cast<DMRpp*>(parser->dmr());
919  if (dmrpp)
920  dmrpp->set_version(parser->get_attribute_val("version", attributes, nb_attributes));
921  DmrppRequestHandler::d_emulate_original_filter_order_behavior = false;
922  }
923  else {
924  DmrppRequestHandler::d_emulate_original_filter_order_behavior = true;
925  }
926 
927  if (parser->check_attribute("dapVersion", attributes, nb_attributes))
928  parser->dmr()->set_dap_version(parser->get_attribute_val("dapVersion", attributes, nb_attributes));
929 
930  if (parser->check_attribute("dmrVersion", attributes, nb_attributes))
931  parser->dmr()->set_dmr_version(parser->get_attribute_val("dmrVersion", attributes, nb_attributes));
932 
933  if (parser->check_attribute("base", attributes, nb_attributes)) {
934  parser->dmr()->set_request_xml_base(parser->get_attribute_val("base", attributes, nb_attributes));
935  }
936  BESDEBUG(PARSER, prolog << "Dataset xml:base is set to '" << parser->dmr()->request_xml_base() << "'" << endl);
937 
938  if (parser->check_attribute("href", attributes, nb_attributes)) {
939  bool trusted = false;
940  if (parser->check_attribute("trust", attributes, nb_attributes)) {
941  string value = parser->get_attribute_val("trust", attributes, nb_attributes);
942  trusted = value == "true";
943  }
944  string href = parser->get_attribute_val("href", attributes, nb_attributes);
945  parser->dmrpp_dataset_href = shared_ptr<http::url>(new http::url(href,trusted));
946  BESDEBUG(PARSER, prolog << "Processed 'href' value into data_url. href: " << parser->dmrpp_dataset_href->str() << (trusted?"(trusted)":"") << endl);
947 
948  //######################################################################################################
949  // Stop parser EffectiveUrl resolution (ndp - 08/27/2021)
950  // I dropped this because:
951  // - The Chunk::get_data_url() method calls EffectiveUrlCache::TheCache()->get_effective_url(data_url)
952  // - EffectiveUrlCache::TheCache()->get_effective_url(data_url) method is thread safe
953  // - By dropping these calls from the parser, which is in a single threaded section of the code we can
954  // resolve the URL during a multithreaded operation (reading the chunks) and reduce the overall
955  // time cost of resolving all of the chunk URLs with concurrency.
956  // -----------------------------------------------------------------------------------------------------
957  //BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Dataset URL: " << parser->dmrpp_dataset_href->str() << endl);
958  //auto effective_url = EffectiveUrlCache::TheCache()->get_effective_url(parser->dmrpp_dataset_href);
959  //BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url->str() << endl);
960  //######################################################################################################
961 
962  }
963  BESDEBUG(PARSER, prolog << "Dataset dmrpp:href is set to '" << parser->dmrpp_dataset_href->str() << "'" << endl);
964 
965  if (!parser->root_ns.empty()) parser->dmr()->set_namespace(parser->root_ns);
966 
967  // Push the root Group on the stack
968  parser->push_group(parser->dmr()->root());
969 
970  parser->push_state(inside_dataset);
971 
972  break;
973 
974  // Both inside dataset and inside group can have the same stuff.
975  // The difference is that the Dataset holds the root group, which
976  // must be present; other groups are optional
977  case inside_dataset:
978  case inside_group:
979  if (parser->process_enum_def(localname, attributes, nb_attributes))
980  parser->push_state(inside_enum_def);
981  else if (parser->process_dimension_def(localname, attributes, nb_attributes))
982  parser->push_state(inside_dim_def);
983  else if (parser->process_group(localname, attributes, nb_attributes))
984  parser->push_state(inside_group);
985  else if (parser->process_variable(localname, attributes, nb_attributes))
986  // This will push either inside_simple_type or inside_structure
987  // onto the parser state stack.
988  break;
989  else if (parser->process_attribute(localname, attributes, nb_attributes))
990  // This will push either inside_attribute, inside_attribute_container
991  // or inside_otherxml_attribute onto the parser state stack
992  break;
993  else
994  DmrppParserSax2::dmr_error(parser,
995  "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.",
996  localname);
997  break;
998 
999  case inside_attribute_container:
1000  if (parser->process_attribute(localname, attributes, nb_attributes))
1001  break;
1002  else
1003  DmrppParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
1004  break;
1005 
1006  case inside_attribute:
1007  if (parser->process_attribute(localname, attributes, nb_attributes))
1008  break;
1009  else if (strcmp(localname, "Value") == 0)
1010  parser->push_state(inside_attribute_value);
1011  else
1012  dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
1013  break;
1014 
1015  case inside_attribute_value:
1016  // Attribute values are processed by the end element code.
1017  break;
1018 
1019  case inside_other_xml_attribute:
1020  parser->other_xml_depth++;
1021 
1022  // Accumulate the elements here
1023  parser->other_xml.append("<");
1024  if (prefix) {
1025  parser->other_xml.append((const char *) prefix);
1026  parser->other_xml.append(":");
1027  }
1028  parser->other_xml.append(localname);
1029 
1030  if (nb_namespaces != 0) {
1031  parser->transfer_xml_ns(namespaces, nb_namespaces);
1032 
1033  for (map<string, string>::iterator i = parser->namespace_table.begin(); i != parser->namespace_table.end();
1034  ++i) {
1035  parser->other_xml.append(" xmlns");
1036  if (!i->first.empty()) {
1037  parser->other_xml.append(":");
1038  parser->other_xml.append(i->first);
1039  }
1040  parser->other_xml.append("=\"");
1041  parser->other_xml.append(i->second);
1042  parser->other_xml.append("\"");
1043  }
1044  }
1045 
1046  if (nb_attributes != 0) {
1047 #if 0
1048  parser->transfer_xml_attrs(attributes, nb_attributes);
1049 #endif
1050  for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
1051  parser->other_xml.append(" ");
1052  if (!i->second.prefix.empty()) {
1053  parser->other_xml.append(i->second.prefix);
1054  parser->other_xml.append(":");
1055  }
1056  parser->other_xml.append(i->first);
1057  parser->other_xml.append("=\"");
1058  parser->other_xml.append(i->second.value);
1059  parser->other_xml.append("\"");
1060  }
1061  }
1062 
1063  parser->other_xml.append(">");
1064  break;
1065 
1066  case inside_enum_def:
1067  // process an EnumConst element
1068  if (parser->process_enum_const(localname, attributes, nb_attributes))
1069  parser->push_state(inside_enum_const);
1070  else
1071  dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
1072  break;
1073 
1074  case inside_enum_const:
1075  // No content; nothing to do
1076  break;
1077 
1078  case inside_dim_def:
1079  // No content; nothing to do
1080  break;
1081 
1082  case inside_dim:
1083  // No content.
1084  break;
1085 
1086  case inside_map:
1087  // No content.
1088  break;
1089 
1090  case inside_simple_type:
1091  if (parser->process_attribute(localname, attributes, nb_attributes))
1092  break;
1093  else if (parser->process_dimension(localname, attributes, nb_attributes))
1094  parser->push_state(inside_dim);
1095  else if (parser->process_map(localname, attributes, nb_attributes))
1096  parser->push_state(inside_map);
1097  else
1098  dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
1099  break;
1100 
1101  case inside_constructor:
1102  if (parser->process_variable(localname, attributes, nb_attributes))
1103  // This will push either inside_simple_type or inside_structure
1104  // onto the parser state stack.
1105  break;
1106  else if (parser->process_attribute(localname, attributes, nb_attributes))
1107  break;
1108  else if (parser->process_dimension(localname, attributes, nb_attributes))
1109  parser->push_state(inside_dim);
1110  else if (parser->process_map(localname, attributes, nb_attributes))
1111  parser->push_state(inside_map);
1112  else
1113  DmrppParserSax2::dmr_error(parser,
1114  "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
1115  break;
1116 
1117  case not_dap4_element:
1118  BESDEBUG(PARSER, prolog << "SKIPPING unexpected element. localname: " << localname << "namespace: "
1119  << this_element_ns_name << endl);
1120  break;
1121 
1122  case inside_dmrpp_compact_element:
1123  if (parser->process_dmrpp_compact_start(localname)) {
1124  BESDEBUG(PARSER, prolog << "Call to parser->process_dmrpp_compact_start() completed." << endl);
1125  }
1126  break;
1127 
1128  case inside_dmrpp_object: {
1129  BESDEBUG(PARSER, prolog << "Inside dmrpp namespaced element. localname: " << localname << endl);
1130  assert(this_element_ns_name == dmrpp_namespace);
1131 
1132 #if 0
1133  parser->transfer_xml_attrs(attributes, nb_attributes); // load up xml_attrs
1134 #endif
1135 
1136  BaseType *bt = parser->top_basetype();
1137  if (!bt) throw BESInternalError("Could locate parent BaseType during parse operation.", __FILE__, __LINE__);
1138 
1139  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(bt); // Get the Dmrpp common info
1140  if (!dc)
1141  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1142 
1143  // Ingest the dmrpp:chunks element and it attributes
1144  if (strcmp(localname, "chunks") == 0) {
1145  BESDEBUG(PARSER, prolog << "DMR++ chunks element. localname: " << localname << endl);
1146 
1147  if (parser->check_attribute("compressionType", attributes, nb_attributes)) {
1148  string compression_type_string(parser->get_attribute_val("compressionType", attributes, nb_attributes));
1149  dc->ingest_compression_type(compression_type_string);
1150 
1151  BESDEBUG(PARSER, prolog << "Processed attribute 'compressionType=\"" <<
1152  compression_type_string << "\"'" << endl);
1153  }
1154  else {
1155  BESDEBUG(PARSER, prolog << "There was no 'compressionType' attribute associated with the variable '"
1156  << bt->type_name() << " " << bt->name() << "'" << endl);
1157  }
1158 
1159  if (parser->check_attribute("byteOrder", attributes, nb_attributes)) {
1160  string byte_order_string(parser->get_attribute_val("byteOrder", attributes, nb_attributes));
1161  dc->ingest_byte_order(byte_order_string);
1162 
1163  BESDEBUG(PARSER, prolog << "Processed attribute 'byteOrder=\"" << byte_order_string << "\"'" << endl);
1164  }
1165  else {
1166  BESDEBUG(PARSER, prolog << "There was no 'byteOrder' attribute associated with the variable '" << bt->type_name()
1167  << " " << bt->name() << "'" << endl);
1168  }
1169  }
1170  // Ingest an dmrpp:chunk element and its attributes
1171  else if (strcmp(localname, "chunk") == 0) {
1172  string data_url_str = "unknown_data_location";
1173  shared_ptr<http::url> data_url;
1174 
1175  if (parser->check_attribute("href", attributes, nb_attributes)) {
1176  bool trusted = false;
1177  if (parser->check_attribute("trust", attributes, nb_attributes)) {
1178  string value = parser->get_attribute_val("trust", attributes, nb_attributes);
1179  trusted = value == "true";
1180  }
1181 
1182  // This is the chunk elements href that we check.
1183  data_url_str = parser->get_attribute_val("href", attributes, nb_attributes);
1184  data_url = shared_ptr<http::url> ( new http::url(data_url_str,trusted));
1185  BESDEBUG(PARSER, prolog << "Processed 'href' value into data_url. href: " << data_url->str() << (trusted?"":"(trusted)") << endl);
1186  //######################################################################################################
1187  // Stop parser EffectiveUrl resolution (ndp - 08/27/2021)
1188  // I dropped this because:
1189  // - The Chunk::get_data_url() method calls EffectiveUrlCache::TheCache()->get_effective_url(data_url)
1190  // - EffectiveUrlCache::TheCache()->get_effective_url(data_url) method is thread safe
1191  // - By dropping these calls from the parser, which is in a single threaded section of the code, we can
1192  // resolve the URL during a multi-threaded operation (reading the chunks) and reduce the overall
1193  // time cost of resolving all of the chunk URLs with concurrency.
1194  // -----------------------------------------------------------------------------------------------------
1195  // We may have to cache the last accessed/redirect URL for data_url here because this URL
1196  // may be unique to this chunk.
1197 
1198  //BESDEBUG(PARSER, prolog << "Attempting to locate and cache the effective URL for Chunk URL: " << data_url->str() << endl);
1199  //auto effective_url = EffectiveUrlCache::TheCache()->get_effective_url(data_url);
1200  //BESDEBUG(PARSER, prolog << "EffectiveUrlCache::get_effective_url() returned: " << effective_url->str() << endl);
1201  //######################################################################################################
1202 
1203  }
1204  else {
1205  BESDEBUG(PARSER, prolog << "No attribute 'href' located. Trying Dataset/@dmrpp:href..." << endl);
1206  // This bit of magic sets the URL used to get the data and it's
1207  // magic in part because it may be a file or an http URL
1208  data_url = parser->dmrpp_dataset_href;
1209  // We don't have to conditionally cache parser->dmrpp_dataset_href here because that was
1210  // done in the evaluation of the parser_start case.
1211  BESDEBUG(PARSER, prolog << "Processing dmrpp:href into data_url. dmrpp:href='" << data_url->str() << "'" << endl);
1212  }
1213 
1214  if (data_url->protocol() != HTTP_PROTOCOL && data_url->protocol() != HTTPS_PROTOCOL && data_url->protocol() != FILE_PROTOCOL) {
1215  BESDEBUG(PARSER, prolog << "data_url does NOT start with 'http://', 'https://' or 'file://'. "
1216  "Retrieving default catalog root directory" << endl);
1217 
1218  // Now we try to find the default catalog. If we can't find it we punt and leave it be.
1220  if (!defcat) {
1221  BESDEBUG(PARSER, prolog << "Not able to find the default catalog." << endl);
1222  }
1223  else {
1224  // Found the catalog so we get the root dir; make a file URL.
1226 
1227  BESDEBUG(PARSER, prolog << "Found default catalog root_dir: '" << utils->get_root_dir() << "'" << endl);
1228 
1229  data_url_str = BESUtil::assemblePath(utils->get_root_dir(), data_url_str, true);
1230  data_url_str = FILE_PROTOCOL + data_url_str;
1231  data_url = shared_ptr<http::url> ( new http::url(data_url_str));
1232  }
1233  }
1234 
1235  BESDEBUG(PARSER, prolog << "Processed data_url: '" << data_url->str() << "'" << endl);
1236 
1237  unsigned long long offset = 0;
1238  unsigned long long size = 0;
1239  string chunk_position_in_array("");
1240  std::string byte_order = dc->get_byte_order();
1241 
1242  if (parser->check_required_attribute("offset", attributes, nb_attributes)) {
1243  istringstream offset_ss(parser->get_attribute_val("offset", attributes, nb_attributes));
1244  offset_ss >> offset;
1245  BESDEBUG(PARSER, prolog << "Processed attribute 'offset=\"" << offset << "\"'" << endl);
1246  }
1247  else {
1248  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'offset'.");
1249  }
1250 
1251  if (parser->check_required_attribute("nBytes", attributes, nb_attributes)) {
1252  istringstream size_ss(parser->get_attribute_val("nBytes", attributes, nb_attributes));
1253  size_ss >> size;
1254  BESDEBUG(PARSER, prolog << "Processed attribute 'nBytes=\"" << size << "\"'" << endl);
1255  }
1256  else {
1257  dmr_error(parser, "The hdf:byteStream element is missing the required attribute 'size'.");
1258  }
1259 
1260  if (parser->check_attribute("chunkPositionInArray", attributes, nb_attributes)) {
1261  istringstream chunk_position_ss(parser->get_attribute_val("chunkPositionInArray", attributes, nb_attributes));
1262  chunk_position_in_array = chunk_position_ss.str();
1263  BESDEBUG(PARSER, prolog << "Found attribute 'chunkPositionInArray' value: " << chunk_position_ss.str() << endl);
1264  }
1265  else {
1266  BESDEBUG(PARSER, prolog << "No attribute 'chunkPositionInArray' located" << endl);
1267  }
1268 
1269  dc->add_chunk(data_url, byte_order, size, offset, chunk_position_in_array);
1270  }
1271  }
1272  break;
1273 
1274  case inside_dmrpp_chunkDimensionSizes_element:
1275  // The dmrpp:chunkDimensionSizes value is processed by the end element code.
1276  break;
1277 
1278  case parser_unknown:
1279  case parser_error:
1280  case parser_fatal_error:
1281  break;
1282 
1283  case parser_end:
1284  // FIXME Error?
1285  break;
1286  }
1287 
1288  BESDEBUG(PARSER, prolog << "Start element exit state: " << states[parser->get_state()] << endl);
1289 }
1290 
1291 void DmrppParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
1292 {
1293  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1294  const char *localname = (const char *) l;
1295 
1296  BESDEBUG(PARSER, prolog << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl);
1297 
1298  switch (parser->get_state()) {
1299  case parser_start:
1300  dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
1301  break;
1302 
1303  case inside_dataset:
1304  if (is_not(localname, "Dataset"))
1305  DmrppParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
1306 
1307  parser->pop_state();
1308  if (parser->get_state() != parser_start)
1309  dmr_fatal_error(parser, "Unexpected state, expected start state.");
1310  else {
1311  parser->pop_state();
1312  parser->push_state(parser_end);
1313  }
1314  break;
1315 
1316  case inside_group: {
1317  if (is_not(localname, "Group"))
1318  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
1319 
1320  if (!parser->empty_basetype() || parser->empty_group())
1321  DmrppParserSax2::dmr_error(parser,
1322  "The document did not contain a valid root Group or contained unbalanced tags.");
1323 
1324  parser->pop_group();
1325  parser->pop_state();
1326  break;
1327  }
1328 
1329  case inside_attribute_container:
1330  if (is_not(localname, "Attribute"))
1331  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1332 
1333  parser->pop_state();
1334  parser->pop_attributes();
1335  break;
1336 
1337  case inside_attribute:
1338  if (is_not(localname, "Attribute"))
1339  DmrppParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
1340 
1341  parser->pop_state();
1342  break;
1343 
1344  case inside_attribute_value: {
1345  if (is_not(localname, "Value"))
1346  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1347 
1348  parser->pop_state();
1349 
1350  // The old code added more values using the name and type as
1351  // indexes to find the correct attribute. Use get() for that
1352  // now. Or fix this code to keep a pointer to the to attribute...
1353  D4Attributes *attrs = parser->top_attributes();
1354  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1355  if (!attr) {
1356  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1357  attrs->add_attribute_nocopy(attr);
1358  }
1359  attr->add_value(parser->char_data);
1360 
1361  parser->char_data = ""; // Null this after use.
1362  break;
1363  }
1364 
1365  case inside_other_xml_attribute: {
1366  if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
1367  parser->pop_state();
1368 
1369  // The old code added more values using the name and type as
1370  // indexes to find the correct attribute. Use get() for that
1371  // now. Or fix this code to keep a pointer to the to attribute...
1372  D4Attributes *attrs = parser->top_attributes();
1373  D4Attribute *attr = attrs->get(parser->dods_attr_name);
1374  if (!attr) {
1375  attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
1376  attrs->add_attribute_nocopy(attr);
1377  }
1378  attr->add_value(parser->other_xml);
1379 
1380  parser->other_xml = ""; // Null this after use.
1381  }
1382  else {
1383  if (parser->other_xml_depth == 0) {
1384  DmrppParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
1385  localname);
1386  break;
1387  }
1388  parser->other_xml_depth--;
1389 
1390  parser->other_xml.append("</");
1391  if (prefix) {
1392  parser->other_xml.append((const char *) prefix);
1393  parser->other_xml.append(":");
1394  }
1395  parser->other_xml.append(localname);
1396  parser->other_xml.append(">");
1397  }
1398  break;
1399  }
1400 
1401  case inside_enum_def:
1402  if (is_not(localname, "Enumeration"))
1403  DmrppParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
1404  if (!parser->top_group())
1405  DmrppParserSax2::dmr_fatal_error(parser,
1406  "Expected a Group to be the current item, while finishing up an Enumeration.");
1407  else {
1408  // copy the pointer; not a deep copy
1409  parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
1410  // Set the enum_def to null; next call to enum_def() will
1411  // allocate a new object
1412  parser->clear_enum_def();
1413  parser->pop_state();
1414  }
1415  break;
1416 
1417  case inside_enum_const:
1418  if (is_not(localname, "EnumConst"))
1419  DmrppParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
1420 
1421  parser->pop_state();
1422  break;
1423 
1424  case inside_dim_def: {
1425  if (is_not(localname, "Dimension"))
1426  DmrppParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
1427 
1428  if (!parser->top_group())
1429  DmrppParserSax2::dmr_error(parser,
1430  "Expected a Group to be the current item, while finishing up an Dimension.");
1431 
1432  parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
1433  // Set the dim_def to null; next call to dim_def() will
1434  // allocate a new object. Calling 'clear' is important because
1435  // the cleanup method will free dim_def if it's not null and
1436  // we just copied the pointer in the add_dim_nocopy() call
1437  // above.
1438  parser->clear_dim_def();
1439  parser->pop_state();
1440  break;
1441  }
1442 
1443  case inside_simple_type:
1444  if (is_simple_type(get_type(localname))) {
1445  BaseType *btp = parser->top_basetype();
1446  parser->pop_basetype();
1447  parser->pop_attributes();
1448  BaseType *parent = 0;
1449  if (!parser->empty_basetype())
1450  parent = parser->top_basetype();
1451  else if (!parser->empty_group())
1452  parent = parser->top_group();
1453  else {
1454  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1455  localname);
1456  delete btp;
1457  parser->pop_state();
1458  break;
1459  }
1460  if (parent->type() == dods_array_c)
1461  static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1462  else
1463  parent->add_var_nocopy(btp);
1464  }
1465  else
1466  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1467 
1468  parser->pop_state();
1469  break;
1470 
1471  case inside_dim:
1472  if (is_not(localname, "Dim"))
1473  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1474 
1475  parser->pop_state();
1476  break;
1477 
1478  case inside_map:
1479  if (is_not(localname, "Map"))
1480  DmrppParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1481 
1482  parser->pop_state();
1483  break;
1484 
1485  case inside_constructor: {
1486  if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1487  DmrppParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1488  return;
1489  }
1490  BaseType *btp = parser->top_basetype();
1491  parser->pop_basetype();
1492  parser->pop_attributes();
1493  BaseType *parent = 0;
1494  if (!parser->empty_basetype())
1495  parent = parser->top_basetype();
1496  else if (!parser->empty_group())
1497  parent = parser->top_group();
1498  else {
1499  dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1500  localname);
1501  delete btp;
1502  parser->pop_state();
1503  break;
1504  }
1505  // TODO Why doesn't this code mirror the simple_var case and test
1506  // for the parent being an array? jhrg 10/13/13
1507  parent->add_var_nocopy(btp);
1508  parser->pop_state();
1509  break;
1510  }
1511 
1512  case not_dap4_element:
1513  BESDEBUG(PARSER, prolog << "End of non DAP4 element: " << localname << endl);
1514  parser->pop_state();
1515  break;
1516 
1517 #if 1
1518  case inside_dmrpp_compact_element: {
1519  parser->process_dmrpp_compact_end(localname);
1520  BESDEBUG(PARSER, prolog << "End of dmrpp compact element: " << localname << endl);
1521  parser->pop_state();
1522  break;
1523  }
1524 #endif
1525 
1526  case inside_dmrpp_object: {
1527  BESDEBUG(PARSER, prolog << "End of dmrpp namespace element: " << localname << endl);
1528  parser->pop_state();
1529  break;
1530  }
1531 
1532  case inside_dmrpp_chunkDimensionSizes_element: {
1533  BESDEBUG(PARSER, prolog << "End of chunkDimensionSizes element. localname: " << localname << endl);
1534 
1535  if (is_not(localname, "chunkDimensionSizes"))
1536  DmrppParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
1537  DmrppCommon *dc = dynamic_cast<DmrppCommon*>(parser->top_basetype()); // Get the Dmrpp common info
1538  if (!dc)
1539  throw BESInternalError("Could not cast BaseType to DmrppType in the drmpp handler.", __FILE__, __LINE__);
1540  string element_text(parser->char_data);
1541  BESDEBUG(PARSER, prolog << "chunkDimensionSizes element_text: '" << element_text << "'" << endl);
1542  dc->parse_chunk_dimension_sizes(element_text);
1543  parser->char_data = ""; // Null this after use.
1544  parser->pop_state();
1545  break;
1546  }
1547 
1548  case parser_unknown:
1549  parser->pop_state();
1550  break;
1551 
1552  case parser_error:
1553  case parser_fatal_error:
1554  break;
1555 
1556  case parser_end:
1557  // FIXME Error?
1558  break;
1559  }
1560 
1561 
1562  BESDEBUG(PARSER, prolog << "End element exit state: " << states[parser->get_state()] <<
1563  " ("<<parser->get_state()<<")"<< endl);
1564 }
1565 
1569 void DmrppParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1570 {
1571  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1572 
1573  switch (parser->get_state()) {
1574  case inside_attribute_value:
1575  case inside_dmrpp_chunkDimensionSizes_element:
1576  case inside_dmrpp_compact_element:
1577  parser->char_data.append((const char *) (ch), len);
1578  BESDEBUG(PARSER, prolog << "Characters[" << parser->char_data.size() << "]" << parser->char_data << "'" << endl);
1579  break;
1580 
1581  case inside_other_xml_attribute:
1582  parser->other_xml.append((const char *) (ch), len);
1583  BESDEBUG(PARSER, prolog << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1584  break;
1585 
1586  default:
1587  break;
1588  }
1589 }
1590 
1595 void DmrppParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1596 {
1597  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1598 
1599  switch (parser->get_state()) {
1600  case inside_other_xml_attribute:
1601  parser->other_xml.append((const char *) (ch), len);
1602  break;
1603 
1604  default:
1605  break;
1606  }
1607 }
1608 
1614 void DmrppParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1615 {
1616  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1617 
1618  switch (parser->get_state()) {
1619  case inside_other_xml_attribute:
1620  parser->other_xml.append((const char *) (value), len);
1621  break;
1622 
1623  case parser_unknown:
1624  break;
1625 
1626  default:
1627  DmrppParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1628 
1629  break;
1630  }
1631 }
1632 
1637 xmlEntityPtr DmrppParserSax2::dmr_get_entity(void *, const xmlChar * name)
1638 {
1639  return xmlGetPredefinedEntity(name);
1640 }
1641 
1652 void DmrppParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1653 {
1654  va_list args;
1655  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1656 
1657  parser->push_state(parser_fatal_error);
1658 
1659  va_start(args, msg);
1660  char str[1024];
1661  vsnprintf(str, 1024, msg, args);
1662  va_end(args);
1663 
1664  int line = xmlSAX2GetLineNumber(parser->context);
1665 
1666  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1667  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1668 }
1669 
1670 void DmrppParserSax2::dmr_error(void *p, const char *msg, ...)
1671 {
1672  va_list args;
1673  DmrppParserSax2 *parser = static_cast<DmrppParserSax2*>(p);
1674 
1675  parser->push_state(parser_error);
1676 
1677  va_start(args, msg);
1678  char str[1024];
1679  vsnprintf(str, 1024, msg, args);
1680  va_end(args);
1681 
1682  int line = xmlSAX2GetLineNumber(parser->context);
1683 
1684  if (!parser->error_msg.empty()) parser->error_msg += "\n";
1685  parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1686 }
1688 
1692 void DmrppParserSax2::cleanup_parse()
1693 {
1694  bool wellFormed = context->wellFormed;
1695  bool valid = context->valid;
1696 
1697  // context->sax = NULL;
1698  // Leak. Removed the above. jhrg 6/19/19
1699  xmlFreeParserCtxt(context);
1700 
1701  delete d_enum_def;
1702  d_enum_def = 0;
1703 
1704  delete d_dim_def;
1705  d_dim_def = 0;
1706 
1707  // If there's an error, there may still be items on the stack at the
1708  // end of the parse.
1709  while (!btp_stack.empty()) {
1710  delete top_basetype();
1711  pop_basetype();
1712  }
1713 
1714  if (!wellFormed)
1715  throw BESInternalError("The DMR was not well formed. " + error_msg,__FILE__,__LINE__);
1716  else if (!valid)
1717  throw BESInternalError("The DMR was not valid." + error_msg,__FILE__,__LINE__);
1718  else if (get_state() == parser_error)
1719  throw BESInternalError(error_msg,__FILE__,__LINE__);
1720  else if (get_state() == parser_fatal_error) throw BESInternalError(error_msg,__FILE__,__LINE__);
1721 }
1722 
1736 void DmrppParserSax2::intern(istream &f, DMR *dest_dmr)
1737 {
1738  // Code example from libxml2 docs re: read from a stream.
1739 
1740  if (!f.good()) throw BESInternalError(prolog + "ERROR - Supplied istream instance not open or read error",__FILE__,__LINE__);
1741  if (!dest_dmr) throw BESInternalError(prolog + "THe supplied DMR object pointer is null", __FILE__, __LINE__);
1742 
1743  d_dmr = dest_dmr; // dump values here
1744 
1745  int line_num = 1;
1746  string line;
1747 
1748  // Get the XML prolog line (looks like: <?xml ... ?> )
1749  getline(f, line);
1750  if (line.length() == 0) throw BESInternalError(prolog + "ERROR - No input found when parsing the DMR++",__FILE__,__LINE__);
1751 
1752  BESDEBUG(PARSER, prolog << "line: (" << line_num << "): " << endl << line << endl << endl);
1753 
1754  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, line.c_str(), line.length(), "stream");
1755  context->validate = true;
1756  push_state(parser_start);
1757 
1758  // Get the first chunk of the stuff
1759  long chunk_count = 0;
1760  long chunk_size = 0;
1761 
1762  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1763  chunk_size=f.gcount();
1764  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1765  BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1766  BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1767 
1768  while(!f.eof() && (get_state() != parser_end)){
1769 
1770  xmlParseChunk(context, d_parse_buffer, chunk_size, 0);
1771 
1772  // There is more to read. Get the next chunk
1773  f.read(d_parse_buffer, D4_PARSE_BUFF_SIZE);
1774  chunk_size=f.gcount();
1775  d_parse_buffer[chunk_size]=0; // null terminate the string. We can do it this way because the buffer is +1 bigger than D4_PARSE_BUFF_SIZE
1776  BESDEBUG(PARSER, prolog << "chunk: (" << chunk_count++ << "): " << endl);
1777  BESDEBUG(PARSER, prolog << "d_parse_buffer: (" << d_parse_buffer << "): " << endl);
1778  }
1779 
1780  // This call ends the parse.
1781  xmlParseChunk(context, d_parse_buffer, chunk_size, 1/*terminate*/); // libxml2 call
1782 
1783  // This checks that the state on the parser stack is parser_end and throws
1784  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1785  cleanup_parse();
1786 }
1787 
1788 
1789 
1798 void DmrppParserSax2::intern(const string &document, DMR *dest_dmr)
1799 {
1800  intern(document.c_str(), document.length(), dest_dmr);
1801 }
1802 
1811 void DmrppParserSax2::intern(const char *buffer, int size, DMR *dest_dmr)
1812 {
1813  if (!(size > 0)) return;
1814 
1815  // Code example from libxml2 docs re: read from a stream.
1816 
1817  if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1818  d_dmr = dest_dmr; // dump values in dest_dmr
1819 
1820  push_state(parser_start);
1821  context = xmlCreatePushParserCtxt(&dmrpp_sax_parser, this, buffer, size, "stream");
1822  context->validate = true;
1823 
1824  // This call ends the parse.
1825  xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1826 
1827  // This checks that the state on the parser stack is parser_end and throws
1828  // an exception if it's not (i.e., the loop exited with gcount() == 0).
1829  cleanup_parse();
1830 }
1831 
1832 } // namespace dmrpp
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
virtual BESCatalog * default_catalog() const
The the default catalog.
const std::string & get_root_dir() const
Get the root directory of the catalog.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
static std::ostream * GetStrm()
return the debug stream
Definition: BESDebug.h:187
static bool IsSet(const std::string &flagName)
see if the debug context flagName is set to true
Definition: BESDebug.h:168
exception thrown if internal error encountered
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
Type
Type of JSON value.
Definition: rapidjson.h:664