33 #include <libdap/BaseType.h>
34 #include <libdap/Array.h>
35 #include <libdap/Type.h>
36 #include <libdap/D4Dimensions.h>
37 #include <libdap/D4Group.h>
38 #include <libdap/D4BaseTypeFactory.h>
39 #include <libdap/D4Enum.h>
40 #include <libdap/D4EnumDefs.h>
41 #include <libdap/D4Attributes.h>
42 #include <libdap/D4Maps.h>
43 #include <libdap/DMR.h>
44 #include <libdap/util.h>
47 #define PUGIXML_NO_XPATH
48 #define PUGIXML_HEADER_ONLY
49 #include <pugixml.hpp>
54 #include "DmrppCommon.h"
55 #include "DmrppArray.h"
56 #include "DmrppD4Group.h"
58 #include "DmrppRequestHandler.h"
59 #include "BESInternalError.h"
71 #define TREAT_NAMESPACES_AS_LITERALS 1
77 #define USE_CACHED_XML_NODE 1
80 #define prolog std::string("DMZ::").append(__func__).append("() - ")
84 const std::set<std::string> variable_elements{
"Byte",
"Int8",
"Int16",
"Int32",
"Int64",
"UInt8",
"UInt16",
"UInt32",
85 "UInt64",
"Float32",
"Float64",
"String",
"Structure",
"Sequence",
89 static inline bool is_eq(
const char *value,
const char *key)
91 #if TREAT_NAMESPACES_AS_LITERALS
92 return strcmp(value, key) == 0;
94 bool found = strcmp(value, key) == 0;
99 const char* colon = strchr(value,
':');
100 return colon && strcmp(colon + 1, key) == 0;
106 static inline bool has_dim_nodes(
const xml_node &var_node)
108 return var_node.child(
"Dim");
112 static inline bool member_of(
const set<string> &elements_set,
const string &element_name)
114 return elements_set.find(element_name) != elements_set.end();
118 static inline DmrppCommon *dc(BaseType *btp)
120 auto *dc =
dynamic_cast<DmrppCommon*
>(btp);
122 throw BESInternalError(
string(
"Expected a BaseType that was also a DmrppCommon instance (")
123 .append((btp) ? btp->name() :
"unknown").append(
")."), __FILE__, __LINE__);
132 DMZ::DMZ(
const string &file_name)
134 parse_xml_doc(file_name);
142 DMZ::parse_xml_doc(
const std::string &file_name)
144 std::ifstream stream(file_name);
151 pugi::xml_parse_result result = d_xml_doc.load(stream, pugi::parse_default | pugi::parse_ws_pcdata_single);
154 throw BESInternalError(
string(
"DMR++ parse error: ").append(result.description()), __FILE__, __LINE__);
156 if (!d_xml_doc.document_element())
169 void DMZ::process_dataset(DMR *dmr,
const xml_node &xml_root)
172 int required_attrs_found = 0;
174 bool href_trusted =
false;
175 string dmrpp_version;
176 for (xml_attribute attr = xml_root.first_attribute(); attr; attr = attr.next_attribute()) {
177 if (is_eq(attr.name(),
"name")) {
178 ++required_attrs_found;
179 dmr->set_name(attr.value());
181 else if (is_eq(attr.name(),
"dapVersion")) {
182 dmr->set_dap_version(attr.value());
184 else if (is_eq(attr.name(),
"dmrVersion")) {
185 dmr->set_dmr_version(attr.value());
187 else if (is_eq(attr.name(),
"base")) {
188 dmr->set_request_xml_base(attr.value());
189 BESDEBUG(PARSER, prolog <<
"Dataset xml:base is set to '" << dmr->request_xml_base() <<
"'" << endl);
192 else if (is_eq(attr.name(),
"xmlns")) {
193 dmr->set_namespace(attr.value());
197 else if (is_eq(attr.name(),
"dmrpp:href")) {
198 href_attr = attr.value();
200 else if (is_eq(attr.name(),
"dmrpp:trust")) {
201 href_trusted = is_eq(attr.value(),
"true");
203 else if (is_eq(attr.name(),
"dmrpp:version")) {
204 dmrpp_version = attr.value();
209 if (dmrpp_version.empty()) {
210 DmrppRequestHandler::d_emulate_original_filter_order_behavior =
true;
213 auto dmrpp =
dynamic_cast<DMRpp*
>(dmr);
215 dmrpp->set_version(dmrpp_version);
219 if (required_attrs_found != 1)
220 throw BESInternalError(
"DMR++ XML dataset element missing one or more required attributes.", __FILE__, __LINE__);
222 d_dataset_elem_href.reset(
new http::url(href_attr, href_trusted));
230 void DMZ::process_dimension(D4Group *grp,
const xml_node &dimension_node)
234 for (xml_attribute attr = dimension_node.first_attribute(); attr; attr = attr.next_attribute()) {
235 if (is_eq(attr.name(),
"name")) {
236 name_value = attr.value();
238 else if (is_eq(attr.name(),
"size")) {
239 size_value = attr.value();
243 if (name_value.empty() || size_value.empty())
244 throw BESInternalError(
"The required attribute 'name' or 'size' was missing from a Dimension element.", __FILE__, __LINE__);
248 auto *dimension =
new D4Dimension();
249 dimension->set_name(name_value);
250 dimension->set_size(size_value);
251 grp->dims()->add_dim_nocopy(dimension);
265 void DMZ::process_dim(DMR *dmr, D4Group *grp, Array *array,
const xml_node &dim_node)
267 assert(array->is_vector_type());
271 for (xml_attribute attr = dim_node.first_attribute(); attr; attr = attr.next_attribute()) {
272 if (is_eq(attr.name(),
"name")) {
273 name_value = attr.value();
275 else if (is_eq(attr.name(),
"size")) {
276 size_value = attr.value();
280 if (name_value.empty() && size_value.empty())
281 throw BESInternalError(
"Either 'size' or 'name' must be used in a Dim element.", __FILE__, __LINE__);
282 if (!name_value.empty() && !size_value.empty())
283 throw BESInternalError(
"Only one of 'size' and 'name' are allowed in a Dim element, but both were used.", __FILE__, __LINE__);
285 if (!size_value.empty()) {
286 BESDEBUG(PARSER, prolog <<
"Processing nameless Dim of size: " << stoi(size_value) << endl);
287 array->append_dim(stoi(size_value));
289 else if (!name_value.empty()) {
290 BESDEBUG(PARSER, prolog <<
"Processing Dim with named Dimension reference: " << name_value << endl);
293 if (name_value[0] ==
'/')
294 dim = dmr->root()->find_dim(name_value);
297 dim = grp->find_dim(name_value);
300 throw BESInternalError(
"The dimension '" + name_value +
"' was not found while parsing the variable '" + array->name() +
"'.",__FILE__,__LINE__);
302 array->append_dim(dim);
306 void DMZ::process_map(DMR *dmr, D4Group *grp, Array *array,
const xml_node &map_node)
308 assert(array->is_vector_type());
312 for (xml_attribute attr = map_node.first_attribute(); attr; attr = attr.next_attribute()) {
313 if (is_eq(attr.name(),
"name")) {
314 name_value = attr.value();
319 if (name_value[0] !=
'/')
320 name_value = grp->FQN() + name_value;
323 Array *map_source = dmr->root()->find_map_source(name_value);
337 array->maps()->add_map(
new D4Map(name_value, map_source));
354 void DMZ::process_variable(DMR *dmr, D4Group *group, Constructor *parent,
const xml_node &var_node)
360 Type t = get_type(var_node.name());
362 assert(t != dods_group_c);
364 bool is_array_type = has_dim_nodes(var_node);
367 btp = add_array_variable(dmr, group, parent, t, var_node);
368 if (t == dods_structure_c || t == dods_sequence_c) {
369 assert(btp->type() == dods_array_c && btp->var()->type() == t);
371 parent =
dynamic_cast<Constructor*
>(btp->var());
373 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
374 if (member_of(variable_elements, child.name()))
375 process_variable(dmr, group, parent, child);
380 btp = add_scalar_variable(dmr, group, parent, t, var_node);
381 if (t == dods_structure_c || t == dods_sequence_c) {
382 assert(btp->type() == t);
383 parent =
dynamic_cast<Constructor*
>(btp);
385 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
386 if (member_of(variable_elements, child.name()))
387 process_variable(dmr, group, parent, child);
392 dc(btp)->set_xml_node(var_node);
402 BaseType *DMZ::build_variable(DMR *dmr, D4Group *group,
Type t,
const xml_node &var_node)
404 assert(dmr->factory());
408 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
409 if (is_eq(attr.name(),
"name")) {
410 name_value = attr.value();
412 if (is_eq(attr.name(),
"enum")) {
413 enum_value = attr.value();
417 if (name_value.empty())
418 throw BESInternalError(
"The variable 'name' attribute was missing.", __FILE__, __LINE__);
420 BaseType *btp = dmr->factory()->NewVariable(t, name_value);
422 throw BESInternalError(
"Could not instantiate the variable ' "+ name_value +
"'.", __FILE__, __LINE__);
424 btp->set_is_dap4(
true);
426 if (t == dods_enum_c) {
427 if (enum_value.empty())
428 throw BESInternalError(
"The variable ' " + name_value +
"' lacks an 'enum' attribute.", __FILE__, __LINE__);
431 if (enum_value[0] ==
'/')
432 enum_def = dmr->root()->find_enum_def(enum_value);
434 enum_def = group->find_enum_def(enum_value);
437 throw BESInternalError(
"Could not find the Enumeration definition '" + enum_value +
"'.", __FILE__, __LINE__);
439 dynamic_cast<D4Enum&
>(*btp).set_enumeration(enum_def);
455 BaseType *DMZ::add_scalar_variable(DMR *dmr, D4Group *group, Constructor *parent,
Type t,
const xml_node &var_node)
459 BaseType *btp = build_variable(dmr, group, t, var_node);
464 parent->add_var_nocopy(btp);
466 group->add_var_nocopy(btp);
485 BaseType *DMZ::add_array_variable(DMR *dmr, D4Group *group, Constructor *parent,
Type t,
const xml_node &var_node)
489 BaseType *btp = build_variable(dmr, group, t, var_node);
492 auto *array =
static_cast<Array*
>(dmr->factory()->NewVariable(dods_array_c, btp->name()));
493 array->set_is_dap4(
true);
494 array->add_var_nocopy(btp);
500 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
501 if (is_eq(child.name(),
"Dim")) {
502 process_dim(dmr, group, array, child);
504 else if (is_eq(child.name(),
"Map")) {
505 process_map(dmr, group, array, child);
510 parent->add_var_nocopy(array);
512 group->add_var_nocopy(array);
525 void DMZ::process_group(DMR *dmr, D4Group *parent,
const xml_node &var_node)
528 for (xml_attribute attr = var_node.first_attribute(); attr; attr = attr.next_attribute()) {
529 if (is_eq(attr.name(),
"name")) {
530 name_value = attr.value();
534 if (name_value.empty())
535 throw BESInternalError(
"The required attribute 'name' was missing from a Group element.", __FILE__, __LINE__);
537 BaseType *btp = dmr->factory()->NewVariable(dods_group_c, name_value);
539 throw BESInternalError(
"Could not instantiate the Group '" + name_value +
"'.", __FILE__, __LINE__);
541 auto new_group =
dynamic_cast<DmrppD4Group*
>(btp);
545 new_group->set_is_dap4(
true);
548 new_group->set_parent(parent);
549 parent->add_group_nocopy(new_group);
552 new_group->set_xml_node(var_node);
556 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
557 if (is_eq(child.name(),
"Dimension")) {
558 process_dimension(new_group, child);
560 else if (is_eq(child.name(),
"Group")) {
561 process_group(dmr, new_group, child);
563 else if (member_of(variable_elements, child.name())) {
564 process_variable(dmr, new_group,
nullptr, child);
574 void DMZ::build_thin_dmr(DMR *dmr)
576 auto xml_root_node = d_xml_doc.first_child();
578 process_dataset(dmr, xml_root_node);
580 auto root_group = dmr->root();
584 throw BESInternalError(
"Expected the root group to also be an instance of DmrppD4Group.", __FILE__, __LINE__);
586 dg->set_xml_node(xml_root_node);
588 for (
auto child = xml_root_node.first_child(); child; child = child.next_sibling()) {
589 if (is_eq(child.name(),
"Dimension")) {
590 process_dimension(dg, child);
592 else if (is_eq(child.name(),
"Group")) {
593 process_group(dmr, dg, child);
596 else if (member_of(variable_elements, child.name())) {
597 process_variable(dmr, dg,
nullptr, child);
610 void DMZ::process_attribute(D4Attributes *attributes,
const xml_node &dap_attr_node)
614 for (xml_attribute attr = dap_attr_node.first_attribute(); attr; attr = attr.next_attribute()) {
615 if (is_eq(attr.name(),
"name")) {
616 name_value = attr.value();
618 if (is_eq(attr.name(),
"type")) {
619 type_value = attr.value();
623 if (name_value.empty() || type_value.empty())
624 throw BESInternalError(
"The required attribute 'name' or 'type' was missing from an Attribute element.", __FILE__, __LINE__);
626 if (type_value ==
"Container") {
628 auto *dap_attr_cont =
new D4Attribute(name_value, attr_container_c);
629 attributes->add_attribute_nocopy(dap_attr_cont);
634 if (dap_attr_node.first_child()) {
635 for (
auto attr_node: dap_attr_node.children(
"Attribute")) {
636 process_attribute(dap_attr_cont->attributes(), attr_node);
640 else if (type_value ==
"OtherXML") {
645 auto *attribute =
new D4Attribute(name_value, StringToD4AttributeType(type_value));
646 attributes->add_attribute_nocopy(attribute);
648 for (
auto value_elem = dap_attr_node.first_child(); value_elem; value_elem = value_elem.next_sibling()) {
649 if (is_eq(value_elem.name(),
"Value")) {
650 attribute->add_value(value_elem.child_value());
673 void DMZ::build_basetype_chain(BaseType *btp, stack<BaseType*> &bt)
675 auto parent = btp->get_parent();
679 if (parent && !(parent->type() == dods_group_c && parent->get_parent() ==
nullptr))
680 build_basetype_chain(parent, bt);
683 xml_node DMZ::get_variable_xml_node_helper(
const xml_node &, stack<BaseType*> &)
685 #if !USE_CACHED_XML_NODE
692 if (bt.top()->type() == dods_array_c && bt.top()->var()->is_constructor_type())
698 string type_name = bt.top()->type() == dods_array_c ? bt.top()->var()->type_name(): bt.top()->type_name();
699 string var_name = bt.top()->name();
703 for (
auto node = parent_node.child(type_name.c_str()); node; node = node.next_sibling()) {
704 for (xml_attribute attr = node.first_attribute(); attr; attr = attr.next_attribute()) {
705 if (is_eq(attr.name(),
"name") && is_eq(attr.value(), var_name.c_str())) {
710 return get_variable_xml_node_helper(node, bt);
727 xml_node DMZ::get_variable_xml_node(BaseType *btp)
const
729 #if USE_CACHED_XML_NODE
730 auto node = dc(btp)->get_xml_node();
732 throw BESInternalError(
string(
"The xml_node for '").append(btp->name()).append(
"' was not recorded."), __FILE__, __LINE__);
740 build_basetype_chain(btp, bt);
742 xml_node dataset = d_xml_doc.first_child();
743 if (!dataset || !is_eq(dataset.name(),
"Dataset"))
746 auto node = get_variable_xml_node_helper(dataset, bt);
769 DMZ::load_attributes(BaseType *btp)
771 if (dc(btp)->get_attributes_loaded())
774 load_attributes(btp, get_variable_xml_node(btp));
777 dc(btp)->set_attributes_loaded(
true);
779 switch (btp->type()) {
787 dc(btp->var())->set_attributes_loaded(
true);
794 case dods_structure_c:
795 case dods_sequence_c:
797 auto *c =
dynamic_cast<Constructor*
>(btp);
799 for (
auto i = c->var_begin(), e = c->var_end(); i != e; i++) {
800 dc(btp->var())->set_attributes_loaded(
true);
817 DMZ::load_attributes(BaseType *btp, xml_node var_node)
const
819 if (dc(btp)->get_attributes_loaded())
827 auto attributes = btp->BaseType::attributes();
828 for (
auto child = var_node.first_child(); child; child = child.next_sibling()) {
829 if (is_eq(child.name(),
"Attribute")) {
830 process_attribute(attributes, child);
834 dc(btp)->set_attributes_loaded(
true);
842 DMZ::load_attributes(Constructor *constructor)
844 load_attributes(constructor, get_variable_xml_node(constructor));
845 for (
auto i = constructor->var_begin(), e = constructor->var_end(); i != e; ++i) {
847 assert((*i)->type() != dods_group_c);
853 DMZ::load_attributes(D4Group *group) {
855 if (group->get_parent() ==
nullptr) {
856 xml_node dataset = d_xml_doc.child(
"Dataset");
858 throw BESInternalError(
"Could not find the 'Dataset' element in the DMR++ XML document.", __FILE__, __LINE__);
859 load_attributes(group, dataset);
862 load_attributes(group, get_variable_xml_node(group));
865 for (
auto i = group->var_begin(), e = group->var_end(); i != e; ++i) {
870 assert((*i)->type() != dods_group_c);
874 for (
auto i = group->grp_begin(), e = group->grp_end(); i != e; ++i) {
879 void DMZ::load_all_attributes(libdap::DMR *dmr)
881 assert(d_xml_doc !=
nullptr);
882 load_attributes(dmr->root());
902 DMZ::process_compact(BaseType *btp,
const xml_node &compact)
906 auto char_data = compact.child_value();
908 throw BESInternalError(
"The dmrpp::compact is missing data values.",__FILE__,__LINE__);
910 std::vector <u_int8_t> decoded = base64::Base64::decode(char_data);
912 if (btp->type() != dods_array_c)
913 throw BESInternalError(
"The dmrpp::compact element must be the child of an array variable",__FILE__,__LINE__);
916 switch (btp->var()->type()) {
918 throw BESInternalError(
"DMR++ document fail: An Array may not be the template for an Array.", __FILE__, __LINE__);
935 btp->val2buf(
reinterpret_cast<void *
>(&decoded[0]));
936 btp->set_read_p(
true);
941 std::string str(decoded.begin(), decoded.end());
942 auto *st =
static_cast<DmrppArray *
>(btp);
946 st->set_read_p(
true);
951 throw BESInternalError(
"Unsupported COMPACT storage variable type in the drmpp handler.", __FILE__, __LINE__);
962 void DMZ::process_chunk(DmrppCommon *dc,
const xml_node &chunk)
const
968 string chunk_position_in_array;
970 bool href_trusted =
false;
972 for (xml_attribute attr = chunk.first_attribute(); attr; attr = attr.next_attribute()) {
973 if (is_eq(attr.name(),
"href")) {
976 else if (is_eq(attr.name(),
"trust")) {
977 href_trusted = is_eq(attr.value(),
"true");
979 else if (is_eq(attr.name(),
"offset")) {
980 offset = attr.value();
982 else if (is_eq(attr.name(),
"nBytes")) {
985 else if (is_eq(attr.name(),
"chunkPositionInArray")) {
986 chunk_position_in_array = attr.value();
990 if (offset.empty() || size.empty())
991 throw BESInternalError(
"Both size and offset are required for a chunk node.", __FILE__, __LINE__);
997 shared_ptr<http::url> data_url(
new http::url(href, href_trusted));
998 dc->
add_chunk(data_url, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
1001 dc->
add_chunk(d_dataset_elem_href, dc->get_byte_order(), stoi(size), stoi(offset), chunk_position_in_array);
1011 void DMZ::process_cds_node(DmrppCommon *dc,
const xml_node &chunks)
1013 for (
auto child = chunks.child(
"dmrpp:chunkDimensionSizes"); child ; child = child.next_sibling()) {
1014 if (is_eq(child.name(),
"dmrpp:chunkDimensionSizes")) {
1015 string sizes = child.child_value();
1023 void DMZ::process_chunks(DmrppCommon *dc,
const xml_node &chunks)
1025 for (xml_attribute attr = chunks.first_attribute(); attr; attr = attr.next_attribute()) {
1026 if (is_eq(attr.name(),
"compressionType")) {
1032 process_cds_node(dc, chunks);
1035 for (
auto chunk = chunks.child(
"dmrpp:chunk"); chunk; chunk = chunk.next_sibling()) {
1036 if (is_eq(chunk.name(),
"dmrpp:chunk")) {
1037 process_chunk(dc, chunk);
1050 void DMZ::load_chunks(BaseType *btp)
1052 if (dc(btp)->get_chunks_loaded())
1056 xml_node var_node = get_variable_xml_node(btp);
1057 if (var_node ==
nullptr)
1058 throw BESInternalError(
"Could not find location of variable in the DMR++ XML document.", __FILE__, __LINE__);
1062 int chunks_found = 0;
1063 int chunk_found = 0;
1064 int compact_found = 0;
1065 auto child = var_node.child(
"dmrpp:chunks");
1068 process_chunks(dc(btp), child);
1071 auto chunk = var_node.child(
"dmrpp:chunk");
1074 process_chunk(dc(btp), chunk);
1078 auto compact = var_node.child(
"dmrpp:compact");
1081 process_compact(btp, compact);
1085 if (DmrppRequestHandler::d_require_chunks) {
1086 int elements_found = chunks_found + chunk_found + compact_found;
1087 if (elements_found != 1) {
1089 oss <<
"Expected chunk, chunks or compact information in the DMR++ data. Found " << elements_found
1090 <<
" types of nodes.";
1095 dc(btp)->set_chunks_loaded(
true);
exception thrown if internal error encountered
virtual void parse_chunk_dimension_sizes(const std::string &chunk_dim_sizes_string)
Set the dimension sizes for a chunk.
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
void set_filter(const std::string &value)
Set the value of the filters property.
void set_compact(bool value)
Set the value of the compact property.