37 #include <H5Ppublic.h>
38 #include <H5Dpublic.h>
39 #include <H5Epublic.h>
40 #include <H5Zpublic.h>
41 #include <H5Spublic.h>
45 #include <libdap/Array.h>
46 #include <libdap/util.h>
61 typedef struct H5D_chunk_rec_t {
62 hsize_t scaled[H5O_LAYOUT_NDIMS];
70 #include <libdap/D4Attributes.h>
71 #include <libdap/BaseType.h>
72 #include <libdap/D4ParserSax2.h>
76 #include <TheBESKeys.h>
81 #include <BESNotFoundError.h>
82 #include <BESInternalError.h>
83 #include <BESDataHandlerInterface.h>
86 #include "DmrppTypeFactory.h"
87 #include "DmrppD4Group.h"
88 #include "DmrppMetadataStore.h"
93 #define H5S_MAX_RANK 32
94 #define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1)
107 typedef struct H5D_chunk_rec_t {
108 hsize_t scaled[H5O_LAYOUT_NDIMS];
110 uint32_t filter_mask;
117 using namespace dmrpp;
119 static bool verbose =
false;
120 #define VERBOSE(x) do { if (verbose) x; } while(false)
122 #define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp"
123 #define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory"
136 void get_data(hid_t dset,
void *buf)
138 BESDEBUG(
"h5",
">get_data()" << endl);
141 if ((dtype = H5Dget_type(dset)) < 0) {
142 throw InternalErr(__FILE__, __LINE__,
"Failed to get the datatype of the dataset");
145 if ((dspace = H5Dget_space(dset)) < 0) {
147 throw InternalErr(__FILE__, __LINE__,
"Failed to get the data space of the dataset");
150 hid_t memtype = H5Tget_native_type(dtype, H5T_DIR_ASCEND);
154 throw InternalErr(__FILE__, __LINE__,
"failed to get memory type");
157 if (H5Dread(dset, memtype, dspace, dspace, H5P_DEFAULT, buf)
162 throw InternalErr(__FILE__, __LINE__,
"failed to read data");
165 if (H5Tclose(dtype) < 0){
168 throw InternalErr(__FILE__, __LINE__,
"Unable to release the dtype.");
171 if (H5Tclose(memtype) < 0){
173 throw InternalErr(__FILE__, __LINE__,
"Unable to release the memtype.");
176 if(H5Sclose(dspace)<0) {
177 throw InternalErr(__FILE__, __LINE__,
"Unable to release the data space.");
187 BESDEBUG(
"h5",
"<get_data()" << endl);
190 bool read_vlen_string(hid_t dsetid,
int nelms, hsize_t *hoffset, hsize_t *hstep, hsize_t *hcount,vector<string> &finstrval)
197 bool is_scalar =
false;
200 if ((dspace = H5Dget_space(dsetid))<0) {
201 throw InternalErr (__FILE__, __LINE__,
"Cannot obtain data space.");
204 if(H5S_SCALAR == H5Sget_simple_extent_type(dspace))
208 if (
false == is_scalar) {
209 if (H5Sselect_hyperslab(dspace, H5S_SELECT_SET,
213 throw InternalErr (__FILE__, __LINE__,
"Cannot generate the hyperslab of the HDF5 dataset.");
216 int d_num_dim = H5Sget_simple_extent_ndims(dspace);
219 throw InternalErr (__FILE__, __LINE__,
"Cannot obtain the number of dimensions of the data space.");
222 mspace = H5Screate_simple(d_num_dim, hcount,NULL);
225 throw InternalErr (__FILE__, __LINE__,
"Cannot create the memory space.");
230 if ((dtypeid = H5Dget_type(dsetid)) < 0) {
232 if (
false == is_scalar)
235 throw InternalErr (__FILE__, __LINE__,
"Cannot obtain the datatype.");
239 if ((memtype = H5Tget_native_type(dtypeid, H5T_DIR_ASCEND))<0) {
241 if (
false == is_scalar)
245 throw InternalErr (__FILE__, __LINE__,
"Fail to obtain memory datatype.");
249 size_t ty_size = H5Tget_size(memtype);
251 if (
false == is_scalar)
256 throw InternalErr (__FILE__, __LINE__,
"Fail to obtain the size of HDF5 string.");
259 vector <char> strval;
260 strval.resize(nelms*ty_size);
262 if (
true == is_scalar)
263 read_ret = H5Dread(dsetid,memtype,H5S_ALL,H5S_ALL,H5P_DEFAULT,(
void*)&strval[0]);
265 read_ret = H5Dread(dsetid,memtype,mspace,dspace,H5P_DEFAULT,(
void*)&strval[0]);
268 if (
false == is_scalar)
273 throw InternalErr (__FILE__, __LINE__,
"Fail to read the HDF5 variable length string dataset.");
277 char*temp_bp = &strval[0];
278 char*onestring = NULL;
279 for (
int i =0;i<nelms;i++) {
280 onestring = *(
char**)temp_bp;
282 finstrval[i] =string(onestring);
288 if (
false == strval.empty()) {
289 herr_t ret_vlen_claim;
290 if (
true == is_scalar)
291 ret_vlen_claim = H5Dvlen_reclaim(memtype,dspace,H5P_DEFAULT,(
void*)&strval[0]);
293 ret_vlen_claim = H5Dvlen_reclaim(memtype,mspace,H5P_DEFAULT,(
void*)&strval[0]);
294 if (ret_vlen_claim < 0){
295 if (
false == is_scalar)
300 throw InternalErr (__FILE__, __LINE__,
"Cannot reclaim the memory buffer of the HDF5 variable length string.");
305 if (
false == is_scalar)
327 static void print_dataset_type_info(hid_t dataset, uint8_t layout_type) {
328 hid_t dtype_id = H5Dget_type(dataset);
330 throw BESInternalError(
"Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
333 if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
334 hid_t dcpl_id = H5Dget_create_plist(dataset);
336 throw BESInternalError(
"Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
342 H5D_fill_value_t fvalue_status;
343 if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
345 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
347 if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
349 if (layout_type == 1)
350 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
351 else if (layout_type == 2)
352 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
353 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
355 cerr <<
" The Fillvalue is undefined ." << endl;
357 if (layout_type == 1)
358 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
359 else if (layout_type == 2)
360 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
361 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
364 size_t fv_size = H5Tget_size(dtype_id);
366 fvalue = (
char *) (malloc(1));
367 else if (fv_size == 2)
368 fvalue = (
char *) (malloc(2));
369 else if (fv_size == 4)
370 fvalue = (
char *) (malloc(4));
371 else if (fv_size == 8) fvalue = (
char *) (malloc(8));
374 if (H5Pget_fill_value(dcpl_id, dtype_id, (
void *) (fvalue)) < 0) {
376 throw BESInternalError(
"Cannot obtain the fill value status.", __FILE__, __LINE__);
378 if (H5Tget_class(dtype_id) == H5T_INTEGER) {
379 H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
381 if (fv_sign == H5T_SGN_NONE) {
382 cerr <<
"This dataset's datatype is unsigned char " << endl;
383 cerr <<
"and the fillvalue is " << *fvalue << endl;
385 cerr <<
"This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
387 }
else if (fv_size == 2) {
388 if (fv_sign == H5T_SGN_NONE) {
389 cerr <<
"This dataset's datatype is unsigned short and the fillvalue is " << *fvalue
392 cerr <<
"This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
394 }
else if (fv_size == 4) {
395 if (fv_sign == H5T_SGN_NONE) {
396 cerr <<
"This dataset's datatype is unsigned int and the fillvalue is " << *fvalue
399 cerr <<
"This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
401 }
else if (fv_size == 8) {
402 if (fv_sign == H5T_SGN_NONE) {
403 cerr <<
"This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue
406 cerr <<
"This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
410 if (H5Tget_class(dtype_id) == H5T_FLOAT) {
412 cerr <<
"This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
413 }
else if (fv_size == 8) {
414 cerr <<
"This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
418 if (fvalue != NULL) free(fvalue);
421 <<
"The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
431 if (layout_type == 1)
432 cerr <<
" The storage size is 0 and the storage type is contiguous." << endl;
433 else if (layout_type == 2)
434 cerr <<
" The storage size is 0 and the storage type is chunking." << endl;
435 else if (layout_type == 3) cerr <<
" The storage size is 0 and the storage type is compact." << endl;
438 <<
"The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
461 static void set_filter_information(hid_t dataset_id,
DmrppCommon *dc) {
462 hid_t plist_id = H5Dget_create_plist(dataset_id);
465 int numfilt = H5Pget_nfilters(plist_id);
466 VERBOSE(cerr <<
"Number of filters associated with dataset: " << numfilt << endl);
469 for (
int filter = 0; filter < numfilt; filter++) {
471 unsigned int flags, filter_info;
472 H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
473 VERBOSE(cerr <<
"Filter Type: ");
475 switch (filter_type) {
476 case H5Z_FILTER_DEFLATE:
477 VERBOSE(cerr <<
"H5Z_FILTER_DEFLATE" << endl);
479 filters.append(
"deflate ");
481 case H5Z_FILTER_SHUFFLE:
482 VERBOSE(cerr <<
"H5Z_FILTER_SHUFFLE" << endl);
484 filters.append(
"shuffle ");
486 case H5Z_FILTER_FLETCHER32:
487 VERBOSE(cerr <<
"H5Z_FILTER_FLETCHER32" << endl);
489 filters.append(
"fletcher32 ");
492 ostringstream oss(
"Unsupported HDF5 filter: ", std::ios::ate);
499 filters = filters.substr(0, filters.length() - 1);
520 static void get_variable_chunk_info(hid_t dataset,
DmrppCommon *dc) {
521 std::string byteOrder =
"";
522 H5T_order_t byte_order = H5T_ORDER_ERROR;
525 hid_t dcpl = H5Dget_create_plist(dataset);
526 uint8_t layout_type = H5Pget_layout(dcpl);
528 hid_t fspace_id = H5Dget_space(dataset);
529 hid_t ftype_id = H5Dget_type(dataset);
531 byte_order = H5Tget_order(ftype_id);
532 switch (byte_order) {
542 ostringstream oss(
"Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
543 oss << byte_order <<
".";
548 unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
550 hid_t dtypeid = H5Dget_type(dataset);
552 size_t dsize = H5Tget_size(dtypeid);
555 switch (layout_type) {
557 case H5D_CONTIGUOUS: {
558 haddr_t cont_addr = 0;
559 hsize_t cont_size = 0;
561 VERBOSE(cerr <<
"Storage: contiguous" << endl);
563 cont_addr = H5Dget_offset(dataset);
568 cont_size = H5Dget_storage_size(dataset);
575 VERBOSE(cerr <<
" Addr: " << cont_addr << endl);
576 VERBOSE(cerr <<
" Size: " << cont_size << endl);
577 VERBOSE(cerr <<
"byteOrder: " << byteOrder << endl);
580 if (dc) dc->
add_chunk(byteOrder, cont_size, cont_addr,
"" );
585 hsize_t num_chunks = 0;
586 herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
592 VERBOSE(cerr <<
"Storage: chunked." << endl);
593 VERBOSE(cerr <<
"Number of chunks is: " << num_chunks << endl);
596 set_filter_information(dataset, dc);
599 vector<size_t> chunk_dims(dataset_rank);
600 unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t *) &chunk_dims[0]);
601 if (chunk_rank != dataset_rank)
603 "Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__,
608 for (
unsigned int i = 0; i < num_chunks; ++i) {
610 vector<hsize_t> temp_coords(dataset_rank);
611 vector<unsigned long long> chunk_coords(dataset_rank);
617 status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
619 VERBOSE(cerr <<
"ERROR" << endl);
620 throw BESInternalError(
"Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
623 VERBOSE(cerr <<
"chk_idk: " << i <<
", addr: " << addr <<
", size: " << size << endl);
627 for (
unsigned int j = 0; j < chunk_coords.size(); ++j) {
628 chunk_coords[j] = temp_coords[j];
631 if (dc) dc->
add_chunk(byteOrder, size, addr, chunk_coords);
639 VERBOSE(cerr <<
"Storage: compact" << endl);
641 size_t comp_size = H5Dget_storage_size(dataset);
642 VERBOSE(cerr <<
" Size: " << comp_size << endl);
644 if (comp_size == 0) {
649 vector<uint8_t> values;
651 Array *btp =
dynamic_cast<Array *
>(dc);
654 size_t memRequired = btp->length() * dsize;
656 if (comp_size != memRequired) {
661 switch (btp->var()->type()) {
673 case dods_uint64_c: {
674 values.resize(memRequired);
675 get_data(dataset,
reinterpret_cast<void *
>(&values[0]));
676 btp->set_read_p(
true);
677 btp->val2buf(
reinterpret_cast<void *
>(&values[0]));
683 if (H5Tis_variable_str(dtypeid) > 0) {
684 vector<string> finstrval = {
""};
685 read_vlen_string(dataset, 1, NULL, NULL, NULL, finstrval);
686 btp->set_value(finstrval, finstrval.size());
687 btp->set_read_p(
true);
697 assert(btp->length() == 1);
698 values.resize(memRequired);
699 get_data(dataset,
reinterpret_cast<void *
>(&values[0]));
700 string str(values.begin(), values.end());
701 vector<string> strings = {str};
702 btp->set_value(strings, strings.size());
703 btp->set_read_p(
true);
709 throw BESInternalError(
"Unsupported compact storage variable type.", __FILE__, __LINE__);
720 ostringstream oss(
"Unsupported HDF5 dataset layout type: ", std::ios::ate);
721 oss << layout_type <<
".";
742 static void get_chunks_for_all_variables(hid_t file, D4Group *group) {
744 for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
747 D4Attributes *d4_attrs = (*v)->attributes();
749 throw BESInternalError(
"Expected to find an attribute table for " + (*v)->name() +
" but did not.",
755 D4Attribute *attr = d4_attrs->get(
"fullnamepath");
766 if (attr->num_values() == 1)
767 FQN = attr->value(0);
770 BESDEBUG(
"dmrpp",
"Working on: " << FQN << endl);
771 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
773 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
778 H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
780 BESDEBUG(
"dmrpp",
"Working on: " << FQN << endl);
781 dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
786 if (attr && attr->num_values() == 1)
787 FQN = attr->value(0);
791 VERBOSE(cerr <<
"Working on: " << FQN << endl);
792 hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
796 if (dataset < 0 && attr == 0) {
797 cerr<<
"Unable to open dataset name "<<FQN <<endl;
800 else if (dataset < 0)
801 throw BESInternalError(
"HDF5 dataset '" + FQN +
"' cannot be opened.", __FILE__, __LINE__);
803 get_variable_chunk_info(dataset,
dynamic_cast<DmrppCommon *
>(*v));
807 D4Group::groupsIter g = group->grp_begin();
808 D4Group::groupsIter ge = group->grp_end();
810 get_chunks_for_all_variables(file, *g++);
813 string cmdln(
int argc,
char *argv[]){
815 for(
int i=0; i<argc; i++) {
823 void inject_version_and_configuration(
int argc,
char **argv,
DMRpp *dmrpp){
825 dmrpp->set_version(CVER);
828 D4Attribute *version =
new D4Attribute(
"build_dmrpp_metadata", StringToD4AttributeType(
"container"));
830 D4Attribute *build_dmrpp_version =
new D4Attribute(
"build_dmrpp", StringToD4AttributeType(
"string"));
831 build_dmrpp_version->add_value(CVER);
832 version->attributes()->add_attribute_nocopy(build_dmrpp_version);
834 D4Attribute *bes_version =
new D4Attribute(
"bes", StringToD4AttributeType(
"string"));
835 bes_version->add_value(CVER);
836 version->attributes()->add_attribute_nocopy(bes_version);
839 ldv << libdap_name() <<
"-" << libdap_version();
840 D4Attribute *libdap4_version =
new D4Attribute(
"libdap", StringToD4AttributeType(
"string"));
841 libdap4_version->add_value(ldv.str());
842 version->attributes()->add_attribute_nocopy(libdap4_version);
846 D4Attribute *config =
new D4Attribute(
"configuration", StringToD4AttributeType(
"string"));
848 version->attributes()->add_attribute_nocopy(config);
852 D4Attribute *invoke =
new D4Attribute(
"invocation", StringToD4AttributeType(
"string"));
853 invoke->add_value(cmdln(argc,argv));
854 version->attributes()->add_attribute_nocopy(invoke);
857 D4Attributes *top_level_attrs = dmrpp->root()->attributes();
858 top_level_attrs->add_attribute_nocopy(version);
862 const char *help = R
"(
863 build_dmrpp -h: Show this help
865 build_dmrpp -V: Show build versions for componets that make up the program
867 build_dmrpp -c <bes.conf> -f <data file> [-u <href url>]: Build the DMR++ using the <bes.conf>
868 options to initialize the software for the <data file>. Optionally substitue the <href url>.
869 Builds the DMR using the HDF5 handler as configued using the options in the <bes.conf>.
871 build_dmrpp build_dmrpp -f <data file> -r <dmr file> [-u <href url>]: As above, but uses the DMR
872 read from the given file (so it does not run the HDF5 handler code.
876 -d: Turn on BES software debugging output
877 -M: Add information about the build_dmrpp software, incl versions, to the built DMR++)";
879 cerr << help << endl;
882 int main(
int argc,
char *argv[]) {
883 string h5_file_name =
"";
884 string h5_dset_path =
"";
885 string dmr_name =
"";
886 string url_name =
"";
888 bool add_production_metadata =
false;
891 while ((option_char = getopt(argc, argv,
"c:f:r:u:dhvVM")) != -1) {
892 switch (option_char) {
894 cerr << basename(argv[0]) <<
"-" << CVER <<
" (bes-"<< CVER <<
", " << libdap_name() <<
"-"
895 << libdap_version() <<
")" << endl;
907 h5_file_name = optarg;
923 add_production_metadata =
true;
935 if (h5_file_name.empty()) {
936 cerr <<
"HDF5 file name must be given (-f <input>)." << endl;
948 if (!dmr_name.empty()) {
952 dmrpp.set_factory(&dtf);
954 ifstream in(dmr_name.c_str());
956 parser.intern(in, &dmrpp,
false);
959 file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
961 cerr <<
"Error: HDF5 file '" + h5_file_name +
"' cannot be opened." << endl;
965 if(add_production_metadata) {
966 inject_version_and_configuration(argc, argv, &dmrpp);
970 get_chunks_for_all_variables(file, dmrpp.root());
975 cout << writer.get_doc();
978 string bes_data_root;
982 cerr <<
"Error: Could not find the BES root directory key." << endl;
994 cerr <<
"The Metadata Store (MDS) must be configured for this command to work." << endl;
1010 cerr <<
"Expected a DMR++ object from the DmrppMetadataStore." << endl;
1015 file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
1017 cerr <<
"Error: HDF5 file '" + h5_file_path +
"' cannot be opened." << endl;
1021 get_chunks_for_all_variables(file, dmrpp->root());
1023 dmrpp->set_href(url_name);
1025 mds->add_dmrpp_response(dmrpp.get(), h5_file_name );
1028 dmrpp->set_print_chunks(
true);
1031 cout << writer.get_doc();
1033 cerr <<
"Error: Could not get a lock on the DMR for '" + h5_file_path +
"'." << endl;
1042 catch (std::exception &e) {
1043 cerr <<
"std::exception: " << e.what() << endl;
1047 cerr <<
"Unknown error." << endl;
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Abstract exception class for the BES with basic string message.
virtual std::string get_message()
get the error message for this exception
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
static TheBESKeys * TheKeys()
static std::string ConfigFile
Provide a way to print the DMR++ response.
virtual void print_dmrpp(libdap::XMLWriter &xml, const std::string &href="", bool constrained=false, bool print_chunks=true)
Print the DMR++ response.
void print_dap4(libdap::XMLWriter &xml, bool constrained=false)
override DMR::print_dap4() so the chunk info will print too.
Size and offset information of data included in DMR++ files.
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
void set_filter(const std::string &value)
Set the value of the filters property.
void set_compact(bool value)
Set the value of the compact property.
void get_data(hid_t dset, void *buf)