bes  Updated for version 3.20.10
build_dmrpp.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the Hyrax data server.
4 
5 // Copyright (c) 2018 OPeNDAP, Inc.
6 // Author: James Gallagher <jgallagher@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 #include "config.h"
25 
26 #include <iostream>
27 #include <fstream>
28 #include <sstream>
29 #include <memory>
30 #include <iterator>
31 #include <algorithm>
32 
33 #include <unistd.h>
34 #include <cstdlib>
35 #include <libgen.h>
36 
37 #include <H5Ppublic.h>
38 #include <H5Dpublic.h>
39 #include <H5Epublic.h>
40 #include <H5Zpublic.h> // Constants for compression filters
41 #include <H5Spublic.h>
42 #include "h5common.h"
43 
44 //#include <libdap/D4Attributes.h>
45 #include <libdap/Array.h>
46 #include <libdap/util.h>
47 
48 
49 #if 0
50 /*
51  * "Generic" chunk record. Each chunk is keyed by the minimum logical
52  * N-dimensional coordinates and the datatype size of the chunk.
53  * The fastest-varying dimension is assumed to reference individual bytes of
54  * the array, so a 100-element 1-D array of 4-byte integers would really be a
55  * 2-D array with the slow varying dimension of size 100 and the fast varying
56  * dimension of size 4 (the storage dimensionality has very little to do with
57  * the real dimensionality).
58  *
59  * The chunk's file address, filter mask and size on disk are not key values.
60  */
61 typedef struct H5D_chunk_rec_t {
62  hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset to start */
63  uint32_t nbytes; /* Size of stored data */
64  uint32_t filter_mask; /* Excluded filters */
65  haddr_t chunk_addr; /* Address of chunk in file */
66 } H5D_chunk_rec_t;
67 #endif
68 
69 //#include <DMRpp.h>
70 #include <libdap/D4Attributes.h>
71 #include <libdap/BaseType.h>
72 #include <libdap/D4ParserSax2.h>
73 //#include <GetOpt.h>
74 
75 //#include <BESDapNames.h>
76 #include <TheBESKeys.h>
77 #include <BESUtil.h>
78 #include <BESDebug.h>
79 
80 #include <BESError.h>
81 #include <BESNotFoundError.h>
82 #include <BESInternalError.h>
83 #include <BESDataHandlerInterface.h>
84 
85 #include "DMRpp.h"
86 #include "DmrppTypeFactory.h"
87 #include "DmrppD4Group.h"
88 #include "DmrppMetadataStore.h"
89 //#include "BESDapNames.h"
90 #if 0
91 //#define H5D_FRIEND // Workaround, needed to use H5D_chunk_rec_t
92 //#include <H5Dpkg.h>
93 #define H5S_MAX_RANK 32
94 #define H5O_LAYOUT_NDIMS (H5S_MAX_RANK+1)
95 
96 /*
97  * "Generic" chunk record. Each chunk is keyed by the minimum logical
98  * N-dimensional coordinates and the datatype size of the chunk.
99  * The fastest-varying dimension is assumed to reference individual bytes of
100  * the array, so a 100-element 1-D array of 4-byte integers would really be a
101  * 2-D array with the slow varying dimension of size 100 and the fast varying
102  * dimension of size 4 (the storage dimensionality has very little to do with
103  * the real dimensionality).
104  *
105  * The chunk's file address, filter mask and size on disk are not key values.
106  */
107 typedef struct H5D_chunk_rec_t {
108  hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset to start */
109  uint32_t nbytes; /* Size of stored data */
110  uint32_t filter_mask; /* Excluded filters */
111  haddr_t chunk_addr; /* Address of chunk in file */
112 } H5D_chunk_rec_t;
113 #endif
114 
115 using namespace std;
116 using namespace libdap;
117 using namespace dmrpp;
118 
119 static bool verbose = false;
120 #define VERBOSE(x) do { if (verbose) x; } while(false)
121 
122 #define DEBUG_KEY "metadata_store,dmrpp_store,dmrpp"
123 #define ROOT_DIRECTORY "BES.Catalog.catalog.RootDirectory"
124 
125 #if 0
135 
136 void get_data(hid_t dset, void *buf)
137 {
138  BESDEBUG("h5", ">get_data()" << endl);
139 
140  hid_t dtype = -1;
141  if ((dtype = H5Dget_type(dset)) < 0) {
142  throw InternalErr(__FILE__, __LINE__, "Failed to get the datatype of the dataset");
143  }
144  hid_t dspace = -1;
145  if ((dspace = H5Dget_space(dset)) < 0) {
146  H5Tclose(dtype);
147  throw InternalErr(__FILE__, __LINE__, "Failed to get the data space of the dataset");
148  }
149  // Use HDF5 H5Tget_native_type API
150  hid_t memtype = H5Tget_native_type(dtype, H5T_DIR_ASCEND);
151  if (memtype < 0) {
152  H5Tclose(dtype);
153  H5Sclose(dspace);
154  throw InternalErr(__FILE__, __LINE__, "failed to get memory type");
155  }
156 
157  if (H5Dread(dset, memtype, dspace, dspace, H5P_DEFAULT, buf)
158  < 0) {
159  H5Tclose(dtype);
160  H5Tclose(memtype);
161  H5Sclose(dspace);
162  throw InternalErr(__FILE__, __LINE__, "failed to read data");
163  }
164 
165  if (H5Tclose(dtype) < 0){
166  H5Tclose(memtype);
167  H5Sclose(dspace);
168  throw InternalErr(__FILE__, __LINE__, "Unable to release the dtype.");
169  }
170 
171  if (H5Tclose(memtype) < 0){
172  H5Sclose(dspace);
173  throw InternalErr(__FILE__, __LINE__, "Unable to release the memtype.");
174  }
175 
176  if(H5Sclose(dspace)<0) {
177  throw InternalErr(__FILE__, __LINE__, "Unable to release the data space.");
178  }
179 #if 0
180  // Supposed to release the resource at the release at the HDF5Array destructor.
181  //if (H5Dclose(dset) < 0){
182  // throw InternalErr(__FILE__, __LINE__, "Unable to close the dataset.");
183  //}
184  }
185 #endif
186 
187  BESDEBUG("h5", "<get_data()" << endl);
188 }
189 
190 bool read_vlen_string(hid_t dsetid, int nelms, hsize_t *hoffset, hsize_t *hstep, hsize_t *hcount,vector<string> &finstrval)
191 {
192 
193  hid_t dspace = -1;
194  hid_t mspace = -1;
195  hid_t dtypeid = -1;
196  hid_t memtype = -1;
197  bool is_scalar = false;
198 
199 
200  if ((dspace = H5Dget_space(dsetid))<0) {
201  throw InternalErr (__FILE__, __LINE__, "Cannot obtain data space.");
202  }
203 
204  if(H5S_SCALAR == H5Sget_simple_extent_type(dspace))
205  is_scalar = true;
206 
207 
208  if (false == is_scalar) {
209  if (H5Sselect_hyperslab(dspace, H5S_SELECT_SET,
210  hoffset, hstep,
211  hcount, NULL) < 0) {
212  H5Sclose(dspace);
213  throw InternalErr (__FILE__, __LINE__, "Cannot generate the hyperslab of the HDF5 dataset.");
214  }
215 
216  int d_num_dim = H5Sget_simple_extent_ndims(dspace);
217  if(d_num_dim < 0) {
218  H5Sclose(dspace);
219  throw InternalErr (__FILE__, __LINE__, "Cannot obtain the number of dimensions of the data space.");
220  }
221 
222  mspace = H5Screate_simple(d_num_dim, hcount,NULL);
223  if (mspace < 0) {
224  H5Sclose(dspace);
225  throw InternalErr (__FILE__, __LINE__, "Cannot create the memory space.");
226  }
227  }
228 
229 
230  if ((dtypeid = H5Dget_type(dsetid)) < 0) {
231 
232  if (false == is_scalar)
233  H5Sclose(mspace);
234  H5Sclose(dspace);
235  throw InternalErr (__FILE__, __LINE__, "Cannot obtain the datatype.");
236 
237  }
238 
239  if ((memtype = H5Tget_native_type(dtypeid, H5T_DIR_ASCEND))<0) {
240 
241  if (false == is_scalar)
242  H5Sclose(mspace);
243  H5Tclose(dtypeid);
244  H5Sclose(dspace);
245  throw InternalErr (__FILE__, __LINE__, "Fail to obtain memory datatype.");
246 
247  }
248 
249  size_t ty_size = H5Tget_size(memtype);
250  if (ty_size == 0) {
251  if (false == is_scalar)
252  H5Sclose(mspace);
253  H5Tclose(memtype);
254  H5Tclose(dtypeid);
255  H5Sclose(dspace);
256  throw InternalErr (__FILE__, __LINE__,"Fail to obtain the size of HDF5 string.");
257  }
258 
259  vector <char> strval;
260  strval.resize(nelms*ty_size);
261  hid_t read_ret = -1;
262  if (true == is_scalar)
263  read_ret = H5Dread(dsetid,memtype,H5S_ALL,H5S_ALL,H5P_DEFAULT,(void*)&strval[0]);
264  else
265  read_ret = H5Dread(dsetid,memtype,mspace,dspace,H5P_DEFAULT,(void*)&strval[0]);
266 
267  if (read_ret < 0) {
268  if (false == is_scalar)
269  H5Sclose(mspace);
270  H5Tclose(memtype);
271  H5Tclose(dtypeid);
272  H5Sclose(dspace);
273  throw InternalErr (__FILE__, __LINE__, "Fail to read the HDF5 variable length string dataset.");
274  }
275 
276  // For scalar, nelms is 1.
277  char*temp_bp = &strval[0];
278  char*onestring = NULL;
279  for (int i =0;i<nelms;i++) {
280  onestring = *(char**)temp_bp;
281  if(onestring!=NULL )
282  finstrval[i] =string(onestring);
283  else // We will add a NULL if onestring is NULL.
284  finstrval[i]="";
285  temp_bp +=ty_size;
286  }
287 
288  if (false == strval.empty()) {
289  herr_t ret_vlen_claim;
290  if (true == is_scalar)
291  ret_vlen_claim = H5Dvlen_reclaim(memtype,dspace,H5P_DEFAULT,(void*)&strval[0]);
292  else
293  ret_vlen_claim = H5Dvlen_reclaim(memtype,mspace,H5P_DEFAULT,(void*)&strval[0]);
294  if (ret_vlen_claim < 0){
295  if (false == is_scalar)
296  H5Sclose(mspace);
297  H5Tclose(memtype);
298  H5Tclose(dtypeid);
299  H5Sclose(dspace);
300  throw InternalErr (__FILE__, __LINE__, "Cannot reclaim the memory buffer of the HDF5 variable length string.");
301 
302  }
303  }
304 
305  if (false == is_scalar)
306  H5Sclose(mspace);
307  H5Tclose(memtype);
308  H5Tclose(dtypeid);
309  H5Sclose(dspace);
310 
311  return true;
312 
313 }
314 #endif
315 
316 #if 0
327 static void print_dataset_type_info(hid_t dataset, uint8_t layout_type) {
328  hid_t dtype_id = H5Dget_type(dataset);
329  if (dtype_id < 0) {
330  throw BESInternalError("Cannot obtain the correct HDF5 datatype.", __FILE__, __LINE__);
331  }
332 
333  if (H5Tget_class(dtype_id) == H5T_INTEGER || H5Tget_class(dtype_id) == H5T_FLOAT) {
334  hid_t dcpl_id = H5Dget_create_plist(dataset);
335  if (dcpl_id < 0) {
336  throw BESInternalError("Cannot obtain the HDF5 dataset creation property list.", __FILE__, __LINE__);
337  }
338 
339  try {
340  // Wrap the resources like dcpl_id in try/catch blocks so that the
341  // calls to H5Pclose(dcpl_id) for each error can be removed. jhrg 5/7/18
342  H5D_fill_value_t fvalue_status;
343  if (H5Pfill_value_defined(dcpl_id, &fvalue_status) < 0) {
344  H5Pclose(dcpl_id);
345  throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
346  }
347  if (fvalue_status == H5D_FILL_VALUE_UNDEFINED) {
348  // Replace with switch(), here and elsewhere. jhrg 5/7/18
349  if (layout_type == 1)
350  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
351  else if (layout_type == 2)
352  cerr << " The storage size is 0 and the storage type is chunking." << endl;
353  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
354 
355  cerr << " The Fillvalue is undefined ." << endl;
356  } else {
357  if (layout_type == 1)
358  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
359  else if (layout_type == 2)
360  cerr << " The storage size is 0 and the storage type is chunking." << endl;
361  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
362 
363  char *fvalue = NULL;
364  size_t fv_size = H5Tget_size(dtype_id);
365  if (fv_size == 1)
366  fvalue = (char *) (malloc(1));
367  else if (fv_size == 2)
368  fvalue = (char *) (malloc(2));
369  else if (fv_size == 4)
370  fvalue = (char *) (malloc(4));
371  else if (fv_size == 8) fvalue = (char *) (malloc(8));
372 
373  if (fv_size <= 8) {
374  if (H5Pget_fill_value(dcpl_id, dtype_id, (void *) (fvalue)) < 0) {
375  H5Pclose(dcpl_id);
376  throw BESInternalError("Cannot obtain the fill value status.", __FILE__, __LINE__);
377  }
378  if (H5Tget_class(dtype_id) == H5T_INTEGER) {
379  H5T_sign_t fv_sign = H5Tget_sign(dtype_id);
380  if (fv_size == 1) {
381  if (fv_sign == H5T_SGN_NONE) {
382  cerr << "This dataset's datatype is unsigned char " << endl;
383  cerr << "and the fillvalue is " << *fvalue << endl;
384  } else {
385  cerr << "This dataset's datatype is char and the fillvalue is " << *fvalue << endl;
386  }
387  } else if (fv_size == 2) {
388  if (fv_sign == H5T_SGN_NONE) {
389  cerr << "This dataset's datatype is unsigned short and the fillvalue is " << *fvalue
390  << endl;
391  } else {
392  cerr << "This dataset's datatype is short and the fillvalue is " << *fvalue << endl;
393  }
394  } else if (fv_size == 4) {
395  if (fv_sign == H5T_SGN_NONE) {
396  cerr << "This dataset's datatype is unsigned int and the fillvalue is " << *fvalue
397  << endl;
398  } else {
399  cerr << "This dataset's datatype is int and the fillvalue is " << *fvalue << endl;
400  }
401  } else if (fv_size == 8) {
402  if (fv_sign == H5T_SGN_NONE) {
403  cerr << "This dataset's datatype is unsigned long long and the fillvalue is " << *fvalue
404  << endl;
405  } else {
406  cerr << "This dataset's datatype is long long and the fillvalue is " << *fvalue << endl;
407  }
408  }
409  }
410  if (H5Tget_class(dtype_id) == H5T_FLOAT) {
411  if (fv_size == 4) {
412  cerr << "This dataset's datatype is float and the fillvalue is " << *fvalue << endl;
413  } else if (fv_size == 8) {
414  cerr << "This dataset's datatype is double and the fillvalue is " << *fvalue << endl;
415  }
416  }
417 
418  if (fvalue != NULL) free(fvalue);
419  } else
420  cerr
421  << "The size of the datatype is greater than 8 bytes, Use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
422  << endl;
423  }
424  }
425  catch (...) {
426  H5Pclose(dcpl_id);
427  throw;
428  }
429  H5Pclose(dcpl_id);
430  } else {
431  if (layout_type == 1)
432  cerr << " The storage size is 0 and the storage type is contiguous." << endl;
433  else if (layout_type == 2)
434  cerr << " The storage size is 0 and the storage type is chunking." << endl;
435  else if (layout_type == 3) cerr << " The storage size is 0 and the storage type is compact." << endl;
436 
437  cerr
438  << "The datatype is neither float nor integer,use HDF5 API H5Pget_fill_value() to retrieve the fill value of this dataset."
439  << endl;
440  }
441 }
442 #endif
443 
444 // FYI: Filter IDs
445 // H5Z_FILTER_ERROR (-1) no filter
446 // H5Z_FILTER_NONE 0 reserved indefinitely
447 // H5Z_FILTER_DEFLATE 1 deflation like gzip
448 // H5Z_FILTER_SHUFFLE 2 shuffle the data
449 // H5Z_FILTER_FLETCHER32 3 fletcher32 checksum of EDC
450 // H5Z_FILTER_SZIP 4 szip compression
451 // H5Z_FILTER_NBIT 5 nbit compression
452 // H5Z_FILTER_SCALEOFFSET 6 scale+offset compression
453 // H5Z_FILTER_RESERVED 256 filter ids below this value are reserved for library use
454 
461 static void set_filter_information(hid_t dataset_id, DmrppCommon *dc) {
462  hid_t plist_id = H5Dget_create_plist(dataset_id);
463 
464  try {
465  int numfilt = H5Pget_nfilters(plist_id);
466  VERBOSE(cerr << "Number of filters associated with dataset: " << numfilt << endl);
467  string filters;
468 
469  for (int filter = 0; filter < numfilt; filter++) {
470  size_t nelmts = 0;
471  unsigned int flags, filter_info;
472  H5Z_filter_t filter_type = H5Pget_filter2(plist_id, filter, &flags, &nelmts, NULL, 0, NULL, &filter_info);
473  VERBOSE(cerr << "Filter Type: ");
474 
475  switch (filter_type) {
476  case H5Z_FILTER_DEFLATE:
477  VERBOSE(cerr << "H5Z_FILTER_DEFLATE" << endl);
478  // dc->set_deflate(true);
479  filters.append("deflate ");
480  break;
481  case H5Z_FILTER_SHUFFLE:
482  VERBOSE(cerr << "H5Z_FILTER_SHUFFLE" << endl);
483  // dc->set_shuffle(true);
484  filters.append("shuffle ");
485  break;
486  case H5Z_FILTER_FLETCHER32:
487  VERBOSE(cerr << "H5Z_FILTER_FLETCHER32" << endl);
488  // dc->set_fletcher32(true);
489  filters.append("fletcher32 ");
490  break;
491  default: {
492  ostringstream oss("Unsupported HDF5 filter: ", std::ios::ate);
493  oss << filter_type;
494  throw BESInternalError(oss.str(), __FILE__, __LINE__);
495  }
496  }
497  }
498  //trimming trailing space from compression (aka filter) string
499  filters = filters.substr(0, filters.length() - 1);
500  dc->set_filter(filters);
501  }
502  catch (...) {
503  H5Pclose(plist_id);
504  throw;
505  }
506 
507  H5Pclose(plist_id);
508 }
509 
520 static void get_variable_chunk_info(hid_t dataset, DmrppCommon *dc) {
521  std::string byteOrder = "";
522  H5T_order_t byte_order = H5T_ORDER_ERROR;
523 
524  try {
525  hid_t dcpl = H5Dget_create_plist(dataset);
526  uint8_t layout_type = H5Pget_layout(dcpl);
527 
528  hid_t fspace_id = H5Dget_space(dataset);
529  hid_t ftype_id = H5Dget_type(dataset);
530 
531  byte_order = H5Tget_order(ftype_id);
532  switch (byte_order) {
533  case H5T_ORDER_LE:
534  byteOrder = "LE";
535  break;
536  case H5T_ORDER_BE:
537  byteOrder = "BE";
538  break;
539  case H5T_ORDER_NONE:
540  break;
541  default:
542  ostringstream oss("Unsupported HDF5 dataset byteOrder: ", std::ios::ate);
543  oss << byte_order << ".";
544  BESInternalError(oss.str(), __FILE__, __LINE__);
545  break; // unsupported enumerations: H5T_ORDER_[ERROR,VAX,MIXED,NONE]
546  }
547 
548  unsigned int dataset_rank = H5Sget_simple_extent_ndims(fspace_id);
549 
550  hid_t dtypeid = H5Dget_type(dataset);
551 
552  size_t dsize = H5Tget_size(dtypeid);
553 
554  /* layout_type: 1 contiguous 2 chunk 3 compact */
555  switch (layout_type) {
556 
557  case H5D_CONTIGUOUS: { /* Contiguous storage */
558  haddr_t cont_addr = 0;
559  hsize_t cont_size = 0;
560 
561  VERBOSE(cerr << "Storage: contiguous" << endl);
562 
563  cont_addr = H5Dget_offset(dataset);
564  /* if statement never less than zero due to cont_addr being unsigned int. SBL 1.29.20
565  if (cont_addr < 0) {
566  throw BESInternalError("Cannot obtain the offset.", __FILE__, __LINE__);
567  }*/
568  cont_size = H5Dget_storage_size(dataset);
569  /* if statement never less than zero due to cont_size being unsigned int. SBL 1.29.20
570  if (cont_size < 0) {
571  throw BESInternalError("Cannot obtain the storage size.", __FILE__, __LINE__);
572  }*/
573 
574 
575  VERBOSE(cerr << " Addr: " << cont_addr << endl);
576  VERBOSE(cerr << " Size: " << cont_size << endl);
577  VERBOSE(cerr << "byteOrder: " << byteOrder << endl);
578 
579  if (cont_size > 0) {
580  if (dc) dc->add_chunk(byteOrder, cont_size, cont_addr, "" /*pos in array*/);
581  }
582  break;
583  }
584  case H5D_CHUNKED: { /*chunking storage */
585  hsize_t num_chunks = 0;
586  herr_t status = H5Dget_num_chunks(dataset, fspace_id, &num_chunks);
587  if (status < 0) {
588  throw BESInternalError("Could not get the number of chunks",
589  __FILE__, __LINE__);
590  }
591 
592  VERBOSE(cerr << "Storage: chunked." << endl);
593  VERBOSE(cerr << "Number of chunks is: " << num_chunks << endl);
594 
595  if (dc)
596  set_filter_information(dataset, dc);
597 
598  // Get chunking information: rank and dimensions
599  vector<size_t> chunk_dims(dataset_rank);
600  unsigned int chunk_rank = H5Pget_chunk(dcpl, dataset_rank, (hsize_t *) &chunk_dims[0]);
601  if (chunk_rank != dataset_rank)
602  throw BESNotFoundError(
603  "Found a chunk with rank different than the dataset's (aka variables's) rank", __FILE__,
604  __LINE__);
605 
606  if (dc) dc->set_chunk_dimension_sizes(chunk_dims);
607 
608  for (unsigned int i = 0; i < num_chunks; ++i) {
609 
610  vector<hsize_t> temp_coords(dataset_rank);
611  vector<unsigned long long> chunk_coords(dataset_rank); //FIXME - see below
612 
613  haddr_t addr = 0;
614  hsize_t size = 0;
615 
616  //H5_DLL herr_t H5Dget_chunk_info(hid_t dset_id, hid_t fspace_id, hsize_t chk_idx, hsize_t *coord, unsigned *filter_mask, haddr_t *addr, hsize_t *size);
617  status = H5Dget_chunk_info(dataset, fspace_id, i, &temp_coords[0], NULL, &addr, &size);
618  if (status < 0) {
619  VERBOSE(cerr << "ERROR" << endl);
620  throw BESInternalError("Cannot get HDF5 dataset storage info.", __FILE__, __LINE__);
621  }
622 
623  VERBOSE(cerr << "chk_idk: " << i << ", addr: " << addr << ", size: " << size << endl);
624 
625  //The coords need to be of type 'unsigned int' when passed into add_chunk()
626  // This loop simply copies the values from the temp_coords to chunk_coords - kln 5/1/19
627  for (unsigned int j = 0; j < chunk_coords.size(); ++j) {
628  chunk_coords[j] = temp_coords[j];
629  }
630 
631  if (dc) dc->add_chunk(byteOrder, size, addr, chunk_coords);
632  }
633 
634  break;
635  }
636 
637  case H5D_COMPACT: { /* Compact storage */
638  //else if (layout_type == 3) {
639  VERBOSE(cerr << "Storage: compact" << endl);
640 
641  size_t comp_size = H5Dget_storage_size(dataset);
642  VERBOSE(cerr << " Size: " << comp_size << endl);
643 
644  if (comp_size == 0) {
645  throw BESInternalError("Cannot obtain the compact storage size.",
646  __FILE__, __LINE__);
647  }
648 
649  vector<uint8_t> values;
650 
651  Array *btp = dynamic_cast<Array *>(dc);
652  if (btp != NULL) {
653  dc->set_compact(true);
654  size_t memRequired = btp->length() * dsize;
655 
656  if (comp_size != memRequired) {
657  throw BESInternalError("Compact storage size does not match D4Array.",
658  __FILE__, __LINE__);
659  }
660 
661  switch (btp->var()->type()) {
662  case dods_byte_c:
663  case dods_char_c:
664  case dods_int8_c:
665  case dods_uint8_c:
666  case dods_int16_c:
667  case dods_uint16_c:
668  case dods_int32_c:
669  case dods_uint32_c:
670  case dods_float32_c:
671  case dods_float64_c:
672  case dods_int64_c:
673  case dods_uint64_c: {
674  values.resize(memRequired);
675  get_data(dataset, reinterpret_cast<void *>(&values[0]));
676  btp->set_read_p(true);
677  btp->val2buf(reinterpret_cast<void *>(&values[0]));
678  break;
679 
680  }
681 
682  case dods_str_c: {
683  if (H5Tis_variable_str(dtypeid) > 0) {
684  vector<string> finstrval = {""}; // passed by reference to read_vlen_string
685  read_vlen_string(dataset, 1, NULL, NULL, NULL, finstrval);
686  btp->set_value(finstrval, finstrval.size());
687  btp->set_read_p(true);
688  } else {
689  // For this case, the Array is really a single string - check for that
690  // with the following assert - but is an Array because the string data
691  // is stored as an array of chars (hello, FORTRAN). Read the chars, make
692  // a string and load that into a vector<string> (which will be a vector
693  // of length one). Set that as the value of the Array. Really, this
694  // value could be stored as a scalar, but that's complicated and client
695  // software might be expecting an array, so better to handle it this way.
696  // jhrg 9/17/20
697  assert(btp->length() == 1);
698  values.resize(memRequired);
699  get_data(dataset, reinterpret_cast<void *>(&values[0]));
700  string str(values.begin(), values.end());
701  vector<string> strings = {str};
702  btp->set_value(strings, strings.size());
703  btp->set_read_p(true);
704  }
705  break;
706  }
707 
708  default:
709  throw BESInternalError("Unsupported compact storage variable type.", __FILE__, __LINE__);
710  }
711 
712  } else {
713  throw BESInternalError("Compact storage variable is not a D4Array.",
714  __FILE__, __LINE__);
715  }
716  break;
717  }
718 
719  default: {
720  ostringstream oss("Unsupported HDF5 dataset layout type: ", std::ios::ate);
721  oss << layout_type << ".";
722  BESInternalError(oss.str(), __FILE__, __LINE__);
723  break;
724  }
725  }
726  }
727  catch (...) {
728  H5Dclose(dataset);
729  throw;
730  }
731 
732  H5Dclose(dataset);
733 }
734 
742 static void get_chunks_for_all_variables(hid_t file, D4Group *group) {
743  // variables in the group
744  for (Constructor::Vars_iter v = group->var_begin(), ve = group->var_end(); v != ve; ++v) {
745  // if this variable has a 'fullnamepath' attribute, use that and not the
746  // FQN value.
747  D4Attributes *d4_attrs = (*v)->attributes();
748  if (!d4_attrs)
749  throw BESInternalError("Expected to find an attribute table for " + (*v)->name() + " but did not.",
750  __FILE__, __LINE__);
751 
752  // Look for the full name path for this variable
753  // If one was not given via an attribute, use BaseType::FQN() which
754  // relies on the variable's position in the DAP dataset hierarchy.
755  D4Attribute *attr = d4_attrs->get("fullnamepath");
756  string FQN;
757  // I believe the logic is more clear in this way:
758  // If fullnamepath exists and the H5Dopen2 fails to open, it should throw an error.
759  // If fullnamepath doesn't exist, we should ignore the error as the reason described below:
760  // (However, we should suppress the HDF5 dataset open error message.) KY 2019-12-02
761  // It's not an error if a DAP variable in a DMR from the hdf5 handler
762  // doesn't exist in the file _if_ there's no 'fullnamepath' because
763  // that variable was synthesized (likely for CF compliance)
764  hid_t dataset = -1;
765  if (attr) {
766  if (attr->num_values() == 1)
767  FQN = attr->value(0);
768  else
769  FQN = (*v)->FQN();
770  BESDEBUG("dmrpp", "Working on: " << FQN << endl);
771  dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
772  if (dataset < 0)
773  throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
774 
775  } else {
776  // The current design seems to still prefer to open the dataset when the fullnamepath doesn't exist
777  // So go ahead to open the dataset. Continue even if the dataset cannot be open. KY 2019-12-02
778  H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
779  FQN = (*v)->FQN();
780  BESDEBUG("dmrpp", "Working on: " << FQN << endl);
781  dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
782  if (dataset < 0)
783  continue;
784  }
785 #if 0
786  if (attr && attr->num_values() == 1)
787  FQN = attr->value(0);
788  else
789  FQN = (*v)->FQN();
790 
791  VERBOSE(cerr << "Working on: " << FQN << endl);
792  hid_t dataset = H5Dopen2(file, FQN.c_str(), H5P_DEFAULT);
793  // It's not an error if a DAP variable in a DMR from the hdf5 handler
794  // doesn't exist in the file _if_ there's no 'fullnamepath' because
795  // that variable was synthesized (likely for CF compliance)
796  if (dataset < 0 && attr == 0) {
797  cerr<<"Unable to open dataset name "<<FQN <<endl;
798  continue;
799  }
800  else if (dataset < 0)
801  throw BESInternalError("HDF5 dataset '" + FQN + "' cannot be opened.", __FILE__, __LINE__);
802 #endif
803  get_variable_chunk_info(dataset, dynamic_cast<DmrppCommon *>(*v));
804  }
805 
806  // all groups in the group
807  D4Group::groupsIter g = group->grp_begin();
808  D4Group::groupsIter ge = group->grp_end();
809  while (g != ge)
810  get_chunks_for_all_variables(file, *g++);
811 }
812 
813 string cmdln(int argc, char *argv[]){
814  stringstream ss;
815  for(int i=0; i<argc; i++) {
816  if (i > 0)
817  ss << " ";
818  ss << argv[i];
819  }
820  return ss.str();
821 }
822 
823 void inject_version_and_configuration(int argc, char **argv, DMRpp *dmrpp){
824 
825  dmrpp->set_version(CVER);
826 
827  // Build the version attributes for the DMR++
828  D4Attribute *version = new D4Attribute("build_dmrpp_metadata", StringToD4AttributeType("container"));
829 
830  D4Attribute *build_dmrpp_version = new D4Attribute("build_dmrpp", StringToD4AttributeType("string"));
831  build_dmrpp_version->add_value(CVER);
832  version->attributes()->add_attribute_nocopy(build_dmrpp_version);
833 
834  D4Attribute *bes_version = new D4Attribute("bes", StringToD4AttributeType("string"));
835  bes_version->add_value(CVER);
836  version->attributes()->add_attribute_nocopy(bes_version);
837 
838  stringstream ldv;
839  ldv << libdap_name() << "-" << libdap_version();
840  D4Attribute *libdap4_version = new D4Attribute("libdap", StringToD4AttributeType("string"));
841  libdap4_version->add_value(ldv.str());
842  version->attributes()->add_attribute_nocopy(libdap4_version);
843 
844  if(!TheBESKeys::ConfigFile.empty()) {
845  // What is the BES configuration in play?
846  D4Attribute *config = new D4Attribute("configuration", StringToD4AttributeType("string"));
847  config->add_value(TheBESKeys::TheKeys()->get_as_config());
848  version->attributes()->add_attribute_nocopy(config);
849  }
850 
851  // How was build_dmrpp invoked?
852  D4Attribute *invoke = new D4Attribute("invocation", StringToD4AttributeType("string"));
853  invoke->add_value(cmdln(argc,argv));
854  version->attributes()->add_attribute_nocopy(invoke);
855 
856  // Inject version and configuration attributes into DMR here.
857  D4Attributes *top_level_attrs = dmrpp->root()->attributes();
858  top_level_attrs->add_attribute_nocopy(version);
859 }
860 
861 void usage() {
862  const char *help = R"(
863  build_dmrpp -h: Show this help
864 
865  build_dmrpp -V: Show build versions for componets that make up the program
866 
867  build_dmrpp -c <bes.conf> -f <data file> [-u <href url>]: Build the DMR++ using the <bes.conf>
868  options to initialize the software for the <data file>. Optionally substitue the <href url>.
869  Builds the DMR using the HDF5 handler as configued using the options in the <bes.conf>.
870 
871  build_dmrpp build_dmrpp -f <data file> -r <dmr file> [-u <href url>]: As above, but uses the DMR
872  read from the given file (so it does not run the HDF5 handler code.
873 
874  Other options:
875  -v: Verbose
876  -d: Turn on BES software debugging output
877  -M: Add information about the build_dmrpp software, incl versions, to the built DMR++)";
878 
879  cerr << help << endl;
880 }
881 
882 int main(int argc, char *argv[]) {
883  string h5_file_name = "";
884  string h5_dset_path = "";
885  string dmr_name = "";
886  string url_name = "";
887  int status = 0;
888  bool add_production_metadata = false;
889 
890  int option_char;
891  while ((option_char = getopt(argc, argv, "c:f:r:u:dhvVM")) != -1) {
892  switch (option_char) {
893  case 'V':
894  cerr << basename(argv[0]) << "-" << CVER << " (bes-"<< CVER << ", " << libdap_name() << "-"
895  << libdap_version() << ")" << endl;
896  return 0;
897 
898  case 'v':
899  verbose = true; // verbose hdf5 errors
900  break;
901 
902  case 'd':
903  BESDebug::SetUp(string("cerr,").append(DEBUG_KEY));
904  break;
905 
906  case 'f':
907  h5_file_name = optarg;
908  break;
909 
910  case 'r':
911  dmr_name = optarg;
912  break;
913 
914  case 'u':
915  url_name = optarg;
916  break;
917 
918  case 'c':
919  TheBESKeys::ConfigFile = optarg;
920  break;
921 
922  case 'M':
923  add_production_metadata = true;
924  break;
925 
926  case 'h':
927  usage();
928  exit(1);
929 
930  default:
931  break;
932  }
933  }
934 
935  if (h5_file_name.empty()) {
936  cerr << "HDF5 file name must be given (-f <input>)." << endl;
937  return 1;
938  }
939 
940  hid_t file = 0;
941  try {
942  // Turn off automatic hdf5 error printing.
943  // See: https://support.hdfgroup.org/HDF5/doc1.8/RM/RM_H5E.html#Error-SetAuto2
944  //if (!verbose) H5Eset_auto2(H5E_DEFAULT, NULL, NULL);
945 
946  // For a given HDF5, get info for all the HDF5 datasets in a DMR or for a
947  // given HDF5 dataset
948  if (!dmr_name.empty()) {
949  // Get dmr:
950  DMRpp dmrpp;
951  DmrppTypeFactory dtf;
952  dmrpp.set_factory(&dtf);
953 
954  ifstream in(dmr_name.c_str());
955  D4ParserSax2 parser;
956  parser.intern(in, &dmrpp, false);
957 
958  // Open the hdf5 file
959  file = H5Fopen(h5_file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
960  if (file < 0) {
961  cerr << "Error: HDF5 file '" + h5_file_name + "' cannot be opened." << endl;
962  return 1;
963  }
964 
965  if(add_production_metadata) {
966  inject_version_and_configuration(argc, argv, &dmrpp);
967  }
968 
969  // iterate over all the variables in the DMR
970  get_chunks_for_all_variables(file, dmrpp.root());
971 
972  XMLWriter writer;
973  dmrpp.print_dmrpp(writer, url_name);
974 
975  cout << writer.get_doc();
976  } else {
977  bool found;
978  string bes_data_root;
979  try {
980  TheBESKeys::TheKeys()->get_value(ROOT_DIRECTORY, bes_data_root, found);
981  if (!found) {
982  cerr << "Error: Could not find the BES root directory key." << endl;
983  return 1;
984  }
985  }
986  catch (BESError &e) {
987  cerr << "BESError: " << e.get_message() << endl;
988  return 1;
989  }
990 
991  // Use the values from the bes.conf file... jhrg 5/21/18
993  if (!mds) {
994  cerr << "The Metadata Store (MDS) must be configured for this command to work." << endl;
995  return 1;
996  }
997 
998  // Use the full path to open the file, but use the 'name' (which is the
999  // path relative to the BES Data Root) with the MDS.
1000  // Changed this to utilize assemblePath() because simply concatenating the strings
1001  // is fragile. - ndp 6/6/18
1002  string h5_file_path = BESUtil::assemblePath(bes_data_root, h5_file_name);
1003 
1004  //bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_name /*h5_file_path*/);
1005  bes::DmrppMetadataStore::MDSReadLock lock = mds->is_dmr_available(h5_file_path, h5_file_name, "h5");
1006  if (lock()) {
1007  // parse the DMR into a DMRpp (that uses the DmrppTypes)
1008  unique_ptr<DMRpp> dmrpp(dynamic_cast<DMRpp *>(mds->get_dmr_object(h5_file_name /*h5_file_path*/)));
1009  if (!dmrpp.get()) {
1010  cerr << "Expected a DMR++ object from the DmrppMetadataStore." << endl;
1011  return 1;
1012  }
1013 
1014  // Open the hdf5 file
1015  file = H5Fopen(h5_file_path.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
1016  if (file < 0) {
1017  cerr << "Error: HDF5 file '" + h5_file_path + "' cannot be opened." << endl;
1018  return 1;
1019  }
1020 
1021  get_chunks_for_all_variables(file, dmrpp->root());
1022 
1023  dmrpp->set_href(url_name);
1024 
1025  mds->add_dmrpp_response(dmrpp.get(), h5_file_name /*h5_file_path*/);
1026 
1027  XMLWriter writer;
1028  dmrpp->set_print_chunks(true);
1029  dmrpp->print_dap4(writer);
1030 
1031  cout << writer.get_doc();
1032  } else {
1033  cerr << "Error: Could not get a lock on the DMR for '" + h5_file_path + "'." << endl;
1034  return 1;
1035  }
1036  }
1037  }
1038  catch (BESError &e) {
1039  cerr << "BESError: " << e.get_message() << endl;
1040  status = 1;
1041  }
1042  catch (std::exception &e) {
1043  cerr << "std::exception: " << e.what() << endl;
1044  status = 1;
1045  }
1046  catch (...) {
1047  cerr << "Unknown error." << endl;
1048  status = 1;
1049  }
1050 
1051  H5Fclose(file);
1052 
1053  return status;
1054 }
static void SetUp(const std::string &values)
Sets up debugging for the bes.
Definition: BESDebug.cc:98
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static std::string ConfigFile
Definition: TheBESKeys.h:185
Store the DAP DMR++ metadata responses.
virtual libdap::DMR * get_dmr_object(const string &name)
Use the DMR response to build a DMR with Dmrpp Types.
static DmrppMetadataStore * get_instance()
virtual MDSReadLock is_dmr_available(const std::string &name)
Is the DMR response for.
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
virtual void print_dmrpp(libdap::XMLWriter &xml, const std::string &href="", bool constrained=false, bool print_chunks=true)
Print the DMR++ response.
Definition: DMRpp.cc:71
void print_dap4(libdap::XMLWriter &xml, bool constrained=false)
override DMR::print_dap4() so the chunk info will print too.
Definition: DMRpp.cc:140
Size and offset information of data included in DMR++ files.
Definition: DmrppCommon.h:76
void set_chunk_dimension_sizes(const std::vector< size_t > &chunk_dims)
Set the value of the chunk dimension sizes given a vector of HDF5 hsize_t.
Definition: DmrppCommon.h:202
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
void set_filter(const std::string &value)
Set the value of the filters property.
Definition: DmrppCommon.cc:108
void set_compact(bool value)
Set the value of the compact property.
Definition: DmrppCommon.h:147
void get_data(hid_t dset, void *buf)
Definition: h5common.cc:50
Unlock and close the MDS item when the ReadLock goes out of scope.