bes  Updated for version 3.20.10
HDF5BaseArray.cc
Go to the documentation of this file.
1 // This file is part of hdf5_handler an HDF5 file handler for the OPeNDAP
2 // data server.
3 
4 // Author: Muqun Yang <myang6@hdfgroup.org>
5 
6 // Copyright (c) 2011-2016 The HDF Group, Inc. and OPeNDAP, Inc.
7 //
8 // This is free software; you can redistribute it and/or modify it under the
9 // terms of the GNU Lesser General Public License as published by the Free
10 // Software Foundation; either version 2.1 of the License, or (at your
11 // option) any later version.
12 //
13 // This software is distributed in the hope that it will be useful, but
14 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 // License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 // You can contact The HDF Group, Inc. at 1800 South Oak Street,
24 // Suite 203, Champaign, IL 61820
41 
42 #include <iostream>
43 #include <sstream>
44 #include <cassert>
45 #include <algorithm>
46 #include <BESDebug.h>
47 #include <libdap/InternalErr.h>
48 
49 #include "HDF5BaseArray.h"
50 #include "HDF5RequestHandler.h"
51 #include "ObjMemCache.h"
52 
53 using namespace std;
54 using namespace libdap;
55 #if 0
56 BaseType *HDF5BaseArray::ptr_duplicate()
57 {
58  return new HDF5BaseArray(*this);
59 }
60 
61 // Always return true.
62 // Data will be read from the missing coordinate variable class(HDF5GMCFMissNonLLCVArray etc.)
63 bool HDF5BaseArray::read()
64 {
65  BESDEBUG("h5","Coming to HDF5BaseArray read "<<endl);
66  return true;
67 }
68 
69 #endif
70 
71 // parse constraint expr. and make hdf5 coordinate point location.
72 // return number of elements to read.
73 int
74 HDF5BaseArray::format_constraint (int *offset, int *step, int *count)
75 {
76  long nels = 1;
77  int id = 0;
78 
79  Dim_iter p = dim_begin ();
80 
81  while (p != dim_end ()) {
82 
83  int start = dimension_start (p, true);
84  int stride = dimension_stride (p, true);
85  int stop = dimension_stop (p, true);
86 
87  // Check for illegal constraint
88  if (start > stop) {
89  ostringstream oss;
90  oss << "Array/Grid hyperslab start point "<< start <<
91  " is greater than stop point " << stop <<".";
92  throw Error(malformed_expr, oss.str());
93  }
94 
95  offset[id] = start;
96  step[id] = stride;
97  count[id] = ((stop - start) / stride) + 1; // count of elements
98  nels *= count[id]; // total number of values for variable
99 
100  BESDEBUG ("h5",
101  "=format_constraint():"
102  << "id=" << id << " offset=" << offset[id]
103  << " step=" << step[id]
104  << " count=" << count[id]
105  << endl);
106 
107  id++;
108  p++;
109  }// "while (p != dim_end ())"
110 
111  return nels;
112 }
113 
114 void HDF5BaseArray::write_nature_number_buffer(int rank, int tnumelm) {
115 
116  if (rank != 1)
117  throw InternalErr(__FILE__, __LINE__, "Currently the rank of the missing field should be 1");
118 
119  vector<int>offset;
120  vector<int>count;
121  vector<int>step;
122  offset.resize(rank);
123  count.resize(rank);
124  step.resize(rank);
125 
126 
127  int nelms = format_constraint(&offset[0], &step[0], &count[0]);
128 
129  // Since we always assign the the missing Z dimension as 32-bit
130  // integer, so no need to check the type. The missing Z-dim is always
131  // 1-D with natural number 1,2,3,....
132  vector<int>val;
133  val.resize(nelms);
134 
135  if (nelms == tnumelm) {
136  for (int i = 0; i < nelms; i++)
137  val[i] = i;
138  set_value((dods_int32 *) &val[0], nelms);
139  }
140  else {
141  for (int i = 0; i < count[0]; i++)
142  val[i] = offset[0] + step[0] * i;
143  set_value((dods_int32 *) &val[0], nelms);
144  }
145 }
146 
147 //#if 0
148 void HDF5BaseArray::read_data_from_mem_cache(H5DataType h5type, const vector<size_t> &h5_dimsizes,void* buf,const bool is_dap4){
149 
150  BESDEBUG("h5", "Coming to read_data_from_mem_cache"<<endl);
151  vector<int>offset;
152  vector<int>count;
153  vector<int>step;
154 
155  int ndims = h5_dimsizes.size();
156  if(ndims == 0)
157  throw InternalErr(__FILE__, __LINE__, "Currently we only support array numeric data in the cache, the number of dimension for this file is 0");
158 
159 
160  offset.resize(ndims);
161  count.resize(ndims);
162  step.resize(ndims);
163  int nelms = format_constraint (&offset[0], &step[0], &count[0]);
164 
165  // set the original position to the starting point
166  vector<size_t>pos(ndims,0);
167  for (int i = 0; i< ndims; i++)
168  pos[i] = offset[i];
169 
170 
171  switch (h5type) {
172 
173  case H5UCHAR:
174 
175  {
176  vector<unsigned char> val;
177  subset<unsigned char>(
178  buf,
179  ndims,
180  h5_dimsizes,
181  &offset[0],
182  &step[0],
183  &count[0],
184  &val,
185  pos,
186  0
187  );
188 
189  set_value ((dods_byte *) &val[0], nelms);
190  } // case H5UCHAR
191  break;
192 
193  case H5CHAR:
194  {
195 
196  vector<char>val;
197  subset<char>(
198  buf,
199  ndims,
200  h5_dimsizes,
201  &offset[0],
202  &step[0],
203  &count[0],
204  &val,
205  pos,
206  0
207  );
208 
209  if(false == is_dap4) {
210 
211  vector<short>newval;
212  newval.resize(nelms);
213 
214  for (int counter = 0; counter < nelms; counter++)
215  newval[counter] = (short) (val[counter]);
216  set_value ((dods_int16 *) &val[0], nelms);
217  }
218  else
219  set_value ((dods_int8 *) &val[0], nelms);
220 
221 
222  } // case H5CHAR
223  break;
224 
225  case H5INT16:
226  {
227  vector<short> val;
228  subset<short>(
229  buf,
230  ndims,
231  h5_dimsizes,
232  &offset[0],
233  &step[0],
234  &count[0],
235  &val,
236  pos,
237  0
238  );
239 
240 
241  set_value ((dods_int16 *) &val[0], nelms);
242  }// H5INT16
243  break;
244 
245 
246  case H5UINT16:
247  {
248  vector<unsigned short> val;
249  subset<unsigned short>(
250  buf,
251  ndims,
252  h5_dimsizes,
253  &offset[0],
254  &step[0],
255  &count[0],
256  &val,
257  pos,
258  0
259  );
260 
261 
262  set_value ((dods_uint16 *) &val[0], nelms);
263  } // H5UINT16
264  break;
265 
266  case H5INT32:
267  {
268  vector<int>val;
269  subset<int>(
270  buf,
271  ndims,
272  h5_dimsizes,
273  &offset[0],
274  &step[0],
275  &count[0],
276  &val,
277  pos,
278  0
279  );
280 
281  set_value ((dods_int32 *) &val[0], nelms);
282  } // case H5INT32
283  break;
284 
285  case H5UINT32:
286  {
287  vector<unsigned int>val;
288  subset<unsigned int>(
289  buf,
290  ndims,
291  h5_dimsizes,
292  &offset[0],
293  &step[0],
294  &count[0],
295  &val,
296  pos,
297  0
298  );
299 
300  set_value ((dods_uint32 *) &val[0], nelms);
301  }
302  break;
303  // Add the code for the CF option DAP4 support
304  // For the CF option DAP2 support, the code will
305  // not come here since 64-integer will be ignored
306  // in DAP2.
307  case H5INT64:
308  {
309  vector<long long>val;
310  subset<long long>(
311  buf,
312  ndims,
313  h5_dimsizes,
314  &offset[0],
315  &step[0],
316  &count[0],
317  &val,
318  pos,
319  0
320  );
321 
322  set_value ((dods_int64 *) &val[0], nelms);
323  } // case H5INT64
324  break;
325 
326  case H5UINT64:
327  {
328  vector<unsigned long long>val;
329  subset<unsigned long long>(
330  buf,
331  ndims,
332  h5_dimsizes,
333  &offset[0],
334  &step[0],
335  &count[0],
336  &val,
337  pos,
338  0
339  );
340 
341  set_value ((dods_uint64 *) &val[0], nelms);
342  }
343  break;
344 
345 
346  case H5FLOAT32:
347  {
348  vector<float>val;
349  subset<float>(
350  buf,
351  ndims,
352  h5_dimsizes,
353  &offset[0],
354  &step[0],
355  &count[0],
356  &val,
357  pos,
358  0
359  );
360  set_value ((dods_float32 *) &val[0], nelms);
361  }
362  break;
363 
364 
365  case H5FLOAT64:
366  {
367 
368  vector<double>val;
369  subset<double>(
370  buf,
371  ndims,
372  h5_dimsizes,
373  &offset[0],
374  &step[0],
375  &count[0],
376  &val,
377  pos,
378  0
379  );
380  set_value ((dods_float64 *) &val[0], nelms);
381  } // case H5FLOAT64
382  break;
383 
384  default:
385  throw InternalErr(__FILE__,__LINE__,"Non-supported datatype");
386 
387  }
388 }
389 
391 //
392 // \param input Input variable
393 // \param dim dimension info of the input
394 // \param start start indexes of each dim
395 // \param stride stride of each dim
396 // \param edge count of each dim
397 // \param poutput output variable
398 // \parrm index dimension index
399 // \return 0 if successful. -1 otherwise.
400 //
401 template<typename T>
403  void* input,
404  int rank,
405  const vector<size_t> & dim,
406  int start[],
407  int stride[],
408  int edge[],
409  vector<T> *poutput,
410  vector<size_t>& pos,
411  int index)
412 {
413  for(int k=0; k<edge[index]; k++)
414  {
415  pos[index] = start[index] + k*stride[index];
416  if(index+1<rank)
417  subset(input, rank, dim, start, stride, edge, poutput,pos,index+1);
418  if(index==rank-1)
419  {
420  size_t cur_pos = INDEX_nD_TO_1D( dim, pos);
421  void* tempbuf = (void*)((char*)input+cur_pos*sizeof(T));
422  poutput->push_back(*(static_cast<T*>(tempbuf)));
423  //"poutput->push_back(input[HDF5CFUtil::INDEX_nD_TO_1D( dim, pos)]);"
424  }
425  } // end of for
426  return 0;
427 } // end of template<typename T> static int subset
428 
429 size_t HDF5BaseArray::INDEX_nD_TO_1D (const std::vector < size_t > &dims,
430  const std::vector < size_t > &pos) const {
431  //
432  // "int a[10][20][30] // & a[1][2][3] == a + (20*30+1 + 30*2 + 1 *3)"
433  // "int b[10][2] // &b[1][1] == b + (2*1 + 1)"
434  //
435  if(dims.size () != pos.size ())
436  throw InternalErr(__FILE__,__LINE__,"dimension error in INDEX_nD_TO_1D routine.");
437  size_t sum = 0;
438  size_t start = 1;
439 
440  for (size_t p = 0; p < pos.size (); p++) {
441  size_t m = 1;
442 
443  for (size_t j = start; j < dims.size (); j++)
444  m *= dims[j];
445  sum += m * pos[p];
446  start++;
447  }
448  return sum;
449 }
450 
451 // This routine will check if any section(separated by sep) of string cur_str is inside the vector str_list.
452 // The first found string will be returned or empty string will return if not found in the whole cur_str.
453 string HDF5BaseArray::
454 check_str_sect_in_list(const vector<string>&str_list, const string &cur_str,const char sep) const {
455 
456  string ret_str;
457  string::size_type start = 0;
458  string::size_type end = 0;
459  // Obtain the ret_str value
460  // The cur_str will be chopped into tokens separated by sep.
461  while ((end = cur_str.find(sep, start)) != string::npos) {
462  if(std::find(str_list.begin(),str_list.end(),cur_str.substr(start,end-start))!=
463  str_list.end()) {
464  ret_str = cur_str.substr(start,end-start);
465  break;
466  }
467  start = end + 1;
468  }
469 
470  // We will not include the last sect (rightmost sect) of cur_str.
471 #if 0
472  //if(ret_str != "") {
473  // if(ret_str == cur_str.substr(cur_str.find_last_of(sep)+1))
474  // ret_str ="";
475  //}
476  //
477 #endif
478 
479  return ret_str;
480 
481 }
482 
483 // This routine will check if there is any sub-string of the fullpath(fname+varname) that is exactly the subset of the fullpath with the same ending
484 // of the fullpath is contained in the slist.
485 // Examples: slist contains { /foo1/foovar foovar2 } fname is /temp/myfile/foo1/ varname is foovar. The rotuine will return true.
486 // fname is /myfile/foo2/ varname is foovar. The routine will return false.
487 bool HDF5BaseArray::
488 check_var_cache_files(const vector<string>&slist, const string &fname,const string &varname) const {
489 
490  bool ret_value = false;
491  if(fname=="" || varname=="")
492  return ret_value;
493 
494  string fullpath;
495 
496  if(fname[fname.size()-1] == '/') {
497  if(varname[0]!='/')
498  fullpath = fname+varname;
499  else
500  fullpath = fname.substr(0,fname.size()-1)+varname;
501  }
502  else {
503  if(varname[0]!='/')
504  fullpath = fname+'/'+varname;
505  else
506  fullpath = fname+varname;
507  }
508 
509 
510  for(unsigned int i = 0; i<slist.size();i++) {
511 #if 0
512 //cerr<<"fullpath is "<<fullpath <<endl;
513 //cerr<<"slist[i] is "<<slist[i] <<endl;
514 //cerr<<"fullpath - slist size"<<fullpath.size() -slist[i].size()<<endl;
515 //cerr<<"fullpath.rfind(slist[i] is "<<fullpath.rfind(slist[i]) <<endl;
516 #endif
517  if(fullpath.rfind(slist[i])==(fullpath.size()-slist[i].size())){
518  ret_value = true;
519  break;
520  }
521  }
522  return ret_value;
523 }
524 
525 // Handle data when memory cache is turned on.
526 void HDF5BaseArray::
527 handle_data_with_mem_cache(H5DataType h5_dtype, size_t total_elems,const short cache_flag, const string & cache_key, const bool is_dap4) {
528 
529  //
530  ObjMemCache * mem_data_cache= NULL;
531  if(1 == cache_flag)
532  mem_data_cache = HDF5RequestHandler::get_srdata_mem_cache();
533  else if(cache_flag > 1) {
534  mem_data_cache = HDF5RequestHandler::get_lrdata_mem_cache();
535 
536 #if 0
537 //cerr<<"coming to the large metadata cache "<<endl;
538 //cerr<<"The cache key is "<<cache_key <<endl;
539 
540 // dump the values in the cache,keep this line to check if memory cache works.
541 //mem_data_cache->dump(cerr);
542 #endif
543 
544  }
545 
546 
547  if(mem_data_cache == NULL)
548  throw InternalErr(__FILE__,__LINE__,"The memory data cache should NOT be NULL.");
549 
550  HDF5DataMemCache* mem_cache_ptr = static_cast<HDF5DataMemCache*>(mem_data_cache->get(cache_key));
551  if(mem_cache_ptr) {
552 
553  BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache genenral"
554  <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
555 
556  BESDEBUG("h5","Data Memory Cache hit, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
557 
558  //const string var_name = mem_cache_ptr->get_varname();
559 
560  // Obtain the buffer and do subsetting
561  const size_t var_size = mem_cache_ptr->get_var_buf_size();
562  if(!var_size)
563  throw InternalErr(__FILE__,__LINE__,"The cached data buffer size is 0.");
564  else {
565 
566  void *buf = mem_cache_ptr->get_var_buf();
567 
568  // Obtain dimension size info.
569  vector<size_t> dim_sizes;
570  Dim_iter i_dim = dim_begin();
571  Dim_iter i_enddim = dim_end();
572  while (i_dim != i_enddim) {
573  dim_sizes.push_back(dimension_size(i_dim));
574  ++i_dim;
575  }
576  // read data from the memory cache
577  read_data_from_mem_cache(h5_dtype,dim_sizes,buf,is_dap4);
578  }
579  }
580  else{
581 
582  BESDEBUG("h5","Cache flag: 1 small data cache, 2 large data cache genenral"
583  <<" 3 large data cache common dir, 4 large data cache real var" <<endl);
584 
585  BESDEBUG("h5","Data Memory added to the cache, the variable name is "<<name() <<". The cache flag is "<< cache_flag<<endl);
586 
587  vector <char> buf;
588  if(total_elems == 0)
589  throw InternalErr(__FILE__,__LINE__,"The total number of elements is 0.");
590 
591  buf.resize(total_elems*HDF5CFUtil::H5_numeric_atomic_type_size(h5_dtype));
592 
593  // This routine will read the data, send it to the DAP and save the buf to the cache.
594  read_data_NOT_from_mem_cache(true,&buf[0]);
595 
596  // Create a new cache element.
597 #if 0
598  //HDF5DataMemCache* new_mem_cache = new HDF5DataMemCache(varname);
599 #endif
600  HDF5DataMemCache* new_mem_cache_ele = new HDF5DataMemCache();
601  new_mem_cache_ele->set_databuf(buf);
602 
603  // Add this entry to the cache list
604  mem_data_cache->add(new_mem_cache_ele, cache_key);
605  }
606 
607  return;
608 }
609 
610 BaseType* HDF5BaseArray::h5cfdims_transform_to_dap4(D4Group *grp) {
611 
612  if(grp == NULL)
613  return NULL;
614  Array *dest = static_cast<HDF5BaseArray*>(ptr_duplicate());
615 
616  // If there is just a size, don't make
617  // a D4Dimension (In DAP4 you cannot share a dimension unless it has
618  // a name). jhrg 3/18/14
619 
620  D4Dimensions *grp_dims = grp->dims();
621  for (Array::Dim_iter dap2_dim = dest->dim_begin(), e = dest->dim_end(); dap2_dim != e; ++dap2_dim) {
622  if (!(*dap2_dim).name.empty()) {
623 
624  // If a D4Dimension with the name already exists, use it.
625  D4Dimension *d4_dim = grp_dims->find_dim((*dap2_dim).name);
626  if (!d4_dim) {
627  d4_dim = new D4Dimension((*dap2_dim).name, (*dap2_dim).size);
628  grp_dims->add_dim_nocopy(d4_dim);
629  }
630  // At this point d4_dim's name and size == those of (*d) so just set
631  // the D4Dimension pointer so it matches the one in the D4Group.
632  (*dap2_dim).dim = d4_dim;
633  }
634  }
635 
636  return dest;
637 
638 }
639 
640 
641 
A helper class that aims to reduce code redundence for different special CF derived array class For e...
include the entry functions to execute the handlers
int subset(void *input, int rank, const std::vector< size_t > &dim, int start[], int stride[], int edge[], std::vector< T > *poutput, std::vector< size_t > &pos, int index)
Getting a subset of a variable.
An in-memory cache for DapObj (DAS, DDS, ...) objects.
Definition: ObjMemCache.h:84
virtual void add(libdap::DapObj *obj, const std::string &key)
Add an object to the cache and associate it with a key.
Definition: ObjMemCache.cc:63
virtual libdap::DapObj * get(const std::string &key)
Get the cached pointer.
Definition: ObjMemCache.cc:105