bes  Updated for version 3.20.10
check_dmrpp.cc
1 #include <iostream>
2 #include<fstream>
3 #include <string>
4 #include <vector>
5 using namespace std;
6 
7 bool find_var(const string &str, const vector<string>var_type_list,
8  vector<string>&var_type,vector<string>&var_name);
9 bool find_endvar(const string &str,const string vtype);
10 bool find_chunk(const string &str);
11 
12 int main (int argc, char** argv)
13 {
14  // Provide the dmrpp file name and the file name to store the variables that miss values
15  if(argc !=3) {
16  cout<<"Please provide the dmrpp file name to be checked and the output name."<<endl;
17  return -1;
18  }
19 
20  string fname(argv[1]);
21  ifstream dmrpp_fstream;
22  dmrpp_fstream.open(fname.c_str(),ifstream::in);
23  string dmrpp_line;
24 
25  // DAP4 supported atomic datatype
26  vector<string> var_type_list;
27  var_type_list.push_back("Float32");
28  var_type_list.push_back("Int32");
29  var_type_list.push_back("Float64");
30  var_type_list.push_back("Byte");
31  var_type_list.push_back("Int16");
32  var_type_list.push_back("UInt16");
33  var_type_list.push_back("String");
34  var_type_list.push_back("UInt32");
35  var_type_list.push_back("Int8");
36  var_type_list.push_back("Int64");
37  var_type_list.push_back("UInt64");
38  var_type_list.push_back("UInt8");
39  var_type_list.push_back("Char");
40 
41  // var_type and var_name should be var data type and var name in the dmrpp file
42  vector<string>var_type;
43  vector<string>var_name;
44 
45  //The vector to check if chunk block inside this var block(<var ..> </var>)
46  vector<bool>chunk_exist;
47 
48  // The following flags are used to check the variables that miss the values.
49  // In a dmrpp file, an example of variable block may start from
50  // <Float32 name="temperature"> and end with </Float32>
51  // fin_vb_start: flag to find the start of the var block
52  // fin_vb_end: flag to find the end of the var block
53  // chunk_found: flag to find is chunking information is inside the var block
54  bool fin_vb_start = false;
55  bool fin_vb_end = false;
56  bool chunk_found = false;
57 
58  // Check every line of the dmrpp file. This will use less memory.
59  while(getline(dmrpp_fstream,dmrpp_line)) {
60 
61  // If we find the start of the var block(<var..>)
62  if(true == fin_vb_start) {
63 
64  // var data type must exist.
65  if(var_type.empty()) {
66  cout<<"Doesn't have the variable datatype, abort for dmrpp file "<<fname << endl;
67  return -1;
68  }
69  // Not find the end of var block. try to find it.
70  if(false == fin_vb_end)
71  fin_vb_end = find_endvar(dmrpp_line, var_type[var_type.size()-1]);
72 
73  // If find the end of var block, check if the chunk is already found in the var block.
74  if(true == fin_vb_end) {
75  if(false == chunk_found)
76  chunk_exist.push_back(false);
77 
78  // If we find the end of this var block,
79  // reset all bools for the next variable.
80  fin_vb_start = false;
81  fin_vb_end = false;
82  chunk_found = false;
83  }
84  else {// Check if having chunks within this var block.
85  if(false == chunk_found) {
86  chunk_found = find_chunk(dmrpp_line);
87  // When finding the chunk info, update the chunk_exist vector.
88  if(true == chunk_found)
89  chunk_exist.push_back(true);
90  }
91  }
92  }
93  else // Continue finding the var block
94  fin_vb_start = find_var(dmrpp_line,var_type_list,var_type,var_name);
95 
96  }
97 
98  //Sanity check to make sure the chunk_exist vector is the same as var_type vector.
99  //If not, something is wrong with this dmrpp file.
100  if(chunk_exist.size()!=var_type.size()) {
101  cout<<"Number of chunk check is not consistent with the number of var check."<<endl;
102  cout<< "The dmrpp file is "<<fname<<endl;
103  return -1;
104  }
105 
106 #if 0
107 for(size_t i = 0; i<var_type.size(); i++)
108 cout<<"var_type["<<i<<"]= "<<var_type[i]<<endl;
109 for(size_t i = 0; i<var_name.size(); i++) {
110 cout<<"var_name["<<i<<"]= "<<var_name[i]<<endl;
111 cout<<"chunk_exist["<<i<<"]= "<<chunk_exist[i]<<endl;
112 }
113 #endif
114 
115  bool has_missing_info = false;
116  size_t last_missing_chunk_index = 0;
117 
118  // Check if there are any missing variable information.
119  if (!var_type.empty()) {
120  auto ritr = var_type.rbegin();
121  size_t i = var_type.size() - 1;
122  while(ritr != var_type.rend()) {
123  if (!chunk_exist[i]) {
124  has_missing_info = true;
125  last_missing_chunk_index = i;
126  break;
127  }
128  ritr++;
129  i--;
130  }
131  }
132 
133 #if 0
134  size_t j = 0;
135  for (size_t i =0;i<var_type.size();i++) {
136  if(false == chunk_exist[i]){
137  j++;
138  if(j == 1)
139  cout<<"The following variables don't have data value information(datatype + data name): "<<endl;
140  cout<< var_type[i] <<" "<<var_name[i] <<endl;
141  }
142  }
143 #endif
144 
145  // Report the final output.
146  if(true == has_missing_info) {
147 
148  ofstream dmrpp_ofstream;
149  string fname2(argv[2]);
150  dmrpp_ofstream.open(fname2.c_str(),ofstream::out);
151 
152  size_t i = 0;
153  for (auto vt:var_type) {
154  if(!chunk_exist[i]) {
155  if (i!=last_missing_chunk_index)
156  dmrpp_ofstream<<var_name[i] <<",";
157  else
158  dmrpp_ofstream<<var_name[i];
159  }
160  i++;
161  }
162 
163  dmrpp_ofstream.close();
164  }
165 
166 
167  return 0;
168 
169 }
170 
171 // Find the the var type and var name like <Int16 name="foo">
172 bool find_var(const string &str, const vector<string>var_type_list,
173  vector<string>&var_type,vector<string>&var_name) {
174 
175  bool ret = false;
176  //if(str[0]=='\n' || str[0]!=' '){
177  // Every var block will have spaces before <
178  if(str[0]!=' '){
179  return ret;
180  }
181 
182  // Ignore the line with all spaces
183  size_t non_space_char_pos = str.find_first_not_of(' ');
184  if(non_space_char_pos == string::npos){
185  return ret;
186  }
187 
188  // The first non-space character should be '<'
189  if(str[non_space_char_pos]!='<') {
190  return ret;
191  }
192 
193  // After space, must at least contain '<','>'
194  if(str.size() <= (non_space_char_pos+1)){
195  return ret;
196  }
197 
198  // The last character must be '>', maybe this is too strict.
199  // We will see.
200  if(str[str.size()-1]!='>' ) {
201  return ret;
202  }
203 
204  // char_2 is a character right after<
205  char char_2 = str[non_space_char_pos+1];
206 
207  // The first var character must be one of the list.
208  // The following list includes the first character
209  // of all possible variable types.
210  string v_1char_list = "FIUBSC";
211 
212  // If the first character is not one of DAP type,ignore.
213  if(v_1char_list.find_first_of(char_2)==string::npos) {
214  return ret;
215  }
216 
217  // Find ' name="' and the position after non_space_char_pos+1, like <Int16 name="d16_1">
218  string sep=" name=\"";
219  size_t sep_pos = str.find(sep,non_space_char_pos+2);
220 
221  // Cannot find "name=..", ignore this line.
222  if(sep_pos == string::npos){
223  return ret;
224  }
225 
226  // Try to figure out the variable type.
227  size_t var_index = -1;
228  bool found = false;
229  for (size_t i = 0; i<var_type_list.size() && !found ;i++) {
230  if(str.compare(non_space_char_pos+1,sep_pos-non_space_char_pos-1,var_type_list[i]) == 0) {
231  var_index = i;
232  found = true;
233  }
234  }
235 
236  // If cannot find the supported type, ignore this line.
237  if(!found) {
238  return ret;
239  }
240 
241  // Find the end quote position of the variable name.
242  char end_quote='"';
243  size_t end_name_pos = str.find(end_quote,sep_pos+sep.size()+1);
244  if(end_name_pos == string::npos)
245  ret = false;
246  else {
247  // Find both var type and var name. Store them in the vector
248  string var_name_line = str.substr(sep_pos+sep.size(),end_name_pos-sep_pos-sep.size());
249  var_type.push_back(var_type_list[var_index]);
250  var_name.push_back(var_name_line);
251  ret = true;
252  }
253  return ret;
254 }
255 
256 // Find whether there are chunks inside the var block.
257 // Any chunk info(chunk or contiguous) should include
258 // "<dmrpp:chunk " and "offset".
259 bool find_chunk(const string &str) {
260  bool ret = false;
261  string chunk_mark = "<dmrpp:chunk ";
262  string offset_mark = "offset";
263  size_t chunk_mark_pos = str.find(chunk_mark);
264  if(chunk_mark_pos !=string::npos) {
265  if(string::npos != str.find(offset_mark, chunk_mark_pos+chunk_mark.size()))
266  ret = true;
267  }
268  return ret;
269 }
270 
271 // Find the end of var block such as </Int32>
272 // There may be space before </Int32>
273 bool find_endvar(const string &str, const string vtype) {
274  bool ret = false;
275  string end_var = "</" + vtype + '>';
276  size_t vb_end_pos = str.find(end_var);
277  if(vb_end_pos !=string::npos) {
278  if((vb_end_pos + end_var.size())==str.size())
279  ret = true;
280  }
281  return ret;
282 }
283 
284 
285 
286 
287 
288 
289 
290 
291 
292 
293 
294