bes  Updated for version 3.20.10
DirectoryUtil.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 #include "DirectoryUtil.h"
32 
33 #include <cstring>
34 #include <cerrno>
35 #include <sstream>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <dirent.h>
39 
40 #include "BESRegex.h"
41 
42 // bes
43 #include "BESDebug.h"
44 #include "BESForbiddenError.h"
45 #include "BESInternalError.h"
46 #include "TheBESKeys.h"
47 #include "BESNotFoundError.h"
48 #include "BESUtil.h"
49 
50 using std::string;
51 using std::vector;
52 using std::endl;
53 
54 namespace agg_util {
59 struct DirWrapper {
60 public:
61 
62  DirWrapper(const string& fullDirPath) :
63  _pDir(0), _fullPath(fullDirPath)
64  {
65  // if the user sees null after this, they can check the errno.
66  _pDir = opendir(fullDirPath.c_str());
67  }
68 
69  ~DirWrapper()
70  {
71  if (_pDir) {
72  closedir(_pDir);
73  _pDir = 0;
74  }
75  }
76 
77  bool fail() const
78  {
79  return !_pDir;
80  }
81 
82  DIR*
83  get() const
84  {
85  return _pDir;
86  }
87 
88  // automatically closedir() if non-null on dtor.
89  DIR* _pDir;
90  std::string _fullPath;
91 };
92 
94 FileInfo::FileInfo(const std::string& path, const std::string& basename, bool isDir, time_t modTime) :
95  _path(path), _basename(basename), _fullPath("") // start empty, cached later
96  , _isDir(isDir), _modTime(modTime)
97 {
100 }
101 
102 FileInfo::~FileInfo()
103 {
104 }
105 
106 const std::string&
108 {
109  return _path;
110 }
111 
112 const std::string&
113 FileInfo::basename() const
114 {
115  return _basename;
116 }
117 
118 bool FileInfo::isDir() const
119 {
120  return _isDir;
121 }
122 
123 time_t FileInfo::modTime() const
124 {
125  return _modTime;
126 }
127 
128 std::string FileInfo::getModTimeAsString() const
129 {
130  // we'll just use UTC for the output...
131  struct tm* pTM = gmtime(&_modTime);
132  char buf[128];
133  // this should be "Year-Month-Day Hour:Minute:Second"
134  strftime(buf, 128, "%F %T", pTM);
135  return string(buf);
136 }
137 
138 const std::string&
140 {
141  if (_fullPath.empty()) {
142  _fullPath = _path + "/" + _basename;
143  }
144  return _fullPath;
145 }
146 
147 std::string FileInfo::toString() const
148 {
149  return "{FileInfo fullPath=" + getFullPath() + " isDir=" + ((isDir()) ? ("true") : ("false")) + " modTime=\""
150  + getModTimeAsString() + "\""
151  " }";
152 }
153 
155 
156 const string DirectoryUtil::_sDebugChannel = "agg_util";
157 
158 DirectoryUtil::DirectoryUtil() :
159  _rootDir("/"), _suffix("") // we start with no filter
160  , _pRegExp(0), _filteringModTimes(false), _newestModTime(0L)
161 {
162  // this can throw, but the class is completely constructed by this point.
163  setRootDir("/");
164 }
165 
166 DirectoryUtil::~DirectoryUtil()
167 {
168  clearRegExp();
169 }
170 
172 const std::string&
174 {
175  return _rootDir;
176 }
177 
183 void DirectoryUtil::setRootDir(const std::string& origRootDir, bool allowRelativePaths/*=false*/,
184  bool /*allowSymLinks=false*/)
185 {
186  if (!allowRelativePaths && hasRelativePath(origRootDir)) {
187  throw BESForbiddenError("can't use rootDir=" + origRootDir + " since it has a relative path (../)", __FILE__,
188  __LINE__);
189  }
190 
191  // Get the root without trailing slash, we'll add it.
192  _rootDir = origRootDir;
193  removeTrailingSlashes(_rootDir);
194  // If empty here, that means the actual filesystem root.
195 
196  // Use the BESUtil to test the path
197  // Since it assumes root is valid and strips preceding "/",
198  // we use "/" as the root path and the root path as the path
199  // to validate the root. This will throw if invalid.
200  BESUtil::check_path(_rootDir, "/", false); // not going to allow symlinks by default.
201 
202  // We should be good if we get here.
203 }
204 
205 void DirectoryUtil::setFilterSuffix(const std::string& suffix)
206 {
207  _suffix = suffix;
208 }
209 
210 void DirectoryUtil::setFilterRegExp(const std::string& regexp)
211 {
212  clearRegExp(); // avoid leaks
213  if (!regexp.empty()) {
214  _pRegExp = new BESRegex(regexp.c_str());
215  }
216 }
217 
219 {
220  delete _pRegExp;
221  _pRegExp = 0;
222 }
223 
225 {
226  _newestModTime = newestModTime;
227  _filteringModTimes = true;
228 }
229 
230 void DirectoryUtil::getListingForPath(const std::string& path, std::vector<FileInfo>* pRegularFiles,
231  std::vector<FileInfo>* pDirectories)
232 {
233  string pathToUse(path);
234  removePrecedingSlashes(pathToUse);
235  pathToUse = getRootDir() + "/" + pathToUse;
236  BESDEBUG(_sDebugChannel, "Attempting to get dir listing for path=\"" << pathToUse << "\"" << endl);
237 
238  // RAII, will closedir no matter how we leave function, including a throw
239  DirWrapper pDir(pathToUse);
240  if (pDir.fail()) {
241  throwErrorForOpendirFail(pathToUse);
242  }
243 
244  // Go through each entry and see if it's a directory or regular file and
245  // add it to the list.
246  struct dirent* pDirEnt = 0;
247  while ((pDirEnt = readdir(pDir.get())) != 0) {
248  string entryName = pDirEnt->d_name;
249  // Exclude ".", ".." and any dotfile dirs like ".svn".
250  if (!entryName.empty() && entryName[0] == '.') {
251  continue;
252  }
253 
254  // Figure out if it's a regular file or directory
255  string pathToEntry = pathToUse + "/" + entryName;
256  struct stat statBuf;
257  int statResult = stat(pathToEntry.c_str(), &statBuf);
258  if (statResult != 0) {
259  // If we can't stat the file for some reason, then ignore it
260  continue;
261  }
262 
263  // Use the passed in path for the entry since we
264  // want to make the locations be relative to the root
265  // for loading later.
266  if (pDirectories && S_ISDIR(statBuf.st_mode)) {
267  pDirectories->push_back(FileInfo(path, entryName, true, statBuf.st_mtime));
268  }
269  else if (pRegularFiles && S_ISREG(statBuf.st_mode)) {
270  FileInfo theFile(path, entryName, false, statBuf.st_mtime);
271  // match against the relative passed in path, not root full path
272  if (matchesAllFilters(theFile.getFullPath(), statBuf.st_mtime)) {
273  pRegularFiles->push_back(theFile);
274  }
275  }
276  }
277 }
278 
279 void DirectoryUtil::getListingForPathRecursive(const std::string& path, std::vector<FileInfo>* pRegularFiles,
280  std::vector<FileInfo>* pDirectories)
281 {
282  // Remove trailing slash to make it canonical
283  string canonicalPath = path;
284  removeTrailingSlashes(canonicalPath);
285 
286  // We use our own local vector of directories in order to recurse,
287  // then add them to the end of pDirectories if it exists.
288 
289  // First, get the current path's listing
290  vector<FileInfo> dirs;
291  dirs.reserve(16); // might as well start with a "few" to avoid grows.
292 
293  // Keep adding them to the user specified regular file list if desired,
294  // but keep track of dirs ourself.
295  getListingForPath(canonicalPath, pRegularFiles, &dirs);
296 
297  // If the caller wanted directories, append them all to the return
298  if (pDirectories) {
299  pDirectories->insert(pDirectories->end(), dirs.begin(), dirs.end());
300  }
301 
302  // Finally, recurse on each directory in dirs
303  for (vector<FileInfo>::const_iterator it = dirs.begin(); it != dirs.end(); ++it) {
304  string subPath = canonicalPath + "/" + it->basename();
305  BESDEBUG(_sDebugChannel, "DirectoryUtil: recursing down to directory subtree=\"" << subPath << "\"..." << endl);
306  // Pass down the caller's accumulated vector's to be filled in.
307  getListingForPathRecursive(subPath, pRegularFiles, pDirectories);
308  }
309 
310 }
311 
312 void DirectoryUtil::getListingOfRegularFilesRecursive(const std::string& path, std::vector<FileInfo>& rRegularFiles)
313 {
314  // call the other one, not accumulated the directories, only recursing into them.
315  getListingForPathRecursive(path, &rRegularFiles, 0);
316 }
317 
318 void DirectoryUtil::throwErrorForOpendirFail(const string& fullPath)
319 {
320  switch (errno) {
321  case EACCES: {
322  string msg = "Permission denied for some directory in path=\"" + fullPath + "\"";
323  throw BESForbiddenError(msg, __FILE__, __LINE__);
324  }
325  break;
326 
327  case ELOOP: {
328  string msg = "A symlink loop was detected in path=\"" + fullPath + "\"";
329  throw BESNotFoundError(msg, __FILE__, __LINE__); // closest I can figure...
330  }
331  break;
332 
333  case ENAMETOOLONG: {
334  string msg = "A name in the path was too long. path=\"" + fullPath + "\"";
335  throw BESNotFoundError(msg, __FILE__, __LINE__);
336  }
337  break;
338 
339  case ENOENT: {
340  string msg = "Some part of the path was not found. path=\"" + fullPath + "\"";
341  throw BESNotFoundError(msg, __FILE__, __LINE__);
342  }
343  break;
344 
345  case ENOTDIR: {
346  string msg = "Some part of the path was not a directory. path=\"" + fullPath + "\"";
347  throw BESNotFoundError(msg, __FILE__, __LINE__);
348  }
349  break;
350 
351  case ENFILE: {
352  string msg = "Internal Error: Too many files are currently open!";
353  throw BESInternalError(msg, __FILE__, __LINE__);
354  }
355  break;
356 
357  default: {
358  string msg = "An unknown errno was found after opendir() was called on path=\"" + fullPath + "\"";
359  throw BESInternalError(msg, __FILE__, __LINE__);
360  }
361  }
362 }
363 
364 bool DirectoryUtil::matchesAllFilters(const std::string& path, time_t modTime) const
365 {
366  bool matches = true;
367  // Do the suffix first since it's fast
368  if (!_suffix.empty() && !matchesSuffix(path, _suffix)) {
369  matches = false;
370  }
371 
372  // Suffix matches and we have a regexp, check that
373  if (matches && _pRegExp) {
374  // match the full string, -1 on fail, num chars matching otherwise
375  int numCharsMatching = _pRegExp->match(path.c_str(), path.size(), 0);
376  matches = (numCharsMatching > 0); // TODO do we want to match the size()?
377  }
378 
379  if (matches && _filteringModTimes) {
380  matches = (modTime < _newestModTime);
381  }
382 
383  return matches;
384 }
385 
386 bool DirectoryUtil::hasRelativePath(const std::string& path)
387 {
388  return (path.find("..") != string::npos);
389 }
390 
392 {
393  if (!path.empty()) {
394  string::size_type pos = path.find_last_not_of("/");
395  if (pos != string::npos) {
396  path = path.substr(0, pos + 1);
397  }
398  }
399 }
400 
402 {
403  if (!path.empty()) {
404  string::size_type pos = path.find_first_not_of("/");
405  path = path.substr(pos, string::npos);
406  }
407 }
408 
409 void DirectoryUtil::printFileInfoList(const vector<FileInfo>& listing)
410 {
411  std::ostringstream oss;
412  printFileInfoList(oss, listing);
413  BESDEBUG(_sDebugChannel, oss.str() << endl);
414 }
415 
416 void DirectoryUtil::printFileInfoList(std::ostream& os, const vector<FileInfo>& listing)
417 {
418  for (vector<FileInfo>::const_iterator it = listing.begin(); it != listing.end(); ++it) {
419  os << it->toString() << endl;
420  }
421 }
422 
424 {
425  bool found;
426  string rootDir;
427  TheBESKeys::TheKeys()->get_value("BES.Catalog.catalog.RootDirectory", rootDir, found);
428  if (!found) {
429  TheBESKeys::TheKeys()->get_value("BES.Data.RootDirectory", rootDir, found);
430  }
431  if (!found) {
432  rootDir = "/";
433  }
434  return rootDir;
435 }
436 
437 bool DirectoryUtil::matchesSuffix(const std::string& filename, const std::string& suffix)
438 {
439  // see if the last suffix.size() characters match.
440  bool matches = (filename.find(suffix, filename.size() - suffix.size()) != string::npos);
441  return matches;
442 }
443 }
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
int match(const char *s, int len, int pos=0) const
Does the pattern match.
Definition: BESRegex.cc:127
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:260
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static std::string getBESRootDir()
void setFilterRegExp(const std::string &regexp)
static void removePrecedingSlashes(std::string &path)
static bool hasRelativePath(const std::string &path)
static void printFileInfoList(std::ostream &os, const std::vector< FileInfo > &listing)
void setRootDir(const std::string &rootDir, bool allowRelativePaths=false, bool allowSymLinks=false)
void getListingOfRegularFilesRecursive(const std::string &path, std::vector< FileInfo > &rRegularFiles)
void setFilterSuffix(const std::string &suffix)
static void removeTrailingSlashes(std::string &path)
void getListingForPath(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void getListingForPathRecursive(const std::string &path, std::vector< FileInfo > *pRegularFiles, std::vector< FileInfo > *pDirectories)
void setFilterModTimeOlderThan(time_t newestModTime)
const std::string & getRootDir() const
std::string getModTimeAsString() const
FileInfo(const std::string &path, const std::string &basename, bool isDir, time_t modTime)
const std::string & path() const
const std::string & getFullPath() const
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...