00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00034 #include "blocxx/PosixRegEx.hpp"
00035 #ifdef BLOCXX_HAVE_REGEX
00036 #ifdef BLOCXX_HAVE_REGEX_H
00037
00038 #include "blocxx/ExceptionIds.hpp"
00039 #include "blocxx/Assertion.hpp"
00040 #include "blocxx/Format.hpp"
00041
00042
00043 namespace BLOCXX_NAMESPACE
00044 {
00045
00046 namespace
00047 {
00048
00049 const int REG_NOERROR = 0;
00050 }
00051
00052
00053 static String
00054 substitute_caps(const PosixRegEx::MatchArray &sub,
00055 const String &str, const String &rep)
00056 {
00057 static const char *cap_refs[] = {
00058 NULL, "\\1", "\\2", "\\3", "\\4",
00059 "\\5", "\\6", "\\7", "\\8", "\\9", NULL
00060 };
00061
00062 String res( rep);
00063 size_t pos;
00064
00065 for(size_t i=1; cap_refs[i] != NULL; i++)
00066 {
00067 String cap;
00068
00069 if( i < sub.size() && sub[i].rm_so >= 0 && sub[i].rm_eo >= 0)
00070 {
00071 cap = str.substring(sub[i].rm_so, sub[i].rm_eo
00072 - sub[i].rm_so);
00073 }
00074
00075 pos = res.indexOf(cap_refs[i]);
00076 while( pos != String::npos)
00077 {
00078 size_t quotes = 0;
00079 size_t at = pos;
00080
00081 while( at > 0 && res.charAt(--at) == '\\')
00082 quotes++;
00083
00084 if( quotes % 2)
00085 {
00086 quotes = (quotes + 1) / 2;
00087
00088 res = res.erase(pos - quotes, quotes);
00089
00090 pos = res.indexOf(cap_refs[i],
00091 pos + 2 - quotes);
00092 }
00093 else
00094 {
00095 quotes = quotes / 2;
00096
00097 res = res.substring(0, pos - quotes) +
00098 cap +
00099 res.substring(pos + 2);
00100
00101 pos = res.indexOf(cap_refs[i],
00102 pos + cap.length() - quotes);
00103 }
00104 }
00105 }
00106 return res;
00107 }
00108
00109
00110
00111 static inline String
00112 getError(const regex_t *preg, const int code)
00113 {
00114 char err[256] = { '\0'};
00115 ::regerror(code, preg, err, sizeof(err));
00116 return String(err);
00117 }
00118
00119
00120
00121 PosixRegEx::PosixRegEx()
00122 : compiled(false)
00123 , m_flags(0)
00124 , m_ecode(REG_NOERROR)
00125 {
00126 }
00127
00128
00129
00130 PosixRegEx::PosixRegEx(const String ®ex, int cflags)
00131 : compiled(false)
00132 , m_flags(0)
00133 , m_ecode(REG_NOERROR)
00134 {
00135 if( !compile(regex, cflags))
00136 {
00137 BLOCXX_THROW_ERR(RegExCompileException,
00138 errorString().c_str(), m_ecode);
00139 }
00140 }
00141
00142
00143
00144 PosixRegEx::PosixRegEx(const PosixRegEx &ref)
00145 : compiled(false)
00146 , m_flags(ref.m_flags)
00147 , m_ecode(REG_NOERROR)
00148 , m_rxstr(ref.m_rxstr)
00149 {
00150 if( ref.compiled && !compile(ref.m_rxstr, ref.m_flags))
00151 {
00152 BLOCXX_THROW_ERR(RegExCompileException,
00153 errorString().c_str(), m_ecode);
00154 }
00155 }
00156
00157
00158
00159 PosixRegEx::~PosixRegEx()
00160 {
00161 if( compiled)
00162 {
00163 regfree(&m_regex);
00164 }
00165 }
00166
00167
00168
00169 PosixRegEx &
00170 PosixRegEx::operator = (const PosixRegEx &ref)
00171 {
00172 if( !ref.compiled)
00173 {
00174 m_ecode = REG_NOERROR;
00175 m_error.erase();
00176 m_flags = ref.m_flags;
00177 m_rxstr = ref.m_rxstr;
00178 if( compiled)
00179 {
00180 regfree(&m_regex);
00181 compiled = false;
00182 }
00183 }
00184 else if( !compile(ref.m_rxstr, ref.m_flags))
00185 {
00186 BLOCXX_THROW_ERR(RegExCompileException,
00187 errorString().c_str(), m_ecode);
00188 }
00189 return *this;
00190 }
00191
00192
00193
00194 bool
00195 PosixRegEx::compile(const String ®ex, int cflags)
00196 {
00197 if( compiled)
00198 {
00199 regfree(&m_regex);
00200 compiled = false;
00201 }
00202
00203 m_rxstr = regex;
00204 m_flags = cflags;
00205 m_ecode = ::regcomp(&m_regex, regex.c_str(), cflags);
00206 if( m_ecode == REG_NOERROR)
00207 {
00208 compiled = true;
00209 m_error.erase();
00210 return true;
00211 }
00212 else
00213 {
00214 m_error = getError(&m_regex, m_ecode);
00215 return false;
00216 }
00217 }
00218
00219
00220
00221 int
00222 PosixRegEx::errorCode()
00223 {
00224 return m_ecode;
00225 }
00226
00227
00228
00229 String
00230 PosixRegEx::errorString() const
00231 {
00232 return m_error;
00233 }
00234
00235
00236
00237 String
00238 PosixRegEx::patternString() const
00239 {
00240 return m_rxstr;
00241 }
00242
00243
00244
00245 int
00246 PosixRegEx::compileFlags() const
00247 {
00248 return m_flags;
00249 }
00250
00251
00252
00253 bool
00254 PosixRegEx::isCompiled() const
00255 {
00256 return compiled;
00257 }
00258
00259
00260
00261 bool
00262 PosixRegEx::execute(MatchArray &sub, const String &str,
00263 size_t index, size_t count, int eflags)
00264 {
00265 if( !compiled)
00266 {
00267 BLOCXX_THROW(RegExCompileException,
00268 "Regular expression is not compiled");
00269 }
00270
00271 if( index > str.length())
00272 {
00273 BLOCXX_THROW(OutOfBoundsException,
00274 Format("String index out of bounds ("
00275 "length = %1, index = %2).",
00276 str.length(), index
00277 ).c_str());
00278 }
00279
00280 if( count == 0)
00281 {
00282 count = m_regex.re_nsub + 1;
00283 }
00284 AutoPtrVec<regmatch_t> rsub(new regmatch_t[count]);
00285 rsub[0].rm_so = -1;
00286 rsub[0].rm_eo = -1;
00287
00288 sub.clear();
00289 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00290 count, rsub.get(), eflags);
00291 if( m_ecode == REG_NOERROR)
00292 {
00293 m_error.erase();
00294 if( m_flags & REG_NOSUB)
00295 {
00296 return true;
00297 }
00298
00299 sub.resize(count);
00300 for(size_t n = 0; n < count; n++)
00301 {
00302 if( rsub[n].rm_so < 0 || rsub[n].rm_eo < 0)
00303 {
00304 sub[n] = rsub[n];
00305 }
00306 else
00307 {
00308 rsub[n].rm_so += index;
00309 rsub[n].rm_eo += index;
00310 sub[n] = rsub[n];
00311 }
00312 }
00313 return true;
00314 }
00315 else
00316 {
00317 m_error = getError(&m_regex, m_ecode);
00318 return false;
00319 }
00320 }
00321
00322
00323
00324 StringArray
00325 PosixRegEx::capture(const String &str, size_t index, size_t count, int eflags)
00326 {
00327 if( !compiled)
00328 {
00329 BLOCXX_THROW(RegExCompileException,
00330 "Regular expression is not compiled");
00331 }
00332
00333 MatchArray rsub;
00334 StringArray ssub;
00335
00336 bool match = execute(rsub, str, index, count, eflags);
00337 if( match)
00338 {
00339 if( rsub.empty())
00340 {
00341 BLOCXX_THROW(RegExCompileException,
00342 "Non-capturing regular expression");
00343 }
00344
00345 MatchArray::const_iterator i=rsub.begin();
00346 for( ; i != rsub.end(); ++i)
00347 {
00348 if( i->rm_so >= 0 && i->rm_eo >= 0)
00349 {
00350 ssub.push_back(str.substring(i->rm_so,
00351 i->rm_eo - i->rm_so));
00352 }
00353 else
00354 {
00355 ssub.push_back(String(""));
00356 }
00357 }
00358 }
00359 else if(m_ecode != REG_NOMATCH)
00360 {
00361 BLOCXX_THROW_ERR(RegExExecuteException,
00362 errorString().c_str(), m_ecode);
00363 }
00364 return ssub;
00365 }
00366
00367
00368
00369 blocxx::String
00370 PosixRegEx::replace(const String &str, const String &rep,
00371 bool global, int eflags)
00372 {
00373 if( !compiled)
00374 {
00375 BLOCXX_THROW(RegExCompileException,
00376 "Regular expression is not compiled");
00377 }
00378
00379 MatchArray rsub;
00380 bool match;
00381 size_t off = 0;
00382 String out = str;
00383
00384 do
00385 {
00386 match = execute(rsub, out, off, 0, eflags);
00387 if( match)
00388 {
00389 if( rsub.empty() ||
00390 rsub[0].rm_so < 0 ||
00391 rsub[0].rm_eo < 0)
00392 {
00393
00394 BLOCXX_THROW(RegExCompileException,
00395 "Non-capturing regular expression");
00396 }
00397
00398 String res = substitute_caps(rsub, out, rep);
00399
00400 out = out.substring(0, rsub[0].rm_so) +
00401 res + out.substring(rsub[0].rm_eo);
00402
00403 off = rsub[0].rm_so + res.length();
00404 }
00405 else if(m_ecode == REG_NOMATCH)
00406 {
00407 m_ecode = REG_NOERROR;
00408 m_error.erase();
00409 }
00410 else
00411 {
00412 BLOCXX_THROW_ERR(RegExExecuteException,
00413 errorString().c_str(), m_ecode);
00414 }
00415 } while(global && match && out.length() > off);
00416
00417 return out;
00418 }
00419
00420
00421 StringArray
00422 PosixRegEx::split(const String &str, bool empty, int eflags)
00423 {
00424 if( !compiled)
00425 {
00426 BLOCXX_THROW(RegExCompileException,
00427 "Regular expression is not compiled");
00428 }
00429
00430 MatchArray rsub;
00431 StringArray ssub;
00432 bool match;
00433 size_t off = 0;
00434 size_t len = str.length();
00435
00436 do
00437 {
00438 match = execute(rsub, str, off, 1, eflags);
00439 if( match)
00440 {
00441 if( rsub.empty() ||
00442 rsub[0].rm_so < 0 ||
00443 rsub[0].rm_eo < 0)
00444 {
00445 BLOCXX_THROW(RegExCompileException,
00446 "Non-capturing regular expression");
00447 }
00448
00449 if( empty || ((size_t)rsub[0].rm_so > off))
00450 {
00451 ssub.push_back(str.substring(off,
00452 rsub[0].rm_so - off));
00453 }
00454 off = rsub[0].rm_eo;
00455 }
00456 else if(m_ecode == REG_NOMATCH)
00457 {
00458 String tmp = str.substring(off);
00459 if( empty || !tmp.empty())
00460 {
00461 ssub.push_back(tmp);
00462 }
00463 m_ecode = REG_NOERROR;
00464 m_error.erase();
00465 }
00466 else
00467 {
00468 BLOCXX_THROW_ERR(RegExExecuteException,
00469 errorString().c_str(), m_ecode);
00470 }
00471 } while(match && len > off);
00472
00473 return ssub;
00474 }
00475
00476
00477
00478 StringArray
00479 PosixRegEx::grep(const StringArray &src, int eflags)
00480 {
00481 if( !compiled)
00482 {
00483 BLOCXX_THROW(RegExCompileException,
00484 "Regular expression is not compiled");
00485 }
00486
00487 m_ecode = REG_NOERROR;
00488 m_error.erase();
00489
00490 StringArray out;
00491 if( !src.empty())
00492 {
00493 StringArray::const_iterator i=src.begin();
00494 for( ; i != src.end(); ++i)
00495 {
00496 int ret = ::regexec(&m_regex, i->c_str(),
00497 0, NULL, eflags);
00498 if( ret == REG_NOERROR)
00499 {
00500 out.push_back(*i);
00501 }
00502 else if(ret != REG_NOMATCH)
00503 {
00504 m_ecode = ret;
00505 m_error = getError(&m_regex, m_ecode);
00506 BLOCXX_THROW_ERR(RegExExecuteException,
00507 errorString().c_str(), m_ecode);
00508 }
00509 }
00510 }
00511
00512 return out;
00513 }
00514
00515
00516
00517 bool
00518 PosixRegEx::match(const String &str, size_t index, int eflags) const
00519 {
00520 if( !compiled)
00521 {
00522 BLOCXX_THROW(RegExCompileException,
00523 "Regular expression is not compiled");
00524 }
00525
00526 if( index > str.length())
00527 {
00528 BLOCXX_THROW(OutOfBoundsException,
00529 Format("String index out of bounds ("
00530 "length = %1, index = %2).",
00531 str.length(), index
00532 ).c_str());
00533 }
00534
00535 m_ecode = ::regexec(&m_regex, str.c_str() + index,
00536 0, NULL, eflags);
00537
00538 if( m_ecode == REG_NOERROR)
00539 {
00540 m_error.erase();
00541 return true;
00542 }
00543 else if(m_ecode == REG_NOMATCH)
00544 {
00545 m_error = getError(&m_regex, m_ecode);
00546 return false;
00547 }
00548 else
00549 {
00550 m_error = getError(&m_regex, m_ecode);
00551 BLOCXX_THROW_ERR(RegExExecuteException,
00552 errorString().c_str(), m_ecode);
00553 }
00554 }
00555
00556
00557
00558 }
00559
00560 #endif // BLOCXX_HAVE_REGEX_H
00561 #endif // BLOCXX_HAVE_REGEX
00562
00563
00564