00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "config.h"
00027
00028
00029
00030
00031
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/stat.h>
00037
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053
00054 #include "util_mit.h"
00055 #include "debug.h"
00056
00057 #ifdef WIN32
00058 #include <direct.h>
00059 #include <time.h>
00060 #include <fcntl.h>
00061 #define MKDIR(a,b) _mkdir((a))
00062 #define REMOVE(a) remove((a))
00063 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE)
00064 #define DIR_SEPARATOR_CHAR '\\'
00065 #define DIR_SEPARATOR_STR "\\"
00066 #else
00067 #define MKDIR(a,b) mkdir((a), (b))
00068 #define REMOVE(a) remove((a))
00069 #define MKSTEMP(a) mkstemp((a))
00070 #define DIR_SEPARATOR_CHAR '/'
00071 #define DIR_SEPARATOR_STR "/"
00072 #endif
00073
00074 #define CACHE_META ".meta"
00075 #define CACHE_INDEX ".index"
00076 #define CACHE_EMPTY_ETAG "@cache@"
#define NO_LM_EXPIRATION 24*3600 // 24 hours
#define MAX_LM_EXPIRATION 48*3600 // Max expiration from LM
// If using LM to find the expiration then take 10% and no more than
// MAX_LM_EXPIRATION.
#ifndef LM_EXPIRATION
#define LM_EXPIRATION(t) (min((MAX_LM_EXPIRATION), static_cast<int>((t) / 10)))
#endif
const int CACHE_TABLE_SIZE = 1499;
using namespace std;
namespace libdap {
00080 int
get_hash(const string &url)
{
int hash = 0;
for (const char *ptr = url.c_str(); *ptr; ptr++)
hash = (int)((hash * 3 + (*(unsigned char *)ptr)) % CACHE_TABLE_SIZE);
return hash;
}
HTTPCacheTable::HTTPCacheTable(const string &cache_root, int block_size) :
d_cache_root(cache_root),
d_block_size(block_size),
d_current_size(0),
d_new_entries(0)
{
d_cache_index = cache_root + CACHE_INDEX;
d_cache_table = new CacheEntries*[CACHE_TABLE_SIZE];
// Initialize the cache table.
for (int i = 0; i < CACHE_TABLE_SIZE; ++i)
d_cache_table[i] = 0;
cache_index_read();
}
00084 static inline void
delete_cache_entry(HTTPCacheTable::CacheEntry *e)
{
DBG2(cerr << "Deleting CacheEntry: " << e << endl);
00085 #if 0
00086 DESTROY(&e->get_lock());
00087 #endif
00088 delete e;
00089 }
00090
00091 HTTPCacheTable::~HTTPCacheTable() {
00092 for (int i = 0; i < CACHE_TABLE_SIZE; ++i) {
00093 HTTPCacheTable::CacheEntries *cp = get_cache_table()[i];
00094 if (cp) {
00095
00096 for_each(cp->begin(), cp->end(), delete_cache_entry);
00097
00098
00099 delete get_cache_table()[i];
00100 get_cache_table()[i] = 0;
00101 }
00102 }
00103
00104 delete[] d_cache_table;
00105 }
00106
00114 class DeleteExpired : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00115 time_t d_time;
00116 HTTPCacheTable &d_table;
00117
00118 public:
00119 DeleteExpired(HTTPCacheTable &table, time_t t) :
00120 d_time(t), d_table(table) {
00121 if (!t)
00122 d_time = time(0);
00123 }
00124
00125 void operator()(HTTPCacheTable::CacheEntry *&e) {
00126 if (e && !e->readers && (e->freshness_lifetime
00127 < (e->corrected_initial_age + (d_time - e->response_time)))) {
00128 DBG(cerr << "Deleting expired cache entry: " << e->url << endl);
00129 d_table.remove_cache_entry(e);
00130 delete e; e = 0;
00131 }
00132 }
00133 };
00134
00135
00136 void HTTPCacheTable::delete_expired_entries(time_t time) {
00137
00138 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00139 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00140 if (slot) {
00141 for_each(slot->begin(), slot->end(), DeleteExpired(*this, time));
00142 slot->erase(remove(slot->begin(), slot->end(),
00143 static_cast<HTTPCacheTable::CacheEntry *>(0)), slot->end());
00144 }
00145 }
00146 }
00147
00154 class DeleteByHits : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00155 HTTPCacheTable &d_table;
00156 int d_hits;
00157
00158 public:
00159 DeleteByHits(HTTPCacheTable &table, int hits) :
00160 d_table(table), d_hits(hits) {
00161 }
00162
00163 void operator()(HTTPCacheTable::CacheEntry *&e) {
00164 if (e && !e->readers && e->hits <= d_hits) {
00165 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00166 d_table.remove_cache_entry(e);
00167 delete e; e = 0;
00168 }
00169 }
00170 };
00171
00172 void
00173 HTTPCacheTable::delete_by_hits(int hits) {
00174 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00175 if (get_cache_table()[cnt]) {
00176 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00177 for_each(slot->begin(), slot->end(), DeleteByHits(*this, hits));
00178 slot->erase(remove(slot->begin(), slot->end(),
00179 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00180 slot->end());
00181
00182 }
00183 }
00184 }
00185
00190 class DeleteBySize : public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00191 HTTPCacheTable &d_table;
00192 unsigned int d_size;
00193
00194 public:
00195 DeleteBySize(HTTPCacheTable &table, unsigned int size) :
00196 d_table(table), d_size(size) {
00197 }
00198
00199 void operator()(HTTPCacheTable::CacheEntry *&e) {
00200 if (e && !e->readers && e->size > d_size) {
00201 DBG(cerr << "Deleting cache entry: " << e->url << endl);
00202 d_table.remove_cache_entry(e);
00203 delete e; e = 0;
00204 }
00205 }
00206 };
00207
00208 void HTTPCacheTable::delete_by_size(unsigned int size) {
00209 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00210 if (get_cache_table()[cnt]) {
00211 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00212 for_each(slot->begin(), slot->end(), DeleteBySize(*this, size));
00213 slot->erase(remove(slot->begin(), slot->end(),
00214 static_cast<HTTPCacheTable::CacheEntry*>(0)),
00215 slot->end());
00216
00217 }
00218 }
00219 }
00220
00227
00234 bool
00235 HTTPCacheTable::cache_index_delete()
00236 {
00237 d_new_entries = 0;
00238
00239 return (REMOVE(d_cache_index.c_str()) == 0);
00240 }
00241
00250 bool
00251 HTTPCacheTable::cache_index_read()
00252 {
00253 FILE *fp = fopen(d_cache_index.c_str(), "r");
00254
00255
00256 if (!fp) {
00257 return false;
00258 }
00259
00260 char line[1024];
00261 while (!feof(fp) && fgets(line, 1024, fp)) {
00262 add_entry_to_cache_table(cache_index_parse_line(line));
00263 DBG2(cerr << line << endl);
00264 }
00265
00266 int res = fclose(fp) ;
00267 if (res) {
00268 DBG(cerr << "HTTPCache::cache_index_read - Failed to close " << (void *)fp << endl);
00269 }
00270
00271 d_new_entries = 0;
00272
00273 return true;
00274 }
00275
00283 HTTPCacheTable::CacheEntry *
00284 HTTPCacheTable::cache_index_parse_line(const char *line)
00285 {
00286
00287 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry;
00288 #if 0
00289 INIT(&entry->d_lock);
00290 #endif
00291 istringstream iss(line);
00292 iss >> entry->url;
00293 iss >> entry->cachename;
00294
00295 iss >> entry->etag;
00296 if (entry->etag == CACHE_EMPTY_ETAG)
00297 entry->etag = "";
00298
00299 iss >> entry->lm;
00300 iss >> entry->expires;
00301 iss >> entry->size;
00302 iss >> entry->range;
00303
00304 iss >> entry->hash;
00305 iss >> entry->hits;
00306 iss >> entry->freshness_lifetime;
00307 iss >> entry->response_time;
00308 iss >> entry->corrected_initial_age;
00309
00310 iss >> entry->must_revalidate;
00311
00312 return entry;
00313 }
00314
00317 class WriteOneCacheEntry :
00318 public unary_function<HTTPCacheTable::CacheEntry *, void>
00319 {
00320
00321 FILE *d_fp;
00322
00323 public:
00324 WriteOneCacheEntry(FILE *fp) : d_fp(fp)
00325 {}
00326
00327 void operator()(HTTPCacheTable::CacheEntry *e)
00328 {
00329 if (e && fprintf(d_fp,
00330 "%s %s %s %ld %ld %ld %c %d %d %ld %ld %ld %c\r\n",
00331 e->url.c_str(),
00332 e->cachename.c_str(),
00333 e->etag == "" ? CACHE_EMPTY_ETAG : e->etag.c_str(),
00334 (long)(e->lm),
00335 (long)(e->expires),
00336 e->size,
00337 e->range ? '1' : '0',
00338 e->hash,
00339 e->hits,
00340 (long)(e->freshness_lifetime),
00341 (long)(e->response_time),
00342 (long)(e->corrected_initial_age),
00343 e->must_revalidate ? '1' : '0') < 0)
00344 throw Error("Cache Index. Error writing cache index\n");
00345 }
00346 };
00347
00357 void
00358 HTTPCacheTable::cache_index_write()
00359 {
00360 DBG(cerr << "Cache Index. Writing index " << d_cache_index << endl);
00361
00362
00363 FILE * fp = NULL;
00364 if ((fp = fopen(d_cache_index.c_str(), "wb")) == NULL) {
00365 throw Error(string("Cache Index. Can't open `") + d_cache_index
00366 + string("' for writing"));
00367 }
00368
00369
00370
00371
00372 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00373 HTTPCacheTable::CacheEntries *cp = get_cache_table()[cnt];
00374 if (cp)
00375 for_each(cp->begin(), cp->end(), WriteOneCacheEntry(fp));
00376 }
00377
00378
00379 int res = fclose(fp);
00380 if (res) {
00381 DBG(cerr << "HTTPCache::cache_index_write - Failed to close "
00382 << (void *)fp << endl);
00383 }
00384
00385 d_new_entries = 0;
00386 }
00387
00389
00402 string
00403 HTTPCacheTable::create_hash_directory(int hash)
00404 {
00405 struct stat stat_info;
00406 ostringstream path;
00407
00408 path << d_cache_root << hash;
00409 string p = path.str();
00410
00411 if (stat(p.c_str(), &stat_info) == -1) {
00412 DBG2(cerr << "Cache....... Create dir " << p << endl);
00413 if (MKDIR(p.c_str(), 0777) < 0) {
00414 DBG2(cerr << "Cache....... Can't create..." << endl);
00415 throw Error("Could not create cache slot to hold response! Check the write permissions on your disk cache directory. Cache root: " + d_cache_root + ".");
00416 }
00417 }
00418 else {
00419 DBG2(cerr << "Cache....... Directory " << p << " already exists"
00420 << endl);
00421 }
00422
00423 return p;
00424 }
00425
00440 void
00441 HTTPCacheTable::create_location(HTTPCacheTable::CacheEntry *entry)
00442 {
00443 string hash_dir = create_hash_directory(entry->hash);
00444 #ifdef WIN32
00445 hash_dir += "\\dodsXXXXXX";
00446 #else
00447 hash_dir += "/dodsXXXXXX";
00448 #endif
00449
00450
00451 char *templat = new char[hash_dir.size() + 1];
00452 strcpy(templat, hash_dir.c_str());
00453
00454
00455
00456
00457
00458 int fd = MKSTEMP(templat);
00459 if (fd < 0) {
00460 delete[] templat; templat = 0;
00461 close(fd);
00462 throw Error("The HTTP Cache could not create a file to hold the response; it will not be cached.");
00463 }
00464
00465 entry->cachename = templat;
00466 delete[] templat; templat = 0;
00467 close(fd);
00468 }
00469
00470
00472 static inline int
00473 entry_disk_space(int size, unsigned int block_size)
00474 {
00475 unsigned int num_of_blocks = (size + block_size) / block_size;
00476
00477 DBG(cerr << "size: " << size << ", block_size: " << block_size
00478 << ", num_of_blocks: " << num_of_blocks << endl);
00479
00480 return num_of_blocks * block_size;
00481 }
00482
00486
00492 void
00493 HTTPCacheTable::add_entry_to_cache_table(CacheEntry *entry)
00494 {
00495 int hash = entry->hash;
00496
00497 if (!d_cache_table[hash])
00498 d_cache_table[hash] = new CacheEntries;
00499
00500 d_cache_table[hash]->push_back(entry);
00501
00502 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size
00503 << ", entry->size: " << entry->size << ", block size: " << d_block_size
00504 << endl);
00505
00506 d_current_size += entry_disk_space(entry->size, d_block_size);
00507
00508 DBG(cerr << "add_entry_to_cache_table, current_size: " << d_current_size << endl);
00509
00510 increment_new_entries();
00511 }
00512
00516 HTTPCacheTable::CacheEntry *
00517 HTTPCacheTable::get_locked_entry_from_cache_table(const string &url)
00518 {
00519 return get_locked_entry_from_cache_table(get_hash(url), url);
00520 }
00521
00529 HTTPCacheTable::CacheEntry *
00530 HTTPCacheTable::get_locked_entry_from_cache_table(int hash, const string &url)
00531 {
00532 DBG(cerr << "url: " << url << "; hash: " << hash << endl);
00533 DBG(cerr << "d_cache_table: " << hex << d_cache_table << dec << endl);
00534 if (d_cache_table[hash]) {
00535 CacheEntries *cp = d_cache_table[hash];
00536 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00537
00538
00539 if ((*i) && (*i)->url == url) {
00540 (*i)->lock_read_response();
00541 #if 0
00542 (*i)->lock();
00543 #endif
00544 return *i;
00545 }
00546 }
00547 }
00548
00549 return 0;
00550 }
00551
00559 HTTPCacheTable::CacheEntry *
00560 HTTPCacheTable::get_write_locked_entry_from_cache_table(const string &url)
00561 {
00562 int hash = get_hash(url);
00563 if (d_cache_table[hash]) {
00564 CacheEntries *cp = d_cache_table[hash];
00565 for (CacheEntriesIter i = cp->begin(); i != cp->end(); ++i) {
00566
00567
00568 if ((*i) && (*i)->url == url) {
00569 (*i)->lock_write_response();
00570 #if 0
00571 (*i)->lock();
00572 #endif
00573 return *i;
00574 }
00575 }
00576 }
00577
00578 return 0;
00579 }
00580
00588 void
00589 HTTPCacheTable::remove_cache_entry(HTTPCacheTable::CacheEntry *entry)
00590 {
00591
00592
00593 if (entry->readers)
00594 throw InternalErr(__FILE__, __LINE__, "Tried to delete a cache entry that is in use.");
00595
00596 REMOVE(entry->cachename.c_str());
00597 REMOVE(string(entry->cachename + CACHE_META).c_str());
00598
00599 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00600
00601 unsigned int eds = entry_disk_space(entry->size, get_block_size());
00602 set_current_size((eds > get_current_size()) ? 0 : get_current_size() - eds);
00603
00604 DBG(cerr << "remove_cache_entry, current_size: " << get_current_size() << endl);
00605 }
00606
00609 class DeleteCacheEntry: public unary_function<HTTPCacheTable::CacheEntry *&, void>
00610 {
00611 string d_url;
00612 HTTPCacheTable *d_cache_table;
00613
00614 public:
00615 DeleteCacheEntry(HTTPCacheTable *c, const string &url)
00616 : d_url(url), d_cache_table(c)
00617 {}
00618
00619 void operator()(HTTPCacheTable::CacheEntry *&e)
00620 {
00621 if (e && e->url == d_url) {
00622 e->lock_write_response();
00623 d_cache_table->remove_cache_entry(e);
00624 e->unlock_write_response();
00625 delete e; e = 0;
00626 }
00627 }
00628 };
00629
00636 void
00637 HTTPCacheTable::remove_entry_from_cache_table(const string &url)
00638 {
00639 int hash = get_hash(url);
00640 if (d_cache_table[hash]) {
00641 CacheEntries *cp = d_cache_table[hash];
00642 for_each(cp->begin(), cp->end(), DeleteCacheEntry(this, url));
00643 cp->erase(remove(cp->begin(), cp->end(), static_cast<HTTPCacheTable::CacheEntry*>(0)),
00644 cp->end());
00645 }
00646 }
00647
00650 class DeleteUnlockedCacheEntry :
00651 public unary_function<HTTPCacheTable::CacheEntry *&, void> {
00652 HTTPCacheTable &d_table;
00653
00654 public:
00655 DeleteUnlockedCacheEntry(HTTPCacheTable &t) :
00656 d_table(t) {
00657 }
00658 void operator()(HTTPCacheTable::CacheEntry *&e) {
00659 if (e) {
00660 d_table.remove_cache_entry(e);
00661 delete e; e = 0;
00662 }
00663 }
00664 };
00665
00666 void HTTPCacheTable::delete_all_entries() {
00667
00668
00669 for (int cnt = 0; cnt < CACHE_TABLE_SIZE; cnt++) {
00670 HTTPCacheTable::CacheEntries *slot = get_cache_table()[cnt];
00671 if (slot) {
00672 for_each(slot->begin(), slot->end(), DeleteUnlockedCacheEntry(*this));
00673 slot->erase(remove(slot->begin(), slot->end(), static_cast<HTTPCacheTable::CacheEntry *>(0)),
00674 slot->end());
00675 }
00676 }
00677
00678 cache_index_delete();
00679 }
00680
00692 void
00693 HTTPCacheTable::calculate_time(HTTPCacheTable::CacheEntry *entry, int default_expiration, time_t request_time)
00694 {
00695 entry->response_time = time(NULL);
00696 time_t apparent_age = max(0, static_cast<int>(entry->response_time - entry->date));
00697 time_t corrected_received_age = max(apparent_age, entry->age);
00698 time_t response_delay = entry->response_time - request_time;
00699 entry->corrected_initial_age = corrected_received_age + response_delay;
00700
00701
00702
00703
00704 time_t freshness_lifetime = entry->max_age;
00705 if (freshness_lifetime < 0) {
00706 if (entry->expires < 0) {
00707 if (entry->lm < 0) {
00708 freshness_lifetime = default_expiration;
00709 }
00710 else {
00711 freshness_lifetime = LM_EXPIRATION(entry->date - entry->lm);
00712 }
00713 }
00714 else
00715 freshness_lifetime = entry->expires - entry->date;
00716 }
00717
00718 entry->freshness_lifetime = max(0, static_cast<int>(freshness_lifetime));
00719
00720 DBG2(cerr << "Cache....... Received Age " << entry->age
00721 << ", corrected " << entry->corrected_initial_age
00722 << ", freshness lifetime " << entry->freshness_lifetime << endl);
00723 }
00724
00735 void HTTPCacheTable::parse_headers(HTTPCacheTable::CacheEntry *entry,
00736 unsigned long max_entry_size, const vector<string> &headers) {
00737 vector<string>::const_iterator i;
00738 for (i = headers.begin(); i != headers.end(); ++i) {
00739
00740 if ((*i).empty())
00741 continue;
00742
00743 string::size_type colon = (*i).find(':');
00744
00745
00746 if (colon == string::npos)
00747 continue;
00748
00749 string header = (*i).substr(0, (*i).find(':'));
00750 string value = (*i).substr((*i).find(": ") + 2);
00751 DBG2(cerr << "Header: " << header << endl);DBG2(cerr << "Value: " << value << endl);
00752
00753 if (header == "ETag") {
00754 entry->etag = value;
00755 } else if (header == "Last-Modified") {
00756 entry->lm = parse_time(value.c_str());
00757 } else if (header == "Expires") {
00758 entry->expires = parse_time(value.c_str());
00759 } else if (header == "Date") {
00760 entry->date = parse_time(value.c_str());
00761 } else if (header == "Age") {
00762 entry->age = parse_time(value.c_str());
00763 } else if (header == "Content-Length") {
00764 unsigned long clength = strtoul(value.c_str(), 0, 0);
00765 if (clength > max_entry_size)
00766 entry->set_no_cache(true);
00767 } else if (header == "Cache-Control") {
00768
00769
00770
00771 if (value == "no-cache" || value == "no-store")
00772
00773
00774
00775 entry->set_no_cache(true);
00776 else if (value == "must-revalidate")
00777 entry->must_revalidate = true;
00778 else if (value.find("max-age") != string::npos) {
00779 string max_age = value.substr(value.find("=" + 1));
00780 entry->max_age = parse_time(max_age.c_str());
00781 }
00782 }
00783 }
00784 }
00785
00787
00788
00789 void HTTPCacheTable::bind_entry_to_data(HTTPCacheTable::CacheEntry *entry, FILE *body) {
00790 entry->hits++;
00791 d_locked_entries[body] = entry;
00792 #if 0
00793 entry->unlock();
00794 #endif
00795 }
00796
00797 void HTTPCacheTable::uncouple_entry_from_data(FILE *body) {
00798 HTTPCacheTable::CacheEntry *entry = d_locked_entries[body];
00799 if (!entry)
00800 throw InternalErr("There is no cache entry for the response given.");
00801
00802 d_locked_entries.erase(body);
00803 entry->unlock_read_response();
00804
00805 if (entry->readers < 0)
00806 throw InternalErr("An unlocked entry was released");
00807 }
00808
00809 bool HTTPCacheTable::is_locked_read_responses() {
00810 return !d_locked_entries.empty();
00811 }
00812
00813 }
00814