00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
#include "kmimemagic.h"
00019
#include <kdebug.h>
00020
#include <kapplication.h>
00021
#include <qfile.h>
00022
#include <ksimpleconfig.h>
00023
#include <kstandarddirs.h>
00024
#include <kstaticdeleter.h>
00025
#include <klargefile.h>
00026
#include <assert.h>
00027
00028
static int fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb);
00029
static void process(
struct config_rec* conf,
const QString &);
00030
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes);
00031
static int tagmagic(
unsigned char *buf,
int nbytes);
00032
static int textmagic(
struct config_rec* conf,
unsigned char *,
int);
00033
00034
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb);
00035
static int match(
struct config_rec* conf,
unsigned char *,
int);
00036
00037
KMimeMagic* KMimeMagic::s_pSelf;
00038
static KStaticDeleter<KMimeMagic> kmimemagicsd;
00039
00040 KMimeMagic*
KMimeMagic::self()
00041 {
00042
if( !s_pSelf )
00043 initStatic();
00044
return s_pSelf;
00045 }
00046
00047
void KMimeMagic::initStatic()
00048 {
00049 s_pSelf = kmimemagicsd.setObject( s_pSelf,
new KMimeMagic() );
00050 s_pSelf->
setFollowLinks(
true );
00051 }
00052
00053
#include <stdio.h>
00054
#include <unistd.h>
00055
#include <stdlib.h>
00056
#include <sys/wait.h>
00057
#include <sys/types.h>
00058
#include <sys/stat.h>
00059
#include <fcntl.h>
00060
#include <errno.h>
00061
#include <ctype.h>
00062
#include <time.h>
00063
#include <utime.h>
00064
#include <stdarg.h>
00065
#include <qregexp.h>
00066
#include <qstring.h>
00067
00068
00069
00070
00071
00072
00073
00074
00075
#if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00076
#define DEBUG_LINENUMBERS
00077
#endif
00078
00079
00080
00081
00082
#define DECLINED 999
00083
#define ERROR 998
00084
#define OK 0
00085
00086
00087
00088
00089
#define MIME_BINARY_UNKNOWN "application/octet-stream"
00090
#define MIME_BINARY_UNREADABLE "application/x-unreadable"
00091
#define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00092
#define MIME_TEXT_UNKNOWN "text/plain"
00093
#define MIME_TEXT_PLAIN "text/plain"
00094
#define MIME_INODE_DIR "inode/directory"
00095
#define MIME_INODE_CDEV "inode/chardevice"
00096
#define MIME_INODE_BDEV "inode/blockdevice"
00097
#define MIME_INODE_FIFO "inode/fifo"
00098
#define MIME_INODE_LINK "inode/link"
00099
#define MIME_INODE_SOCK "inode/socket"
00100
00101
#define MIME_APPL_TROFF "application/x-troff"
00102
#define MIME_APPL_TAR "application/x-tar"
00103
#define MIME_TEXT_FORTRAN "text/x-fortran"
00104
00105
#define MAXMIMESTRING 256
00106
00107
#define HOWMANY 1024
00108
#define MAXDESC 50
00109
#define MAXstring 64
00110
00111
typedef union VALUETYPE {
00112
unsigned char b;
00113
unsigned short h;
00114
unsigned long l;
00115
char s[MAXstring];
00116
unsigned char hs[2];
00117
unsigned char hl[4];
00118 } VALUETYPE;
00119
00120
struct magic {
00121
struct magic *
next;
00122
#ifdef DEBUG_LINENUMBERS
00123
int lineno;
00124
#endif
00125
00126
short flag;
00127
#define INDIR 1
00128
#define UNSIGNED 2
00129
short cont_level;
00130
struct {
00131
char type;
00132
long offset;
00133 } in;
00134
long offset;
00135
unsigned char reln;
00136
char type;
00137
char vallen;
00138
#define BYTE 1
00139
#define SHORT 2
00140
#define LONG 4
00141
#define STRING 5
00142
#define DATE 6
00143
#define BESHORT 7
00144
#define BELONG 8
00145
#define BEDATE 9
00146
#define LESHORT 10
00147
#define LELONG 11
00148
#define LEDATE 12
00149
VALUETYPE value;
00150
unsigned long mask;
00151
char nospflag;
00152
00153
00154
char desc[MAXDESC];
00155 };
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
#define RECORDSIZE 512
00172
#define NAMSIZ 100
00173
#define TUNMLEN 32
00174
#define TGNMLEN 32
00175
00176
union record {
00177
char charptr[RECORDSIZE];
00178
struct header {
00179
char name[NAMSIZ];
00180
char mode[8];
00181
char uid[8];
00182
char gid[8];
00183
char size[12];
00184
char mtime[12];
00185
char chksum[8];
00186
char linkflag;
00187
char linkname[NAMSIZ];
00188
char magic[8];
00189
char uname[TUNMLEN];
00190
char gname[TGNMLEN];
00191
char devmajor[8];
00192
char devminor[8];
00193 } header;
00194 };
00195
00196
00197
#define TMAGIC "ustar "
00198
00199
00200
00201
00202
static int is_tar(
unsigned char *,
int);
00203
static unsigned long signextend(
struct magic *,
unsigned long);
00204
static int getvalue(
struct magic *,
char **);
00205
static int hextoint(
int);
00206
static char *getstr(
char *,
char *,
int,
int *);
00207
static int mget(
union VALUETYPE *,
unsigned char *,
struct magic *,
int);
00208
static int mcheck(
union VALUETYPE *,
struct magic *);
00209
static int mconvert(
union VALUETYPE *,
struct magic *);
00210
static long from_oct(
int,
char *);
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
#define L_HTML 0x001
00227
#define L_C 0x002
00228
#define L_MAKE 0x004
00229
#define L_PLI 0x008
00230
#define L_MACH 0x010
00231
#define L_PAS 0x020
00232
#define L_JAVA 0x040
00233
#define L_CPP 0x080
00234
#define L_MAIL 0x100
00235
#define L_NEWS 0x200
00236
#define L_DIFF 0x400
00237
00238
#define P_HTML 0
00239
#define P_C 1
00240
#define P_MAKE 2
00241
#define P_PLI 3
00242
#define P_MACH 4
00243
#define P_PAS 5
00244
#define P_JAVA 6
00245
#define P_CPP 7
00246
#define P_MAIL 8
00247
#define P_NEWS 9
00248
#define P_DIFF 10
00249
00250
typedef struct asc_type {
00251
const char *type;
00252
int kwords;
00253
double weight;
00254 } asc_type;
00255
00256
static const asc_type types[] = {
00257 {
"text/html", 19, 2 },
00258 {
"text/x-c", 9, 1.3 },
00259 {
"text/x-makefile", 4, 1.9 },
00260 {
"text/x-pli", 1, 3 },
00261 {
"text/x-assembler", 6, 2.1 },
00262 {
"text/x-pascal", 1, 1 },
00263 {
"text/x-java", 14, 1 },
00264 {
"text/x-c++", 14, 1 },
00265 {
"message/rfc822", 4, 1.9 },
00266 {
"message/news", 3, 2 },
00267 {
"text/x-diff", 4, 2 }
00268 };
00269
00270
#define NTYPES (sizeof(types)/sizeof(asc_type))
00271
00272
static struct names {
00273
const char *
name;
00274
short type;
00275 }
const names[] = {
00276 {
00277
"<html", L_HTML
00278 },
00279 {
00280
"<HTML", L_HTML
00281 },
00282 {
00283
"<head", L_HTML
00284 },
00285 {
00286
"<HEAD", L_HTML
00287 },
00288 {
00289
"<body", L_HTML
00290 },
00291 {
00292
"<BODY", L_HTML
00293 },
00294 {
00295
"<title", L_HTML
00296 },
00297 {
00298
"<TITLE", L_HTML
00299 },
00300 {
00301
"<h1", L_HTML
00302 },
00303 {
00304
"<H1", L_HTML
00305 },
00306 {
00307
"<a", L_HTML
00308 },
00309 {
00310
"<A", L_HTML
00311 },
00312 {
00313
"<img", L_HTML
00314 },
00315 {
00316
"<IMG", L_HTML
00317 },
00318 {
00319
"<!--", L_HTML
00320 },
00321 {
00322
"<!doctype", L_HTML
00323 },
00324 {
00325
"<!DOCTYPE", L_HTML
00326 },
00327 {
00328
"<div", L_HTML
00329 },
00330 {
00331
"<DIV", L_HTML
00332 },
00333 {
00334
"<frame", L_HTML
00335 },
00336 {
00337
"<FRAME", L_HTML
00338 },
00339 {
00340
"<frameset", L_HTML
00341 },
00342 {
00343
"<FRAMESET", L_HTML
00344 },
00345 {
00346
"<script", L_HTML
00347 },
00348 {
00349
"<SCRIPT", L_HTML
00350 },
00351 {
00352
"/*", L_C|L_CPP|L_JAVA
00353 },
00354 {
00355
"//", L_CPP|L_JAVA
00356 },
00357 {
00358
"#include", L_C|L_CPP
00359 },
00360 {
00361
"char", L_C|L_CPP|L_JAVA
00362 },
00363 {
00364
"double", L_C|L_CPP|L_JAVA
00365 },
00366 {
00367
"extern", L_C|L_CPP
00368 },
00369 {
00370
"float", L_C|L_CPP|L_JAVA
00371 },
00372 {
00373
"real", L_C|L_CPP|L_JAVA
00374 },
00375 {
00376
"struct", L_C|L_CPP
00377 },
00378 {
00379
"union", L_C|L_CPP
00380 },
00381 {
00382
"implements", L_JAVA
00383 },
00384 {
00385
"super", L_JAVA
00386 },
00387 {
00388
"import", L_JAVA
00389 },
00390 {
00391
"class", L_CPP|L_JAVA
00392 },
00393 {
00394
"public", L_CPP|L_JAVA
00395 },
00396 {
00397
"private", L_CPP|L_JAVA
00398 },
00399 {
00400
"CFLAGS", L_MAKE
00401 },
00402 {
00403
"LDFLAGS", L_MAKE
00404 },
00405 {
00406
"all:", L_MAKE
00407 },
00408 {
00409
".PHONY:", L_MAKE
00410 },
00411 {
00412
"srcdir", L_MAKE
00413 },
00414 {
00415
"exec_prefix", L_MAKE
00416 },
00417
00418
00419
00420
00421 {
00422
".ascii", L_MACH
00423 },
00424 {
00425
".asciiz", L_MACH
00426 },
00427 {
00428
".byte", L_MACH
00429 },
00430 {
00431
".even", L_MACH
00432 },
00433 {
00434
".globl", L_MACH
00435 },
00436 {
00437
"clr", L_MACH
00438 },
00439 {
00440
"(input", L_PAS
00441 },
00442 {
00443
"dcl", L_PLI
00444 },
00445 {
00446
"Received:", L_MAIL
00447 },
00448
00449
00450
00451 {
00452
"Return-Path:", L_MAIL
00453 },
00454 {
00455
"Cc:", L_MAIL
00456 },
00457 {
00458
"Newsgroups:", L_NEWS
00459 },
00460 {
00461
"Path:", L_NEWS
00462 },
00463 {
00464
"Organization:", L_NEWS
00465 },
00466 {
00467
"---", L_DIFF
00468 },
00469 {
00470
"+++", L_DIFF
00471 },
00472 {
00473
"***", L_DIFF
00474 },
00475 {
00476
"@@", L_DIFF
00477 },
00478 {
00479 NULL, 0
00480 }
00481 };
00482
00493
class KMimeMagicUtimeConf
00494 {
00495
public:
00496 KMimeMagicUtimeConf()
00497 {
00498 tmpDirs << QString::fromLatin1(
"/tmp");
00499
00500
00501
00502
QStringList confDirs =
KGlobal::dirs()->
resourceDirs(
"config" );
00503
if ( !confDirs.isEmpty() )
00504 {
00505
QString globalConf = confDirs.last() +
"kmimemagicrc";
00506
if ( QFile::exists( globalConf ) )
00507 {
00508
KSimpleConfig cfg( globalConf );
00509 cfg.setGroup(
"Settings" );
00510 tmpDirs = cfg.readListEntry(
"atimeDirs" );
00511 }
00512
if ( confDirs.count() > 1 )
00513 {
00514
QString localConf = confDirs.first() +
"kmimemagicrc";
00515
if ( QFile::exists( localConf ) )
00516 {
00517
KSimpleConfig cfg( localConf );
00518 cfg.setGroup(
"Settings" );
00519 tmpDirs += cfg.readListEntry(
"atimeDirs" );
00520 }
00521 }
00522
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00523 {
00524
QString dir = *it;
00525
if ( !dir.isEmpty() && dir[ dir.length()-1 ] !=
'/' )
00526 (*it) +=
'/';
00527 }
00528 }
00529
#if 0
00530
00531
for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00532
kdDebug(7018) <<
" atimeDir: " << *it <<
endl;
00533
#endif
00534
}
00535
00536
bool restoreAccessTime(
const QString & file )
const
00537
{
00538
QString dir = file.left( file.findRev(
'/' ) );
00539
bool res = tmpDirs.contains( dir );
00540
00541
return res;
00542 }
00543
QStringList tmpDirs;
00544 };
00545
00546
00547
struct config_rec {
00548
bool followLinks;
00549
QString resultBuf;
00550
int accuracy;
00551
00552
struct magic *magic,
00553 *last;
00554 KMimeMagicUtimeConf * utimeConf;
00555 };
00556
00557
#ifdef MIME_MAGIC_DEBUG_TABLE
00558
static void
00559 test_table()
00560 {
00561
struct magic *m;
00562
struct magic *prevm = NULL;
00563
00564
kdDebug(7018) <<
"test_table : started" <<
endl;
00565
for (m = conf->magic; m; m = m->next) {
00566
if (isprint((((
unsigned long) m) >> 24) & 255) &&
00567 isprint((((
unsigned long) m) >> 16) & 255) &&
00568 isprint((((
unsigned long) m) >> 8) & 255) &&
00569 isprint(((
unsigned long) m) & 255)) {
00570
00571
00572 (((
unsigned long) m) >> 24) & 255,
00573 (((
unsigned long) m) >> 16) & 255,
00574 (((
unsigned long) m) >> 8) & 255,
00575 ((
unsigned long) m) & 255,
00576 prevm ? prevm->lineno : -1);
00577
break;
00578 }
00579 prevm = m;
00580 }
00581 }
00582
#endif
00583
00584
#define EATAB {while (isascii((unsigned char) *l) && \
00585
isspace((unsigned char) *l)) ++l;}
00586
00587
int KMimeMagic::parse_line(
char *line,
int *rule,
int lineno)
00588 {
00589
int ws_offset;
00590
00591
00592
if (line[0]) {
00593 line[strlen(line) - 1] =
'\0';
00594 }
00595
00596 ws_offset = 0;
00597
while (line[ws_offset] && isspace(line[ws_offset])) {
00598 ws_offset++;
00599 }
00600
00601
00602
if (line[ws_offset] == 0) {
00603
return 0;
00604 }
00605
00606
if (line[ws_offset] ==
'#')
00607
return 0;
00608
00609
00610 (*rule)++;
00611
00612
00613
return (parse(line + ws_offset, lineno) != 0);
00614 }
00615
00616
00617
00618
00619
int KMimeMagic::apprentice(
const QString& magicfile )
00620 {
00621 FILE *f;
00622
char line[BUFSIZ + 1];
00623
int errs = 0;
00624
int lineno;
00625
int rule = 0;
00626
QCString fname;
00627
00628
if (magicfile.isEmpty())
00629
return -1;
00630 fname = QFile::encodeName(magicfile);
00631 f = fopen(fname,
"r");
00632
if (f == NULL) {
00633
kdError(7018) <<
"can't read magic file " << fname.data() <<
": " << strerror(errno) <<
endl;
00634
return -1;
00635 }
00636
00637
00638
for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00639
if (parse_line(line, &rule, lineno))
00640 errs++;
00641
00642 fclose(f);
00643
00644
#ifdef DEBUG_APPRENTICE
00645
kdDebug(7018) <<
"apprentice: conf=" << conf <<
" file=" << magicfile <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00646
kdDebug(7018) <<
"apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00647
#endif
00648
00649
#ifdef MIME_MAGIC_DEBUG_TABLE
00650
test_table();
00651
#endif
00652
00653
return (errs ? -1 : 0);
00654 }
00655
00656
int KMimeMagic::buff_apprentice(
char *buff)
00657 {
00658
char line[BUFSIZ + 2];
00659
int errs = 0;
00660
int lineno = 1;
00661
char *start = buff;
00662
char *
end;
00663
int count = 0;
00664
int rule = 0;
00665
int len = strlen(buff) + 1;
00666
00667
00668
do {
00669 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00670 strncpy(line, start, count);
00671 line[count] =
'\0';
00672
if ((
end = strchr(line,
'\n'))) {
00673 *(++
end) =
'\0';
00674 count = strlen(line);
00675 }
else
00676 strcat(line,
"\n");
00677 start += count;
00678 len -= count;
00679
if (parse_line(line, &rule, lineno))
00680 errs++;
00681 lineno++;
00682 }
while (len > 0);
00683
00684
#ifdef DEBUG_APPRENTICE
00685
kdDebug(7018) <<
"buff_apprentice: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
00686
kdDebug(7018) <<
"buff_apprentice: read " << lineno <<
" lines, " << rule <<
" rules, " << errs <<
" errors" <<
endl;
00687
#endif
00688
00689
#ifdef MIME_MAGIC_DEBUG_TABLE
00690
test_table();
00691
#endif
00692
00693
return (errs ? -1 : 0);
00694 }
00695
00696
00697
00698
00699
static unsigned long
00700 signextend(
struct magic *m,
unsigned long v)
00701 {
00702
if (!(m->flag & UNSIGNED))
00703
switch (m->type) {
00704
00705
00706
00707
00708
00709
case BYTE:
00710 v = (
char) v;
00711
break;
00712
case SHORT:
00713
case BESHORT:
00714
case LESHORT:
00715 v = (
short) v;
00716
break;
00717
case DATE:
00718
case BEDATE:
00719
case LEDATE:
00720
case LONG:
00721
case BELONG:
00722
case LELONG:
00723 v = (
long) v;
00724
break;
00725
case STRING:
00726
break;
00727
default:
00728
kdError(7018) <<
"" <<
"signextend" <<
": can't happen: m->type=" << m->type <<
endl;
00729
return ERROR;
00730 }
00731
return v;
00732 }
00733
00734
00735
00736
00737
int KMimeMagic::parse(
char *l,
int
00738 #ifdef DEBUG_LINENUMBERS
00739 lineno
00740 #endif
00741 )
00742 {
00743
int i = 0;
00744
struct magic *m;
00745
char *t,
00746 *s;
00747
00748
if ((m = (
struct magic *) calloc(1,
sizeof(
struct magic))) == NULL) {
00749
kdError(7018) <<
"parse: Out of memory." <<
endl;
00750
return -1;
00751 }
00752
00753 m->next = NULL;
00754
if (!conf->magic || !conf->last) {
00755 conf->magic = conf->last = m;
00756 }
else {
00757 conf->last->next = m;
00758 conf->last = m;
00759 }
00760
00761
00762 m->flag = 0;
00763 m->cont_level = 0;
00764
#ifdef DEBUG_LINENUMBERS
00765
m->lineno = lineno;
00766
#endif
00767
00768
while (*l ==
'>') {
00769 ++l;
00770 m->cont_level++;
00771 }
00772
00773
if (m->cont_level != 0 && *l ==
'(') {
00774 ++l;
00775 m->flag |= INDIR;
00776 }
00777
00778 m->offset = (
int) strtol(l, &t, 0);
00779
if (l == t) {
00780
kdError(7018) <<
"parse: offset " << l <<
" invalid" <<
endl;
00781 }
00782 l = t;
00783
00784
if (m->flag & INDIR) {
00785 m->in.type = LONG;
00786 m->in.offset = 0;
00787
00788
00789
00790
if (*l ==
'.') {
00791
switch (*++l) {
00792
case 'l':
00793 m->in.type = LONG;
00794
break;
00795
case 's':
00796 m->in.type = SHORT;
00797
break;
00798
case 'b':
00799 m->in.type = BYTE;
00800
break;
00801
default:
00802
kdError(7018) <<
"parse: indirect offset type " << *l <<
" invalid" <<
endl;
00803
break;
00804 }
00805 l++;
00806 }
00807 s = l;
00808
if (*l ==
'+' || *l ==
'-')
00809 l++;
00810
if (isdigit((
unsigned char) *l)) {
00811 m->in.offset = strtol(l, &t, 0);
00812
if (*s ==
'-')
00813 m->in.offset = -m->in.offset;
00814 }
else
00815 t = l;
00816
if (*t++ !=
')') {
00817
kdError(7018) <<
"parse: missing ')' in indirect offset" <<
endl;
00818 }
00819 l = t;
00820 }
00821
while (isascii((
unsigned char) *l) && isdigit((
unsigned char) *l))
00822 ++l;
00823 EATAB;
00824
00825
#define NBYTE 4
00826
#define NSHORT 5
00827
#define NLONG 4
00828
#define NSTRING 6
00829
#define NDATE 4
00830
#define NBESHORT 7
00831
#define NBELONG 6
00832
#define NBEDATE 6
00833
#define NLESHORT 7
00834
#define NLELONG 6
00835
#define NLEDATE 6
00836
00837
if (*l ==
'u') {
00838 ++l;
00839 m->flag |= UNSIGNED;
00840 }
00841
00842
if (strncmp(l,
"byte", NBYTE) == 0) {
00843 m->type = BYTE;
00844 l += NBYTE;
00845 }
else if (strncmp(l,
"short", NSHORT) == 0) {
00846 m->type = SHORT;
00847 l += NSHORT;
00848 }
else if (strncmp(l,
"long", NLONG) == 0) {
00849 m->type = LONG;
00850 l += NLONG;
00851 }
else if (strncmp(l,
"string", NSTRING) == 0) {
00852 m->type = STRING;
00853 l += NSTRING;
00854 }
else if (strncmp(l,
"date", NDATE) == 0) {
00855 m->type = DATE;
00856 l += NDATE;
00857 }
else if (strncmp(l,
"beshort", NBESHORT) == 0) {
00858 m->type = BESHORT;
00859 l += NBESHORT;
00860 }
else if (strncmp(l,
"belong", NBELONG) == 0) {
00861 m->type = BELONG;
00862 l += NBELONG;
00863 }
else if (strncmp(l,
"bedate", NBEDATE) == 0) {
00864 m->type = BEDATE;
00865 l += NBEDATE;
00866 }
else if (strncmp(l,
"leshort", NLESHORT) == 0) {
00867 m->type = LESHORT;
00868 l += NLESHORT;
00869 }
else if (strncmp(l,
"lelong", NLELONG) == 0) {
00870 m->type = LELONG;
00871 l += NLELONG;
00872 }
else if (strncmp(l,
"ledate", NLEDATE) == 0) {
00873 m->type = LEDATE;
00874 l += NLEDATE;
00875 }
else {
00876
kdError(7018) <<
"parse: type " << l <<
" invalid" <<
endl;
00877
return -1;
00878 }
00879
00880
if (*l ==
'&') {
00881 ++l;
00882 m->mask = signextend(m, strtol(l, &l, 0));
00883 }
else
00884 m->mask = (
unsigned long) ~0L;
00885 EATAB;
00886
00887
switch (*l) {
00888
case '>':
00889
case '<':
00890
00891
case '&':
00892
case '^':
00893
case '=':
00894 m->reln = *l;
00895 ++l;
00896
break;
00897
case '!':
00898
if (m->type != STRING) {
00899 m->reln = *l;
00900 ++l;
00901
break;
00902 }
00903
00904
default:
00905
if (*l ==
'x' && isascii((
unsigned char) l[1]) &&
00906 isspace((
unsigned char) l[1])) {
00907 m->reln = *l;
00908 ++l;
00909
goto GetDesc;
00910 }
00911 m->reln =
'=';
00912
break;
00913 }
00914 EATAB;
00915
00916
if (getvalue(m, &l))
00917
return -1;
00918
00919
00920
00921 GetDesc:
00922 EATAB;
00923
if (l[0] ==
'\b') {
00924 ++l;
00925 m->nospflag = 1;
00926 }
else if ((l[0] ==
'\\') && (l[1] ==
'b')) {
00927 ++l;
00928 ++l;
00929 m->nospflag = 1;
00930 }
else
00931 m->nospflag = 0;
00932
00933
while (*l !=
'\0' && *l !=
'#' && i < MAXDESC-1)
00934 m->desc[i++] = *l++;
00935 m->desc[i] =
'\0';
00936
00937
while (--i>0 && isspace( m->desc[i] ))
00938 m->desc[i] =
'\0';
00939
00940
00941
00942
00943
#ifdef DEBUG_APPRENTICE
00944
kdDebug(7018) <<
"parse: line=" << lineno <<
" m=" << m <<
" next=" << m->next <<
" cont=" << m->cont_level <<
" desc=" << (m->desc ? m->desc :
"NULL") <<
endl;
00945
#endif
00946
return 0;
00947 }
00948
00949
00950
00951
00952
00953
00954
static int
00955 getvalue(
struct magic *m,
char **p)
00956 {
00957
int slen;
00958
00959
if (m->type == STRING) {
00960 *p = getstr(*p, m->value.s,
sizeof(m->value.s), &slen);
00961 m->vallen = slen;
00962 }
else if (m->reln !=
'x')
00963 m->value.l = signextend(m, strtol(*p, p, 0));
00964
return 0;
00965 }
00966
00967
00968
00969
00970
00971
00972
static char *
00973 getstr(
register char *s,
register char *p,
int plen,
int *slen)
00974 {
00975
char *origs = s,
00976 *origp = p;
00977
char *pmax = p + plen - 1;
00978
register int c;
00979
register int val;
00980
00981
while ((c = *s++) !=
'\0') {
00982
if (isspace((
unsigned char) c))
00983
break;
00984
if (p >= pmax) {
00985
kdError(7018) <<
"String too long: " << origs <<
endl;
00986
break;
00987 }
00988
if (c ==
'\\') {
00989
switch (c = *s++) {
00990
00991
case '\0':
00992
goto out;
00993
00994
default:
00995 *p++ = (
char) c;
00996
break;
00997
00998
case 'n':
00999 *p++ =
'\n';
01000
break;
01001
01002
case 'r':
01003 *p++ =
'\r';
01004
break;
01005
01006
case 'b':
01007 *p++ =
'\b';
01008
break;
01009
01010
case 't':
01011 *p++ =
'\t';
01012
break;
01013
01014
case 'f':
01015 *p++ =
'\f';
01016
break;
01017
01018
case 'v':
01019 *p++ =
'\v';
01020
break;
01021
01022
01023
case '0':
01024
case '1':
01025
case '2':
01026
case '3':
01027
case '4':
01028
case '5':
01029
case '6':
01030
case '7':
01031 val = c -
'0';
01032 c = *s++;
01033
if (c >=
'0' && c <=
'7') {
01034 val = (val << 3) | (c -
'0');
01035 c = *s++;
01036
if (c >=
'0' && c <=
'7')
01037 val = (val << 3) | (c -
'0');
01038
else
01039 --s;
01040 }
else
01041 --s;
01042 *p++ = (
char) val;
01043
break;
01044
01045
01046
case 'x':
01047 val =
'x';
01048 c = hextoint(*s++);
01049
if (c >= 0) {
01050 val = c;
01051 c = hextoint(*s++);
01052
if (c >= 0) {
01053 val = (val << 4) + c;
01054 c = hextoint(*s++);
01055
if (c >= 0) {
01056 val = (val << 4) + c;
01057 }
else
01058 --s;
01059 }
else
01060 --s;
01061 }
else
01062 --s;
01063 *p++ = (
char) val;
01064
break;
01065 }
01066 }
else
01067 *p++ = (
char) c;
01068 }
01069 out:
01070 *p =
'\0';
01071 *slen = p - origp;
01072
01073
01074
return s;
01075 }
01076
01077
01078
01079
static int
01080 hextoint(
int c)
01081 {
01082
if (!isascii((
unsigned char) c))
01083
return -1;
01084
if (isdigit((
unsigned char) c))
01085
return c -
'0';
01086
if ((c >=
'a') && (c <=
'f'))
01087
return c + 10 -
'a';
01088
if ((c >=
'A') && (c <=
'F'))
01089
return c + 10 -
'A';
01090
return -1;
01091 }
01092
01093
01094
01095
01096
static int
01097 mconvert(
union VALUETYPE *p,
struct magic *m)
01098 {
01099
switch (m->type) {
01100
case BYTE:
01101
return 1;
01102
case STRING:
01103
01104 p->s[
sizeof(p->s) - 1] =
'\0';
01105
return 1;
01106
#ifndef WORDS_BIGENDIAN
01107
case SHORT:
01108
#endif
01109
case BESHORT:
01110 p->h = (
short) ((p->hs[0] << 8) | (p->hs[1]));
01111
return 1;
01112
#ifndef WORDS_BIGENDIAN
01113
case LONG:
01114
case DATE:
01115
#endif
01116
case BELONG:
01117
case BEDATE:
01118 p->l = (
long)
01119 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01120
return 1;
01121
#ifdef WORDS_BIGENDIAN
01122
case SHORT:
01123
#endif
01124
case LESHORT:
01125 p->h = (
short) ((p->hs[1] << 8) | (p->hs[0]));
01126
return 1;
01127
#ifdef WORDS_BIGENDIAN
01128
case LONG:
01129
case DATE:
01130
#endif
01131
case LELONG:
01132
case LEDATE:
01133 p->l = (
long)
01134 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01135
return 1;
01136
default:
01137
kdError(7018) <<
"mconvert: invalid type " << m->type <<
endl;
01138
return 0;
01139 }
01140 }
01141
01142
01143
static int
01144 mget(
union VALUETYPE *p,
unsigned char *s,
struct magic *m,
01145
int nbytes)
01146 {
01147
long offset = m->offset;
01148
switch ( m->type )
01149 {
01150
case BYTE:
01151
if ( offset + 1 > nbytes-1 )
01152
return 0;
01153
break;
01154
case SHORT:
01155
case BESHORT:
01156
case LESHORT:
01157
if ( offset + 2 > nbytes-1 )
01158
return 0;
01159
break;
01160
case LONG:
01161
case BELONG:
01162
case LELONG:
01163
case DATE:
01164
case BEDATE:
01165
case LEDATE:
01166
if ( offset + 4 > nbytes-1 )
01167
return 0;
01168
break;
01169
case STRING:
01170
break;
01171 }
01172
01173
01174
01175
01176
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01177 {
01178
int have = nbytes - offset;
01179 memset(p, 0,
sizeof(
union VALUETYPE));
01180
if (have > 0)
01181 memcpy(p, s + offset, have);
01182 }
else
01183 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01184
01185
if (!mconvert(p, m))
01186
return 0;
01187
01188
if (m->flag & INDIR) {
01189
01190
switch (m->in.type) {
01191
case BYTE:
01192 offset = p->b + m->in.offset;
01193
break;
01194
case SHORT:
01195 offset = p->h + m->in.offset;
01196
break;
01197
case LONG:
01198 offset = p->l + m->in.offset;
01199
break;
01200 }
01201
01202
if (offset + (
int)
sizeof(
union VALUETYPE) > nbytes)
01203 return 0;
01204
01205 memcpy(p, s + offset,
sizeof(
union VALUETYPE));
01206
01207
if (!mconvert(p, m))
01208
return 0;
01209 }
01210
return 1;
01211 }
01212
01213
static int
01214 mcheck(
union VALUETYPE *p,
struct magic *m)
01215 {
01216
register unsigned long l = m->value.l;
01217
register unsigned long v;
01218
int matched;
01219
01220
if ((m->value.s[0] ==
'x') && (m->value.s[1] ==
'\0')) {
01221
kdError(7018) <<
"BOINK" <<
endl;
01222
return 1;
01223 }
01224
switch (m->type) {
01225
case BYTE:
01226 v = p->b;
01227
break;
01228
01229
case SHORT:
01230
case BESHORT:
01231
case LESHORT:
01232 v = p->h;
01233
break;
01234
01235
case LONG:
01236
case BELONG:
01237
case LELONG:
01238
case DATE:
01239
case BEDATE:
01240
case LEDATE:
01241 v = p->l;
01242
break;
01243
01244
case STRING:
01245 l = 0;
01246
01247
01248
01249
01250
01251 v = 0;
01252 {
01253
register unsigned char *a = (
unsigned char *) m->value.s;
01254
register unsigned char *b = (
unsigned char *) p->s;
01255
register int len = m->vallen;
01256 Q_ASSERT(len);
01257
01258
while (--len >= 0)
01259
if ((v = *b++ - *a++) != 0)
01260
break;
01261 }
01262
break;
01263
default:
01264
kdError(7018) <<
"mcheck: invalid type " << m->type <<
endl;
01265
return 0;
01266 }
01267
#if 0
01268
qDebug(
"Before signextend %08x", v);
01269
#endif
01270
v = signextend(m, v) & m->mask;
01271
#if 0
01272
qDebug(
"After signextend %08x", v);
01273
#endif
01274
01275
switch (m->reln) {
01276
case 'x':
01277 matched = 1;
01278
break;
01279
01280
case '!':
01281 matched = v != l;
01282
break;
01283
01284
case '=':
01285 matched = v == l;
01286
break;
01287
01288
case '>':
01289
if (m->flag & UNSIGNED)
01290 matched = v > l;
01291
else
01292 matched = (
long) v > (
long) l;
01293
break;
01294
01295
case '<':
01296
if (m->flag & UNSIGNED)
01297 matched = v < l;
01298
else
01299 matched = (
long) v < (
long) l;
01300
break;
01301
01302
case '&':
01303 matched = (v & l) == l;
01304
break;
01305
01306
case '^':
01307 matched = (v & l) != l;
01308
break;
01309
01310
default:
01311 matched = 0;
01312
kdError(7018) <<
"mcheck: can't happen: invalid relation " << m->reln <<
"." <<
endl;
01313
break;
01314 }
01315
01316
return matched;
01317 }
01318
01319
01320
01321
01322
01323
01324
void process(
struct config_rec* conf,
const QString & fn)
01325 {
01326
int fd = 0;
01327
unsigned char buf[HOWMANY + 1];
01328 KDE_struct_stat sb;
01329
int nbytes = 0;
01330
int tagbytes = 0;
01331
QCString fileName = QFile::encodeName( fn );
01332
01333
01334
01335
01336
if (fsmagic(conf, fileName, &sb) != 0) {
01337
01338
return;
01339 }
01340
if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01341
01342
01343
01344
01345
01346
01347 conf->resultBuf = MIME_BINARY_UNREADABLE;
01348
return;
01349 }
01350
01351
01352
01353
if ((nbytes = read(fd, (
char *) buf, HOWMANY)) == -1) {
01354
kdError(7018) <<
"" << fn <<
" read failed (" << strerror(errno) <<
")." <<
endl;
01355 conf->resultBuf = MIME_BINARY_UNREADABLE;
01356
return;
01357 }
01358
if ((tagbytes = tagmagic(buf, nbytes))) {
01359
01360 lseek(fd, tagbytes, SEEK_SET);
01361 nbytes = read(fd, (
char*)buf, HOWMANY);
01362
if (nbytes < 0) {
01363 conf->resultBuf = MIME_BINARY_UNREADABLE;
01364
return;
01365 }
01366 }
01367
if (nbytes == 0) {
01368 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01369 }
else {
01370 buf[nbytes++] =
'\0';
01371 tryit(conf, buf, nbytes);
01372 }
01373
01374
if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01375 {
01376
01377
01378
01379
01380
01381
struct utimbuf utbuf;
01382 utbuf.actime = sb.st_atime;
01383 utbuf.modtime = sb.st_mtime;
01384 (
void) utime(fileName, &utbuf);
01385 }
01386 (
void)
close(fd);
01387 }
01388
01389
01390
static void tryit(
struct config_rec* conf,
unsigned char *buf,
int nb)
01391 {
01392
01393
if (match(conf, buf, nb))
01394
return;
01395
01396
01397
if (ascmagic(conf, buf, nb) == 1)
01398
return;
01399
01400
01401
if (textmagic(conf, buf, nb))
01402
return;
01403
01404
01405 conf->resultBuf = MIME_BINARY_UNKNOWN;
01406 conf->accuracy = 0;
01407 }
01408
01409
static int
01410 fsmagic(
struct config_rec* conf,
const char *fn, KDE_struct_stat *sb)
01411 {
01412
int ret = 0;
01413
01414
01415
01416
01417
01418 ret = KDE_lstat(fn, sb);
01419
01420
if (ret) {
01421
return 1;
01422
01423 }
01424
01425
01426
01427
01428
01429
01430
switch (sb->st_mode & S_IFMT) {
01431
case S_IFDIR:
01432 conf->resultBuf = MIME_INODE_DIR;
01433
return 1;
01434
case S_IFCHR:
01435 conf->resultBuf = MIME_INODE_CDEV;
01436
return 1;
01437
case S_IFBLK:
01438 conf->resultBuf = MIME_INODE_BDEV;
01439
return 1;
01440
01441
#ifdef S_IFIFO
01442
case S_IFIFO:
01443 conf->resultBuf = MIME_INODE_FIFO;
01444
return 1;
01445
#endif
01446
#ifdef S_IFLNK
01447
case S_IFLNK:
01448 {
01449
char buf[BUFSIZ + BUFSIZ + 4];
01450
register int nch;
01451 KDE_struct_stat tstatbuf;
01452
01453
if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01454 conf->resultBuf = MIME_INODE_LINK;
01455
01456
return 1;
01457 }
01458 buf[nch] =
'\0';
01459
01460
if (*buf ==
'/') {
01461
if (KDE_stat(buf, &tstatbuf) < 0) {
01462 conf->resultBuf = MIME_INODE_LINK;
01463
01464
return 1;
01465 }
01466 }
else {
01467
char *tmp;
01468
char buf2[BUFSIZ + BUFSIZ + 4];
01469
01470 strncpy(buf2, fn, BUFSIZ);
01471 buf2[BUFSIZ] = 0;
01472
01473
if ((tmp = strrchr(buf2,
'/')) == NULL) {
01474 tmp = buf;
01475 }
else {
01476
01477 *++tmp =
'\0';
01478 strcat(buf2, buf);
01479 tmp = buf2;
01480 }
01481
if (KDE_stat(tmp, &tstatbuf) < 0) {
01482 conf->resultBuf = MIME_INODE_LINK;
01483
01484
return 1;
01485 }
else
01486 strcpy(buf, tmp);
01487 }
01488
if (conf->followLinks)
01489 process( conf, QFile::decodeName( buf ) );
01490
else
01491 conf->resultBuf = MIME_INODE_LINK;
01492
return 1;
01493 }
01494
return 1;
01495
#endif
01496
#ifdef S_IFSOCK
01497
#ifndef __COHERENT__
01498
case S_IFSOCK:
01499 conf->resultBuf = MIME_INODE_SOCK;
01500
return 1;
01501
#endif
01502
#endif
01503
case S_IFREG:
01504
break;
01505
default:
01506
kdError(7018) <<
"KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode <<
"." <<
endl;
01507
01508 }
01509
01510
01511
01512
01513
if (sb->st_size == 0) {
01514 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01515
return 1;
01516 }
01517
return 0;
01518 }
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
static int
01547 match(
struct config_rec* conf,
unsigned char *s,
int nbytes)
01548 {
01549
int cont_level = 0;
01550
union VALUETYPE p;
01551
struct magic *m;
01552
01553
#ifdef DEBUG_MIMEMAGIC
01554
kdDebug(7018) <<
"match: conf=" << conf <<
" m=" << (conf->magic ?
"set" :
"NULL") <<
" m->next=" << ((conf->magic && conf->magic->next) ?
"set" :
"NULL") <<
" last=" << (conf->last ?
"set" :
"NULL") <<
endl;
01555
for (m = conf->magic; m; m = m->next) {
01556
if (isprint((((
unsigned long) m) >> 24) & 255) &&
01557 isprint((((
unsigned long) m) >> 16) & 255) &&
01558 isprint((((
unsigned long) m) >> 8) & 255) &&
01559 isprint(((
unsigned long) m) & 255)) {
01560
kdDebug(7018) <<
"match: POINTER CLOBBERED! " <<
endl;
01561
break;
01562 }
01563 }
01564
#endif
01565
01566
for (m = conf->magic; m; m = m->next) {
01567
#ifdef DEBUG_MIMEMAGIC
01568
kdDebug(7018) <<
"match: line=" << m->lineno <<
" desc=" << m->desc <<
endl;
01569
#endif
01570
memset(&p, 0,
sizeof(
union VALUETYPE));
01571
01572
01573
if (!mget(&p, s, m, nbytes) ||
01574 !mcheck(&p, m)) {
01575
struct magic *m_cont;
01576
01577
01578
01579
01580
if (!m->next || (m->next->cont_level == 0)) {
01581
continue;
01582 }
01583 m_cont = m->next;
01584
while (m_cont && (m_cont->cont_level != 0)) {
01585
#ifdef DEBUG_MIMEMAGIC
01586
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m_cont->cont_level <<
" mc=" << m_cont->lineno <<
" mc->next=" << m_cont <<
" " <<
endl;
01587
#endif
01588
01589
01590
01591
01592 m = m_cont;
01593 m_cont = m_cont->next;
01594 }
01595
continue;
01596 }
01597
01598
01599
#ifdef DEBUG_MIMEMAGIC
01600
kdDebug(7018) <<
"match: rule matched, line=" << m->lineno <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01601
#endif
01602
01603
01604 conf->resultBuf = m->desc;
01605
01606 cont_level++;
01607
01608
01609
01610
01611 m = m->next;
01612
while (m && (m->cont_level != 0)) {
01613
#ifdef DEBUG_MIMEMAGIC
01614
kdDebug(7018) <<
"match: line=" << m->lineno <<
" cont=" << m->cont_level <<
" type=" << m->type <<
" " << ((m->type == STRING) ? m->value.s :
"") <<
endl;
01615
#endif
01616
if (cont_level >= m->cont_level) {
01617
if (cont_level > m->cont_level) {
01618
01619
01620
01621
01622 cont_level = m->cont_level;
01623 }
01624
if (mget(&p, s, m, nbytes) &&
01625 mcheck(&p, m)) {
01626
01627
01628
01629
01630
01631
01632
#ifdef DEBUG_MIMEMAGIC
01633
kdDebug(7018) <<
"continuation matched" <<
endl;
01634
#endif
01635
conf->resultBuf = m->desc;
01636 cont_level++;
01637 }
01638 }
01639
01640 m = m->next;
01641 }
01642
01643
01644
if ( !conf->resultBuf.isEmpty() )
01645 {
01646
#ifdef DEBUG_MIMEMAGIC
01647
kdDebug(7018) <<
"match: matched" <<
endl;
01648
#endif
01649
return 1;
01650 }
01651 }
01652
#ifdef DEBUG_MIMEMAGIC
01653
kdDebug(7018) <<
"match: failed" <<
endl;
01654
#endif
01655
return 0;
01656 }
01657
01658
01659
01660
static int tagmagic(
unsigned char *buf,
int nbytes)
01661 {
01662
if(nbytes<40)
return 0;
01663
if(buf[0] ==
'I' && buf[1] ==
'D' && buf[2] ==
'3') {
01664
int size = 10;
01665
01666
if(buf[3] > 4)
return 0;
01667
if(buf[5] & 0x0F)
return 0;
01668
01669
if(buf[5] & 0x10) size += 10;
01670
01671 size += buf[9];
01672 size += buf[8] << 7;
01673 size += buf[7] << 14;
01674 size += buf[6] << 21;
01675
return size;
01676 }
01677
return 0;
01678 }
01679
01680
01681
01682
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
01683
01684
static int ascmagic(
struct config_rec* conf,
unsigned char *buf,
int nbytes)
01685 {
01686
int i;
01687
double pct, maxpct, pctsum;
01688
double pcts[NTYPES];
01689
int mostaccurate, tokencount;
01690
int typeset, jonly, conly, jconly, cppcomm, ccomm;
01691
int has_escapes = 0;
01692
unsigned char *s;
01693
char nbuf[HOWMANY + 1];
01694
char *token;
01695
register const struct names *p;
01696
int typecount[NTYPES];
01697
01698
01699 conf->accuracy = 70;
01700
01701
01702
01703
01704
01705
01706
if (*buf ==
'.') {
01707
unsigned char *tp = buf + 1;
01708
01709
while (isascii(*tp) && isspace(*tp))
01710 ++tp;
01711
if ((isascii(*tp) && (isalnum(*tp) || *tp ==
'\\') &&
01712 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp ==
'"'))) {
01713 conf->resultBuf = MIME_APPL_TROFF;
01714
return 1;
01715 }
01716 }
01717
if ((*buf ==
'c' || *buf ==
'C') &&
01718 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01719
01720 conf->resultBuf = MIME_TEXT_FORTRAN;
01721
return 1;
01722 }
01723 assert(nbytes-1 < HOWMANY + 1);
01724
01725
01726 s = (
unsigned char *) memcpy(nbuf, buf, nbytes);
01727 s[nbytes-1] =
'\0';
01728 has_escapes = (memchr(s,
'\033', nbytes) != NULL);
01729
01730
01731
01732
01733 memset(&typecount, 0,
sizeof(typecount));
01734 typeset = 0;
01735 jonly = 0;
01736 conly = 0;
01737 jconly = 0;
01738 cppcomm = 0;
01739 ccomm = 0;
01740 tokencount = 0;
01741
bool foundClass =
false;
01742
01743
01744
while ((token = strtok((
char *) s,
" \t\n\r\f,;>")) != NULL) {
01745 s = NULL;
01746
#ifdef DEBUG_MIMEMAGIC
01747
kdDebug(7018) <<
"KMimeMagic::ascmagic token=" << token <<
endl;
01748
#endif
01749
for (p = names; p->name ; p++) {
01750
if (STREQ(p->name, token)) {
01751
#ifdef DEBUG_MIMEMAGIC
01752
kdDebug(7018) <<
"KMimeMagic::ascmagic token matches ! name=" << p->name <<
" type=" << p->type <<
endl;
01753
#endif
01754
tokencount++;
01755 typeset |= p->type;
01756
if (p->type == L_JAVA)
01757 jonly++;
01758
if ((p->type & (L_C|L_CPP|L_JAVA))
01759 == (L_CPP|L_JAVA)) {
01760 jconly++;
01761
if ( !foundClass && STREQ(
"class", token) )
01762 foundClass =
true;
01763 }
01764
if ((p->type & (L_C|L_CPP|L_JAVA))
01765 == (L_C|L_CPP))
01766 conly++;
01767
if (STREQ(token,
"//"))
01768 cppcomm++;
01769
if (STREQ(token,
"/*"))
01770 ccomm++;
01771
for (i = 0; i < (
int)NTYPES; i++)
01772
if ((1 << i) & p->type)
01773 typecount[i]++;
01774 }
01775 }
01776 }
01777
01778
if (typeset & (L_C|L_CPP|L_JAVA)) {
01779 conf->accuracy = 40;
01780
if (!(typeset & ~(L_C|L_CPP|L_JAVA))) {
01781
#ifdef DEBUG_MIMEMAGIC
01782
kdDebug(7018) <<
"C/C++/Java: jonly=" << jonly <<
" conly=" << conly <<
" jconly=" << jconly <<
" ccomm=" << ccomm <<
endl;
01783
#endif
01784
if (jonly && conly)
01785
01786
if ( jonly > conly )
01787 conly = 0;
01788
else
01789 jonly = 0;
01790
if (jonly > 1 && foundClass) {
01791
01792 conf->resultBuf =
QString(types[P_JAVA].type);
01793
return 1;
01794 }
01795
if (jconly > 1) {
01796
01797
if (typecount[P_JAVA] > typecount[P_CPP])
01798 conf->resultBuf =
QString(types[P_JAVA].type);
01799
else
01800 conf->resultBuf =
QString(types[P_CPP].type);
01801
return 1;
01802 }
01803
if (conly) {
01804
01805
if (cppcomm)
01806 conf->resultBuf =
QString(types[P_CPP].type);
01807
else
01808 conf->resultBuf =
QString(types[P_C].type);
01809
return 1;
01810 }
01811
if (ccomm) {
01812 conf->resultBuf =
QString(types[P_C].type);
01813
return 1;
01814 }
01815 }
01816 }
01817
01818
01819
01820
01821
01822 mostaccurate = -1;
01823 maxpct = pctsum = 0.0;
01824
for (i = 0; i < (
int)NTYPES; i++) {
01825
if (typecount[i] > 1) {
01826 pct = (
double)typecount[i] / (
double)types[i].kwords *
01827 (
double)types[i].weight;
01828 pcts[i] = pct;
01829 pctsum += pct;
01830
if (pct > maxpct) {
01831 maxpct = pct;
01832 mostaccurate = i;
01833 }
01834
#ifdef DEBUG_MIMEMAGIC
01835
kdDebug(7018) <<
"" << types[i].type <<
" has " << typecount[i] <<
" hits, " << types[i].kwords <<
" kw, weight " << types[i].weight <<
", " << pct <<
" -> max = " << maxpct <<
"\n" <<
endl;
01836
#endif
01837
}
01838 }
01839
if (mostaccurate >= 0) {
01840
if ( mostaccurate != P_JAVA || foundClass )
01841 {
01842 conf->accuracy = (
int)(pcts[mostaccurate] / pctsum * 60);
01843
#ifdef DEBUG_MIMEMAGIC
01844
kdDebug(7018) <<
"mostaccurate=" << mostaccurate <<
" pcts=" << pcts[mostaccurate] <<
" pctsum=" << pctsum <<
" accuracy=" << conf->accuracy <<
endl;
01845
#endif
01846
conf->resultBuf =
QString(types[mostaccurate].type);
01847
return 1;
01848 }
01849 }
01850
01851
switch (is_tar(buf, nbytes)) {
01852
case 1:
01853
01854 conf->resultBuf = MIME_APPL_TAR;
01855 conf->accuracy = 90;
01856
return 1;
01857
case 2:
01858
01859 conf->resultBuf = MIME_APPL_TAR;
01860 conf->accuracy = 90;
01861
return 1;
01862 }
01863
01864
for (i = 0; i < nbytes; i++) {
01865
if (!isascii(*(buf + i)))
01866
return 0;
01867 }
01868
01869
01870 conf->accuracy = 90;
01871
if (has_escapes) {
01872
01873
01874 conf->resultBuf = MIME_TEXT_UNKNOWN;
01875 }
else {
01876
01877 conf->resultBuf = MIME_TEXT_PLAIN;
01878 }
01879
return 1;
01880 }
01881
01882
01883
#define TEXT_MAXLINELEN 300
01884
01885
01886
01887
01888
01889
static int textmagic(
struct config_rec* conf,
unsigned char * buf,
int nbytes)
01890 {
01891
int i;
01892
unsigned char *cp;
01893
01894 nbytes--;
01895
01896
01897
for (i = 0, cp = buf; i < nbytes; i++, cp++)
01898
if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
01899
return 0;
01900
01901
01902
01903
01904
for (i = 0; i < nbytes;) {
01905 cp = (
unsigned char *) memchr(buf,
'\n', nbytes - i);
01906
if (cp == NULL) {
01907
01908
if (i + TEXT_MAXLINELEN >= nbytes)
01909
break;
01910
else
01911
return 0;
01912 }
01913
if (cp - buf > TEXT_MAXLINELEN)
01914
return 0;
01915 i += (cp - buf + 1);
01916 buf = cp + 1;
01917 }
01918 conf->resultBuf = MIME_TEXT_PLAIN;
01919
return 1;
01920 }
01921
01922
01923
01924
01925
01926
01927
01928
01929
01930
01931
01932
01933
01934
01935
01936
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
01937
01938
01939
01940
01941
01942
01943
static int
01944 is_tar(
unsigned char *buf,
int nbytes)
01945 {
01946
register union record *header = (
union record *) buf;
01947
register int i;
01948
register long sum,
01949 recsum;
01950
register char *p;
01951
01952
if (nbytes < (
int)
sizeof(
union record))
01953 return 0;
01954
01955 recsum = from_oct(8, header->header.chksum);
01956
01957 sum = 0;
01958 p = header->charptr;
01959
for (i =
sizeof(
union record); --i >= 0;) {
01960
01961
01962
01963
01964 sum += 0xFF & *p++;
01965 }
01966
01967
01968
for (i =
sizeof(header->header.chksum); --i >= 0;)
01969 sum -= 0xFF & header->header.chksum[i];
01970 sum +=
' ' *
sizeof header->header.chksum;
01971
01972
if (sum != recsum)
01973
return 0;
01974
01975
if (0 == strcmp(header->header.magic, TMAGIC))
01976
return 2;
01977
01978
return 1;
01979 }
01980
01981
01982
01983
01984
01985
01986
01987
static long
01988 from_oct(
int digs,
char *where)
01989 {
01990
register long value;
01991
01992
while (isspace(*where)) {
01993 where++;
01994
if (--digs <= 0)
01995
return -1;
01996 }
01997 value = 0;
01998
while (digs > 0 && isodigit(*where)) {
01999 value = (value << 3) | (*where++ -
'0');
02000 --digs;
02001 }
02002
02003
if (digs > 0 && *where && !isspace(*where))
02004
return -1;
02005
02006
return value;
02007 }
02008
02009 KMimeMagic::KMimeMagic()
02010 {
02011
02012
QString mimefile =
locate(
"mime",
"magic" );
02013 init( mimefile );
02014
02015
QStringList snippets =
KGlobal::dirs()->
findAllResources(
"config",
"magic/*.magic",
true );
02016
for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02017
if ( !
mergeConfig( *it ) )
02018
kdWarning() <<
k_funcinfo <<
"Failed to parse " << *it <<
endl;
02019 }
02020
02021 KMimeMagic::KMimeMagic(
const QString & _configfile)
02022 {
02023 init( _configfile );
02024 }
02025
02026
void KMimeMagic::init(
const QString& _configfile )
02027 {
02028
int result;
02029 conf =
new config_rec;
02030
02031
02032 conf->magic = conf->last = NULL;
02033 magicResult = NULL;
02034 conf->followLinks =
false;
02035
02036 conf->utimeConf = 0L;
02037
02038 result = apprentice(_configfile);
02039
if (result == -1)
02040
return;
02041
#ifdef MIME_MAGIC_DEBUG_TABLE
02042
test_table();
02043
#endif
02044
}
02045
02046
02047
02048
02049
02050 KMimeMagic::~KMimeMagic()
02051 {
02052
if (conf) {
02053
struct magic *p = conf->magic;
02054
struct magic *q;
02055
while (p) {
02056 q = p;
02057 p = p->next;
02058 free(q);
02059 }
02060
delete conf->utimeConf;
02061
delete conf;
02062 }
02063
delete magicResult;
02064 }
02065
02066
bool
02067 KMimeMagic::mergeConfig(
const QString & _configfile)
02068 {
02069
kdDebug(7018) <<
k_funcinfo << _configfile <<
endl;
02070
int result;
02071
02072
if (_configfile.isEmpty())
02073
return false;
02074 result = apprentice(_configfile);
02075
if (result == -1) {
02076
return false;
02077 }
02078
#ifdef MIME_MAGIC_DEBUG_TABLE
02079
test_table();
02080
#endif
02081
return true;
02082 }
02083
02084
bool
02085 KMimeMagic::mergeBufConfig(
char * _configbuf)
02086 {
02087
int result;
02088
02089
if (conf) {
02090 result = buff_apprentice(_configbuf);
02091
if (result == -1)
02092
return false;
02093
#ifdef MIME_MAGIC_DEBUG_TABLE
02094
test_table();
02095
#endif
02096
return true;
02097 }
02098
return false;
02099 }
02100
02101
void
02102 KMimeMagic::setFollowLinks(
bool _enable )
02103 {
02104 conf->followLinks = _enable;
02105 }
02106
02107
KMimeMagicResult *
02108 KMimeMagic::findBufferType(
const QByteArray &array)
02109 {
02110
unsigned char buf[HOWMANY + 1];
02111
02112 conf->resultBuf = QString::null;
02113
if ( !
magicResult )
02114
magicResult =
new KMimeMagicResult();
02115
magicResult->
setInvalid();
02116 conf->accuracy = 100;
02117
02118
int nbytes = array.size();
02119
02120
if (nbytes > HOWMANY)
02121 nbytes = HOWMANY;
02122 memcpy(buf, array.data(), nbytes);
02123
if (nbytes == 0) {
02124 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02125 }
else {
02126 buf[nbytes++] =
'\0';
02127 tryit(conf, buf, nbytes);
02128 }
02129
02130
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02131
magicResult->
setAccuracy(conf->accuracy);
02132
return magicResult;
02133 }
02134
02135
static void
02136 refineResult(
KMimeMagicResult *r,
const QString & _filename)
02137 {
02138
QString tmp = r->
mimeType();
02139
if (tmp.isEmpty())
02140
return;
02141
if ( tmp ==
"text/x-c" ||
02142 tmp ==
"text/x-c++" )
02143 {
02144
if ( _filename.right(2) ==
".h" )
02145 tmp +=
"hdr";
02146
else
02147 tmp +=
"src";
02148 r->
setMimeType(tmp);
02149 }
02150 }
02151
02152
KMimeMagicResult *
02153 KMimeMagic::findBufferFileType(
const QByteArray &data,
02154
const QString &fn)
02155 {
02156
KMimeMagicResult * r =
findBufferType( data );
02157 refineResult(r, fn);
02158
return r;
02159 }
02160
02161
02162
02163
02164 KMimeMagicResult*
KMimeMagic::findFileType(
const QString & fn)
02165 {
02166
#ifdef DEBUG_MIMEMAGIC
02167
kdDebug(7018) <<
"KMimeMagic::findFileType " << fn <<
endl;
02168
#endif
02169
conf->resultBuf = QString::null;
02170
02171
if ( !
magicResult )
02172
magicResult =
new KMimeMagicResult();
02173
magicResult->
setInvalid();
02174 conf->accuracy = 100;
02175
02176
if ( !conf->utimeConf )
02177 conf->utimeConf =
new KMimeMagicUtimeConf();
02178
02179
02180 process(conf, fn );
02181
02182
02183
02184
magicResult->
setMimeType(conf->resultBuf.stripWhiteSpace());
02185
magicResult->
setAccuracy(conf->accuracy);
02186 refineResult(
magicResult, fn);
02187
return magicResult;
02188 }