| tfile.c - plan9port - [fork] Plan 9 from user space | |
| git clone git://src.adamsgaard.dk/plan9port | |
| Log | |
| Files | |
| Refs | |
| README | |
| LICENSE | |
| --- | |
| tfile.c (23931B) | |
| --- | |
| 1 #include <u.h> | |
| 2 #include <libc.h> | |
| 3 #include <bio.h> | |
| 4 #include <ctype.h> | |
| 5 #include <mach.h> | |
| 6 | |
| 7 /* | |
| 8 * file - determine type of file | |
| 9 */ | |
| 10 #define LENDIAN(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | … | |
| 11 | |
| 12 uchar buf[6001]; | |
| 13 short cfreq[140]; | |
| 14 short wfreq[50]; | |
| 15 int nbuf; | |
| 16 Dir* mbuf; | |
| 17 int fd; | |
| 18 char *fname; | |
| 19 char *slash; | |
| 20 | |
| 21 enum | |
| 22 { | |
| 23 Cword, | |
| 24 Fword, | |
| 25 Aword, | |
| 26 Alword, | |
| 27 Lword, | |
| 28 I1, | |
| 29 I2, | |
| 30 I3, | |
| 31 Clatin = 128, | |
| 32 Cbinary, | |
| 33 Cnull, | |
| 34 Ceascii, | |
| 35 Cutf, | |
| 36 }; | |
| 37 struct | |
| 38 { | |
| 39 char* word; | |
| 40 int class; | |
| 41 } dict[] = | |
| 42 { | |
| 43 "PATH", Lword, | |
| 44 "TEXT", Aword, | |
| 45 "adt", Alword, | |
| 46 "aggr", Alword, | |
| 47 "alef", Alword, | |
| 48 "array", Lword, | |
| 49 "block", Fword, | |
| 50 "chan", Alword, | |
| 51 "char", Cword, | |
| 52 "common", Fword, | |
| 53 "con", Lword, | |
| 54 "data", Fword, | |
| 55 "dimension", Fword, | |
| 56 "double", Cword, | |
| 57 "extern", Cword, | |
| 58 "bio", I2, | |
| 59 "float", Cword, | |
| 60 "fn", Lword, | |
| 61 "function", Fword, | |
| 62 "h", I3, | |
| 63 "implement", Lword, | |
| 64 "import", Lword, | |
| 65 "include", I1, | |
| 66 "int", Cword, | |
| 67 "integer", Fword, | |
| 68 "iota", Lword, | |
| 69 "libc", I2, | |
| 70 "long", Cword, | |
| 71 "module", Lword, | |
| 72 "real", Fword, | |
| 73 "ref", Lword, | |
| 74 "register", Cword, | |
| 75 "self", Lword, | |
| 76 "short", Cword, | |
| 77 "static", Cword, | |
| 78 "stdio", I2, | |
| 79 "struct", Cword, | |
| 80 "subroutine", Fword, | |
| 81 "u", I2, | |
| 82 "void", Cword, | |
| 83 }; | |
| 84 | |
| 85 /* codes for 'mode' field in language structure */ | |
| 86 enum { | |
| 87 Normal = 0, | |
| 88 First, /* first entry for language spanni… | |
| 89 Multi, /* later entries " " " ..… | |
| 90 Shared, /* codes used in several language… | |
| 91 }; | |
| 92 | |
| 93 struct | |
| 94 { | |
| 95 int mode; /* see enum above */ | |
| 96 int count; | |
| 97 int low; | |
| 98 int high; | |
| 99 char *name; | |
| 100 | |
| 101 } language[] = | |
| 102 { | |
| 103 Normal, 0, 0x0080, 0x0080, "Extended Latin", | |
| 104 Normal, 0, 0x0100, 0x01FF, "Extended… | |
| 105 Normal, 0, 0x0370, 0x03FF, "Greek", | |
| 106 Normal, 0, 0x0400, 0x04FF, "Cyrillic… | |
| 107 Normal, 0, 0x0530, 0x058F, "Armenian… | |
| 108 Normal, 0, 0x0590, 0x05FF, "Hebrew", | |
| 109 Normal, 0, 0x0600, 0x06FF, "Arabic", | |
| 110 Normal, 0, 0x0900, 0x097F, "Devanaga… | |
| 111 Normal, 0, 0x0980, 0x09FF, "Bengali", | |
| 112 Normal, 0, 0x0A00, 0x0A7F, "Gurmukhi… | |
| 113 Normal, 0, 0x0A80, 0x0AFF, "Gujarati… | |
| 114 Normal, 0, 0x0B00, 0x0B7F, "Oriya", | |
| 115 Normal, 0, 0x0B80, 0x0BFF, "Tamil", | |
| 116 Normal, 0, 0x0C00, 0x0C7F, "Telugu", | |
| 117 Normal, 0, 0x0C80, 0x0CFF, "Kannada", | |
| 118 Normal, 0, 0x0D00, 0x0D7F, "Malayala… | |
| 119 Normal, 0, 0x0E00, 0x0E7F, "Thai", | |
| 120 Normal, 0, 0x0E80, 0x0EFF, "Lao", | |
| 121 Normal, 0, 0x1000, 0x105F, "Tibetan", | |
| 122 Normal, 0, 0x10A0, 0x10FF, "Georgian… | |
| 123 Normal, 0, 0x3040, 0x30FF, "Japanese… | |
| 124 Normal, 0, 0x3100, 0x312F, "Chinese", | |
| 125 First, 0, 0x3130, 0x318F, "Korean", | |
| 126 Multi, 0, 0x3400, 0x3D2F, "Korean", | |
| 127 Shared, 0, 0x4e00, 0x9fff, "CJK", | |
| 128 Normal, 0, 0, 0, 0, /… | |
| 129 }; | |
| 130 | |
| 131 | |
| 132 enum | |
| 133 { | |
| 134 Fascii, /* printable ascii */ | |
| 135 Flatin, /* latin 1*/ | |
| 136 Futf, /* UTf character set */ | |
| 137 Fbinary, /* binary */ | |
| 138 Feascii, /* ASCII with control chars */ | |
| 139 Fnull, /* NULL in file */ | |
| 140 } guess; | |
| 141 | |
| 142 void bump_utf_count(Rune); | |
| 143 int cistrncmp(char*, char*, int); | |
| 144 void filetype(int); | |
| 145 int getfontnum(uchar*, uchar**); | |
| 146 int isas(void); | |
| 147 int isc(void); | |
| 148 int isenglish(void); | |
| 149 int ishp(void); | |
| 150 int ishtml(void); | |
| 151 int isrfc822(void); | |
| 152 int ismbox(void); | |
| 153 int islimbo(void); | |
| 154 int ismung(void); | |
| 155 int isp9bit(void); | |
| 156 int isp9font(void); | |
| 157 int isrtf(void); | |
| 158 int ismsdos(void); | |
| 159 int iself(void); | |
| 160 int istring(void); | |
| 161 int iff(void); | |
| 162 int long0(void); | |
| 163 int istar(void); | |
| 164 int p9bitnum(uchar*); | |
| 165 int p9subfont(uchar*); | |
| 166 void print_utf(void); | |
| 167 void type(char*, int); | |
| 168 int utf_count(void); | |
| 169 void wordfreq(void); | |
| 170 | |
| 171 int (*call[])(void) = | |
| 172 { | |
| 173 long0, /* recognizable by first 4 bytes */ | |
| 174 istring, /* recognizable by first string */ | |
| 175 iff, /* interchange file format (strings) */ | |
| 176 isrfc822, /* email file */ | |
| 177 ismbox, /* mail box */ | |
| 178 istar, /* recognizable by tar checksum */ | |
| 179 ishtml, /* html keywords */ | |
| 180 /* iscint, /* compiler/assembler intermediate */ | |
| 181 islimbo, /* limbo source */ | |
| 182 isc, /* c & alef compiler key words */ | |
| 183 isas, /* assembler key words */ | |
| 184 ismung, /* entropy compressed/encrypted */ | |
| 185 isp9font, /* plan 9 font */ | |
| 186 isp9bit, /* plan 9 image (as from /dev/window) */ | |
| 187 isenglish, /* char frequency English */ | |
| 188 isrtf, /* rich text format */ | |
| 189 ismsdos, /* msdos exe (virus file attachement) */ | |
| 190 iself, /* ELF (foreign) executable */ | |
| 191 0 | |
| 192 }; | |
| 193 | |
| 194 int mime; | |
| 195 | |
| 196 #define OCTET "application/octet-stream\n" | |
| 197 #define PLAIN "text/plain\n" | |
| 198 | |
| 199 void | |
| 200 main(int argc, char *argv[]) | |
| 201 { | |
| 202 int i, j, maxlen; | |
| 203 char *cp; | |
| 204 Rune r; | |
| 205 | |
| 206 ARGBEGIN{ | |
| 207 case 'm': | |
| 208 mime = 1; | |
| 209 break; | |
| 210 default: | |
| 211 fprint(2, "usage: file [-m] [file...]\n"); | |
| 212 exits("usage"); | |
| 213 }ARGEND; | |
| 214 | |
| 215 maxlen = 0; | |
| 216 if(mime == 0 || argc > 1){ | |
| 217 for(i = 0; i < argc; i++) { | |
| 218 for (j = 0, cp = argv[i]; *cp; j++, cp += charto… | |
| 219 ; | |
| 220 if(j > maxlen) | |
| 221 maxlen = j; | |
| 222 } | |
| 223 } | |
| 224 if (argc <= 0) { | |
| 225 if(!mime) | |
| 226 print ("stdin: "); | |
| 227 filetype(0); | |
| 228 } | |
| 229 else { | |
| 230 for(i = 0; i < argc; i++) | |
| 231 type(argv[i], maxlen); | |
| 232 } | |
| 233 exits(0); | |
| 234 } | |
| 235 | |
| 236 void | |
| 237 type(char *file, int nlen) | |
| 238 { | |
| 239 Rune r; | |
| 240 int i; | |
| 241 char *p; | |
| 242 | |
| 243 if(nlen > 0){ | |
| 244 slash = 0; | |
| 245 for (i = 0, p = file; *p; i++) { | |
| 246 if (*p == '/') /* find ri… | |
| 247 slash = p; | |
| 248 p += chartorune(&r, p); /* count … | |
| 249 } | |
| 250 print("%s:%*s",file, nlen-i+1, ""); | |
| 251 } | |
| 252 fname = file; | |
| 253 if ((fd = open(file, OREAD)) < 0) { | |
| 254 print("cannot open\n"); | |
| 255 return; | |
| 256 } | |
| 257 filetype(fd); | |
| 258 close(fd); | |
| 259 } | |
| 260 | |
| 261 void | |
| 262 filetype(int fd) | |
| 263 { | |
| 264 Rune r; | |
| 265 int i, f, n; | |
| 266 char *p, *eob; | |
| 267 | |
| 268 free(mbuf); | |
| 269 mbuf = dirfstat(fd); | |
| 270 if(mbuf == nil){ | |
| 271 print("cannot stat: %r\n"); | |
| 272 return; | |
| 273 } | |
| 274 if(mbuf->mode & DMDIR) { | |
| 275 print(mime ? "text/directory\n" : "directory\n"); | |
| 276 return; | |
| 277 } | |
| 278 if(mbuf->type != 'M' && mbuf->type != '|') { | |
| 279 print(mime ? OCTET : "special file #%c/%s\n", | |
| 280 mbuf->type, mbuf->name); | |
| 281 return; | |
| 282 } | |
| 283 nbuf = read(fd, buf, sizeof(buf)-1); | |
| 284 | |
| 285 if(nbuf < 0) { | |
| 286 print("cannot read\n"); | |
| 287 return; | |
| 288 } | |
| 289 if(nbuf == 0) { | |
| 290 print(mime ? PLAIN : "empty file\n"); | |
| 291 return; | |
| 292 } | |
| 293 buf[nbuf] = 0; | |
| 294 | |
| 295 /* | |
| 296 * build histogram table | |
| 297 */ | |
| 298 memset(cfreq, 0, sizeof(cfreq)); | |
| 299 for (i = 0; language[i].name; i++) | |
| 300 language[i].count = 0; | |
| 301 eob = (char *)buf+nbuf; | |
| 302 for(n = 0, p = (char *)buf; p < eob; n++) { | |
| 303 if (!fullrune(p, eob-p) && eob-p < UTFmax) | |
| 304 break; | |
| 305 p += chartorune(&r, p); | |
| 306 if (r == 0) | |
| 307 f = Cnull; | |
| 308 else if (r <= 0x7f) { | |
| 309 if (!isprint(r) && !isspace(r)) | |
| 310 f = Ceascii; /* ASCII control cha… | |
| 311 else f = r; | |
| 312 } else if (r == 0x080) { | |
| 313 bump_utf_count(r); | |
| 314 f = Cutf; | |
| 315 } else if (r < 0xA0) | |
| 316 f = Cbinary; /* Invalid Runes */ | |
| 317 else if (r <= 0xff) | |
| 318 f = Clatin; /* Latin 1 */ | |
| 319 else { | |
| 320 bump_utf_count(r); | |
| 321 f = Cutf; /* UTF extension */ | |
| 322 } | |
| 323 cfreq[f]++; /* ASCII chars peg di… | |
| 324 } | |
| 325 /* | |
| 326 * gross classify | |
| 327 */ | |
| 328 if (cfreq[Cbinary]) | |
| 329 guess = Fbinary; | |
| 330 else if (cfreq[Cutf]) | |
| 331 guess = Futf; | |
| 332 else if (cfreq[Clatin]) | |
| 333 guess = Flatin; | |
| 334 else if (cfreq[Ceascii]) | |
| 335 guess = Feascii; | |
| 336 else if (cfreq[Cnull] == n) { | |
| 337 print(mime ? OCTET : "first block all null bytes\n"); | |
| 338 return; | |
| 339 } | |
| 340 else guess = Fascii; | |
| 341 /* | |
| 342 * lookup dictionary words | |
| 343 */ | |
| 344 memset(wfreq, 0, sizeof(wfreq)); | |
| 345 if(guess == Fascii || guess == Flatin || guess == Futf) | |
| 346 wordfreq(); | |
| 347 /* | |
| 348 * call individual classify routines | |
| 349 */ | |
| 350 for(i=0; call[i]; i++) | |
| 351 if((*call[i])()) | |
| 352 return; | |
| 353 | |
| 354 /* | |
| 355 * if all else fails, | |
| 356 * print out gross classification | |
| 357 */ | |
| 358 if (nbuf < 100 && !mime) | |
| 359 print(mime ? PLAIN : "short "); | |
| 360 if (guess == Fascii) | |
| 361 print(mime ? PLAIN : "Ascii\n"); | |
| 362 else if (guess == Feascii) | |
| 363 print(mime ? PLAIN : "extended ascii\n"); | |
| 364 else if (guess == Flatin) | |
| 365 print(mime ? PLAIN : "latin ascii\n"); | |
| 366 else if (guess == Futf && utf_count() < 4) | |
| 367 print_utf(); | |
| 368 else print(mime ? OCTET : "binary\n"); | |
| 369 } | |
| 370 | |
| 371 void | |
| 372 bump_utf_count(Rune r) | |
| 373 { | |
| 374 int low, high, mid; | |
| 375 | |
| 376 high = sizeof(language)/sizeof(language[0])-1; | |
| 377 for (low = 0; low < high;) { | |
| 378 mid = (low+high)/2; | |
| 379 if (r >=language[mid].low) { | |
| 380 if (r <= language[mid].high) { | |
| 381 language[mid].count++; | |
| 382 break; | |
| 383 } else low = mid+1; | |
| 384 } else high = mid; | |
| 385 } | |
| 386 } | |
| 387 | |
| 388 int | |
| 389 utf_count(void) | |
| 390 { | |
| 391 int i, count; | |
| 392 | |
| 393 count = 0; | |
| 394 for (i = 0; language[i].name; i++) | |
| 395 if (language[i].count > 0) | |
| 396 switch (language[i].mode) { | |
| 397 case Normal: | |
| 398 case First: | |
| 399 count++; | |
| 400 break; | |
| 401 default: | |
| 402 break; | |
| 403 } | |
| 404 return count; | |
| 405 } | |
| 406 | |
| 407 int | |
| 408 chkascii(void) | |
| 409 { | |
| 410 int i; | |
| 411 | |
| 412 for (i = 'a'; i < 'z'; i++) | |
| 413 if (cfreq[i]) | |
| 414 return 1; | |
| 415 for (i = 'A'; i < 'Z'; i++) | |
| 416 if (cfreq[i]) | |
| 417 return 1; | |
| 418 return 0; | |
| 419 } | |
| 420 | |
| 421 int | |
| 422 find_first(char *name) | |
| 423 { | |
| 424 int i; | |
| 425 | |
| 426 for (i = 0; language[i].name != 0; i++) | |
| 427 if (language[i].mode == First | |
| 428 && strcmp(language[i].name, name) == 0) | |
| 429 return i; | |
| 430 return -1; | |
| 431 } | |
| 432 | |
| 433 void | |
| 434 print_utf(void) | |
| 435 { | |
| 436 int i, printed, j; | |
| 437 | |
| 438 if(mime){ | |
| 439 print(PLAIN); | |
| 440 return; | |
| 441 } | |
| 442 if (chkascii()) { | |
| 443 printed = 1; | |
| 444 print("Ascii"); | |
| 445 } else | |
| 446 printed = 0; | |
| 447 for (i = 0; language[i].name; i++) | |
| 448 if (language[i].count) { | |
| 449 switch(language[i].mode) { | |
| 450 case Multi: | |
| 451 j = find_first(language[i].name); | |
| 452 if (j < 0) | |
| 453 break; | |
| 454 if (language[j].count > 0) | |
| 455 break; | |
| 456 /* Fall through */ | |
| 457 case Normal: | |
| 458 case First: | |
| 459 if (printed) | |
| 460 print(" & "); | |
| 461 else printed = 1; | |
| 462 print("%s", language[i].name); | |
| 463 break; | |
| 464 case Shared: | |
| 465 default: | |
| 466 break; | |
| 467 } | |
| 468 } | |
| 469 if(!printed) | |
| 470 print("UTF"); | |
| 471 print(" text\n"); | |
| 472 } | |
| 473 | |
| 474 void | |
| 475 wordfreq(void) | |
| 476 { | |
| 477 int low, high, mid, r; | |
| 478 uchar *p, *p2, c; | |
| 479 | |
| 480 p = buf; | |
| 481 for(;;) { | |
| 482 while (p < buf+nbuf && !isalpha(*p)) | |
| 483 p++; | |
| 484 if (p >= buf+nbuf) | |
| 485 return; | |
| 486 p2 = p; | |
| 487 while(p < buf+nbuf && isalpha(*p)) | |
| 488 p++; | |
| 489 c = *p; | |
| 490 *p = 0; | |
| 491 high = sizeof(dict)/sizeof(dict[0]); | |
| 492 for(low = 0;low < high;) { | |
| 493 mid = (low+high)/2; | |
| 494 r = strcmp(dict[mid].word, (char*)p2); | |
| 495 if(r == 0) { | |
| 496 wfreq[dict[mid].class]++; | |
| 497 break; | |
| 498 } | |
| 499 if(r < 0) | |
| 500 low = mid+1; | |
| 501 else | |
| 502 high = mid; | |
| 503 } | |
| 504 *p++ = c; | |
| 505 } | |
| 506 } | |
| 507 | |
| 508 typedef struct Filemagic Filemagic; | |
| 509 struct Filemagic { | |
| 510 ulong x; | |
| 511 ulong mask; | |
| 512 char *desc; | |
| 513 char *mime; | |
| 514 }; | |
| 515 | |
| 516 Filemagic long0tab[] = { | |
| 517 0xF16DF16D, 0xFFFFFFFF, "pac1 audio file\n", … | |
| 518 0x31636170, 0xFFFFFFFF, "pac3 audio file\n", … | |
| 519 0x32636170, 0xFFFF00FF, "pac4 audio file\n", … | |
| 520 0xBA010000, 0xFFFFFFFF, "mpeg system stream\n", … | |
| 521 0x30800CC0, 0xFFFFFFFF, "inferno .dis executable\n… | |
| 522 0x04034B50, 0xFFFFFFFF, "zip archive\n", "applicat… | |
| 523 070707, 0xFFFF, "cpio archive\n", … | |
| 524 0x2F7, 0xFFFF, "tex dvi\n", "appli… | |
| 525 0xfffa0000, 0xfffe0000, "mp3 audio\n", "aud… | |
| 526 0xcafebabe, 0xFFFFFFFF, "Mach-O fat executable\n",… | |
| 527 0xfeedface, 0xFFFFFFFE, "Mach-O executable\n", … | |
| 528 0xbebafeca, 0xFFFFFFFF, "Java class\n", "ap… | |
| 529 }; | |
| 530 | |
| 531 int | |
| 532 filemagic(Filemagic *tab, int ntab, ulong x) | |
| 533 { | |
| 534 int i; | |
| 535 | |
| 536 for(i=0; i<ntab; i++) | |
| 537 if((x&tab[i].mask) == tab[i].x){ | |
| 538 print(mime ? tab[i].mime : tab[i].desc); | |
| 539 return 1; | |
| 540 } | |
| 541 return 0; | |
| 542 } | |
| 543 | |
| 544 int | |
| 545 long0(void) | |
| 546 { | |
| 547 /* Fhdr *f; */ | |
| 548 long x; | |
| 549 | |
| 550 seek(fd, 0, 0); /* reposition to start of file */ | |
| 551 /* | |
| 552 if(crackhdr(fd, &f)) { | |
| 553 print(mime ? OCTET : "%s\n", f.name); | |
| 554 return 1; | |
| 555 } | |
| 556 */ | |
| 557 x = LENDIAN(buf); | |
| 558 if(filemagic(long0tab, nelem(long0tab), x)) | |
| 559 return 1; | |
| 560 return 0; | |
| 561 } | |
| 562 | |
| 563 /* from tar.c */ | |
| 564 enum { NAMSIZ = 100, TBLOCK = 512 }; | |
| 565 | |
| 566 union hblock | |
| 567 { | |
| 568 char dummy[TBLOCK]; | |
| 569 struct header | |
| 570 { | |
| 571 char name[NAMSIZ]; | |
| 572 char mode[8]; | |
| 573 char uid[8]; | |
| 574 char gid[8]; | |
| 575 char size[12]; | |
| 576 char mtime[12]; | |
| 577 char chksum[8]; | |
| 578 char linkflag; | |
| 579 char linkname[NAMSIZ]; | |
| 580 /* rest are defined by POSIX's ustar format; see p1003.2… | |
| 581 char magic[6]; /* "ustar" */ | |
| 582 char version[2]; | |
| 583 char uname[32]; | |
| 584 char gname[32]; | |
| 585 char devmajor[8]; | |
| 586 char devminor[8]; | |
| 587 char prefix[155]; /* if non-null, path = prefix … | |
| 588 } dbuf; | |
| 589 }; | |
| 590 | |
| 591 int | |
| 592 checksum(union hblock *hp) | |
| 593 { | |
| 594 int i; | |
| 595 char *cp; | |
| 596 struct header *hdr = &hp->dbuf; | |
| 597 | |
| 598 for (cp = hdr->chksum; cp < &hdr->chksum[sizeof hdr->chksum]; cp… | |
| 599 *cp = ' '; | |
| 600 i = 0; | |
| 601 for (cp = hp->dummy; cp < &hp->dummy[TBLOCK]; cp++) | |
| 602 i += *cp & 0xff; | |
| 603 return i; | |
| 604 } | |
| 605 | |
| 606 int | |
| 607 istar(void) | |
| 608 { | |
| 609 int chksum; | |
| 610 char tblock[TBLOCK]; | |
| 611 union hblock *hp = (union hblock *)tblock; | |
| 612 struct header *hdr = &hp->dbuf; | |
| 613 | |
| 614 seek(fd, 0, 0); /* reposition to start of file */ | |
| 615 if (readn(fd, tblock, sizeof tblock) != sizeof tblock) | |
| 616 return 0; | |
| 617 chksum = strtol(hdr->chksum, 0, 8); | |
| 618 if (hdr->name[0] != '\0' && checksum(hp) == chksum) { | |
| 619 if (strcmp(hdr->magic, "ustar") == 0) | |
| 620 print(mime? "application/x-ustar\n": | |
| 621 "posix tar archive\n"); | |
| 622 else | |
| 623 print(mime? "application/x-tar\n": "tar archive\… | |
| 624 return 1; | |
| 625 } | |
| 626 return 0; | |
| 627 } | |
| 628 | |
| 629 /* | |
| 630 * initial words to classify file | |
| 631 */ | |
| 632 struct FILE_STRING | |
| 633 { | |
| 634 char *key; | |
| 635 char *filetype; | |
| 636 int length; | |
| 637 char *mime; | |
| 638 } file_string[] = | |
| 639 { | |
| 640 "!<arch>\n__.SYMDEF", "archive random library", 16… | |
| 641 "!<arch>\n", "archive", 8,… | |
| 642 "070707", "cpio archive - ascii header", 6… | |
| 643 "%!", "postscript", … | |
| 644 "\004%!", "postscript", 3,… | |
| 645 "x T post", "troff output for post", 8, … | |
| 646 "x T Latin1", "troff output for Latin1", 1… | |
| 647 "x T utf", "troff output for UTF", … | |
| 648 "x T 202", "troff output for 202", … | |
| 649 "x T aps", "troff output for aps", … | |
| 650 "GIF", "GIF image", … | |
| 651 "\0PC Research, Inc\0", "ghostscript fax file", … | |
| 652 "%PDF", "PDF", … | |
| 653 "<html>\n", "HTML file", 7… | |
| 654 "<HTML>\n", "HTML file", 7… | |
| 655 "compressed\n", "Compressed image or subfont", … | |
| 656 "\111\111\052\000", "tiff", … | |
| 657 "\115\115\000\052", "tiff", … | |
| 658 "\377\330\377\340", "jpeg", … | |
| 659 "\377\330\377\341", "jpeg", … | |
| 660 "\377\330\377\333", "jpeg", … | |
| 661 "\106\117\126\142", "x3f", … | |
| 662 "BM", "bmp", … | |
| 663 "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", "microsoft office doc… | |
| 664 "<MakerFile ", "FrameMaker file", … | |
| 665 "\033%-12345X", "HPJCL file", 9, "a… | |
| 666 "ID3", "mp3 audio with id3", 3, … | |
| 667 0,0,0,0 | |
| 668 }; | |
| 669 | |
| 670 int | |
| 671 istring(void) | |
| 672 { | |
| 673 int i, j; | |
| 674 struct FILE_STRING *p; | |
| 675 | |
| 676 for(p = file_string; p->key; p++) { | |
| 677 if(nbuf >= p->length && !memcmp(buf, p->key, p->length))… | |
| 678 if(mime) | |
| 679 print("%s\n", p->mime); | |
| 680 else | |
| 681 print("%s\n", p->filetype); | |
| 682 return 1; | |
| 683 } | |
| 684 } | |
| 685 if(strncmp((char*)buf, "TYPE=", 5) == 0) { /* td */ | |
| 686 for(i = 5; i < nbuf; i++) | |
| 687 if(buf[i] == '\n') | |
| 688 break; | |
| 689 if(mime) | |
| 690 print(OCTET); | |
| 691 else | |
| 692 print("%.*s picture\n", utfnlen((char*)buf+5, i-… | |
| 693 return 1; | |
| 694 } | |
| 695 if(buf[0]=='#' && buf[1]=='!'){ | |
| 696 i=2; | |
| 697 for(j=2; j < nbuf && buf[j] != ' ' && buf[j] != '\n' && … | |
| 698 if(buf[j] == '/') | |
| 699 i = j+1; | |
| 700 if(mime) | |
| 701 print(PLAIN); | |
| 702 else | |
| 703 print("%.*s executable file script\n", utfnlen((… | |
| 704 return 1; | |
| 705 } | |
| 706 return 0; | |
| 707 } | |
| 708 | |
| 709 int | |
| 710 iff(void) | |
| 711 { | |
| 712 if (strncmp((char*)buf, "FORM", 4) == 0 && | |
| 713 strncmp((char*)buf+8, "AIFF", 4) == 0) { | |
| 714 print("%s\n", mime? "audio/x-aiff": "aiff audio"); | |
| 715 return 1; | |
| 716 } | |
| 717 return 0; | |
| 718 } | |
| 719 | |
| 720 char* html_string[] = | |
| 721 { | |
| 722 "title", | |
| 723 "body", | |
| 724 "head", | |
| 725 "strong", | |
| 726 "h1", | |
| 727 "h2", | |
| 728 "h3", | |
| 729 "h4", | |
| 730 "h5", | |
| 731 "h6", | |
| 732 "ul", | |
| 733 "li", | |
| 734 "dl", | |
| 735 "br", | |
| 736 "em", | |
| 737 0, | |
| 738 }; | |
| 739 | |
| 740 int | |
| 741 ishtml(void) | |
| 742 { | |
| 743 uchar *p, *q; | |
| 744 int i, count; | |
| 745 | |
| 746 /* compare strings between '<' and '>' to html table */ | |
| 747 count = 0; | |
| 748 p = buf; | |
| 749 for(;;) { | |
| 750 while (p < buf+nbuf && *p != '<') | |
| 751 p++; | |
| 752 p++; | |
| 753 if (p >= buf+nbuf) | |
| 754 break; | |
| 755 if(*p == '/') | |
| 756 p++; | |
| 757 q = p; | |
| 758 while(p < buf+nbuf && *p != '>') | |
| 759 p++; | |
| 760 if (p >= buf+nbuf) | |
| 761 break; | |
| 762 for(i = 0; html_string[i]; i++) { | |
| 763 if(cistrncmp(html_string[i], (char*)q, p-q) == 0… | |
| 764 if(count++ > 4) { | |
| 765 print(mime ? "text/html\n" : "HT… | |
| 766 return 1; | |
| 767 } | |
| 768 break; | |
| 769 } | |
| 770 } | |
| 771 p++; | |
| 772 } | |
| 773 return 0; | |
| 774 } | |
| 775 | |
| 776 char* rfc822_string[] = | |
| 777 { | |
| 778 "from:", | |
| 779 "date:", | |
| 780 "to:", | |
| 781 "subject:", | |
| 782 "received:", | |
| 783 "reply to:", | |
| 784 "sender:", | |
| 785 0, | |
| 786 }; | |
| 787 | |
| 788 int | |
| 789 isrfc822(void) | |
| 790 { | |
| 791 | |
| 792 char *p, *q, *r; | |
| 793 int i, count; | |
| 794 | |
| 795 count = 0; | |
| 796 p = (char*)buf; | |
| 797 for(;;) { | |
| 798 q = strchr(p, '\n'); | |
| 799 if(q == nil) | |
| 800 break; | |
| 801 *q = 0; | |
| 802 if(p == (char*)buf && strncmp(p, "From ", 5) == 0 && str… | |
| 803 count++; | |
| 804 *q = '\n'; | |
| 805 p = q+1; | |
| 806 continue; | |
| 807 } | |
| 808 *q = '\n'; | |
| 809 if(*p != '\t' && *p != ' '){ | |
| 810 r = strchr(p, ':'); | |
| 811 if(r == 0 || r > q) | |
| 812 break; | |
| 813 for(i = 0; rfc822_string[i]; i++) { | |
| 814 if(cistrncmp(p, rfc822_string[i], strlen… | |
| 815 count++; | |
| 816 break; | |
| 817 } | |
| 818 } | |
| 819 } | |
| 820 p = q+1; | |
| 821 } | |
| 822 if(count >= 3){ | |
| 823 print(mime ? "message/rfc822\n" : "email file\n"); | |
| 824 return 1; | |
| 825 } | |
| 826 return 0; | |
| 827 } | |
| 828 | |
| 829 int | |
| 830 ismbox(void) | |
| 831 { | |
| 832 char *p, *q; | |
| 833 | |
| 834 p = (char*)buf; | |
| 835 q = strchr(p, '\n'); | |
| 836 if(q == nil) | |
| 837 return 0; | |
| 838 *q = 0; | |
| 839 if(strncmp(p, "From ", 5) == 0 && strstr(p, " remote from ") == … | |
| 840 print(mime ? "text/plain\n" : "mail box\n"); | |
| 841 return 1; | |
| 842 } | |
| 843 *q = '\n'; | |
| 844 return 0; | |
| 845 } | |
| 846 | |
| 847 int | |
| 848 isc(void) | |
| 849 { | |
| 850 int n; | |
| 851 | |
| 852 n = wfreq[I1]; | |
| 853 /* | |
| 854 * includes | |
| 855 */ | |
| 856 if(n >= 2 && wfreq[I2] >= n && wfreq[I3] >= n && cfreq['.'] >= n) | |
| 857 goto yes; | |
| 858 if(n >= 1 && wfreq[Alword] >= n && wfreq[I3] >= n && cfreq['.'] … | |
| 859 goto yes; | |
| 860 /* | |
| 861 * declarations | |
| 862 */ | |
| 863 if(wfreq[Cword] >= 5 && cfreq[';'] >= 5) | |
| 864 goto yes; | |
| 865 /* | |
| 866 * assignments | |
| 867 */ | |
| 868 if(cfreq[';'] >= 10 && cfreq['='] >= 10 && wfreq[Cword] >= 1) | |
| 869 goto yes; | |
| 870 return 0; | |
| 871 | |
| 872 yes: | |
| 873 if(mime){ | |
| 874 print(PLAIN); | |
| 875 return 1; | |
| 876 } | |
| 877 if(wfreq[Alword] > 0) | |
| 878 print("alef program\n"); | |
| 879 else | |
| 880 print("c program\n"); | |
| 881 return 1; | |
| 882 } | |
| 883 | |
| 884 int | |
| 885 islimbo(void) | |
| 886 { | |
| 887 | |
| 888 /* | |
| 889 * includes | |
| 890 */ | |
| 891 if(wfreq[Lword] < 4) | |
| 892 return 0; | |
| 893 print(mime ? PLAIN : "limbo program\n"); | |
| 894 return 1; | |
| 895 } | |
| 896 | |
| 897 int | |
| 898 isas(void) | |
| 899 { | |
| 900 | |
| 901 /* | |
| 902 * includes | |
| 903 */ | |
| 904 if(wfreq[Aword] < 2) | |
| 905 return 0; | |
| 906 print(mime ? PLAIN : "as program\n"); | |
| 907 return 1; | |
| 908 } | |
| 909 | |
| 910 /* | |
| 911 * low entropy means encrypted | |
| 912 */ | |
| 913 int | |
| 914 ismung(void) | |
| 915 { | |
| 916 int i, bucket[8]; | |
| 917 float cs; | |
| 918 | |
| 919 if(nbuf < 64) | |
| 920 return 0; | |
| 921 memset(bucket, 0, sizeof(bucket)); | |
| 922 for(i=0; i<64; i++) | |
| 923 bucket[(buf[i]>>5)&07] += 1; | |
| 924 | |
| 925 cs = 0.; | |
| 926 for(i=0; i<8; i++) | |
| 927 cs += (bucket[i]-8)*(bucket[i]-8); | |
| 928 cs /= 8.; | |
| 929 if(cs <= 24.322) { | |
| 930 if(buf[0]==0x1f && (buf[1]==0x8b || buf[1]==0x9d)) | |
| 931 print(mime ? OCTET : "compressed\n"); | |
| 932 else | |
| 933 print(mime ? OCTET : "encrypted\n"); | |
| 934 return 1; | |
| 935 } | |
| 936 return 0; | |
| 937 } | |
| 938 | |
| 939 /* | |
| 940 * english by punctuation and frequencies | |
| 941 */ | |
| 942 int | |
| 943 isenglish(void) | |
| 944 { | |
| 945 int vow, comm, rare, badpun, punct; | |
| 946 char *p; | |
| 947 | |
| 948 if(guess != Fascii && guess != Feascii) | |
| 949 return 0; | |
| 950 badpun = 0; | |
| 951 punct = 0; | |
| 952 for(p = (char *)buf; p < (char *)buf+nbuf-1; p++) | |
| 953 switch(*p) { | |
| 954 case '.': | |
| 955 case ',': | |
| 956 case ')': | |
| 957 case '%': | |
| 958 case ';': | |
| 959 case ':': | |
| 960 case '?': | |
| 961 punct++; | |
| 962 if(p[1] != ' ' && p[1] != '\n') | |
| 963 badpun++; | |
| 964 } | |
| 965 if(badpun*5 > punct) | |
| 966 return 0; | |
| 967 if(cfreq['>']+cfreq['<']+cfreq['/'] > cfreq['e']) /* shel… | |
| 968 return 0; | |
| 969 if(2*cfreq[';'] > cfreq['e']) | |
| 970 return 0; | |
| 971 | |
| 972 vow = 0; | |
| 973 for(p="AEIOU"; *p; p++) { | |
| 974 vow += cfreq[(uchar)*p]; | |
| 975 vow += cfreq[tolower((uchar)*p)]; | |
| 976 } | |
| 977 comm = 0; | |
| 978 for(p="ETAION"; *p; p++) { | |
| 979 comm += cfreq[(uchar)*p]; | |
| 980 comm += cfreq[tolower((uchar)*p)]; | |
| 981 } | |
| 982 rare = 0; | |
| 983 for(p="VJKQXZ"; *p; p++) { | |
| 984 rare += cfreq[(uchar)*p]; | |
| 985 rare += cfreq[tolower((uchar)*p)]; | |
| 986 } | |
| 987 if(vow*5 >= nbuf-cfreq[' '] && comm >= 10*rare) { | |
| 988 print(mime ? PLAIN : "English text\n"); | |
| 989 return 1; | |
| 990 } | |
| 991 return 0; | |
| 992 } | |
| 993 | |
| 994 /* | |
| 995 * pick up a number with | |
| 996 * syntax _*[0-9]+_ | |
| 997 */ | |
| 998 #define P9BITLEN 12 | |
| 999 int | |
| 1000 p9bitnum(uchar *bp) | |
| 1001 { | |
| 1002 int n, c, len; | |
| 1003 | |
| 1004 len = P9BITLEN; | |
| 1005 while(*bp == ' ') { | |
| 1006 bp++; | |
| 1007 len--; | |
| 1008 if(len <= 0) | |
| 1009 return -1; | |
| 1010 } | |
| 1011 n = 0; | |
| 1012 while(len > 1) { | |
| 1013 c = *bp++; | |
| 1014 if(!isdigit(c)) | |
| 1015 return -1; | |
| 1016 n = n*10 + c-'0'; | |
| 1017 len--; | |
| 1018 } | |
| 1019 if(*bp != ' ') | |
| 1020 return -1; | |
| 1021 return n; | |
| 1022 } | |
| 1023 | |
| 1024 int | |
| 1025 depthof(char *s, int *newp) | |
| 1026 { | |
| 1027 char *es; | |
| 1028 int d; | |
| 1029 | |
| 1030 *newp = 0; | |
| 1031 es = s+12; | |
| 1032 while(s<es && *s==' ') | |
| 1033 s++; | |
| 1034 if(s == es) | |
| 1035 return -1; | |
| 1036 if('0'<=*s && *s<='9') | |
| 1037 return 1<<atoi(s); | |
| 1038 | |
| 1039 *newp = 1; | |
| 1040 d = 0; | |
| 1041 while(s<es && *s!=' '){ | |
| 1042 s++; /* skip letter */ | |
| 1043 d += strtoul(s, &s, 10); | |
| 1044 } | |
| 1045 | |
| 1046 switch(d){ | |
| 1047 case 32: | |
| 1048 case 24: | |
| 1049 case 16: | |
| 1050 case 8: | |
| 1051 return d; | |
| 1052 } | |
| 1053 return -1; | |
| 1054 } | |
| 1055 | |
| 1056 int | |
| 1057 isp9bit(void) | |
| 1058 { | |
| 1059 int dep, lox, loy, hix, hiy, px, new; | |
| 1060 ulong t; | |
| 1061 long len; | |
| 1062 char *newlabel; | |
| 1063 | |
| 1064 newlabel = "old "; | |
| 1065 | |
| 1066 dep = depthof((char*)buf + 0*P9BITLEN, &new); | |
| 1067 if(new) | |
| 1068 newlabel = ""; | |
| 1069 lox = p9bitnum(buf + 1*P9BITLEN); | |
| 1070 loy = p9bitnum(buf + 2*P9BITLEN); | |
| 1071 hix = p9bitnum(buf + 3*P9BITLEN); | |
| 1072 hiy = p9bitnum(buf + 4*P9BITLEN); | |
| 1073 if(dep < 0 || lox < 0 || loy < 0 || hix < 0 || hiy < 0) | |
| 1074 return 0; | |
| 1075 | |
| 1076 if(dep < 8){ | |
| 1077 px = 8/dep; /* pixels per byte */ | |
| 1078 /* set l to number of bytes of data per scan line */ | |
| 1079 if(lox >= 0) | |
| 1080 len = (hix+px-1)/px - lox/px; | |
| 1081 else{ /* make positive before divide */ | |
| 1082 t = (-lox)+px-1; | |
| 1083 t = (t/px)*px; | |
| 1084 len = (t+hix+px-1)/px; | |
| 1085 } | |
| 1086 }else | |
| 1087 len = (hix-lox)*dep/8; | |
| 1088 len *= (hiy-loy); /* col length */ | |
| 1089 len += 5*P9BITLEN; /* size of initial ascii */ | |
| 1090 | |
| 1091 /* | |
| 1092 * for image file, length is non-zero and must match calculation… | |
| 1093 * for /dev/window and /dev/screen the length is always zero | |
| 1094 * for subfont, the subfont header should follow immediately. | |
| 1095 */ | |
| 1096 if (len != 0 && mbuf->length == 0) { | |
| 1097 print("%splan 9 image\n", newlabel); | |
| 1098 return 1; | |
| 1099 } | |
| 1100 if (mbuf->length == len) { | |
| 1101 print("%splan 9 image\n", newlabel); | |
| 1102 return 1; | |
| 1103 } | |
| 1104 /* Ghostscript sometimes produces a little extra on the end */ | |
| 1105 if (mbuf->length < len+P9BITLEN) { | |
| 1106 print("%splan 9 image\n", newlabel); | |
| 1107 return 1; | |
| 1108 } | |
| 1109 if (p9subfont(buf+len)) { | |
| 1110 print("%ssubfont file\n", newlabel); | |
| 1111 return 1; | |
| 1112 } | |
| 1113 return 0; | |
| 1114 } | |
| 1115 | |
| 1116 int | |
| 1117 p9subfont(uchar *p) | |
| 1118 { | |
| 1119 int n, h, a; | |
| 1120 | |
| 1121 /* if image too big, assume it's a subfont */ | |
| 1122 if (p+3*P9BITLEN > buf+sizeof(buf)) | |
| 1123 return 1; | |
| 1124 | |
| 1125 n = p9bitnum(p + 0*P9BITLEN); /* char count */ | |
| 1126 if (n < 0) | |
| 1127 return 0; | |
| 1128 h = p9bitnum(p + 1*P9BITLEN); /* height */ | |
| 1129 if (h < 0) | |
| 1130 return 0; | |
| 1131 a = p9bitnum(p + 2*P9BITLEN); /* ascent */ | |
| 1132 if (a < 0) | |
| 1133 return 0; | |
| 1134 return 1; | |
| 1135 } | |
| 1136 | |
| 1137 #define WHITESPACE(c) ((c) == ' ' || (c) == '\t' |… | |
| 1138 | |
| 1139 int | |
| 1140 isp9font(void) | |
| 1141 { | |
| 1142 uchar *cp, *p; | |
| 1143 int i, n; | |
| 1144 char pathname[1024]; | |
| 1145 | |
| 1146 cp = buf; | |
| 1147 if (!getfontnum(cp, &cp)) /* height */ | |
| 1148 return 0; | |
| 1149 if (!getfontnum(cp, &cp)) /* ascent */ | |
| 1150 return 0; | |
| 1151 for (i = 0; 1; i++) { | |
| 1152 if (!getfontnum(cp, &cp)) /* min */ | |
| 1153 break; | |
| 1154 if (!getfontnum(cp, &cp)) /* max */ | |
| 1155 return 0; | |
| 1156 while (WHITESPACE(*cp)) | |
| 1157 cp++; | |
| 1158 for (p = cp; *cp && !WHITESPACE(*cp); cp++) | |
| 1159 ; | |
| 1160 /* construct a path name, if needed */ | |
| 1161 n = 0; | |
| 1162 if (*p != '/' && slash) { | |
| 1163 n = slash-fname+1; | |
| 1164 if (n < sizeof(pathname)) | |
| 1165 memcpy(pathname, fname, n); | |
| 1166 else n = 0; | |
| 1167 } | |
| 1168 if (n+cp-p < sizeof(pathname)) { | |
| 1169 memcpy(pathname+n, p, cp-p); | |
| 1170 n += cp-p; | |
| 1171 pathname[n] = 0; | |
| 1172 if (access(pathname, AEXIST) < 0) | |
| 1173 return 0; | |
| 1174 } | |
| 1175 } | |
| 1176 if (i) { | |
| 1177 print(mime ? "text/plain\n" : "font file\n"); | |
| 1178 return 1; | |
| 1179 } | |
| 1180 return 0; | |
| 1181 } | |
| 1182 | |
| 1183 int | |
| 1184 getfontnum(uchar *cp, uchar **rp) | |
| 1185 { | |
| 1186 while (WHITESPACE(*cp)) /* extract ulong delimite… | |
| 1187 cp++; | |
| 1188 if (*cp < '0' || *cp > '9') | |
| 1189 return 0; | |
| 1190 strtoul((char *)cp, (char **)rp, 0); | |
| 1191 if (!WHITESPACE(**rp)) | |
| 1192 return 0; | |
| 1193 return 1; | |
| 1194 } | |
| 1195 | |
| 1196 int | |
| 1197 isrtf(void) | |
| 1198 { | |
| 1199 if(strstr((char *)buf, "\\rtf1")){ | |
| 1200 print(mime ? "application/rtf\n" : "rich text format\n"); | |
| 1201 return 1; | |
| 1202 } | |
| 1203 return 0; | |
| 1204 } | |
| 1205 | |
| 1206 int | |
| 1207 ismsdos(void) | |
| 1208 { | |
| 1209 if (buf[0] == 0x4d && buf[1] == 0x5a){ | |
| 1210 print(mime ? "application/x-msdownload\n" : "MSDOS execu… | |
| 1211 return 1; | |
| 1212 } | |
| 1213 return 0; | |
| 1214 } | |
| 1215 | |
| 1216 int | |
| 1217 iself(void) | |
| 1218 { | |
| 1219 static char *cpu[] = { /* NB: incomplete and arbi… | |
| 1220 nil, | |
| 1221 /*1*/ "WE32100", | |
| 1222 /*2*/ "SPARC", | |
| 1223 /*3*/ "i386", | |
| 1224 /*4*/ "M68000", | |
| 1225 /*5*/ "M88000", | |
| 1226 /*6*/ "i486", | |
| 1227 /*7*/ "i860", | |
| 1228 /*8*/ "R3000", | |
| 1229 /*9*/ "S370", | |
| 1230 /*10*/ "R4000", | |
| 1231 nil, nil, nil, nil, | |
| 1232 /*15*/ "HP-PA", | |
| 1233 nil, | |
| 1234 nil, | |
| 1235 /*18*/ "sparc v8+", | |
| 1236 /*19*/ "i960", | |
| 1237 /*20*/ "PPC-32", | |
| 1238 /*21*/ "PPC-64", | |
| 1239 nil, nil, nil, nil, | |
| 1240 nil, nil, nil, nil, nil, | |
| 1241 nil, nil, nil, nil, nil, | |
| 1242 nil, nil, nil, nil, | |
| 1243 /*40*/ "ARM", | |
| 1244 /*41*/ "Alpha", | |
| 1245 nil, | |
| 1246 /*43*/ "sparc v9", | |
| 1247 nil, nil, | |
| 1248 nil, nil, nil, nil, | |
| 1249 /*50*/ "IA-64", | |
| 1250 nil, nil, nil, nil, nil, | |
| 1251 nil, nil, nil, nil, nil, | |
| 1252 nil, | |
| 1253 /*62*/ "AMD64", | |
| 1254 nil, nil, nil, | |
| 1255 nil, nil, nil, nil, nil, | |
| 1256 nil, nil, nil, nil, | |
| 1257 /*75*/ "VAX", | |
| 1258 }; | |
| 1259 | |
| 1260 | |
| 1261 if (memcmp(buf, "\177ELF", 4) == 0){ | |
| 1262 /* gcc misparses \x7FELF as \x7FE L F */ | |
| 1263 if (!mime){ | |
| 1264 int n = (buf[19] << 8) | buf[18]; | |
| 1265 char *p = "unknown"; | |
| 1266 | |
| 1267 if (n > 0 && n < nelem(cpu) && cpu[n]) | |
| 1268 p = cpu[n]; | |
| 1269 else { | |
| 1270 /* try the other byte order */ | |
| 1271 n = (buf[18] << 8) | buf[19]; | |
| 1272 if (n > 0 && n < nelem(cpu) && cpu[n]) | |
| 1273 p = cpu[n]; | |
| 1274 } | |
| 1275 print("%s ELF executable\n", p); | |
| 1276 } | |
| 1277 else | |
| 1278 print("application/x-elf-executable"); | |
| 1279 return 1; | |
| 1280 } | |
| 1281 | |
| 1282 return 0; | |
| 1283 } |