#------------------------------------------------------------------------------
# $File: scientific,v 1.14 2023/04/29 17:28:09 christos Exp $
# scientific:  file(1) magic for scientific formats
#
# From: Joe Krahn <[email protected]>

########################################################
# CCP4 data and plot files:
0       string          MTZ\040         MTZ reflection file

92      string          PLOT%%84        Plot84 plotting file
>52     byte            1               , Little-endian
>55     byte            1               , Big-endian

########################################################
# Electron density MAP/MASK formats

0       string          EZD_MAP NEWEZD Electron Density Map
109     string          MAP\040(  Old EZD Electron Density Map

0       string/c        :-)\040Origin   BRIX Electron Density Map
>170    string          >0      , Sigma:%.12s
#>4     string          >0      %.178s
#>4     addr            x       %.178s

7       string          18\040!NTITLE   XPLOR ASCII Electron Density Map
9       string          \040!NTITLE\012\040REMARK       CNS ASCII electron density map

208     string          MAP\040 CCP4 Electron Density Map
# Assumes same stamp for float and double (normal case)
>212    byte            17      \b, Big-endian
>212    byte            34      \b, VAX format
>212    byte            68      \b, Little-endian
>212    byte            85      \b, Convex native

############################################################
# X-Ray Area Detector images
0       string  R-AXIS4\ \ \    R-Axis Area Detector Image:
>796    lelong  <20             Little-endian, IP #%d,
>>768   lelong  >0              Size=%dx
>>772   lelong  >0              \b%d
>796    belong  <20             Big-endian, IP #%d,
>>768   belong  >0              Size=%dx
>>772   belong  >0              \b%d

0       string  RAXIS\ \ \ \ \  R-Axis Area Detector Image, Win32:
>796    lelong  <20             Little-endian, IP #%d,
>>768   lelong  >0              Size=%dx
>>772   lelong  >0              \b%d
>796    belong  <20             Big-endian, IP #%d,
>>768   belong  >0              Size=%dx
>>772   belong  >0              \b%d


1028    string  MMX\000\000\000\000\000\000\000\000\000\000\000\000\000 MAR Area Detector Image,
>1072   ulong   >1              Compressed(%d),
>1100   ulong   >1              %d headers,
>1104   ulong   >0              %d x
>1108   ulong   >0              %d,
>1120   ulong   >0              %d bits/pixel

# Type: GEDCOM genealogical (family history) data
# From: Giuseppe Bilotta
# Update:       Joerg Jenderek
# URL:          http://fileformats.archiveteam.org/wiki/GEDCOM
#               https://en.wikipedia.org/wiki/GEDCOM
# Reference:    http://mark0.net/download/triddefs_xml.7z/defs/g/
#               ged.trid.xml ged-utf8.trid.xml ged-utf16.trid.xml
# Note:         called "GEDCOM Family History" by TrID and "Genealogical Data Communication (GEDCOM) Format" by DROID via PUID fmt/851
0       search/1/c      0\ HEAD         GEDCOM genealogy text
#!:mime text/plain
#!:mime application/x-gedcom
# https://www.iana.org/assignments/media-types/text/vnd.familysearch.gedcom
!:mime  text/vnd.familysearch.gedcom
!:ext   ged
# no gedcom sample found and ged suffix also used for other formats
#!:ext  ged/gedcom
>&0     search          1\ GEDC
>>&0    search          2\ VERS         version
# 4 5.0 5.3 5.4 5.5 5.5.1 5.5.5 5.6 7.0 or no version
>>>&1   string          >\0             %s
# From: Phil Endecott <[email protected]>
# 0\040HEAD as UTF-16 big endian without BOM
0       string  \000\060\000\040\000\110\000\105\000\101\000\104                GEDCOM genealogy text
!:mime  text/vnd.familysearch.gedcom
!:ext   ged
# look for VERS tag encoded as UTF-16 big endian
>12             search/0x65     V\0E\0R\0S                                      version
# version like: 5.5.1
>>&2            bestring16      x                                               %s
>>0             string          x                                               \b, UTF-16 (without BOM) big-endian text
# 0\040HEAD as UTF-16 little endian without BOM
0       string  \060\000\040\000\110\000\105\000\101\000\104\000                GEDCOM genealogy text
!:mime  text/vnd.familysearch.gedcom
!:ext   ged
# look for VERS tag encoded as UTF-16 lttle endian
>12             search/0x65     V\0E\0R\0S                                      version
# version like: 5.5.1
>>&3            lestring16      x                                               %s
>>2             string          x                                               \b, UTF-16 (without BOM) little-endian text
# Note:         UTF-16 with BOM variants already described above by first test as "GEDCOM genealogy text"
# 0\040HEAD as UTF-16 big endian with BOM
#0      string  \376\377\000\060\000\040\000\110\000\105\000\101\000\104        GEDCOM data
# 0\040HEAD as UTF-16 little endian with BOM
#0      string  \377\376\060\000\040\000\110\000\105\000\101\000\104\000        GEDCOM data

# PDB: Protein Data Bank files
# Adam Buchbinder <[email protected]>
#
# https://www.wwpdb.org/documentation/format32/sect2.html
# https://www.ch.ic.ac.uk/chemime/
#
# The PDB file format is fixed-field, 80 columns. From the spec:
#
# COLS        DATA
#  1 -  6      "HEADER"
#  11 - 50     String(40)
#  51 - 59     Date
#  63 - 66     IDcode
#
# Thus, positions 7-10, 60-62 and 67-80 are spaces. The Date must be in the
# format DD-MMM-YY, e.g., 01-JAN-70, and the IDcode consists of numbers and
# uppercase letters. However, examples have been seen without the date string,
# e.g., the example on the chemime site.
0       string  HEADER\ \ \ \040
>&0     regex/1l        \^.{40}
>>&0    regex/1l        [0-9]{2}-[A-Z]{3}-[0-9]{2}\ {3}
>>>&0   regex/1ls       [A-Z0-9]{4}.{14}$
>>>>&0  regex/1l        [A-Z0-9]{4}     Protein Data Bank data, ID Code %s
!:mime  chemical/x-pdb
>>>>0   regex/1l        [0-9]{2}-[A-Z]{3}-[0-9]{2}      \b, %s

# Type: GDSII Stream file
0       belong  0x00060002      GDSII Stream file
>4      byte    0x00
>>5     byte    x               version %d.0
>4      byte    >0x00           version %d
>>5     byte    x               \b.%d

# Type: LXT (interLaced eXtensible Trace)
# chrysn <[email protected]>
0       beshort 0x0138  interLaced eXtensible Trace (LXT) file
>2      beshort >0      (Version %u)