; EXP2.ASM as of April 29, 1981
;
; This routine is extracted and CP/Mified from an article
; 'An Inroduction to Data Compression' by Harold Corbin,
; that appeared in the April 1981 issue of Byte magazine.
;
; CP/Mified by: Kelly Smith, CP/M-Net "SYSOP" April 29, 1981
;
; EXP2 (text expansion routine) accepts data prepared by
; the COMP2 (text compression routine) program. The program
; runs until all bits have been processed from the data
; buffer (dbuf), then returns control to CP/M. The decoding
; process consists of adding a data bit to the table entry
; point, getting an increment index that points to the next
; 0-1 pair, and continues until a "tag" (MSB = 1) in bit 7,
; indicates that the next table entry is the desired
; character. The data are processed 8 bits per byte, MSB
; first.
;
; The Huffman Code was derived for the following table, by
; the relative frequency of occurence in a random sampling
; of English text. Frequently used characters are assigned
; shorter bit 'code' patterns, and seldom used characters
; are assigned longer bit 'code' patterns. Note: with any
; set of codes generated, it is important that no code has a
; shorter code as part of its beginning. For example, if the
; letter 'E' is 100, then 10010 cannot be the code for
; another letter...because in scanning the bit stream from
; left to right (using EXP2, the text expansion routine),
; the decoding algorithm would think that 10010 is 'E' (100)
; followed by 10 and NOT the different letter that was
; intended.
;
; Letter Frequency (%) Huffman Code
;
; E 13.0 100
; T 10.5 001
; A 8.1 1111
; O 7.9 1110
; N 7.1 1100
; R 6.8 1011
; I 6.3 1010
; S 6.1 0110
; H 5.2 0101
; D 3.8 11011
; L 3.4 01111
; F 2.9 01001
; C 2.7 01000
; M 2.5 00011
; U 2.4 00010
; G 2.0 00001
; Y 1.9 00000
; P 1.9 110101
; W 1.5 011101
; B 1.4 011100
; V .9 1101001
; K .4 110100011
; X .15 110100001
; J .13 110100000
; Q .11 1101000101
; Z .07 1101000100
;
; The EXP2 program expects to find the data to be decoded,
; in the packed form of 8 bit bytes as encoded by COMP2, in
; the data buffer (dbuf). Basically, the program searches a
; binary tree to decode the bit stream generated by COMP2,
; selecting the appropriate branch in the tree structure
; depending upon the value of each bit in the data stream.
; Note: the data in the table is uniquely dependent upon the
; code generated by COMP2.
;
; There are three parts to the table structure: the index
; values that allow the program to step through the
; appropriate number of table entries (i.e., tree branches)
; as the data stream bit values are serially examined; the
; decoded character that results from the search; and the
; flag to indicate to the program that the next table entry
; found is a character and not an index value. The index
; values are always in pairs, with a separate index value
; for a 1 or a 0 bit stream value. Therefore, as the program
; scans through the table at each pair of index values, one
; or the other is selected, depending upon whether the bit
; in khe data stream was a 1 or a 0.
;
; The table scanning process consists of adding the data
; bit to the current table address. This gives a new address
; whose contents (an index value) is added to the address of
; the index value itself...this new table address is the
; address of the next node in the tree structure. This
; process continues until a flag is detected, indicating
; that the next entry is the desired character. If the flag
; is on (MSB = 1), the remaining 7 bits are interpreted as
; an ASCII character. If the flag is off (MSB = 0), the
; remaining bits are interpreted as an index value. This
; limits the index value to 127, the maximum value in the
; table that can be skipped when processing one data bit.
;
;
;
true equ -1 ; define true
false equ not true; define false
;
stdcpm equ true ; true if regular CP/M (base address 0000h)
altcpm equ false ; true if alternate CP/M (base address 4200h)
;
if stdcpm ; if standard CP/M...
base equ 0000h ; bsae for standard CP/M system
endif ; end if...
;
if altcpm ; if alternate CP/M...
base equ 4200h ; base for H8 or TRS-80 CP/M system
endif ; end if...
;
bdos equ base+5 ; CP/M BDOS entry address for function call
;
conout equ 2 ; write console character
;
dbuf equ 04000h ; data buffer for compressed data
;
org base+100h
start: lxi h,0 ; save "old" CP/M stack pointer
dad sp
shld oldstk
lxi sp,stack; make "new" stack pointer
lxi h,dbuf+2; store pointer to next bit location in data buffer
shld dadd
lxi h,dbuf ; point to data buffer bit count
mov c,m ; get it...
inx h
mov b,m
mvi a,1 ; set initial position
sta pos
expand: push b ; save bit count on the stack
lxi h,expansion$table ; point to expansion decode table
next: push h ; save pointer on the stack
lhld dadd ; get data address bit position
lda pos ; get bit position
mov b,a
mov a,m ; get the data
bit: ral ; get desired bit into carry
dcr b ; de-bump bit position count
jnz bit ; loop 'til done
mvi a,0 ; clear data, but preserve carry
ral ; restore single isolated bit
mov c,a ; ...the data value
mvi b,0
shld dadd ; save data address bit position
pop h ; recover pointer
dad b ; make bias to table with data bit position
mov a,m ; get new pointer
ral ; check if "tag" bit...
jc put$character
rar ; restore to original position
mov e,a ; make bias
mvi d,0
dad d ; calculate 'tabel'+'data bit'+'pointer'
pop b ; recover bit count
call decb ; reduce bit count
push b
jmp next ; process next character
;
put$character:
;
rar ; restore byte to original position
ani 07fh ; mask-off "tag" bit
mov e,a ; compute bias to expansion table
mvi d,0
dad d ; add bias
mov a,m ; get expanded character from table
push b ; save all registers
push d
push h
mov e,a ; CP/M needs character in E register
mvi c,conout; output character function
call bdos
pop h ; recover registers
pop d
pop b
pop b ; adust stack, get bit count
call decb ; reduce bit count
jmp expand ; expand next character
;
decb: dcx b ; reduce bit count
mov a,c ; check for all characters expanded
ora b
jz exit ; if all characters expanded, exit to CP/M
lda pos ; get bit position, and update
inr a
sta pos
cpi 9 ; all 8 bits processed?
rnz ; return, if not...
mvi a,1 ; all bits processed, reset bit position
sta pos
push h ; save table pointer on stack
lhld dadd ; get current data byte address, and update
inx h
shld dadd
pop h ; recover table pointer
ret ; process next bit...
;
exit: lhld oldstk ; get "old" CP/M stack pointer
sphl ; and restore...
ret ; return to CP/M
;
expansion$table:
;
db 02ah ; 0 0
db 001h ; 1 1
db 002h ; 2 0
db 008h ; 3 1
db 082h ; 4 0
db 002h ; 5 1
db 'E' ; 6 'E'
db 082h ; 7 0
db 082h ; 8 1
db 'I' ; 9 'I'
db 'R' ; 10 'R'
db 006h ; 11 0
db 001h ; 12 1
db 082h ; 13 0
db 082h ; 14 1
db 'O' ; 15 'O'
db 'A' ; 16 'A'
db 082h ; 17 0
db 002h ; 18 1
db 'N' ; 19 'N'
db 003h ; 20 0
db 081h ; 21 1
db 'D' ; 22 'D'
db 003h ; 23 0
db 081h ; 24 1
db 'P' ; 25 'P'
db 003h ; 26 0
db 081h ; 27 1
db 'V' ; 28 'V'
db 002h ; 29 0
db 005h ; 30 1
db 082h ; 31 0
db 082h ; 32 1
db 'J' ; 33 'J'
db 'X' ; 34 'X'
db 003h ; 35 0
db 081h ; 36 1
db 'K' ; 37 'K'
db 082h ; 38 0
db 082h ; 39 1
db 'Z' ; 40 'Z'
db 'Q' ; 41 'Q'
db 002h ; 42 0
db 00eh ; 43 1
db 003h ; 44 0
db 081h ; 45 1
db 'T' ; 46 'T'
db 002h ; 47 0
db 005h ; 48 1
db 082h ; 49 0
db 082h ; 50 1
db 'Y' ; 51 'Y'
db 'G' ; 52 'G'
db 082h ; 53 0
db 082h ; 54 1
db 'U' ; 55 'U'
db 'M' ; 56 'M'
db 002h ; 57 0
db 008h ; 58 1
db 003h ; 59 0
db 081h ; 60 1
db 'H' ; 61 'H'
db 082h ; 62 0
db 082h ; 63 1
db 'C' ; 64 'C'
db 'F' ; 65 'F'
db 082h ; 66 0
db 002h ; 67 1
db 'S' ; 68 'S'
db 003h ; 69 0
db 081h ; 70 1
db 'L' ; 71 'L'
db 082h ; 72 0
db 082h ; 73 1
db 'B' ; 74 'B'
db 'W' ; 75 'W'
;
dadd: dw dbuf+2 ; next data address
;
pos: ds 1 ; current bit location
;
oldstk: ds 2 ; storage for "old" CP/M stack pointer
ds 32 ; storage for 16 level stack
stack equ $ ; top of local stack
;
end start