-- This is not actually full DVI reader. It just calculates hash for each page,
-- so it can be detected if it changed between compilations and needs to be
-- converted to image using Dvisvgm or Dvipng
--
-- information about DVI format is from here:
https://web.archive.org/web/20070403030353/http://www.math.umd.edu/~asnowden/comp-cont/dvi.html
--
local M
-- the file after post_post is filled with bytes 223
local endfill = 223
-- numbers of bytes for each data type in DVI file
local int = 4
local byte = 1
local sixteen = 2
local function read_char(str, pos)
if pos and pos > string.len(str) then return nil end
return string.sub(str, pos, pos + 1)
end
local function read_byte(str, pos)
return string.byte(read_char(str, pos))
end
-- DVI file format uses signed big endian integers. This code doesn't take into account
-- the sign, so it will return incorrect result for negative numbers. It doesn't matter
-- for the original purpose of this library, but it should be fixed for general use.
local function read_integer(str, pos)
local first = read_byte(str, pos)
local num = first * (256 ^ 3)
num = read_byte(str, pos + 1) * (256 ^ 2) + num
num = read_byte(str, pos + 2) * 256 + num
num = read_byte(str, pos + 3) + num
return num
end
local function read_sixteen(str, pos)
local num = read_byte(str, pos) * 256
num = read_byte(str, pos + 1) + num
return num
end
-- select reader function with number of bytes of an argument
local readers = {
[byte] = read_byte,
[int] = read_integer,
[sixteen] = read_sixteen
}
local opcodes = {
post_post = {
opcode = 249, args = {
{name="q", type = int}, -- postamble address
{name="i", type = byte}
}
},
post = {
opcode = 248,
args = {
{name="p", type = int}, -- address of the last page
{name="num", type = int},
{name="den", type = int},
{name="mag", type = int},
{name="l", type = int},
{name="u", type = int},
{name="s", type = sixteen},
{name="t", type = sixteen},
}
},
bop = {
opcode = 139,
args = {
{name="c0", type=int},
{name="c1", type=int},
{name="c2", type=int},
{name="c3", type=int},
{name="c4", type=int},
{name="c5", type=int},
{name="c6", type=int},
{name="c7", type=int},
{name="c8", type=int},
{name="c9", type=int},
{name="p", type=int}, -- previous page
}
}
}
local function read_arguments(str, pos, args)
local t = {}
for _, v in ipairs(args) do
local fn = readers[v.type]
t[v.name] = fn(str, pos)
-- seek the position. v.type contains size of the current data type in bytes
pos = pos + v.type
end
return t
end
local function read_opcode(opcode, str, pos)
local format = opcodes[opcode]
if not format then return nil, "Cannot find opcode format: " .. opcode end
-- check that opcode byte in the current position is the same as required opcode
local op = read_byte(str, pos)
if op ~= format.opcode then return nil, "Wrong opcode " .. op .. " at position " .. pos end
return read_arguments(str, pos+1, format.args)
end
-- find the postamble address
local function get_postamble_addr(dvicontent)
local pos = string.len(dvicontent)
local last = read_char(dvicontent, pos)
-- skip endfill bytes at the end of file
while string.byte(last) == endfill do
pos = pos - 1
last = read_char(dvicontent, pos)
end
-- first read post_post to get address of the postamble
local post_postamble, msg = read_opcode("post_post", dvicontent, pos-5)
if not post_postamble then return nil, msg end
-- return the postamble address
return post_postamble.q + 1
-- return read_opcode("post", dvicontent, post_postamble.q + 1)
end
local function read_page(str, start, stop)
local function get_end_of_page(str, pos)
if read_byte(str, pos) == 140 then -- end of page
return pos
end
return get_end_of_page(str, pos - 1)
end
-- we reached the end of file
if start == 2^32-1 then return nil end
local current_page = read_opcode("bop", str, start + 1)
if not current_page then return nil end
local endofpage = get_end_of_page(str, stop)
-- get the page contents, but skip all parameters, because they can change
-- (especially pointer to the previous page)
local page = str:sub(start + 46, endofpage)
local page_obj = {
number = current_page.c0, -- the page number
hash = md5.sumhexa(page) -- hash the page contents
}
return page_obj, current_page.p, start
end
local function get_pages(dvicontent)
local pages = {}
local postamble_pos = get_postamble_addr(dvicontent)
local postamble = read_opcode("post", dvicontent, postamble_pos)
local next_page_pos = postamble.p
local page, previous_page = nil, postamble_pos
local page_sequence = {}
while next_page_pos do
page, next_page_pos, previous_page = read_page(dvicontent, next_page_pos, previous_page)
page_sequence[#page_sequence+1] = page
end
-- reorder pages
for _, v in ipairs(page_sequence) do
pages[v.number] = v.hash
end
return pages
end
-- if arg[1] then
-- local f = io.open(arg[1], "r")
-- local dvicontent = f:read("*all")
-- f:close()
-- local pages = get_pages(dvicontent)
-- for k,v in pairs(pages) do
-- print(k,v)
-- end
-- end
return {
get_pages = get_pages
}