local M = {}
local log = logging.new "indexing"

-- Handle accented characters in files created with \usepackage[utf]{inputenc}
-- this code was originally part of https://github.com/michal-h21/iec2utf/
local enc = {}

local licrs = {}
local codepoint2utf = unicode.utf8.char
local used_encodings = {}

-- load inputenc encoding file
local function load_encfiles(f)
       local file= io.open(f,"r")
       local encodings = file:read("*all")
       file:close()
       for codepoint, licr in encodings:gmatch('DeclareUnicodeCharacter(%b{})(%b{})') do
               local codepoint = codepoint2utf(tonumber(codepoint:sub(2,-2),16))
               local licr= licr:sub(2,-2):gsub('@tabacckludge','')
               licrs[licr] = codepoint
       end
end

local function sanitize_licr(l)
       return l:gsub(" (.)",function(s) if s:match("[%a]") then return " "..s else return s end end):sub(2,-2)
end

local load_enc = function(enc)
 -- use default encodings if used doesn't provide one
 enc = enc or  {"T1","T2A","T2B","T2C","T3","T5", "LGR"}
       for _,e in pairs(enc) do
               local filename = e:lower() .. "enc.dfu"
   -- don't process an enc file multiple times
   if not used_encodings[filename] then
     local dfufile = kpse.find_file(filename)
     if dfufile then
       load_encfiles(dfufile)
     end
   end
   used_encodings[filename] = true
       end
end



local cache = {}

local get_utf8 = function(input)
       local output = input:gsub('\\IeC[%s]*(%b{})',function(iec)
   -- remove \protect commands
   local iec = iec:gsub("\\protect%s*", "")
               local code = cache[iec] or licrs[sanitize_licr(iec)] or '\\IeC '..iec
               -- print(iec, code)
               cache[iec] = code
               return code
       end)
       return output
end


-- parse the idx file produced by tex4ht
-- it replaces the document page numbers by index entry number
-- each index entry can then link to place in the HTML file where the
-- \index command had been used

local parse_idx = function(content)
 -- index entry number
 local current_entry = 0
 -- map between index entry number and corresponding HTML file and destination
 local map = {}
 local buffer = {}

 for line in content:gmatch("([^\n]+)") do
   if line:match("^\\beforeentry") then
     -- increment index entry number
     current_entry = current_entry + 1
     local file, dest, locator = line:match("\\beforeentry%s*{(.-)}{(.-)}{(.-)}")
     -- if the third argument to \beforeentry is not empty,
     -- use it as a index entry locator instead of the index counter
     if locator and locator == "" then locator = nil end
     map[current_entry] = {file = file, dest = dest, locator = locator}
   elseif line:match("^\\indexentry") then
     -- replace the page number with the current
     -- index entry number
     local result = line:gsub("%b{}$", "{"..current_entry .."}")
     buffer[#buffer+1] = get_utf8(result)
   else
     buffer[#buffer+1] = line
   end
 end
 -- return table with page to dest map and updated idx file
 return {map = map, idx = table.concat(buffer, "\n")}
end


local previous
-- replace numbers in .ind file with links back to text
local function replace_index_pages(rest, entries)
 -- keep track of the previous page number
 local count = 0
 local delete_coma = false
 return rest:gsub("(%s*%-*%s*)(,?%s*)(%{?)(%[?)(%d+)(%]?)(%}?)", function(dash, coma, lbrace, lbracket, page, rbracket, rbrace)
   if lbracket == "[" and rbracket == "]" then
     -- don't process numbers in brackets, they are not page numbers
     return nil
   end
   local entry = entries[tonumber(page)]
   count = count + 1
   if entry then
     page = entry.locator or page
     if delete_coma then
       -- if the coma was marked for deletion, remove it. this may happen after line breaks in the index
       coma = ""
     end
     -- if the page number is the same as the previous one, don't create a link
     -- this can happen when we use section numbers as locators. for example,
     -- we could get 1.1 -- 1.1, 1.1, so we want to keep only the first one
     if page == previous then
       previous = page
       -- if the first page number on a line is the same as the previous one, we need to delete the coma,
       -- otherwise the coma will be left in the output
       if count == 1 then
         delete_coma = true
       end
       return ""
     else
       previous = page
       -- don't forget to reset the delete_coma flag after page change
       delete_coma = false
       -- construct link to the index entry
       return dash .. coma.. lbrace ..  "\\Link[" .. entry.file .."]{".. entry.dest .."}{}" ..  page .."\\EndLink{}" .. rbrace
     end
   else
     return dash .. coma .. lbrace .. lbracket .. page .. rbracket .. rbrace
   end
end)
end

local function fix_subitems(start, rest)
 -- in xindex, subentries start with a comma, so if the subentry itself is number, it would be mistaken for the page number
 -- the start should contain just \subitem -\
 if start:match("%s*\\subitem %-\\$") then
   -- the keyword in this case is the first item in the rest
   local keyword, newrest = rest:match("(,?[^,]+,)(.+)")
   if keyword and newrest then
     -- join the extracted keyword with the start, newrest should contain only actual page numbers
     return start .. keyword, newrest
   end
 end
 return start, rest
end

-- replace page numbers in the ind file with hyperlinks
local fix_idx_pages = function(content, idxobj)
 local buffer = {}
 local entries = idxobj.map
 for  line in content:gmatch("([^\n]+)")  do
   local line, count = line:gsub("(%s*\\%a+[^%[^,]+)(.+)$", function(start,rest)
     -- reset the previous page number
     previous = nil
     start, rest = fix_subitems(start, rest)
     -- there is a problem when index term itself contains numbers, like Bible verses (1:2),
     -- because they will be detected as page numbers too. I cannot find a good solution
     -- that wouldn't break something else.
     -- There can be also commands with numbers in braces. These numbers in braces will be ignored,
     -- as they may be not page numbers
     return start .. replace_index_pages(rest, entries)    end)
   -- longer index entries may be broken over several lines, in that case, we need to process only numbers
   if count == 0 then
     line = line:gsub("(%s*%d+.+)", function(rest)
       return replace_index_pages(rest, entries)
     end)
   end
   buffer[#buffer+1] = line
 end
 return table.concat(buffer, "\n")
end

-- prepare the .idx file produced by tex4ht
-- for use with Xindy or Makeindex
local prepare_idx = function(filename)
 local f = io.open(filename, "r")
 if not f then return nil, "Cannot open file :".. tostring(filename) end
 local content = f:read("*all")
 local idx = parse_idx(content)
 local idxname = os.tmpname()
 local f = io.open(idxname, "w")
 f:write(idx.idx)
 f:close()
 -- return the object with mapping between dummy page numbers
 -- and link destinations in the files, and the temporary .idx file
 -- these can be used for the processing with the index processor
 return idx, idxname
end

-- add links to a index file
local process_index = function(indname, idx)
 local f = io.open(indname,  "r")
 if not f then return  nil, "Cannot open .ind file: " .. tostring(indname) end
 local content = f:read("*all")
 f:close()

 local newcontent = fix_idx_pages(content, idx)
 local f = io.open(indname,"w")
 f:write(newcontent)
 f:close()
 return true
end

local get_idxname = function(par)
 return par.idxfile or par.input .. ".idx"
end

local prepare_tmp_idx = function(par)
 par.idxfile = mkutils.file_in_builddir(get_idxname(par), par)
 if not par.idxfile or not mkutils.file_exists(par.idxfile) then return nil, "Cannot load idx file " .. (par.idxfile or "''") end
 -- construct the .ind name, based on the .idx name
 par.indfile = par.indfile or par.idxfile:gsub("idx$", "ind")
 load_enc()
 -- save hyperlinks and clean the .idx file
 local idxdata, newidxfile = prepare_idx(par.idxfile)
 if not idxdata then
   -- if the prepare_idx function returns nil, the second reuturned value contains error msg
   return nil, newidxfile
 end
 return  newidxfile, idxdata
end


local splitindex = function(par)
 local files = {}
 local idxfiles = {}
 local buffer
 local idxfile = get_idxname(par)
 if not idxfile or not mkutils.file_exists(idxfile) then return nil, "Cannot load idx file " .. (idxfile or "''") end
 for line in io.lines(idxfile) do
   local file = line:match("indexentry%[(.-)%]")
   if file then
     -- generate idx name for the current output file
     file =  par.input .. "-" ..file .. ".idx"
     local current = files[file] or {}
     -- remove file name from the index entry
     local indexentry = line:gsub("indexentry%[.-%]", "indexentry")
     -- save the index entry and preseding line to the current buffer
     table.insert(current, buffer)
     table.insert(current, indexentry)
     files[file] = current
   end
   --
   buffer = line
 end
 -- save idx files
 for filename, contents in pairs(files) do
   log:info("Saving split index file: " .. filename)
   idxfiles[#idxfiles+1] = filename
   local f = io.open(filename, "w")
   f:write(table.concat(contents, "\n"))
   f:close()
 end
 return idxfiles
end

local function run_indexing_command (command, par)
 -- detect command name from the command. It will be the first word
 local cmd_name = command:match("^[%a]+") or "indexing"
 local xindylog  = logging.new(cmd_name)
 -- support split index
 local subindexes = splitindex(par) or {}
 if #subindexes > 0 then
   -- call the command again on all files produced by splitindex
   for _, subindex in ipairs(subindexes) do
     -- make copy of the parameters
     local t = {}
     for k,v in pairs(par) do t[k] = v end
     t.idxfile = subindex
     run_indexing_command(command, t)
   end
   return nil
 end
 local newidxfile, idxdata = prepare_tmp_idx(par)
 if not newidxfile then
   -- the idxdata will contain error message in the case of error
   xindylog:warning(idxdata)
   return false
 end
 par.newidxfile = newidxfile
 xindylog:debug("Prepared temporary idx file: ", newidxfile)
 -- prepare modules
 local xindy_call = command % par
 xindylog:info(xindy_call)
 local status = mkutils.execute(xindy_call)
 -- insert correct links to the index
 local status, msg = process_index(par.indfile, idxdata)
 if not status then xindylog:warning(msg) end
 -- remove the temporary idx file
 os.remove(newidxfile)
 -- null the indfile, it is necessary in order to support
 -- multiple indices
 par.indfile = nil
end


M.get_utf8 = get_utf8
M.load_enc = load_enc
M.parse_idx = parse_idx
M.fix_idx_pages = fix_idx_pages
M.prepare_idx = prepare_idx
M.process_index = process_index
M.prepare_tmp_idx = prepare_tmp_idx
M.run_indexing_command = run_indexing_command
return M