--
-- Copyright (c) 2021-2025 Zeping Lee
-- Released under the MIT license.
-- Repository: https://github.com/zepinglee/citeproc-lua
--

local util = {}

local uni_utf8
local using_luatex, kpse = pcall(require, "kpse")
if using_luatex then
 uni_utf8 = require("unicode").grapheme
else
 uni_utf8 = require("lua-utf8")
end


local inspect  -- only load it when debugging


-- Deep copy
function util.deep_copy(obj)
 local res
 if type(obj) == "table" then
   res = {}
   local meta_table = getmetatable(obj)
   if meta_table then
     setmetatable(res, meta_table)
   end
   for key, value in pairs(obj) do
     res[key] = util.deep_copy(value)
   end
 else
   res = obj
 end
 return res
end

-- Shallow copy
function util.clone(obj)
 if type(obj) == "table" then
   local res = {}
   for key, value in pairs(obj) do
     res[key] = value
   end
   return setmetatable(res, getmetatable(obj))
 else
   return obj
 end
end

---Limitation: Hierarchical or multiple inheritance is not supported.
---@param obj table
---@param class table
---@return boolean
function util.is_instance(obj, class)
 return (type(obj) == "table" and obj._type == class._type)
end

function util.join(list, delimiter)
 -- Why not return table.concat(list, delimiter)?
 local res = {}
 for i, item in ipairs(list) do
   if i > 1 then
     table.insert(res, delimiter)
   end
   table.insert(res, item)
 end
 return res
end

function util.to_boolean(str)
 if not str then
   return false
 end
 if str == "true" then
   return true
 elseif str == "false" then
   return false
 else
   util.warning(string.format("Invalid boolean string '%s'", str))
   return false
 end
end

function util.to_list(str)
 if not str then
   return nil
 end
 return util.split(str)
end


--- Concat with CJK or western colon
---@param main_title string
---@param substitle string
function util.join_title(main_title, substitle)
 local code_point = utf8.codepoint(main_title, utf8.len(main_title))
 if util.is_cjk_char(code_point) then
   return main_title .. ":" .. substitle
 else
   code_point = utf8.codepoint(substitle, 1)
   if util.is_cjk_char(code_point) then
     return main_title .. ":" .. substitle
   else
     return main_title .. ": " .. substitle
   end
 end
end


function util.to_ordinal(n)
 -- assert(type(n) == "number")
 local last_digit = n % 10
 if last_digit == 1 and n ~= 11 then
   return tostring(n) .. "st"
 elseif last_digit == 2 and n ~= 12 then
   return tostring(n) .. "nd"
 elseif last_digit == 3 and n ~= 13 then
   return tostring(n) .. "rd"
 else
   return tostring(n) .. "th"
 end
end


util.quiet_mode = false

-- A file handle to usually a `.blg` file.
util.logging_file = nil


function util.set_logging_file(path)
 util.logging_file = io.open(path, "w")
 if not util.logging_file then
   util.error(string.format("Cannot write to '%s'.", path))
 end
 util.logging_file = io.open(path, "a")
end


function util.close_logging_file()
 if util.logging_file then
   util.logging_file:close()
 end
end


util.num_errors = 0
util.num_warnings = 0


---@param message string
function util.error(message)
 util.num_errors = util.num_errors + 1
 if luatexbase then
   -- Run in LuaLaTeX
   tex.print(string.format("\\csname msg_error:nnn\\endcsname{citeproc}{citeproc-error}{%s}", message))
   -- Don't use the following methods.
   -- `error()` prints long traceback when run in LuaLaTeX.
   -- texio.write_nl("term", "\n")
   -- `tex.error()` prints annoying `\lua_now:e #1->` when called from LaTeX3 interface.
   -- `luatexbase.module_error()` prints traceback.
   -- tex.print(string.format("\\PackageError{CSL}{%s}{}", message))
 else
   -- Run in citeproc-lua script
   -- This format is used by latexmk. DO NOT change.
   message = "Error: " .. message
   -- Following bibtex and biber, the error message is printed to stdout rather than stderr.
   -- And the error doesn't break the execution of the script.
   print(message)
   if util.logging_file then
     util.logging_file:write(message .. "\n")
   end
 end
end

util.warning_enabled = true

function util.warning(message)
 util.num_warnings = util.num_warnings + 1
 if luatexbase then
   tex.print(string.format("\\csname msg_warning:nnn\\endcsname{citeproc}{citeproc-warning}{%s}", message))

 else
   -- This format is used by latexmk. DO NOT change.
   message = "Warning: " .. message
   if util.logging_file then
     util.logging_file:write(message .. "\n")
   end
   if util.warning_enabled then
     io.stderr:write(message, "\n")
   end
 end
end

---@param message string
function util.info(message)
 if not luatexbase and not util.quiet_mode then
   print(message)
 end
 if util.logging_file then
   util.logging_file:write(message .. "\n")
 end
end


local remove_all_metatables = nil

function util.debug(obj)
 local text
 if type(obj) == "table" and (obj._debug or (obj[1] and obj[1]._debug)) then
   if obj._debug then
     text = obj:_debug()
   else
     text = ""
     for _, child in ipairs(obj) do
       text = text .. child:_debug()
     end
   end
 else
   if not inspect then
     inspect = require("inspect")
     remove_all_metatables = function (item, path)
       if path[#path] ~= inspect.METATABLE then
         return item
       end
     end
   end
   text = inspect(obj, {process = remove_all_metatables})
 end
 io.stderr:write("[")
 io.stderr:write(debug.getinfo(2, "S").source:sub(2))
 io.stderr:write(":")
 io.stderr:write(debug.getinfo(2, "l").currentline)
 io.stderr:write("] ")
 io.stderr:write(text)
 io.stderr:write("\n")
end

-- Similar to re.split() in Python
---@param str string
---@param sep string?
---@param maxsplit integer?
---@return string[]
function util.split(str, sep, maxsplit)
 if type(str) ~= "string" then
   util.error("Invalid string")
 end
 sep = sep or "%s+"
 if sep == "" then
   util.error("Empty separator")
 end
 if str == "" then
   return {}
 end
 if string.find(str, sep) == nil then
   return {str}
 end

 if maxsplit == nil or maxsplit < 0 then
   maxsplit = -1  -- No limit
 end
 local result = {}
 local pattern = "(.-)" .. sep .. "()"
 local num_splits = 0
 local lastPos = 1
 for part, pos in string.gmatch(str, pattern) do
   if num_splits == maxsplit then
     break
   end
   num_splits = num_splits + 1
   result[num_splits] = part
   lastPos = pos
 end
 -- Handle the last field
 result[num_splits + 1] = string.sub(str, lastPos)
 return result
end

function util.split_multiple(str, seps, include_sep)
 seps = seps or "%s+"
 if seps == "" then
   error("Empty separator")
 end
 if type(seps) == "string" then
   seps = {seps}
 end

 local splits = {}
 for _, sep_pattern in ipairs(seps) do
   for start, sep, stop in string.gmatch(str, "()(" .. sep_pattern .. ")()") do
     table.insert(splits, {start, sep, stop})
   end
 end

 if #seps > 1 then
   table.sort(splits, function (a, b) return a[1] < b[1] end)
 end

 local res = {}
 local previous = 1
 for _, sep_tuple in ipairs(splits) do
   local start, sep, stop = table.unpack(sep_tuple)
   local item = string.sub(str, previous, start - 1)
   if include_sep then
     table.insert(res, {item, sep})
   else
     table.insert(res, item)
   end
   previous = stop
 end
 local item = string.sub(str, previous, #str)
 if include_sep then
   table.insert(res, {item, ""})
 else
   table.insert(res, item)
 end
 return res
end

function util.slice(t, start, stop)
 start = start or 1
 stop = stop or #t
 if start < 0 then
   start = start + #t + 1
 end
 if stop < 0 then
   stop = stop + #t + 1
 end
 local new = {}
 for i, item in ipairs(t) do
   if i >= start and i <= stop then
     table.insert(new, item)
   end
 end
 return new
end

function util.concat(list, sep)
 -- This helper function omits empty strings in list, which is different from table.concat
 -- This function always returns a string, even empty.
 local res = ""
 for i = 1, #list do
   local s = list[i]
   if s and s ~= "" then
     if res == "" then
       res = s
     else
       res = res .. sep .. s
     end
   end
 end
 return res
end

-- Python list.extend()
function util.extend(first, second)
 -- if not second then
 --   print(debug.traceback())
 -- end
 local l = #first
 for i, element in ipairs(second) do
   first[l + i] = element
 end
 return first
end

-- Concat two lists in place
function util.concat_list(first, second)
 local res
 for i, element in ipairs(first) do
   res[i] = element
 end
 local i = #res
 for j, element in ipairs(second) do
   res[i + j] = element
 end
 return res
end

---@param str string
---@return string
function util.lstrip(str)
 if not str then
   error("Invalid input")
 end
 local res = string.gsub(str, "^%s*", "")
 return res
end

---@param str string
---@param prefix string
---@return string
function util.remove_prefix(str, prefix)
 if type(str) ~= "string" or type(prefix) ~= "string" then
   error("Invalid input")
 end
 if util.startswith(str, prefix) then
   return string.sub(str, #prefix + 1)
 end
 return str
end

---@param str string
---@return string
function util.rstrip(str)
 if not str then
   error("Invalid input")
 end
 local res = string.gsub(str, "%s*$", "")
 return res
end

---@param str string
---@return string
function util.strip(str)
 return util.lstrip(util.rstrip(str))
end

function util.startswith(str, prefix)
 if type(str) ~= "string" then
   util.error(string.format("\n%s\n'%s' is not a string.", debug.traceback(), str))
 end
 if type(prefix) ~= "string" then
   util.error(string.format("\n%s\n'%s' is not a string.", debug.traceback(), prefix))
 end
 return string.sub(str, 1, #prefix) == prefix
end

function util.endswith(str, suffix)
 -- if not str or type(str) ~= "string" then
 --   print(debug.traceback())
 -- end
 return string.sub(str, - #suffix) == suffix
end

---@param str string
---@return string
function util.check_prefix_space_append(str)
 if str == "" then
   return str
 end
 local last_char = uni_utf8.match(str, ".$")
 if not last_char then
   return str
 end
 if util.is_romanesque(utf8.codepoint(last_char, 1, #last_char)) then
   return str .. " "
 elseif string.match(last_char, "[:.;!?]") then
   return str .. " "
 elseif string.match(last_char, "[)%],0-9]") then
   return str .. " "
 else
   return str
 end
end

---@param str string
---@return string
function util.check_suffix_prepend(str)
 if str == "" then
   return str
 end
 local first_char = uni_utf8.match(str, "^.")
 if not first_char then
   return str
 end
 if util.is_romanesque(utf8.codepoint(first_char, 1, #first_char)) then
   return " " .. str
 elseif string.match(first_char, "[)%[]") then
   return " " .. str
 else
   return str
 end
end

---@param str string | number
---@return boolean
function util.is_numeric(str)
 if str == nil or str == "" then
   return false
 end
 str = string.gsub(str, util.unicode["en dash"], "-")
 local res = true
 for w in string.gmatch(str, "%w+") do
   if not string.match(w, "^%w*%d+%a*$") and
       not string.match(w, "^and$") and
       not string.match(w, "^et$") and
       not string.match(w, "^[MDCLXVI]+$") and
       not string.match(w, "^[mdclxvi]+$") then
     -- Roman number without validation
     return false
   end
 end
 for w in string.gmatch(str, "%W+") do
   if not string.match(w, "^%s*[,&-]+%s*$")
       and not string.match(str, "%s+") then
     res = false
     break
   end
 end
 return res
end

util.variable_types = {}

-- schema/schemas/styles/csl-variables.rnc
util.variables = {}

-- -- Standard variables
-- util.variables.standard = {
--   "abstract",
--   "annote",
--   "archive",
--   "archive_collection",
--   "archive_location",
--   "archive-place",
--   "authority",
--   "call-number",
--   "citation-key",
--   "citation-label",
--   "collection-title",
--   "container-title",
--   "container-title-short",
--   "dimensions",
--   "division",
--   "DOI",
--   "event",
--   "event-title",
--   "event-place",
--   "genre",
--   "ISBN",
--   "ISSN",
--   "jurisdiction",
--   "keyword",
--   "language",
--   "license",
--   "medium",
--   "note",
--   "original-publisher",
--   "original-publisher-place",
--   "original-title",
--   "part-title",
--   "PMCID",
--   "PMID",
--   "publisher",
--   "publisher-place",
--   "references",
--   "reviewed-genre",
--   "reviewed-title",
--   "scale",
--   "source",
--   "status",
--   "title",
--   "title-short",
--   "URL",
--   "volume-title",
--   "year-suffix",
-- }

-- Number variables
util.variables.number = {
 "chapter-number",
 "citation-number",
 "collection-number",
 "edition",
 "first-reference-note-number",
 "issue",
 "locator",
 "number",
 "number-of-pages",
 "number-of-volumes",
 "page",
 "page-first",
 "part-number",
 "printing-number",
 "section",
 "supplement-number",
 "version",
 "volume",
}

-- Date variables
util.variables.date = {
 "accessed",
 "available-date",
 "event-date",
 "issued",
 "original-date",
 "submitted",
}

-- Name variables
util.variables.name = {
 "author",
 "chair",
 "collection-editor",
 "compiler",
 "composer",
 "container-author",
 "contributor",
 "curator",
 "director",
 "editor",
 "editor-translator",
 "editorial-director",
 "executive-producer",
 "guest",
 "host",
 "illustrator",
 "interviewer",
 "narrator",
 "organizer",
 "original-author",
 "performer",
 "producer",
 "recipient",
 "reviewed-author",
 "script-writer",
 "series-creator",
 "translator",
}

util.variable_types = {}

for type, variables in pairs(util.variables) do
 for _, variable in ipairs(variables) do
   util.variable_types[variable] = type
 end
end

util.primary_dialects = {
 af = "af-ZA",
 ar = "ar",
 bg = "bg-BG",
 ca = "ca-AD",
 cs = "cs-CZ",
 cy = "cy-GB",
 da = "da-DK",
 de = "de-DE",
 el = "el-GR",
 en = "en-US",
 es = "es-ES",
 et = "et-EE",
 eu = "eu",
 fa = "fa-IR",
 fi = "fi-FI",
 fr = "fr-FR",
 he = "he-IL",
 hi = "hi-IN",
 hr = "hr-HR",
 hu = "hu-HU",
 id = "id-ID",
 is = "is-IS",
 it = "it-IT",
 ja = "ja-JP",
 km = "km-KH",
 ko = "ko-KR",
 la = "la",
 lt = "lt-LT",
 lv = "lv-LV",
 mn = "mn-MN",
 nb = "nb-NO",
 nl = "nl-NL",
 nn = "nn-NO",
 pl = "pl-PL",
 pt = "pt-PT",
 ro = "ro-RO",
 ru = "ru-RU",
 sk = "sk-SK",
 sl = "sl-SI",
 sr = "sr-RS",
 sv = "sv-SE",
 th = "th-TH",
 tr = "tr-TR",
 uk = "uk-UA",
 vi = "vi-VN",
 zh = "zh-CN",
}


-- Range delimiter

util.unicode = {
 ["no-break space"] = "\u{00A0}",
 ["em space"] = "\u{2003}",
 ["en dash"] = "\u{2013}",
 ["em dash"] = "\u{2014}",
 ["left single quotation mark"] = "\u{2018}",
 ["right single quotation mark"] = "\u{2019}",
 ["apostrophe"] = "\u{2019}",
 ["left double quotation mark"] = "\u{201C}",
 ["right double quotation mark"] = "\u{201D}",
 ["left-pointing double angle quotation mark"] = "\u{00AB}",
 ["right-pointing double angle quotation mark"] = "\u{00BB}",
 ["horizontal ellipsis"] = "\u{2026}",
 ["narrow no-break space"] = "\u{202F}",
}

util.word_boundaries = {
 ":",
 " ",
 "%-",
 "/",
 util.unicode["no-break space"],
 util.unicode["en dash"],
 util.unicode["em dash"],
}


-- TODO: process multiple words
util.stop_words = {
 ["a"] = true,
 ["according to"] = true,
 ["across"] = true,
 ["afore"] = true,
 ["after"] = true,
 ["against"] = true,
 ["ahead of"] = true,
 ["along"] = true,
 ["alongside"] = true,
 ["amid"] = true,
 ["amidst"] = true,
 ["among"] = true,
 ["amongst"] = true,
 ["an"] = true,
 ["and"] = true,
 ["anenst"] = true,
 ["apart from"] = true,
 ["apropos"] = true,
 ["apud"] = true,
 ["around"] = true,
 ["as"] = true,
 ["as regards"] = true,
 ["aside"] = true,
 ["astride"] = true,
 ["at"] = true,
 ["athwart"] = true,
 ["atop"] = true,
 ["back to"] = true,
 ["barring"] = true,
 ["because of"] = true,
 ["before"] = true,
 ["behind"] = true,
 ["below"] = true,
 ["beneath"] = true,
 ["beside"] = true,
 ["besides"] = true,
 ["between"] = true,
 ["beyond"] = true,
 ["but"] = true,
 ["by"] = true,
 ["c"] = true,
 ["ca"] = true,
 ["circa"] = true,
 ["close to"] = true,
 ["d'"] = true,
 ["de"] = true,
 ["despite"] = true,
 ["down"] = true,
 ["due to"] = true,
 ["during"] = true,
 ["et"] = true,
 ["except"] = true,
 ["far from"] = true,
 ["for"] = true,
 ["forenenst"] = true,
 ["from"] = true,
 ["given"] = true,
 ["in"] = true,
 ["inside"] = true,
 ["instead of"] = true,
 ["into"] = true,
 ["lest"] = true,
 ["like"] = true,
 ["modulo"] = true,
 ["near"] = true,
 ["next"] = true,
 ["nor"] = true,
 ["notwithstanding"] = true,
 ["of"] = true,
 ["off"] = true,
 ["on"] = true,
 ["onto"] = true,
 ["or"] = true,
 ["out"] = true,
 ["outside of"] = true,
 ["over"] = true,
 ["per"] = true,
 ["plus"] = true,
 ["prior to"] = true,
 ["pro"] = true,
 ["pursuant to"] = true,
 ["qua"] = true,
 ["rather than"] = true,
 ["regardless of"] = true,
 ["sans"] = true,
 ["since"] = true,
 ["so"] = true,
 ["such as"] = true,
 ["than"] = true,
 ["that of"] = true,
 ["the"] = true,
 ["through"] = true,
 ["throughout"] = true,
 ["thru"] = true,
 ["thruout"] = true,
 ["till"] = true,
 ["to"] = true,
 ["toward"] = true,
 ["towards"] = true,
 ["under"] = true,
 ["underneath"] = true,
 ["until"] = true,
 ["unto"] = true,
 ["up"] = true,
 ["upon"] = true,
 ["v."] = true,
 ["van"] = true,
 ["versus"] = true,
 ["via"] = true,
 ["vis-à-vis"] = true,
 ["von"] = true,
 ["vs."] = true,
 ["where as"] = true,
 ["with"] = true,
 ["within"] = true,
 ["without"] = true,
 ["yet"] = true,
}

-- <https://github.com/Juris-M/citeproc-js/blob/73bc1b44bc7d54d0bfec4e070fd27f5efe024ff9/src/load.js#L1052C2-L1052C2>
local citeproc_js_addition_stop_words = {
 ["about"] = true,
 ["above"] = true,
 ["al"] = true,
 ["as for"] = true,
 ["as of"] = true,
 ["as per"] = true,
 ["aside from"] = true,
 ["except for"] = true,
 ["inside of"] = true,
 ["near to"] = true,
 ["next to"] = true,
 ["on to"] = true,
 ["out from"] = true,
 ["out of"] = true,
 ["up to"] = true,
 ["v"] = true,
 ["vs"] = true,
}

for word, _ in pairs(citeproc_js_addition_stop_words) do
 util.stop_words[word] = true
end

function util.all(t)
 for _, item in ipairs(t) do
   if not item then
     return false
   end
 end
 return true
end

function util.any(t)
 for _, item in ipairs(t) do
   if item then
     return true
   end
 end
 return false
end

-- ROMANESQUE_REGEXP = "-0-9a-zA-Z\u0e01-\u0e5b\u00c0-\u017f\u0370-\u03ff\u0400-\u052f\u0590-\u05d4\u05d6-\u05ff\u1f00-\u1fff\u0600-\u06ff\u200c\u200d\u200e\u0218\u0219\u021a\u021b\u202a-\u202e"

util.romanesque_ranges = {
 {0x0030, 0x0039},  -- 0-9
 {0x0041, 0x005A},  -- A-Z
 {0x0061, 0x007A},  -- a-z
 {0x0E01, 0x0E5B},  -- Thai
 {0x0E01, 0x0E5B},  -- Thai
 {0x00C0, 0x017F},  -- Latin-1 Supplement
 {0x0370, 0x03FF},  -- Greek and Coptic
 {0x0400, 0x052F},  -- Cyrillic
 {0x0590, 0x05D4},  -- Hebrew
 {0x05D6, 0x05FF},  -- Hebrew
 {0x1F00, 0x1FFF},  -- Greek Extended
 {0x0600, 0x06FF},  -- Arabic
 {0x202A, 0x202E},  -- Writing directions in General Punctuation
}

util.romanesque_chars = {
 0x200c,
 0x200d,
 0x200e,
 0x0218,
 0x0219,
 0x021a,
 0x021b,
}

util.CJK_ranges = {
 {0x4E00, 0x9FFF},  -- CJK Unified Ideographs
 {0x3400, 0x4DBF},  -- CJK Unified Ideographs Extension A
 {0x3040, 0x309F},  -- Hiragana
 {0x30A0, 0x30FF},  -- Katakana
 {0xF900, 0xFAFF},  -- CJK Compatibility Ideographs
 {0x20000, 0x2A6DF},  -- CJK Unified Ideographs Extension B
 {0x2A700, 0x2B73F},  -- CJK Unified Ideographs Extension C
 {0x2B740, 0x2B81F},  -- CJK Unified Ideographs Extension D
 {0x2B820, 0x2CEAF},  -- CJK Unified Ideographs Extension E
 {0x2CEB0, 0x2EBEF},  -- CJK Unified Ideographs Extension F
 {0x30000, 0x3134F},  -- CJK Unified Ideographs Extension G
 {0x2F800, 0x2FA1F},  -- CJK Compatibility Ideographs Supplement
}

function util.in_list(value, list)
 for _, v in ipairs(list) do
   if value == v then
     return true
   end
 end
 return false
end

function util.in_ranges(value, ranges)
 for _, range in ipairs(ranges) do
   if value >= range[1] and value <= range[2] then
     return true
   end
 end
 return false
end

function util.is_romanesque(code_point)
 if not code_point then
   return false
 end
 if util.in_ranges(code_point, util.romanesque_ranges) then
   return true
 end
 if util.in_list(code_point, util.romanesque_chars) then
   return true
 end
 return false
end

function util.has_romanesque_char(s)
 -- has romanesque char but not necessarily pure romanesque
 if not s then
   return false
 end
 for _, code_point in utf8.codes(s) do
   if util.is_romanesque(code_point) then
     return true
   end
 end
 return false
end

function util.is_cjk_char(code_point)
 if not code_point then
   return false
 end
 if util.in_ranges(code_point, util.CJK_ranges) then
   return true
 end
 return false
end

function util.has_cjk_char(s)
 -- has romanesque char but not necessarily pure romanesque
 if not s then
   return false
 end
 for _, code_point in utf8.codes(s) do
   if util.is_cjk_char(code_point) then
     return true
   end
 end
 return false
end


---@param ordinal string
---@return string
function util.convert_ordinal_to_arabic(ordinal)
 -- "1st", "2nd"
 local numeral, suffix = string.match(ordinal, "^(%d+)(%a+)$")
 if numeral then
   return numeral
 end
 local arabic = util.ordinal_to_arabic_map[string.lower(ordinal)]
 if arabic then
   return arabic
 else
   return ordinal
 end
end

util.ordinal_to_arabic_map = {
 first = "1",
 second = "2",
 third = "3",
 fourth = "4",
 fifth = "5",
 sixth = "6",
 seventh = "7",
 eighth = "8",
 ninth = "9",
 tenth = "10",
 eleventh = "11",
 twelfth = "12",
 thirteenth = "13",
 fourteenth = "14",
 fifteenth = "15",
 sixteenth = "16",
 seventeenth = "17",
 eighteenth = "18",
 nineteenth = "19",
 twentieth = "20",
 ["twenty-first"] = "21",
 ["twenty-second"] = "22",
 ["twenty-third"] = "23",
 ["twenty-fourth"] = "24",
 ["twenty-fifth"] = "25",
 ["twenty-sixth"] = "26",
 ["twenty-seventh"] = "27",
 ["twenty-eighth"] = "28",
 ["twenty-ninth"] = "29",
 thirtieth = "30",
}


function util.convert_roman(number)
 -- assert(type(number) == "number")
 local output = {}
 for _, tuple in ipairs(util.roman_numerals) do
   local letter, value = table.unpack(tuple)
   table.insert(output, string.rep(letter, math.floor(number / value)))
   number = number % value
 end
 return table.concat(output, "")
end

util.roman_numerals = {
 {"m", 1000},
 {"cm", 900},
 {"d", 500},
 {"cd", 400},
 {"c", 100},
 {"xc", 90},
 {"l", 50},
 {"xl", 40},
 {"x", 10},
 {"ix", 9},
 {"v", 5},
 {"iv", 4},
 {"i", 1},
};


-- Choose

---@enum Position
local Position = {
 First = 0,
 Subsequent = 1,
 Ibid = 2,
 IbidWithLocator = 3,
}
util.Position = Position

util.position_map = {
 ["first"] = Position.First,
 ["subsequent"] = Position.Subsequent,
 ["ibid"] = Position.Ibid,
 ["ibid-with-locator"] = Position.IbidWithLocator,
 ["container-subsequent"] = 4,
}


-- Output

util.superscripts = {
 ["\u{00AA}"] = "\u{0061}",
 ["\u{00B2}"] = "\u{0032}",
 ["\u{00B3}"] = "\u{0033}",
 ["\u{00B9}"] = "\u{0031}",
 ["\u{00BA}"] = "\u{006F}",
 ["\u{02B0}"] = "\u{0068}",
 ["\u{02B1}"] = "\u{0266}",
 ["\u{02B2}"] = "\u{006A}",
 ["\u{02B3}"] = "\u{0072}",
 ["\u{02B4}"] = "\u{0279}",
 ["\u{02B5}"] = "\u{027B}",
 ["\u{02B6}"] = "\u{0281}",
 ["\u{02B7}"] = "\u{0077}",
 ["\u{02B8}"] = "\u{0079}",
 ["\u{02E0}"] = "\u{0263}",
 ["\u{02E1}"] = "\u{006C}",
 ["\u{02E2}"] = "\u{0073}",
 ["\u{02E3}"] = "\u{0078}",
 ["\u{02E4}"] = "\u{0295}",
 ["\u{1D2C}"] = "\u{0041}",
 ["\u{1D2D}"] = "\u{00C6}",
 ["\u{1D2E}"] = "\u{0042}",
 ["\u{1D30}"] = "\u{0044}",
 ["\u{1D31}"] = "\u{0045}",
 ["\u{1D32}"] = "\u{018E}",
 ["\u{1D33}"] = "\u{0047}",
 ["\u{1D34}"] = "\u{0048}",
 ["\u{1D35}"] = "\u{0049}",
 ["\u{1D36}"] = "\u{004A}",
 ["\u{1D37}"] = "\u{004B}",
 ["\u{1D38}"] = "\u{004C}",
 ["\u{1D39}"] = "\u{004D}",
 ["\u{1D3A}"] = "\u{004E}",
 ["\u{1D3C}"] = "\u{004F}",
 ["\u{1D3D}"] = "\u{0222}",
 ["\u{1D3E}"] = "\u{0050}",
 ["\u{1D3F}"] = "\u{0052}",
 ["\u{1D40}"] = "\u{0054}",
 ["\u{1D41}"] = "\u{0055}",
 ["\u{1D42}"] = "\u{0057}",
 ["\u{1D43}"] = "\u{0061}",
 ["\u{1D44}"] = "\u{0250}",
 ["\u{1D45}"] = "\u{0251}",
 ["\u{1D46}"] = "\u{1D02}",
 ["\u{1D47}"] = "\u{0062}",
 ["\u{1D48}"] = "\u{0064}",
 ["\u{1D49}"] = "\u{0065}",
 ["\u{1D4A}"] = "\u{0259}",
 ["\u{1D4B}"] = "\u{025B}",
 ["\u{1D4C}"] = "\u{025C}",
 ["\u{1D4D}"] = "\u{0067}",
 ["\u{1D4F}"] = "\u{006B}",
 ["\u{1D50}"] = "\u{006D}",
 ["\u{1D51}"] = "\u{014B}",
 ["\u{1D52}"] = "\u{006F}",
 ["\u{1D53}"] = "\u{0254}",
 ["\u{1D54}"] = "\u{1D16}",
 ["\u{1D55}"] = "\u{1D17}",
 ["\u{1D56}"] = "\u{0070}",
 ["\u{1D57}"] = "\u{0074}",
 ["\u{1D58}"] = "\u{0075}",
 ["\u{1D59}"] = "\u{1D1D}",
 ["\u{1D5A}"] = "\u{026F}",
 ["\u{1D5B}"] = "\u{0076}",
 ["\u{1D5C}"] = "\u{1D25}",
 ["\u{1D5D}"] = "\u{03B2}",
 ["\u{1D5E}"] = "\u{03B3}",
 ["\u{1D5F}"] = "\u{03B4}",
 ["\u{1D60}"] = "\u{03C6}",
 ["\u{1D61}"] = "\u{03C7}",
 ["\u{2070}"] = "\u{0030}",
 ["\u{2071}"] = "\u{0069}",
 ["\u{2074}"] = "\u{0034}",
 ["\u{2075}"] = "\u{0035}",
 ["\u{2076}"] = "\u{0036}",
 ["\u{2077}"] = "\u{0037}",
 ["\u{2078}"] = "\u{0038}",
 ["\u{2079}"] = "\u{0039}",
 ["\u{207A}"] = "\u{002B}",
 ["\u{207B}"] = "\u{2212}",
 ["\u{207C}"] = "\u{003D}",
 ["\u{207D}"] = "\u{0028}",
 ["\u{207E}"] = "\u{0029}",
 ["\u{207F}"] = "\u{006E}",
 ["\u{2120}"] = "\u{0053}\u{004D}",
 ["\u{2122}"] = "\u{0054}\u{004D}",
 ["\u{3192}"] = "\u{4E00}",
 ["\u{3193}"] = "\u{4E8C}",
 ["\u{3194}"] = "\u{4E09}",
 ["\u{3195}"] = "\u{56DB}",
 ["\u{3196}"] = "\u{4E0A}",
 ["\u{3197}"] = "\u{4E2D}",
 ["\u{3198}"] = "\u{4E0B}",
 ["\u{3199}"] = "\u{7532}",
 ["\u{319A}"] = "\u{4E59}",
 ["\u{319B}"] = "\u{4E19}",
 ["\u{319C}"] = "\u{4E01}",
 ["\u{319D}"] = "\u{5929}",
 ["\u{319E}"] = "\u{5730}",
 ["\u{319F}"] = "\u{4EBA}",
 ["\u{02C0}"] = "\u{0294}",
 ["\u{02C1}"] = "\u{0295}",
 ["\u{06E5}"] = "\u{0648}",
 ["\u{06E6}"] = "\u{064A}",
}


-- File IO

function util.remove_bom(text)
 if type(text) == "string" then
   return string.gsub(text, "^\xEF\xBB\xBF", "")
 else
   return text
 end
end


---@param path string
---@return string?
function util.read_file(path, allowe_missing)
 if type(path) ~= "string" then
   error("Invalid path.")
 end
 local file = io.open(path, "r")
 if not file then
   if not allowe_missing then
     util.error(string.format("Cannot open file '%s'.", path))
   end
   return nil
 end
 local content = file:read("*a")
 content = util.remove_bom(content)
 file:close()
 return content
end


---@param text string
---@param path string
function util.write_file(text, path)
 local file = io.open(path, "w")
 if not file then
   util.error(string.format("Cannot write to file '%s'.", path))
   return
 end
 file:write(text)
 file:close()
end

---@alias CslDateVariable { date-parts: (string|number)[][]?, season: string|number, circa: boolean, literal: string|number?, raw: string?}


---@param str string
---@return CslDateVariable?
function util.parse_edtf(str)
 if string.match(str, "^%s*$") then
   return nil
 end
 local date = {["date-parts"] = {}}
 local range_parts = util.split(str, "/")
 for i, range_part in ipairs(range_parts) do
   if i > 2 then
     break
   end
   date["date-parts"][i] = {}
   local negative_year = false
   if string.match(range_part, "^%-") or string.match(range_part, "^Y%-") then
     negative_year = true
   end
   range_part = string.gsub(range_part, "^Y?[+-]?", "")
   if string.match(range_part, "[?~%%]$") then
     date.circa = true
     range_part = string.gsub(range_part, "[?~%%]$", "")
   end
   range_part = string.gsub(range_part, "T.*$", "")
   for j, date_part in ipairs(util.split(range_part, "%-")) do
     if j > 3 then
       break
     end
     if string.match(date_part, "X") then
       date.circa = true
       date_part = string.gsub(date_part, "X", "0")
     end
     if string.match(date_part, "^%d+$") then
       local date_part_number = tonumber(date_part)
       if date_part_number > 0 then
         date["date-parts"][i][j] = date_part_number
       else
         break
       end
     elseif date_part ~= "" then
       -- util.error(string.format('Invalid EDTF date "%s".', str))
       return {literal = str}
     end
   end
   if negative_year then
     date["date-parts"][i][1] = -1 - date["date-parts"][i][1]
   end
 end

 local all_empty = true
 for i, range_part in ipairs(date["date-parts"]) do
   if #range_part > 0 then
     all_empty = false
     break
   end
 end

 if all_empty then
   return nil
 end

 return date
end


function util.parse_extra_name(str)
 local name
 local name_parts = util.split(str, "%s*||%s*")
 if #name_parts == 2 then
   name = {
     family = name_parts[1],
     given = name_parts[2],
   }
 else
   name = {literal = str}
 end
 return name
end


---CSL-M: `layout` extension
---Select the layout by
---@param element Citation | Bibliography
---@param engine CiteProc
---@param item any
---@return Layout
---@return string
function util.get_layout_by_language(element, engine, item)
 if not item then
   return element.layout, engine.lang
 end

 local entry_lang = item.language or ""
 local language = string.sub(entry_lang, 1, 2)
 local primary_dialect = util.primary_dialects[language] or ""

 local layouts = element.layouts_by_language
 local active_layout = layouts[entry_lang] or layouts[language] or layouts[primary_dialect]

 local context_lang
 if active_layout then
   context_lang = entry_lang
 else
   context_lang = engine.lang
   active_layout = element.layout
 end
 return active_layout, context_lang
end


util.trigraph = "Aaaa00:AaAa00:AaAA00:AAAA00"


---@param trigraph string
---@return { authors: integer[], year: integer }
function util.get_trigraph_param(trigraph)
 if not trigraph or string.sub(trigraph, 1, 1) ~= "A" then
   util.error(string.format("Bad trigraph definition: '%s'", trigraph))
 end
 local param = {
   authors = {},
   year = 0,
 }

 for i = 1, #trigraph do
   local char = string.sub(trigraph, i, i)
   if char == "A" then
     table.insert(param.authors, 1)
   elseif char == "a" then
     local len = #param.authors
     param.authors[len] = param.authors[len] + 1
   elseif char == "0" then
     param.year = param.year + 1
   else
     util.error(string.format("Invalid character '%s' in trigraph definition '%s'", char, trigraph))
   end
 end
 return param
end


---@param item ItemData
---@return string
function util.get_citation_label(item)
 local label = ""
 local trigraph = util.split(util.trigraph, ":")
 local config = util.get_trigraph_param(trigraph[1])

 for _, name_variable in ipairs(util.variables.name) do
   local names = item[name_variable]
   if names then
     local param = trigraph[1]
     if #names > #trigraph then
       param = trigraph[#trigraph]
     else
       param = trigraph[#names]
     end
     config = util.get_trigraph_param(param)
     for i, name in ipairs(names) do
       if i > #config.authors then
         break
       end
       local name_label = ""
       if name and name.family then
         name_label = name.family
         name_label = string.gsub(name_label, "^[ 'a-z]+%s*", "")
         name_label = string.gsub(name_label, "^\u{2019}+%s*", "")
       elseif name and name.literal then
         name_label = name.literal
       end
       name_label = uni_utf8.lower(name_label)
       name_label = string.gsub(name_label, "^a%s+", "")
       name_label = string.gsub(name_label, "^an%s+", "")
       name_label = string.gsub(name_label, "^the%s+", "")
       -- TODO: Remove none reomanesque
       name_label = uni_utf8.sub(name_label, 1, config.authors[i])
       if #name_label > 1 then
         name_label = uni_utf8.upper(uni_utf8.sub(name_label, 1, 1)) .. uni_utf8.lower(uni_utf8.sub(name_label, 2))
       elseif #name_label == 1 then
         name_label = uni_utf8.upper(name_label)
       end
       label = label .. name_label
     end

     break
   end
 end

 if label == "" then
   -- TODO: try the title
 end

 local year = "0000"
 if item.issued and item.issued["date-parts"] then
   year = tostring(item.issued["date-parts"][1][1])
 end
 year = string.sub(year, -config.year)
 label = label .. year

 return label
end


return util