--
-- Copyright (c) 2021-2025 Zeping Lee
-- Released under the MIT license.
-- Repository: https://github.com/zepinglee/citeproc-lua
--

local engine = {}

local dom
local context
local element
local nodes
local node_locale
local node_style
local output
local util

local using_luatex, _ = pcall(require, "kpse")
if using_luatex then
 dom = require("luaxml-domobject")
 context = require("citeproc-context")
 element = require("citeproc-element")
 nodes = require("citeproc-nodes")
 node_locale = require("citeproc-node-locale")
 node_style = require("citeproc-node-style")
 output = require("citeproc-output")
 util = require("citeproc-util")
else
 dom = require("citeproc.luaxml.domobject")
 context = require("citeproc.context")
 element = require("citeproc.element")
 nodes = require("citeproc.nodes")
 node_locale = require("citeproc.node-locale")
 node_style = require("citeproc.node-style")
 output = require("citeproc.output")
 util = require("citeproc.util")
end

local Element = element.Element
local Style = node_style.Style
local Locale = node_locale.Locale
local Context = context.Context
local IrState = context.IrState
local LatexWriter = output.LatexWriter
local HtmlWriter = output.HtmlWriter
local SortStringFormat = output.SortStringFormat

local Position = util.Position


---@alias CitationId string
---@alias ItemId string | number
---@alias NoteIndex integer
---@alias ChapterIndex number

---@class CitationData
---@field citationID CitationId
---@field citationItems CitationItem[]
---@field properties CitationProperties
---@field citation_index integer
---@field sorted_items CitationItem[]

---@class CitationItem
---@field id CiteId
---@field prefix string?
---@field suffix string?
---@field locator string?
---@field label string?
---@field position_level Position?
---@field near_note boolean?
---@field num_citations integer?

---@class CitationProperties
---@field noteIndex NoteIndex,
---@field chapterIndex ChapterIndex,
---@field mode string?
---@field prefix string?
---@field suffix string?
---@field unsorted boolean?


---@class NameVariable
---@field family string?
---@field given string?
---@field dropping-particle string?
---@field non-dropping-particle string?
---@field suffix string?
---@field comma-suffix string | number | boolean?
---@field static-ordering string | number | boolean?
---@field literal string | number | boolean?
---@field parse-names string | number | boolean?

---@class DateVariable
---@field date-parts (string | number)[][]
---@field season (string | number)
---@field circa (string | number | boolean)
---@field literal string
---@field raw string

---@alias ItemData { id: ItemId, type: string, language: string?, [string]: string | number | NameVariable[] | DateVariable }


---@class Registry
---@field citations_by_id table<ItemId, CitationData>
---@field citation_list CitationData[]
---@field citations_by_item_id table<ItemId, CitationData[]>
---@field registry table<ItemId, ItemData>
---@field reflist ItemId[]
---@field uncited_list ItemId[]
---@field previous_citation CitationData?
---@field requires_sorting boolean
---@field widest_label string
---@field maxoffset integer
---@field second_field_align string | boolean
local Registry = {}


---@class CiteProc
---@field style Style
---@field sys CiteProcSys
---@field locales table<LanguageCode, Locale>
---@field system_locales table<LanguageCode, Locale>
---@field lang LanguageCode
---@field output_format OutputFormat
---@field opt table
---@field registry Registry
---@field cite_first_note_numbers table<ItemId, NoteIndex>
---@field cite_last_note_numbers table<ItemId, NoteIndex>
---@field tainted_item_ids table<ItemId, boolean>
---@field disam_irs IrNode[]
---@field cite_irs_by_output table<string, IrNode[]>
---@field person_names PersonNameIr[]
---@field person_names_by_output table<string, PersonNameIr[]>
---@field locale_tags_info_dict table<LanguageCode, table>
local CiteProc = {}

---@class CiteProcSys
---@field retrieveLocale fun(LanguageCode): string?
---@field retrieveItem fun(ItemId): ItemData?

---@param sys table
---@param style string
---@param lang string?
---@param force_lang boolean?
---@return CiteProc
function CiteProc.new(sys, style, lang, force_lang)
 if not sys then
   error("\"citeprocSys\" required")
 end
 if sys.retrieveLocale == nil then
   error("\"citeprocSys.retrieveLocale\" required")
 end
 if sys.retrieveItem == nil then
   error("\"citeprocSys.retrieveItem\" required")
 end
 local parsed_style = Style:parse(style)
 local engine_lang = parsed_style.default_locale
 if not engine_lang or force_lang then
   engine_lang = lang or "en-US"
 end
 ---@type CiteProc
 local o = {
   style = parsed_style,
   sys = sys,
   locales = {},
   system_locales = {},
   lang = engine_lang,
   output_format = LatexWriter:new(),
   opt = {
     -- Similar to citeproc-js's development_extensions.wrap_url_and_doi
     wrap_url_and_doi = false,
     citation_link = false,
     title_link = false,
   },
   registry = {
     citations_by_id = {},  -- A map
     citation_list = {},  -- A list
     citations_by_item_id = {},  -- A map from item id to a map of citations
     registry = {},  -- A map of bibliographic meta data
     reflist = {},  -- list of cited ids
     uncited_list = {},
     previous_citation = nil,
     requires_sorting = false,
     widest_label = "",
     maxoffset = 0,
     second_field_align = false,
   },

   cite_first_note_numbers = {},
   cite_last_note_numbers = {},

   tainted_item_ids = {},

   disam_irs = {},
   -- { <ir1>, <ir2>, ...  }

   cite_irs_by_output = {},
   -- {
   --   ["Roe, J"] = {<ir1>},
   --   ["Doe, J"] = {<ir2>, <ir3>},
   --   ["Doe, John"] = {<ir2>},
   --   ["Doe, Jack"] = {<ir2>},
   -- }

   person_names = {},
   person_names_by_output = {},

   locale_tags_info_dict = {},
 }

 setmetatable(o, {__index = CiteProc})
 return o
end

---@return boolean
function CiteProc:is_dependent_style()
 return self.style.info.independent_parent ~= nil
end

---@return string?
function CiteProc:get_independent_parent()
 return self.style.info.independent_parent
end

---@param ids CiteId[]
function CiteProc:updateItems(ids)
 self.registry.reflist = {}
 self.registry.registry = {}
 self.person_names = {}
 self.person_names_by_output = {}
 self.disam_irs = {}
 self.cite_irs_by_output = {}

 local cite_items = {}
 local loaded_ids = {}

 for _, id in ipairs(ids) do
   if not loaded_ids[id] then
     table.insert(cite_items, {id = id})
     loaded_ids[id] = true
   end
 end
 for _, id in ipairs(self.registry.uncited_list) do
   if not loaded_ids[id] then
     table.insert(cite_items, {id = id})
     loaded_ids[id] = true
   end
 end

 -- Clean the first note number to reset all the positions
 self.cite_first_note_numbers = {}

 -- TODO: optimize this
 self:makeCitationCluster(cite_items)

 self.registry.previous_citation = nil
 self.cite_first_note_numbers = {}
 self.cite_last_note_numbers = {}

 for _, item in ipairs(self.registry.registry) do
   item.year_suffix_number = nil
   item["year-suffix"] = nil
 end
end

function CiteProc:updateUncitedItems(uncited_ids)
 -- self.registry.reflist = {}
 self.registry.registry = {}
 self.registry.uncited_list = {}
 self.person_names = {}
 self.person_names_by_output = {}
 self.disam_irs = {}
 self.cite_irs_by_output = {}

 local cite_items = {}
 local loaded_ids = {}

 for _, id in ipairs(self.registry.reflist) do
   if not loaded_ids[id] then
     table.insert(cite_items, {id = id})
     loaded_ids[id] = true
   end
 end
 self.registry.reflist = {}

 for _, id in ipairs(uncited_ids) do
   if not loaded_ids[id] then
     table.insert(cite_items, {id = id})
     loaded_ids[id] = true
   end
 end

 loaded_ids = {}
 for _, id in ipairs(uncited_ids) do
   if not loaded_ids[id] then
     table.insert(self.registry.uncited_list, id)
     loaded_ids[id] = true
   end
 end

 -- TODO: optimize this
 self:makeCitationCluster(cite_items)

 self.registry.previous_citation = nil
 self.cite_first_note_numbers = {}
 self.cite_last_note_numbers = {}
end


---@alias PreCitation [CitationId, NoteIndex, ChapterIndex?]
---@alias PostCitation [CitationId, NoteIndex, ChapterIndex?]

---@param citation CitationData
---@param citations_pre PreCitation[]
---@param citations_post PostCitation[]
---@return [table, [integer, string, CitationId][]]
function CiteProc:processCitationCluster(citation, citations_pre, citations_post)
 self:_check_valid_citation_element()
 citation = self:_normalize_citation_input(citation)
 self:_check_input(citation, citations_pre, citations_post)

 local citation_list, item_ids = self:_build_reconstituted_citation_list(citation, citations_pre, citations_post)
 self:updateItems(item_ids)
 if #citation.sorted_items > 1 and self.style.citation.sort and not citation.properties.unsorted then
   citation.sorted_items = self.style.citation:sorted_citation_items(citation.citationItems, self)
 end

 local tainted_citation_ids = self:_set_positions(citation_list)

 tainted_citation_ids[citation.citationID] = true

 local params = {
   bibchange = false,
   citation_errors = {},
 }
 -- TODO: evaluate params.bibchange

 local result = self:_rerun_changed_cites(tainted_citation_ids)

 return {params, result}
end

-- A variant of processCitationCluster() for easy use with LaTeX.
-- It should be run after refreshing the registry (updateItems()) with all items
---@param citation CitationData
---@return string
function CiteProc:process_citation(citation)
 self:_check_valid_citation_element()
 citation = self:_normalize_citation_input(citation)

 local citations_pre = {}
 for _, citation_ in ipairs(self.registry.citation_list) do
   table.insert(citations_pre, {citation_.citationID, citation_.properties.noteIndex})
 end
 self:_check_input(citation, citations_pre, {})

 local citation_list, item_ids = self:_build_reconstituted_citation_list(citation, citations_pre, {})
 -- self:updateItems(item_ids)
 for _, cite_item in ipairs(citation.citationItems) do
   self:get_item(cite_item.id)
 end
 if #citation.sorted_items > 1 and self.style.citation.sort and not citation.properties.unsorted then
   citation.sorted_items = self.style.citation:sorted_citation_items(citation.citationItems, self)
 end

 self:_set_positions(citation_list)
 local tainted_citation_ids = {[citation.citationID] = true}
 local result = self:_rerun_changed_cites(tainted_citation_ids)
 return result[1][2]
end

function CiteProc:makeCitationCluster(citation_items)
 local special_form = nil
 local items = {}

 for i, cite_item in ipairs(citation_items) do
   cite_item = self:_normalize_cite_item(cite_item)
   local item_data = self:get_item(cite_item.id)

   -- Create a wrapper of the orignal item from registry so that
   -- it may hold different `locator` or `position` values for cites.
   local cite_item = setmetatable(cite_item, {__index = item_data})

   if not special_form then
     for _, form in ipairs({"author-only", "suppress-author", "coposite"}) do
       if cite_item[form] then
         special_form = form
       end
     end
   end

   -- Set "first-reference-note-number" variable when called from
   -- processCitationCluster() > updateItems()
   local citations = self.registry.citations_by_item_id[cite_item.id]
   if citations and #citations > 0 then
     cite_item["first-reference-note-number"] = citations[1].properties.noteIndex
   end

   cite_item.position_level = Position.First
   if self.cite_first_note_numbers[cite_item.id] then
     cite_item.position_level = Position.Subsequent
   else
     self.cite_first_note_numbers[cite_item.id] = 0
   end

   local preceding_cite
   if i == 1 then
     local previous_citation = self.registry.previous_citation
     if previous_citation then
       if #previous_citation.citationItems == 1 and previous_citation.citationItems[1].id == cite_item.id then
         preceding_cite = previous_citation.citationItems[1]
       end
     end
   elseif citation_items[i - 1].id == cite_item.id then
     preceding_cite = citation_items[i - 1]
   end

   if preceding_cite then
     cite_item.position_level = self:_get_ibid_position(cite_item, preceding_cite)
   end

   table.insert(items, cite_item)
 end

 if self.registry.requires_sorting then
   self:sort_bibliography()
 end

 self:_check_valid_citation_element()
 local citation_element = self.style.citation
 if special_form == "author-only" and self.style.intext then
   citation_element = self.style.intext
 end

 if #items > 1 and self.style.citation.sort then
   items = self.style.citation:sorted_citation_items(items, self)
 end
 local res = citation_element:build_cluster(items, self)

 -- local context = {
 --   build = {},
 --   engine=self,
 -- }
 -- local res = self.style:render_citation(items, context)

 self.registry.previous_citation = {
   citationID = "pseudo-citation",
   citationItems = items,
   properties = {
     noteIndex = 0,
   },
 }
 return res
end

---@param bibsection any
---@return [{[string]: string | number | boolean}, string[]]
function CiteProc:makeBibliography(bibsection)
 -- The bibsection works as a filter described in
 -- <https://citeproc-js.readthedocs.io/en/latest/running.html#selective-output-with-makebibliography>.
 if not self.style.bibliography then
   return {{}, {}}
 end

 local res = {}

 self.registry.widest_label = ""
 self.registry.maxoffset = 0

 local ids = self:_get_sorted_refs()
 local excluded_ids
 if bibsection then
   ids, excluded_ids = self:_filter_with_bibsection(ids, bibsection)
 end
 for _, id in ipairs(ids) do
   local str = self.style.bibliography:build_bibliography_str(id, self)
   table.insert(res, str)
 end

 local bib_start = self.output_format.markups["bibstart"]
 local bib_end = self.output_format.markups["bibend"]
 if type(bib_start) == "function" then
   bib_start = bib_start(self)
 end
 if type(bib_end) == "function" then
   bib_end = bib_end(self)
 end

 local params = {
   hangingindent = self.style.bibliography.hanging_indent,
   ["second-field-align"] = self.style.bibliography.second_field_align or self.registry.second_field_align or false,
   linespacing = self.style.bibliography.line_spacing,
   entryspacing = self.style.bibliography.entry_spacing,
   maxoffset = self.registry.maxoffset,
   widest_label = self.registry.widest_label,
   bibstart = bib_start,
   bibend = bib_end,
   entry_ids = ids,
   excluded_ids = excluded_ids,
 }

 return {params, res}
end

function CiteProc:_check_valid_citation_element()
 if not self.style.citation then
   if self.style.info and self.style.info.independent_parent then
     util.error(string.format("This is a dependent style linked to '%s'.", self.style.info.independent_parent))
   else
     util.error("No <citation> in style.")
   end
 end
end

---@param citation CitationData
---@return CitationData
function CiteProc:_normalize_citation_input(citation)
 citation = util.deep_copy(citation)

 if not citation.citationID then
   citation.citationID = "CITATION-" .. tostring(#self.registry.citation_list)
 end

 if not citation.citationItems then
   citation.citationItems = {}
 end
 for i, cite_item in ipairs(citation.citationItems) do
   citation.citationItems[i] = self:_normalize_cite_item(cite_item)
 end

 -- Fix missing noteIndex: sort_CitationNumberPrimaryAscendingViaMacroCitation.txt
 if not citation.properties then
   citation.properties = {}
 end
 if not citation.properties.noteIndex then
   citation.properties.noteIndex = 0
 end
 if not citation.properties.chapterIndex then
   citation.properties.chapterIndex = 0
 end

 citation.sorted_items = util.clone(citation.citationItems)

 return citation
end

---@param cite_item CitationItem
---@return CitationItem
function CiteProc:_normalize_cite_item(cite_item)
 -- Shallow copy
 cite_item = util.clone(cite_item)
 cite_item.id = tostring(cite_item.id)

 -- Use "page" as locator label if missing
 -- label_PluralWithAmpersand.txt
 if cite_item.locator and not cite_item.label then
   cite_item.label = "page"
 end

 local the_context = Context:new()
 the_context.engine = self
 the_context.style = self.style
 the_context.area = self
 the_context.in_bibliography = false
 the_context.lang = self.lang
 the_context.locale = self:get_locale(self.lang)
 the_context.format = self.output_format

 if cite_item.prefix then
   -- Assert CSL rich-text or HTML-like tagged string
   if cite_item.prefix == "" then
     cite_item.prefix = nil
   end
 end
 if cite_item.suffix then
   if cite_item.suffix == "" then
     cite_item.suffix = nil
   end
 end

 return cite_item
end

---@param citation CitationData
---@param citations_pre PreCitation[]
---@param citations_post PostCitation[]
function CiteProc:_check_input(citation, citations_pre, citations_post)
 local citation_info_list = {}
 do
   for i, pre_citation in ipairs(citations_pre) do
     local citation_id = pre_citation[1]
     local note_index = pre_citation[2]
     local chapter_number = pre_citation[3] or self.registry.citations_by_id[citation_id].properties.chapterIndex or 0
     local name = string.format("citationsPre[%d]", i)
     table.insert(citation_info_list, {citation_id, note_index, chapter_number, name})
   end
   table.insert(citation_info_list,
     {citation.citationID, citation.properties.noteIndex, citation.properties.chapterIndex or 0, "citation"})
   for i, post_citation in ipairs(citations_post) do
     local citation_id = post_citation[1]
     local note_index = post_citation[2]
     local chapter_number = post_citation[3] or self.registry.citations_by_id[citation_id].properties.chapterIndex or
         0
     local name = string.format("citationsPost[%d]", i)
     table.insert(citation_info_list, {citation_id, note_index, chapter_number, name})
   end
 end

 ---@type table<CitationId, boolean>
 local citation_dict = {}
 local last_note_number = 0
 local last_chapter_number = 0

 for _, citation_info in ipairs(citation_info_list) do
   local citation_id, note_index, chapter_number, name = table.unpack(citation_info)
   if citation_dict[citation_id] then
     error(string.format("Previously referenced citationID '%s' encountered at %s", name))
   end
   citation_dict[citation_id] = true
   if chapter_number and chapter_number > 0 then
     if chapter_number < last_chapter_number then
       util.warning(string.format("Chapter index sequence is not sane at %s", name))
     end
     if chapter_number ~= last_chapter_number then
       last_note_number = 0
     end
     last_chapter_number = chapter_number
   end
   if note_index > 0 then
     if note_index < last_note_number then
       util.warning(string.format("Note index sequence is not sane at %s", name))
     end
     last_note_number = note_index
   end
 end

end

---@param citation CitationData
---@param citations_pre PreCitation[]
---@param citations_post PostCitation[]
---@return CitationData[]
---@return CiteId[]
function CiteProc:_build_reconstituted_citation_list(citation, citations_pre, citations_post)
 self.registry.citations_by_id[citation.citationID] = citation

 ---@type [CitationId, NoteIndex][]
 local citation_note_pairs = {}
 util.extend(citation_note_pairs, citations_pre)
 table.insert(citation_note_pairs, {citation.citationID, citation.properties.noteIndex})
 util.extend(citation_note_pairs, citations_post)

 ---@type CiteId[]
 local item_ids = {}
 ---@type table<ItemId, boolean>
 local item_id_dict = {}
 ---@type CitationData[]
 local citation_list = {}
 ---@type table<CitationId, CitationData>
 local citations_by_id = {}
 -- TODO: Remove citations_by_item_id
 ---@type table<ItemId, CitationData[]>
 local citations_by_item_id = {}

 for citation_index, pair in ipairs(citation_note_pairs) do
   local citation_id, note_index = table.unpack(pair)
   local citation_ = self.registry.citations_by_id[citation_id]
   if not citation_ then
     util.error("Citation not in registry.")
   end
   citation_.citation_index = citation_index
   citation_.properties.noteIndex = note_index

   table.insert(citation_list, citation_)
   citations_by_id[citation_.citationID] = citation_
   for _, cite_item in ipairs(citation_.citationItems) do
     if not item_id_dict[cite_item.id] then
       item_id_dict[cite_item.id] = true
       table.insert(item_ids, cite_item.id)
       citations_by_item_id[cite_item.id] = {}
     end
     table.insert(citations_by_item_id[cite_item.id], citation_)
   end
 end
 self.registry.citation_list = citation_list
 self.registry.citations_by_id = citations_by_id
 self.registry.citations_by_item_id = citations_by_item_id
 return citation_list, item_ids
end

---@param citation_list CitationData[]
---@return table<CitationId, boolean>
function CiteProc:_set_positions(citation_list)
 ---@type table<CitationId, boolean>
 local tainted_citation_ids = {}

 ---@type {[integer]: CitationData[]}
 local chapter_citations = {}
 for _, citation in ipairs(citation_list) do
   local chapter_number = citation.properties.chapterIndex
   if not chapter_citations[chapter_number] then
     chapter_citations[chapter_number] = {}
   end
   table.insert(chapter_citations[chapter_number], citation)
 end

 for _, citations in pairs(chapter_citations) do
   self:_update_chapter_positions(citations, tainted_citation_ids)
 end

 -- Update tainted citation ids because of citation-number's change
 -- The self.tainted_item_ids were added in the sort_bibliography() procedure.
 for item_id, _ in pairs(self.tainted_item_ids) do
   if self.registry.citations_by_item_id[item_id] then
     for _, citation in ipairs(self.registry.citations_by_item_id[item_id]) do
       tainted_citation_ids[citation.citationID] = true
     end
   end
 end

 return tainted_citation_ids
end

---@param citation_list CitationData[]
---@param tainted_citation_ids table<CitationId, boolean>
---@return table<string, boolean>
function CiteProc:_update_chapter_positions(citation_list, tainted_citation_ids)
 ---@type CitationData[]
 local in_text_citations = {}
 ---@type CitationData[]
 local note_citations = {}
 for _, citation in ipairs(citation_list) do
   if citation.properties.noteIndex == 0 then
     table.insert(in_text_citations, citation)
   else
     table.insert(note_citations, citation)
   end
 end

 for _, citations in ipairs({in_text_citations, note_citations}) do
   ---@type table<CiteId, NoteIndex>
   local first_ref = {}
   ---@type table<CiteId, NoteIndex>
   local last_ref = {}
   ---@type table<NoteIndex, CitationId[]>
   local num_citations_in_note = {}

   for _, citation in ipairs(citations) do
     local note_index = citation.properties.noteIndex
     if not num_citations_in_note[note_index] then
       num_citations_in_note[note_index] = {}
     end
     table.insert(num_citations_in_note[note_index], citation.citationID)
   end

   local previous_citation
   for _, citation in ipairs(citations) do
     local mode = citation.properties.mode
     local note_index = citation.properties.noteIndex
     local previous_cite
     for _, cite_item in ipairs(citation.sorted_items) do
       local position_properties = {
         position_level = cite_item.position_level,
         ["first-reference-note-number"] = cite_item["first-reference-note-number"],
         near_note = cite_item.near_note,
       }

       self:_set_cite_item_position(cite_item, note_index, previous_cite, previous_citation, citation, first_ref,
         last_ref, num_citations_in_note)

       if self:_check_tainted_position_change(cite_item, position_properties) then
         tainted_citation_ids[citation.citationID] = true
       end

       -- https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html#citations
       -- Citations within the main text of the document have a noteIndex of zero.
       if mode ~= "author-only" and mode ~= "full-cite" then
         if not first_ref[cite_item.id] and note_index > 0 then
           -- note_index == 0 implied an in-text citation
           first_ref[cite_item.id] = note_index
         end
         last_ref[cite_item.id] = note_index
         previous_cite = cite_item
       end
     end

     if mode ~= "author-only" and mode ~= "full-cite" then
       previous_citation = citation
     end
   end

   if self.style.class == "note" and self.style.has_disambiguate then
     ---@type table<CiteId, CitationData[]>
     local citations_by_item_id = {}
     for _, citation in ipairs(citations) do
       if citation.properties.mode ~= "author-only" and citation.properties.mode ~= "full-cite" then
         for _, cite_item in ipairs(citation.sorted_items) do
           if not citations_by_item_id[cite_item.id] then
             citations_by_item_id[cite_item.id] = {}
           end
           table.insert(citations_by_item_id[cite_item.id], citation)
         end
       end
     end
     for _, citation in ipairs(citations) do
       if citation.properties.mode ~= "author-only" and citation.properties.mode ~= "full-cite" then
         for _, cite_item in ipairs(citation.sorted_items) do
           assert(citations_by_item_id[cite_item.id])
           local num_citations = #citations_by_item_id[cite_item.id]
           if not cite_item.num_citations or (num_citations < 2) ~= (cite_item.num_citations < 2) then
             -- self.tainted_item_ids[cite_item.id] = true
             for _, citation_ in ipairs(citations_by_item_id[cite_item.id]) do
               tainted_citation_ids[citation_.citationID] = true
             end
           end
           cite_item.num_citations = num_citations
         end
       end
     end
   end
 end

 return tainted_citation_ids
end

function CiteProc:_set_cite_item_position(cite_item, note_index, previous_cite, previous_citation, citation, first_ref,
   last_ref, num_citations_in_note)
 -- https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html#citations
 -- Citations within the main text of the document have a noteIndex of zero.
 if citation.properties.mode == "author-only" or citation.properties.mode == "full-cite" then
   -- discretionary_IbidInAuthorDateStyleWithoutIntext.txt
   cite_item.position_level = Position.First
   cite_item.near_note = false
   return
 end

 local first_reference_note_number = first_ref[cite_item.id]
 if first_reference_note_number then
   cite_item.position_level = Position.Subsequent
   cite_item["first-reference-note-number"] = first_reference_note_number
 else
   cite_item.position_level = Position.First
 end

 local preceding_cite_item = self:_find_preceding_ibid_item(cite_item, previous_cite, previous_citation, note_index,
   num_citations_in_note)

 if preceding_cite_item then
   cite_item.position_level = self:_get_ibid_position(cite_item, preceding_cite_item)
 end

 cite_item.near_note = false
 local last_note_number = last_ref[cite_item.id]
 if last_note_number then
   local note_distance = note_index - last_note_number
   cite_item.near_note = (note_distance <= self.style.citation.near_note_distance)
 end

end

-- Find the preceding cite referencing the same item
function CiteProc:_find_preceding_ibid_item(cite_item, previous_cite, previous_citation, note_index,
   num_citations_in_note)
 if previous_cite then
   -- a. the current cite immediately follows on another cite, within the same
   --    citation, that references the same item
   if cite_item.id == previous_cite.id then
     return previous_cite
   end
 elseif previous_citation then
   -- (hidden) The previous citation is the only one in the previous note.
   --    See also
   --    https://github.com/citation-style-language/documentation/issues/121
   --    position_IbidWithMultipleSoloCitesInBackref.txt
   -- b. the current cite is the first cite in the citation, and the previous
   --    citation consists of a single cite referencing the same item
   local previous_note_number = previous_citation.properties.noteIndex
   local num_previous_note_citations = #num_citations_in_note[previous_note_number]
   if (previous_note_number == note_index - 1 and num_previous_note_citations == 1)
       or previous_note_number == note_index then
     if #previous_citation.sorted_items == 1 then
       previous_cite = previous_citation.sorted_items[1]
       if previous_cite.id == cite_item.id then
         return previous_cite
       end
     end
   end
 end
 return nil
end

function CiteProc:_get_ibid_position(item, preceding_cite)
 if preceding_cite.locator then
   if item.locator then
     if item.locator == preceding_cite.locator and item.label == preceding_cite.label then
       return Position.Ibid
     else
       return Position.IbidWithLocator
     end
   else
     return Position.Subsequent
   end
 else
   if item.locator then
     return Position.IbidWithLocator
   else
     return Position.Ibid
   end
 end
end

function CiteProc:_check_tainted_position_change(cite_item, position_properties)
 for key, value in pairs(position_properties) do
   if cite_item[key] ~= value then
     return true
   end
 end
 return false
end

---@param tainted_citation_ids table<CitationId, boolean>
---@return [integer, string, CitationId][]
function CiteProc:_rerun_changed_cites(tainted_citation_ids)
 local result = {}
 for citation_id, _ in pairs(tainted_citation_ids) do
   local citation = self.registry.citations_by_id[citation_id]
   local citation_index = citation.citation_index
   local mode = citation.properties.mode
   if mode == "suppress-author" and self.style.class == "note" then
     mode = nil
   end
   local citation_element = self.style.citation
   if mode == "author-only" and self.style.intext then
     citation_element = self.style.intext
   elseif mode == "full-cite" then
     if self.style.class == "note" then
       citation_element = self.style.citation
     else
       citation_element = self.style.full_citation
     end
   end

   local citation_str = citation_element:build_citation_str(citation, self)
   table.insert(result, {citation_index, citation_str, citation_id})
 end
 return result
end

function CiteProc:_get_sorted_refs()
 if self.registry.requires_sorting then
   self:sort_bibliography()
 end
 return self.registry.reflist
end

---@param ids CiteId[]
---@param bibsection any
---@return CiteId[]
---@return CiteId[]
function CiteProc:_filter_with_bibsection(ids, bibsection)
 if bibsection.quash then
   return self:filter_quash(ids, bibsection)
 elseif bibsection.select then
   return self:filter_select(ids, bibsection)
 elseif bibsection.include then
   return self:filter_include(ids, bibsection)
 elseif bibsection.exclude then
   return self:filter_exclude(ids, bibsection)
 else
   return ids, {}
 end
end

function CiteProc:match_bibsection_object(item, bibsection_object)
 local field = bibsection_object.field
 local value = bibsection_object.value
 local match = false
 if value == "" then
   if not item[field] or item[field] == "" then
     match = true
   end
 else
   if type(item[field]) == "table" then
     if util.in_list(value, item[field]) then
       match = true
     end
   elseif field == "keyword" then
     if item.keyword and util.in_list(value, util.split(item.keyword, "%s*[;,]%s*")) then
       match = true
     end
   elseif item[field] == value then
     match = true
   end
 end
 if bibsection_object.negative then
   match = not match
 end
 return match
end

function CiteProc:filter_select(ids, bibsection)
 -- Include the item if, and only if, all of the objects match.
 local res = {}
 local excluded_ids = {}
 for _, id in ipairs(ids) do
   local item = self.registry.registry[id]
   local match = true
   for _, bibsection_object in ipairs(bibsection.select) do
     if not self:match_bibsection_object(item, bibsection_object) then
       match = false
       break
     end
   end
   if match then
     table.insert(res, id)
   else
     table.insert(excluded_ids, id)
   end
 end
 return res, excluded_ids
end

function CiteProc:filter_include(ids, bibsection)
 -- Include the item if any of the objects match.
 local res = {}
 local excluded_ids = {}
 for _, id in ipairs(ids) do
   local item = self.registry.registry[id]
   local match = false
   for _, bibsection_object in ipairs(bibsection.include) do
     if self:match_bibsection_object(item, bibsection_object) then
       match = true
       break
     end
   end
   if match then
     table.insert(res, id)
   else
     table.insert(excluded_ids, id)
   end
 end
 return res, excluded_ids
end

function CiteProc:filter_exclude(ids, bibsection)
 -- Include the item if none of the objects match.
 local res = {}
 local excluded_ids = {}
 for _, id in ipairs(ids) do
   local item = self.registry.registry[id]
   local match = false
   for _, bibsection_object in ipairs(bibsection.exclude) do
     if self:match_bibsection_object(item, bibsection_object) then
       match = true
       break
     end
   end
   if not match then
     table.insert(res, id)
   else
     table.insert(excluded_ids, id)
   end
 end
 return res, excluded_ids
end

function CiteProc:filter_quash(ids, bibsection)
 -- Skip the item if all of the objects match.
 local res = {}
 local excluded_ids = {}
 for _, id in ipairs(ids) do
   local item = self.registry.registry[id]
   local match = true
   for _, bibsection_object in ipairs(bibsection.quash) do
     if not self:match_bibsection_object(item, bibsection_object) then
       match = false
       break
     end
   end
   if not match then
     table.insert(res, id)
   else
     table.insert(excluded_ids, id)
   end
 end
 return res, excluded_ids
end

function CiteProc:set_output_format(format)
 if format == "latex" then
   self.output_format = LatexWriter:new()
 elseif format == "html" then
   self.output_format = HtmlWriter:new()
 end
end

function CiteProc:enable_linking()
 self.opt.wrap_url_and_doi = true
 self.opt.citation_link = true
end

function CiteProc:disable_linking()
 self.opt.wrap_url_and_doi = false
 self.opt.citation_link = false
end

function CiteProc.create_element_tree(node)
 local element_name = node:get_element_name()
 local element_class = nodes[element_name]
 local el = nil
 if element_class then
   el = element_class:from_node(node)
 end
 if el then
   for _, child in ipairs(node:get_children()) do
     if child:is_element() then
       local child_element = CiteProc.create_element_tree(child)
       if child_element then
         if not el.children then
           el.children = {}
         end
         table.insert(el.children, child_element)
       end
     end
   end
 end
 return el
end

---@param id ItemId
---@return ItemData?
function CiteProc:get_item(id)
 ---@type ItemData?
 local item = self.registry.registry[id]
 if not item then
   item = self:_retrieve_item(id)
   if not item then
     return nil
   end
   item = self:process_extra_note(item)
   table.insert(self.registry.reflist, id)
   item["citation-number"] = #self.registry.reflist
   self.registry.registry[id] = item
   self.registry.requires_sorting = true
 end
 -- local res = {}
 -- setmetatable(res, {__index = item})
 -- return res
 return item
end

---@param id ItemId
---@return ItemData?
function CiteProc:_retrieve_item(id)
 -- Retrieve, copy, and normalize
 local res = {}
 local item = self.sys.retrieveItem(id)
 if not item then
   return nil
 end

 -- TODO: normalize data input
 item.id = tostring(item.id)

 for key, value in pairs(item) do
   res[key] = value
 end

 -- if res["page"] and not res["page-first"] then
 --   local page_first = util.split(res["page"], "%s*[&,-]%s*")[1]
 --   page_first = util.split(page_first, util.unicode["en dash"])[1]
 --   res["page-first"] = page_first
 -- end

 return res
end

-- TODO: Nomalize all inputs
function CiteProc:process_extra_note(item)
 if item.note then
   local note_fields = {}
   local note_lines = {}
   for _, line in ipairs(util.split(item.note, "%s*\r?\n%s*")) do
     local field, value = string.match(line, "^([%w-_ ]+):%s*(.*)$")
     if field then
       local variable_type = util.variable_types[field]
       if not item[field] or field == "type" or variable_type == "date" then
         if variable_type == "number" then
           item[field] = value
         elseif variable_type == "date" then
           item[field] = util.parse_edtf(value)
         elseif variable_type == "name" then
           if not note_fields[field] then
             note_fields[field] = {}
           end
           table.insert(note_fields[field], util.parse_extra_name(value))
         else
           item[field] = value
         end
       end
     else
       table.insert(note_lines, line)
     end
   end
   for field, value in pairs(note_fields) do
     item[field] = value
   end
   item.note = table.concat(note_lines, "\n")
 end
 return item
end

function CiteProc:sort_bibliography()
 -- Sort the items in registry according to the `sort` in `bibliography.`
 -- This will update the `citation-number` of each item.
 local bibliography_sort = nil
 if self.style.bibliography and self.style.bibliography.sort then
   bibliography_sort = self.style.bibliography.sort
 end
 if not bibliography_sort then
   return
 end
 local items = {}
 for _, id in ipairs(self.registry.reflist) do
   table.insert(items, self.registry.registry[id])
 end

 local state = IrState:new()
 local context = Context:new()
 context.engine = self
 context.style = self.style
 context.area = self.style.bibliography
 context.in_bibliography = true
 context.lang = self.lang
 context.locale = self:get_locale(self.lang)
 context.name_inheritance = self.style.bibliography.name_inheritance
 context.format = SortStringFormat:new()
 -- context.id = id
 context.cite = nil
 -- context.reference = self:get_item(id)

 bibliography_sort:sort(items, state, context)
 self.registry.reflist = {}
 self.tainted_item_ids = {}
 for i, item in ipairs(items) do
   if item["citation-number"] ~= i then
     self.tainted_item_ids[item.id] = true
   end
   item["citation-number"] = i
   self.registry.reflist[i] = item.id
 end
 self.registry.requires_sorting = false
end

---@param lang string
---@return Locale
function CiteProc:get_locale(lang)
 lang = util.primary_dialects[lang] or lang
 local locale = self.locales[lang] or self:get_merged_locales(lang)
 return locale
end

function CiteProc:get_merged_locales(lang)
 local fall_back_locales = {}

 local language = string.sub(lang, 1, 2)
 local primary_dialect = util.primary_dialects[language]

 -- 1. In-style cs:locale elements
 --    i. `xml:lang` set to chosen dialect, “de-AT”
 table.insert(fall_back_locales, self.style.locales[lang])

 --    ii. `xml:lang` set to matching language, “de” (German)
 if language and language ~= lang then
   table.insert(fall_back_locales, self.style.locales[language])
 end

 --    iii. `xml:lang` not set
 table.insert(fall_back_locales, self.style.locales["@generic"])

 -- 2. Locale files
 --    iv. `xml:lang` set to chosen dialect, “de-AT”
 if lang then
   table.insert(fall_back_locales, self:get_system_locale(lang))
 end

 --    v. `xml:lang` set to matching primary dialect, “de-DE” (Standard German)
 --       (only applicable when the chosen locale is a secondary dialect)
 if primary_dialect and primary_dialect ~= lang then
   table.insert(fall_back_locales, self:get_system_locale(primary_dialect))
 end

 --    vi. `xml:lang` set to “en-US” (American English)
 if lang ~= "en-US" and primary_dialect ~= "en-US" then
   table.insert(fall_back_locales, self:get_system_locale("en-US"))
 end

 -- Merge locales

 local locale = Locale:new()
 for i = #fall_back_locales, 1, -1 do
   local fall_back_locale = fall_back_locales[i]
   locale:merge(fall_back_locale)
 end

 self.locales[lang] = locale
 return locale
end

function CiteProc:get_system_locale(lang)
 local locale = self.system_locales[lang]
 if locale then
   return locale
 end

 local locale_str = self.sys.retrieveLocale(lang)
 if not locale_str then
   util.warning(string.format("Failed to retrieve locale '%s'", lang))
   return nil
 end
 local locale_xml = dom.parse(locale_str)
 local root_element = locale_xml:get_path("locale")[1]
 locale = Locale:from_node(root_element)
 self.system_locales[lang] = locale
 return locale
end


function CiteProc:get_style_class()
 if self.style and self.style.class then
   return self.style.class
 else
   return nil
 end
end


---@class Macro: Element
local Macro = Element:derive("macro")

function Macro:from_node(node)
 local o = Macro:new()
 o.children = {}
 o:set_attribute(node, "name")
 o:process_children_nodes(node)
 return o
end

---@param engine CiteProc
---@param state IrState
---@param context Context
---@return IrNode?
function Macro:build_ir(engine, state, context)
 local ir = self:build_group_ir(engine, state, context)
 return ir
end


engine.CiteProc = CiteProc

return engine