-- The semantic analysis step of static analysis determines the meaning of the different function calls.

local lexical_analysis = require("explcheck-lexical-analysis")
local syntactic_analysis = require("explcheck-syntactic-analysis")
local get_option = require("explcheck-config").get_option
local ranges = require("explcheck-ranges")
local parsers = require("explcheck-parsers")
local identity = require("explcheck-utils").identity

local get_token_byte_range = lexical_analysis.get_token_byte_range
local is_token_simple = lexical_analysis.is_token_simple
local token_types = lexical_analysis.token_types

local extract_text_from_tokens = syntactic_analysis.extract_text_from_tokens

local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE

local new_range = ranges.new_range
local range_flags = ranges.range_flags

local INCLUSIVE = range_flags.INCLUSIVE
local MAYBE_EMPTY = range_flags.MAYBE_EMPTY

local call_types = syntactic_analysis.call_types
local get_calls = syntactic_analysis.get_calls
local get_call_token_range = syntactic_analysis.get_call_token_range
local transform_replacement_text_tokens = syntactic_analysis.transform_replacement_text_tokens

local CALL = call_types.CALL
local OTHER_TOKENS = call_types.OTHER_TOKENS

local lpeg = require("lpeg")

local statement_types = {
 FUNCTION_DEFINITION = "function definition",
 FUNCTION_VARIANT_DEFINITION = "function variant definition",
 OTHER_STATEMENT = "other statement",
 OTHER_TOKENS_SIMPLE = "block of other simple tokens",
 OTHER_TOKENS_COMPLEX = "block of other complex tokens",
}

local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
local FUNCTION_VARIANT_DEFINITION = statement_types.FUNCTION_VARIANT_DEFINITION
local OTHER_STATEMENT = statement_types.OTHER_STATEMENT
local OTHER_TOKENS_SIMPLE = statement_types.OTHER_TOKENS_SIMPLE
local OTHER_TOKENS_COMPLEX = statement_types.OTHER_TOKENS_COMPLEX

local statement_subtypes = {
 FUNCTION_DEFINITION = {
   DIRECT = "direct function definition",
   INDIRECT = "indirect function definition",
 }
}

local FUNCTION_DEFINITION_DIRECT = statement_subtypes.FUNCTION_DEFINITION.DIRECT
local FUNCTION_DEFINITION_INDIRECT = statement_subtypes.FUNCTION_DEFINITION.INDIRECT

local statement_confidences = {
 DEFINITELY = 1,
 MAYBE = 0.5,
 NONE = 0,
}

local DEFINITELY = statement_confidences.DEFINITELY
local MAYBE = statement_confidences.MAYBE
local NONE = statement_confidences.NONE

local csname_types = {
 TEXT = "direct text representation of a control sequence name or its part, usually paired with confidence DEFINITELY",
 PATTERN = "a PEG pattern that recognizes different control sequences or their parts, usually paired with confidence MAYBE"
}

local TEXT = csname_types.TEXT
local PATTERN = csname_types.PATTERN

-- Determine the meaning of function calls and register any issues.
local function semantic_analysis(pathname, content, issues, results, options)

 -- Determine the type of a span of tokens as either "simple text" [1, p. 383] with no expected side effects or
 -- a more complex material that may have side effects and presents a boundary between chunks of well-understood
 -- expl3 statements.
 --
 --  [1]: Donald Ervin Knuth. 1986. TeX: The Program. Addison-Wesley, USA.
 --
 local function classify_tokens(tokens, token_range)
   for _, token in token_range:enumerate(tokens) do
     if not is_token_simple(token) then  -- complex material
       return OTHER_TOKENS_COMPLEX
     end
   end
   return OTHER_TOKENS_SIMPLE  -- simple material
 end

 -- Extract statements from function calls and record them. For all identified function definitions, also record replacement texts.
 local function record_statements_and_replacement_texts(tokens, transformed_tokens, calls, first_map_back, first_map_forward)
   local statements = {}
   local replacement_text_tokens = {}
   for call_number, call in ipairs(calls) do

     local call_range = new_range(call_number, call_number, INCLUSIVE, #calls)
     local byte_range = call.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)

     -- Try and convert tokens from an argument into a text.
     local function extract_text_from_argument(argument)
       assert(lpeg.match(parsers.n_type_argument_specifier, argument.specifier) ~= nil)
       return extract_text_from_tokens(argument.token_range, transformed_tokens, first_map_forward)
     end

     -- Extract the name of a control sequence from a call argument.
     local function extract_csname_from_argument(argument)
       local csname
       if argument.specifier == "N" then
         local csname_token = transformed_tokens[first_map_forward(argument.token_range:start())]
         if csname_token.type ~= CONTROL_SEQUENCE then  -- the N-type argument is not a control sequence, give up
           return nil
         end
         csname = csname_token.payload
       elseif argument.specifier == "c" then
         csname = extract_text_from_argument(argument)
         if csname == nil then  -- the c-type argument contains complex material, give up
           return nil
         end
       else
         return nil
       end
       assert(csname ~= nil)
       return csname
     end

     -- Split an expl3 control sequence name to a stem and the argument specifiers.
     local function parse_expl3_csname(csname)
       local _, _, csname_stem, argument_specifiers = csname:find("([^:]*):([^:]*)")
       return csname_stem, argument_specifiers
     end

     -- Determine whether a function is private or public based on its name.
     local function is_function_private(csname)
       return csname:sub(1, 2) == "__"
     end

     -- Replace the argument specifiers in an expl3 control sequence name.
     local function replace_argument_specifiers(csname_stem, argument_specifiers)
       local csname
       if type(argument_specifiers) == 'string' then
         csname = string.format("%s:%s", csname_stem, argument_specifiers)
       else
         local transcript = string.format("%s:%s", csname_stem, argument_specifiers.transcript)
         if argument_specifiers.type == TEXT then
           csname = {
             payload = string.format("%s:%s", csname_stem, argument_specifiers.payload),
             transcript = transcript,
             type = TEXT
           }
         elseif argument_specifiers.type == PATTERN then
           csname = {
             payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload,
             transcript = transcript,
             type = PATTERN
           }
         else
           error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
         end
       end
       return csname
     end

     -- Determine the control sequence name of a conditional function given a base control sequence name and a condition.
     local function get_conditional_function_csname(csname_stem, argument_specifiers, condition)
       local csname
       if condition == "p" then  -- predicate function
         local format = "%s_p:%s"
         if type(argument_specifiers) == 'string' then
           csname = string.format(format, csname_stem, argument_specifiers)
         else
           local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
           if argument_specifiers.type == TEXT then
             csname = {
               payload = string.format(format, csname_stem, argument_specifiers.payload),
               transcript = transcript,
               type = TEXT
             }
           elseif argument_specifiers.type == PATTERN then
             csname = {
               payload = lpeg.P(csname_stem) * lpeg.P("_p:") * argument_specifiers.payload,
               transcript = transcript,
               type = PATTERN
             }
           else
             error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
           end
         end
       elseif condition == "T" then  -- true-branch conditional function
         local format = "%s:%sT"
         if type(argument_specifiers) == 'string' then
           csname = string.format(format, csname_stem, argument_specifiers)
         else
           local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
           if argument_specifiers.type == TEXT then
             csname = {
               payload = string.format(format, csname_stem, argument_specifiers.payload),
               transcript = transcript,
               type = TEXT
             }
           elseif argument_specifiers.type == PATTERN then
             csname = {
               payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("T"),
               transcript = transcript,
               type = PATTERN
             }
           else
             error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
           end
         end
       elseif condition == "F" then  -- false-branch conditional function
         local format = "%s:%sF"
         if type(argument_specifiers) == 'string' then
           csname = string.format(format, csname_stem, argument_specifiers)
         else
           local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
           if argument_specifiers.type == TEXT then
             csname = {
               payload = string.format(format, csname_stem, argument_specifiers.payload),
               transcript = transcript,
               type = TEXT
             }
           elseif argument_specifiers.type == PATTERN then
             csname = {
               payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("F"),
               transcript = transcript,
               type = PATTERN
             }
           else
             error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
           end
         end
       elseif condition == "TF" then  -- true-and-false-branch conditional function
         local format = "%s:%sTF"
         if type(argument_specifiers) == 'string' then
           csname = string.format(format, csname_stem, argument_specifiers)
         else
           local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
           if argument_specifiers.type == TEXT then
             csname = {
               payload = string.format(format, csname_stem, argument_specifiers.payload),
               transcript = transcript,
               type = TEXT
             }
           elseif argument_specifiers.type == PATTERN then
             csname = {
               payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("TF"),
               transcript = transcript,
               type = PATTERN,
             }
           else
             error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
           end
         end
       else
         error('Unexpected condition "' .. condition .. '"')
       end
       return csname
     end

     -- Try and extract a list of conditions in a conditional function (variant) definition.
     -- Together with the conditions, include a measurement of confidence about the correctness of the extracted information.
     local function parse_conditions(argument)
       local conditions

       -- try to determine the list of conditions
       local conditions_text, condition_list
       if argument.specifier ~= "n" then  -- conditions are hidden behind expansion, assume all conditions with lower confidence
         goto unknown_conditions
       end
       conditions_text = extract_text_from_argument(argument)
       if conditions_text == nil then  -- failed to read conditions
         goto unknown_conditions  -- assume all conditions with lower confidence
       end
       condition_list = lpeg.match(parsers.conditions, conditions_text)
       if condition_list == nil then  -- cound not parse conditions, give up
         return nil
       end
       conditions = {}
       for _, condition in ipairs(condition_list) do
         table.insert(conditions, {condition, DEFINITELY})
       end
       goto done_parsing

       ::unknown_conditions::
       -- assume all possible conditions with lower confidence
       conditions = {{"p", MAYBE}, {"T", MAYBE}, {"F", MAYBE}, {"TF", MAYBE}}

       ::done_parsing::
       return conditions
     end

     -- Try and extract a list of variant argument specifiers in a (conditional) function variant definition.
     -- Together with the argument specifiers, include a measurement of confidence about the correctness of the extracted information.
     local function parse_variant_argument_specifiers(csname, argument)
       -- extract the argument specifiers from the csname
       local _, base_argument_specifiers = parse_expl3_csname(csname)
       if base_argument_specifiers == nil then
         return nil  -- we couldn't parse the csname, give up
       end

       local variant_argument_specifiers

       -- try to determine all sets of variant argument specifiers
       local variant_argument_specifiers_text, variant_argument_specifiers_list
       if argument.specifier ~= "n" then  -- specifiers are hidden behind expansion, assume all possibilities with lower confidence
         goto unknown_argument_specifiers
       end
       variant_argument_specifiers_text = extract_text_from_argument(argument)
       if variant_argument_specifiers_text == nil then  -- failed to read specifiers
         goto unknown_argument_specifiers  -- assume all specifiers with lower confidence
       end
       variant_argument_specifiers_list = lpeg.match(parsers.variant_argument_specifiers, variant_argument_specifiers_text)
       if variant_argument_specifiers_list == nil then  -- cound not parse specifiers, assume all possibilities with lower confidence
         goto unknown_argument_specifiers
       end
       variant_argument_specifiers = {}
       for _, argument_specifiers in ipairs(variant_argument_specifiers_list) do
         if #argument_specifiers ~= #base_argument_specifiers then
           if #argument_specifiers < #base_argument_specifiers then  -- variant argument specifiers are shorter than base specifiers
             argument_specifiers = string.format(
               "%s%s",  -- treat the variant specifiers as a prefix with the rest filled in with the base specifiers
               argument_specifiers, base_argument_specifiers:sub(#argument_specifiers + 1)
             )
           else  -- variant argument specifiers are longer than base specifiers
             issues:add("t403", "function variant of incompatible type", byte_range)
             return nil  -- give up
           end
         end
         assert(#argument_specifiers == #base_argument_specifiers)
         for i = 1, #argument_specifiers do
           local base_argument_specifier = base_argument_specifiers:sub(i, i)
           local argument_specifier = argument_specifiers:sub(i, i)
           if base_argument_specifier == argument_specifier then  -- variant argument specifier is same as base argument specifier
             goto continue  -- skip further checks
           end
           local any_compatible_specifier = false
           for _, compatible_specifier in ipairs(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier)) do
             if argument_specifier == compatible_specifier then  -- variant argument specifier is compatible with base argument specifier
               any_compatible_specifier = true
               break  -- skip further checks
             end
           end
           if not any_compatible_specifier then
             local any_deprecated_specifier = false
             for _, deprecated_specifier in ipairs(lpeg.match(parsers.deprecated_argument_specifiers, base_argument_specifier)) do
               if argument_specifier == deprecated_specifier then  -- variant argument specifier is deprecated regarding the base specifier
                 any_deprecated_specifier = true
                 break  -- skip further checks
               end
             end
             if any_deprecated_specifier then
               issues:add("w410", "function variant of deprecated type", byte_range)
             else
               issues:add("t403", "function variant of incompatible type", byte_range)
               return nil  -- variant argument specifier is incompatible with base argument specifier, give up
             end
           end
           ::continue::
         end
         table.insert(variant_argument_specifiers, {
           payload = argument_specifiers,
           transcript = argument_specifiers,
           type = TEXT,
           confidence = DEFINITELY
         })
       end
       goto done_parsing

       ::unknown_argument_specifiers::
       -- assume all possible sets of variant argument specifiers with lower confidence
       do
         variant_argument_specifiers = {}
         local compatible_specifier_pattern, compatible_specifier_transcripts = parsers.success, {}
         for i = 1, #base_argument_specifiers do
           local base_argument_specifier = base_argument_specifiers:sub(i, i)
           local compatible_specifiers = table.concat(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier))
           compatible_specifier_pattern = compatible_specifier_pattern * lpeg.S(compatible_specifiers)
           local compatible_specifier_transcript = string.format('[%s]', compatible_specifiers)
           table.insert(compatible_specifier_transcripts, compatible_specifier_transcript)
         end
         local compatible_specifiers_transcript = table.concat(compatible_specifier_transcripts)
         table.insert(variant_argument_specifiers, {
           payload = compatible_specifier_pattern,
           transcript = compatible_specifiers_transcript,
           type = PATTERN,
           confidence = MAYBE
         })
       end

       ::done_parsing::
       return variant_argument_specifiers
     end

     if call.type == CALL then  -- a function call
       -- Ignore error S204 (Missing stylistic whitespaces) in Lua code.
       for _, arguments_number in ipairs(lpeg.match(parsers.expl3_function_call_with_lua_code_argument_csname, call.csname)) do
         local lua_code_argument = call.arguments[arguments_number]
         if #lua_code_argument.token_range > 0 then
           local lua_code_byte_range = lua_code_argument.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)
           issues:ignore('s204', lua_code_byte_range)
         end
       end

       local function_variant_definition = lpeg.match(parsers.expl3_function_variant_definition_csname, call.csname)
       local function_definition = lpeg.match(parsers.expl3_function_definition_csname, call.csname)

       -- Process a function variant definition.
       if function_variant_definition ~= nil then
         local is_conditional = table.unpack(function_variant_definition)
         -- determine the name of the defined function
         local base_csname_argument = call.arguments[1]
         local base_csname = extract_csname_from_argument(base_csname_argument)
         if base_csname == nil then  -- we couldn't extract the csname, give up
           goto other_statement
         end
         local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
         if base_csname_stem == nil then  -- we couldn't parse the csname, give up
           goto other_statement
         end
         -- determine the variant argument specifiers
         local variant_argument_specifiers = parse_variant_argument_specifiers(base_csname, call.arguments[2])
         if variant_argument_specifiers == nil then  -- we couldn't parse the variant argument specifiers, give up
           goto other_statement
         end
         -- determine all defined csnames
         local defined_csnames = {}
         for _, argument_specifiers in ipairs(variant_argument_specifiers) do
           if is_conditional then  -- conditional function
             -- determine the conditions
             local conditions = parse_conditions(call.arguments[#call.arguments])
             if conditions == nil then  -- we couldn't determine the conditions, give up
               goto other_statement
             end
             -- determine the defined csnames
             for _, condition_table in ipairs(conditions) do
               local condition, condition_confidence = table.unpack(condition_table)
               local base_conditional_csname = get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
               local defined_conditional_csname = get_conditional_function_csname(base_csname_stem, argument_specifiers, condition)
               local confidence = math.min(argument_specifiers.confidence, condition_confidence)
               if base_conditional_csname ~= defined_conditional_csname then
                 table.insert(defined_csnames, {base_conditional_csname, defined_conditional_csname, confidence})
               end
             end
           else  -- non-conditional function
             local defined_csname = replace_argument_specifiers(base_csname_stem, argument_specifiers)
             if base_csname ~= defined_csname then
               table.insert(defined_csnames, {base_csname, defined_csname, argument_specifiers.confidence})
             end
           end
         end
         -- record function variant definition statements for all effectively defined csnames
         for _, defined_csname_table in ipairs(defined_csnames) do  -- lua
           local effective_base_csname, defined_csname, confidence = table.unpack(defined_csname_table)
           local statement = {
             type = FUNCTION_VARIANT_DEFINITION,
             call_range = call_range,
             confidence = confidence,
             -- The following attributes are specific to the type.
             base_csname = effective_base_csname,
             defined_csname = defined_csname,
             is_private = is_function_private(base_csname),
             is_conditional = is_conditional,
           }
           table.insert(statements, statement)
         end
         goto continue
       end

       -- Process a function definition.
       if function_definition ~= nil then
         local is_direct = table.unpack(function_definition)
         -- Process a direct function definition.
         if is_direct then
           -- determine the properties of the defined function
           local _, _, is_creator_function = table.unpack(function_definition)
           local is_conditional, maybe_redefinition, is_global, is_protected, is_nopar
           local defined_csname_argument, num_parameters
           if is_creator_function == true then  -- direct application of a creator function
             defined_csname_argument = call.arguments[1]
             _, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(function_definition)
           else  -- indirect application of a creator function
             defined_csname_argument = call.arguments[2]
             local num_parameter_argument = call.arguments[3]
             if num_parameter_argument ~= nil and num_parameter_argument.specifier == "n" then
               local num_parameters_text = extract_text_from_argument(num_parameter_argument)
               if num_parameters_text ~= nil then
                 num_parameters = tonumber(num_parameters_text)
               end
             end
             local creator_function_csname = extract_csname_from_argument(call.arguments[1])
             if creator_function_csname == nil then  -- couldn't determine the name of the creator function, give up
               goto other_statement
             end
             local actual_function_definition = lpeg.match(parsers.expl3_function_definition_csname, creator_function_csname)
             if actual_function_definition == nil then  -- couldn't understand the creator function, give up
               goto other_statement
             end
             _, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(actual_function_definition)
           end
           -- determine the name of the defined function
           local defined_csname = extract_csname_from_argument(defined_csname_argument)
           if defined_csname == nil then  -- we couldn't extract the csname, give up
             goto other_statement
           end
           local defined_csname_stem, argument_specifiers = parse_expl3_csname(defined_csname)
           -- determine the replacement text
           local replacement_text_number
           local replacement_text_argument = call.arguments[#call.arguments]
           do
             if replacement_text_argument.specifier ~= "n" then  -- replacement text is hidden behind expansion
               goto skip_replacement_text  -- record partial information
             end
             -- determine the number of parameters of the defined function
             local function update_num_parameters(updated_num_parameters)
               assert(updated_num_parameters ~= nil)
               if num_parameters == nil or updated_num_parameters > num_parameters then  -- trust the highest guess
                 num_parameters = updated_num_parameters
               end
             end
             if argument_specifiers ~= nil and lpeg.match(parsers.N_or_n_type_argument_specifiers, argument_specifiers) ~= nil then
               update_num_parameters(#argument_specifiers)
             end
             for _, argument in ipairs(call.arguments) do  -- next, try to look for p-type "TeX parameter" argument specifiers
               if argument.specifier == "p" and argument.num_parameters ~= nil then
                 update_num_parameters(argument.num_parameters)
                 break
               end
             end
             if num_parameters == nil then  -- we couldn't determine the number of parameters
               goto skip_replacement_text  -- record partial information
             end
             -- parse the replacement text and record the function definition
             local mapped_replacement_text_token_range = new_range(
               first_map_forward(replacement_text_argument.token_range:start()),
               first_map_forward(replacement_text_argument.token_range:stop()),
               INCLUSIVE + MAYBE_EMPTY,
               #transformed_tokens
             )
             local doubly_transformed_tokens, second_map_back, second_map_forward = transform_replacement_text_tokens(
               content,
               transformed_tokens,
               issues,
               num_parameters,
               mapped_replacement_text_token_range
             )
             if doubly_transformed_tokens == nil then  -- we couldn't parse the replacement text
               goto skip_replacement_text  -- record partial information
             end
             local function map_back(...) return first_map_back(second_map_back(...)) end
             local function map_forward(...) return second_map_forward(first_map_forward(...)) end
             table.insert(replacement_text_tokens, {
               token_range = replacement_text_argument.token_range,
               transformed_tokens = doubly_transformed_tokens,
               map_back = map_back,
               map_forward = map_forward,
             })
             replacement_text_number = #replacement_text_tokens
           end
           ::skip_replacement_text::
           -- determine all effectively defined csnames
           local effectively_defined_csnames = {}
           if is_conditional then  -- conditional function
             -- determine the conditions
             local conditions = parse_conditions(call.arguments[#call.arguments - 1])
             if conditions == nil then  -- we couldn't determine the conditions, give up
               goto other_statement
             end
             -- determine the defined csnames
             for _, condition_table in ipairs(conditions) do
               local condition, confidence = table.unpack(condition_table)
               if condition == "p" and is_protected then
                 issues:add("e404", "protected predicate function", byte_range)
               end
               local effectively_defined_csname = get_conditional_function_csname(defined_csname_stem, argument_specifiers, condition)
               table.insert(effectively_defined_csnames, {effectively_defined_csname, confidence})
             end
           else  -- non-conditional function
             effectively_defined_csnames = {{defined_csname, DEFINITELY}}
           end
           -- record function definition statements for all effectively defined csnames
           for _, effectively_defined_csname_table in ipairs(effectively_defined_csnames) do  -- lua
             local effectively_defined_csname, confidence = table.unpack(effectively_defined_csname_table)
             local statement = {
               type = FUNCTION_DEFINITION,
               call_range = call_range,
               confidence = confidence,
               -- The following attributes are specific to the type.
               subtype = FUNCTION_DEFINITION_DIRECT,
               maybe_redefinition = maybe_redefinition,
               is_private = is_function_private(defined_csname),
               is_global = is_global,
               defined_csname = effectively_defined_csname,
               -- The following attributes are specific to the subtype.
               is_conditional = is_conditional,
               is_protected = is_protected,
               is_nopar = is_nopar,
               replacement_text_number = replacement_text_number,
               replacement_text_argument = replacement_text_argument,
             }
             table.insert(statements, statement)
           end
         else
           -- Process an indirect function definition.
           local _, is_conditional, maybe_redefinition, is_global = table.unpack(function_definition)
           -- determine the name of the defined function
           local defined_csname_argument = call.arguments[1]
           local defined_csname = extract_csname_from_argument(defined_csname_argument)
           if defined_csname == nil then  -- we couldn't extract the csname, give up
             goto other_statement
           end
           -- determine the name of the base function
           local base_csname_argument = call.arguments[2]
           local base_csname = extract_csname_from_argument(base_csname_argument)
           if base_csname == nil then  -- we couldn't extract the csname, give up
             goto other_statement
           end
           -- determine all effectively defined csnames and effective base csnames
           local effective_defined_and_base_csnames = {}
           if is_conditional then  -- conditional function
             -- parse the base and defined csnames
             local defined_csname_stem, defined_argument_specifiers = parse_expl3_csname(defined_csname)
             if defined_csname_stem == nil then  -- we couldn't parse the defined csname, give up
               goto other_statement
             end
             local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
             if base_csname_stem == nil then  -- we couldn't parse the base csname, give up
               goto other_statement
             end
             -- determine the conditions
             local conditions = parse_conditions(call.arguments[#call.arguments - 1])
             if conditions == nil then  -- we couldn't determine the conditions, give up
               goto other_statement
             end
             -- determine the defined and base csnames
             for _, condition_table in ipairs(conditions) do
               local condition, confidence = table.unpack(condition_table)
               local effectively_defined_csname
                 = get_conditional_function_csname(defined_csname_stem, defined_argument_specifiers, condition)
               local effective_base_csname
                 = get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
               table.insert(effective_defined_and_base_csnames, {effectively_defined_csname, effective_base_csname, confidence})
             end
           else  -- non-conditional function
             effective_defined_and_base_csnames = {{defined_csname, base_csname, DEFINITELY}}
           end
           -- record function definition statements for all effectively defined csnames
           for _, effective_defined_and_base_csname_table in ipairs(effective_defined_and_base_csnames) do  -- lua
             local effectively_defined_csname, effective_base_csname, confidence
               = table.unpack(effective_defined_and_base_csname_table)
             local statement = {
               type = FUNCTION_DEFINITION,
               call_range = call_range,
               confidence = confidence,
               -- The following attributes are specific to the type.
               subtype = FUNCTION_DEFINITION_INDIRECT,
               maybe_redefinition = maybe_redefinition,
               is_private = is_function_private(defined_csname),
               is_global = is_global,
               defined_csname = effectively_defined_csname,
               -- The following attributes are specific to the subtype.
               base_csname = effective_base_csname,
               is_conditional = is_conditional,
             }
             table.insert(statements, statement)
           end
         end
         goto continue
       end

       ::other_statement::
       local statement = {
         type = OTHER_STATEMENT,
         call_range = call_range,
         confidence = NONE,
       }
       table.insert(statements, statement)
     elseif call.type == OTHER_TOKENS then  -- other tokens
       local statement_type = classify_tokens(tokens, call.token_range)
       local statement = {
         type = statement_type,
         call_range = call_range,
         confidence = NONE,
       }
       table.insert(statements, statement)
     else
       error('Unexpected call type "' .. call.type .. '"')
     end
     ::continue::
   end
   return statements, replacement_text_tokens
 end

 -- Extract statements from function calls. For all identified function definitions, record replacement texts and recursively
 -- apply syntactic and semantic analysis on them.
 local function get_statements(tokens, groupings, calls)

   -- First, record top-level statements.
   local replacement_texts = {tokens = nil, calls = {}, statements = {}, nesting_depth = {}}
   local statements
   statements, replacement_texts.tokens = record_statements_and_replacement_texts(tokens, tokens, calls, identity, identity)

   -- Then, process any new replacement texts until convergence.
   local previous_num_replacement_texts = 0
   local current_num_replacement_texts = #replacement_texts.tokens
   local current_nesting_depth = 1
   while previous_num_replacement_texts < current_num_replacement_texts do
     for replacement_text_number = previous_num_replacement_texts + 1, current_num_replacement_texts do
       local replacement_text_tokens = replacement_texts.tokens[replacement_text_number]
       -- record the current nesting depth with the replacement text
       table.insert(replacement_texts.nesting_depth, current_nesting_depth)
       -- extract nested calls from the replacement text using syntactic analysis
       local nested_calls = get_calls(
         tokens,
         replacement_text_tokens.transformed_tokens,
         replacement_text_tokens.token_range,
         replacement_text_tokens.map_back,
         replacement_text_tokens.map_forward,
         issues,
         groupings,
         content
       )
       table.insert(replacement_texts.calls, nested_calls)
       -- extract nested statements and replacement texts from the nested calls using semactic analysis
       local nested_statements, nested_replacement_text_tokens = record_statements_and_replacement_texts(
         tokens,
         replacement_text_tokens.transformed_tokens,
         nested_calls,
         replacement_text_tokens.map_back,
         replacement_text_tokens.map_forward
       )
       for _, nested_statement in ipairs(nested_statements) do
         if nested_statement.type == FUNCTION_DEFINITION
             and nested_statement.subtype == FUNCTION_DEFINITION_DIRECT
             and nested_statement.replacement_text_number ~= nil then
           -- make the reference to the replacement text absolute instead of relative
           nested_statement.replacement_text_number = nested_statement.replacement_text_number + current_num_replacement_texts
         end
       end
       table.insert(replacement_texts.statements, nested_statements)
       for _, nested_tokens in ipairs(nested_replacement_text_tokens) do
         table.insert(replacement_texts.tokens, nested_tokens)
       end
     end
     previous_num_replacement_texts = current_num_replacement_texts
     current_num_replacement_texts = #replacement_texts.tokens
     current_nesting_depth = current_nesting_depth + 1
   end

   assert(#replacement_texts.tokens == current_num_replacement_texts)
   assert(#replacement_texts.calls == current_num_replacement_texts)
   assert(#replacement_texts.statements == current_num_replacement_texts)
   assert(#replacement_texts.nesting_depth == current_num_replacement_texts)

   return statements, replacement_texts
 end

 -- Extract statements from function calls.
 local statements = {}
 local replacement_texts = {}
 for part_number, part_calls in ipairs(results.calls) do
   local part_tokens = results.tokens[part_number]
   local part_groupings = results.groupings[part_number]
   local part_statements, part_replacement_texts = get_statements(part_tokens, part_groupings, part_calls)
   table.insert(statements, part_statements)
   table.insert(replacement_texts, part_replacement_texts)
 end

 assert(#statements == #results.calls)
 assert(#statements == #replacement_texts)

 -- Report issues that are apparent after the semantic analysis.
 --- Collect all segments of top-level and nested tokens, calls, and statements.
 local token_segments, call_segments, statement_segments = {}, {}, {}
 for part_number, part_calls in ipairs(results.calls) do
   local part_statements = statements[part_number]
   table.insert(call_segments, part_calls)
   table.insert(statement_segments, part_statements)
   local part_tokens = results.tokens[part_number]
   table.insert(token_segments, {part_tokens, part_tokens, identity})
   local part_replacement_texts = replacement_texts[part_number]
   for replacement_text_number, nested_calls in ipairs(part_replacement_texts.calls) do
     local nested_statements = part_replacement_texts.statements[replacement_text_number]
     table.insert(call_segments, nested_calls)
     table.insert(statement_segments, nested_statements)
     local replacement_text_tokens = part_replacement_texts.tokens[replacement_text_number]
     table.insert(token_segments, {part_tokens, replacement_text_tokens.transformed_tokens, replacement_text_tokens.map_forward})
   end
 end

 --- Make a pass over the segments, building up information.
 local defined_private_functions = {}

 ---- Collect information about symbols that were definitely defined.
 local called_functions_and_variants = {}
 local defined_private_function_variant_texts, defined_private_function_variant_pattern = {}, parsers.fail
 local defined_private_function_variant_byte_ranges = {}
 local variant_base_csnames, indirect_definition_base_csnames = {}, {}

 ---- Collect information about symbols that may have been defined.
 local maybe_defined_csname_texts, maybe_defined_csname_pattern = {}, parsers.fail
 local maybe_used_csname_texts, maybe_used_csname_pattern = {}, parsers.fail

 for segment_number, segment_statements in ipairs(statement_segments) do
   local segment_calls = call_segments[segment_number]
   local segment_tokens, segment_transformed_tokens, map_forward = table.unpack(token_segments[segment_number])

   -- Convert tokens from a range into a PEG pattern.
   local function extract_pattern_from_tokens(token_range)
     local pattern, transcripts, num_simple_tokens = parsers.success, {}, 0
     local previous_token_was_simple = true
     for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
       if is_token_simple(token) then  -- simple material
         pattern = pattern * lpeg.P(token.payload)
         table.insert(transcripts, token.payload)
         num_simple_tokens = num_simple_tokens + 1
         previous_token_was_simple = true
       else  -- complex material
         if previous_token_was_simple then
           pattern = pattern * parsers.any^0
           table.insert(transcripts, "*")
         end
         previous_token_was_simple = false
       end
     end
     local transcript = table.concat(transcripts)
     return pattern, transcript, num_simple_tokens
   end

   -- Try and convert tokens from a range into a csname.
   local function extract_csname_from_tokens(token_range)
     local text = extract_text_from_tokens(token_range, segment_transformed_tokens, map_forward)
     local csname
     if text ~= nil then  -- simple material
       csname = {
         payload = text,
         transcript = text,
         type = TEXT
       }
     else  -- complex material
       local pattern, transcript, num_simple_tokens = extract_pattern_from_tokens(token_range)
       if num_simple_tokens < get_option("min_simple_tokens_in_csname_pattern", options, pathname) then  -- too few simple tokens, give up
         return nil
       end
       csname = {
         payload = pattern,
         transcript = transcript,
         type = PATTERN
       }
     end
     return csname
   end

   -- Process an argument and record control sequence name usage and definitions.
   local function process_argument_tokens(argument)
     -- Record control sequence name usage.
     --- Extract text from tokens within c- and v-type arguments.
     if argument.specifier == "c" or argument.specifier == "v" then
       local csname = extract_csname_from_tokens(argument.token_range)
       if csname ~= nil then
         if csname.type == TEXT then
           maybe_used_csname_texts[csname.payload] = true
         elseif csname.type == PATTERN then
           maybe_used_csname_pattern = maybe_used_csname_pattern + csname.payload
         end
       end
     end
     --- Scan control sequence tokens within N- and n-type arguments.
     if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
       for _, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
         if token.type == CONTROL_SEQUENCE then
           maybe_used_csname_texts[token.payload] = true
         end
       end
     end
     -- Record control sequence name definitions.
     --- Scan control sequence tokens within N- and n-type arguments.
     if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
       for token_number, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
         if token.type == CONTROL_SEQUENCE then
           if token_number + 1 <= #segment_transformed_tokens then
             local next_token = segment_transformed_tokens[token_number + 1]
             if (
                   next_token.type == CONTROL_SEQUENCE
                   and lpeg.match(parsers.expl3_function_definition_csname, token.payload) ~= nil
                 ) then
               maybe_defined_csname_texts[next_token.payload] = true
             end
           end
         end
       end
     end
   end

   for _, statement in ipairs(segment_statements) do
     local token_range = statement.call_range:new_range_from_subranges(get_call_token_range(segment_calls), #segment_tokens)
     local byte_range = token_range:new_range_from_subranges(get_token_byte_range(segment_tokens), #content)
     -- Process a function variant definition.
     if statement.type == FUNCTION_VARIANT_DEFINITION then
       -- Record base control sequence names of variants, both as control sequence name usage and separately.
       table.insert(variant_base_csnames, {statement.base_csname, byte_range})
       maybe_used_csname_texts[statement.base_csname] = true
       -- Record control sequence name definitions.
       if statement.defined_csname.type == TEXT then
         maybe_defined_csname_texts[statement.defined_csname.payload] = true
       elseif statement.defined_csname.type == PATTERN then
         maybe_defined_csname_pattern = maybe_defined_csname_pattern + statement.defined_csname.payload
       else
         error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
       end
       -- Record private function variant definitions.
       if statement.confidence == DEFINITELY and statement.is_private then
         table.insert(defined_private_function_variant_byte_ranges, byte_range)
         local defined_private_function_variant = {
           number = #defined_private_function_variant_byte_ranges,
           csname = statement.defined_csname
         }
         if statement.defined_csname.type == TEXT then
           table.insert(defined_private_function_variant_texts, defined_private_function_variant)
         elseif statement.defined_csname.type == PATTERN then
           defined_private_function_variant_pattern = (
             defined_private_function_variant_pattern
             + statement.defined_csname.payload
             / defined_private_function_variant
           )
         else
           error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
         end
       end
     -- Process a function definition.
     elseif statement.type == FUNCTION_DEFINITION then
       -- Record the base control sequences used in indirect function definitions.
       if statement.subtype == FUNCTION_DEFINITION_INDIRECT then
         maybe_used_csname_texts[statement.base_csname] = true
         table.insert(indirect_definition_base_csnames, {statement.base_csname, byte_range})
       end
       -- Record control sequence name usage and definitions.
       maybe_defined_csname_texts[statement.defined_csname] = true
       if statement.subtype == FUNCTION_DEFINITION_DIRECT and statement.replacement_text_number == nil then
         process_argument_tokens(statement.replacement_text_argument)
       end
       -- Record private function defition.
       if statement.confidence == DEFINITELY and statement.is_private then
         table.insert(defined_private_functions, {statement.defined_csname, byte_range})
       end
     -- Process an unrecognized statement.
     elseif statement.type == OTHER_STATEMENT then
       -- Record control sequence name usage and definitions.
       for _, call in statement.call_range:enumerate(segment_calls) do
         maybe_used_csname_texts[call.csname] = true
         table.insert(called_functions_and_variants, {call.csname, byte_range})
         for _, argument in ipairs(call.arguments) do
           process_argument_tokens(argument)
         end
       end
     -- Process a block of unrecognized tokens.
     elseif statement.type == OTHER_TOKENS_SIMPLE or statement.type == OTHER_TOKENS_COMPLEX then
       -- Record control sequence name usage by scanning all control sequence tokens.
       for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
         if token.type == CONTROL_SEQUENCE then
           maybe_used_csname_texts[token.payload] = true
         end
       end
     else
       error('Unexpected statement type "' .. statement.type .. '"')
     end
   end
 end

 -- Finalize PEG patterns.
 maybe_defined_csname_pattern = maybe_defined_csname_pattern * parsers.eof
 maybe_used_csname_pattern = maybe_used_csname_pattern * parsers.eof
 defined_private_function_variant_pattern = defined_private_function_variant_pattern * parsers.eof

 --- Report issues apparent from the collected information.
 ---- Report unused private functions.
 for _, defined_private_function in ipairs(defined_private_functions) do
   local defined_csname, byte_range = table.unpack(defined_private_function)
   if not maybe_used_csname_texts[defined_csname] and lpeg.match(maybe_used_csname_pattern, defined_csname) == nil then
     issues:add('w401', 'unused private function', byte_range)
   end
 end

 ---- Report unused private function variants.
 local used_private_function_variants = {}
 for private_function_variant_number, _ in ipairs(defined_private_function_variant_byte_ranges) do
   used_private_function_variants[private_function_variant_number] = false
 end
 for _, defined_private_function_variant in ipairs(defined_private_function_variant_texts) do
   assert(defined_private_function_variant.csname.type == TEXT)
   if maybe_used_csname_texts[defined_private_function_variant.csname.payload]
       or lpeg.match(maybe_used_csname_pattern, defined_private_function_variant.csname.payload) ~= nil then
     used_private_function_variants[defined_private_function_variant.number] = true
   end
 end
 for maybe_used_csname, _ in pairs(maybe_used_csname_texts) do
   -- NOTE: Although we might want to also test whether "defined_private_function_variant_pattern" and
   -- "maybe_used_csname_pattern" overlap, intersection is undecideable for parsing expression languages (PELs). In
   -- theory, we could use regular expressions instead of PEG patterns, since intersection is decideable for regular
   -- languages. In practice, there are no Lua libraries that would implement the required algorithms. Therefore, it
   -- seems more practical to just accept that low-confidence function variant definitions and function uses don't
   -- interact, not just because the technical difficulty but also because the combined confidence is just too low.
   local defined_private_function_variant = lpeg.match(defined_private_function_variant_pattern, maybe_used_csname)
   if defined_private_function_variant ~= nil then
     assert(defined_private_function_variant.csname.type == PATTERN)
     used_private_function_variants[defined_private_function_variant.number] = true
   end
 end
 for private_function_variant_number, byte_range in ipairs(defined_private_function_variant_byte_ranges) do
   if not used_private_function_variants[private_function_variant_number] then
     issues:add('w402', 'unused private function variant', byte_range)
   end
 end

 local imported_prefixes = get_option('imported_prefixes', options, pathname)
 local expl3_well_known_function_csname = parsers.expl3_well_known_function_csname(imported_prefixes)

 ---- Report function variants for undefined functions.
 for _, variant_base_csname in ipairs(variant_base_csnames) do
   local base_csname, byte_range = table.unpack(variant_base_csname)
   if lpeg.match(expl3_well_known_function_csname, base_csname) == nil
       and not maybe_defined_csname_texts[base_csname]
       and not lpeg.match(maybe_defined_csname_pattern, base_csname) then
     issues:add('e405', 'function variant for an undefined function', byte_range)
   end
 end

 ---- Report calls to undefined functions and function variants.
 for _, called_function_or_variant in ipairs(called_functions_and_variants) do
   local csname, byte_range = table.unpack(called_function_or_variant)
   if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
       and lpeg.match(expl3_well_known_function_csname, csname) == nil
       and not maybe_defined_csname_texts[csname]
       and not lpeg.match(maybe_defined_csname_pattern, csname) then
     issues:add('e408', 'calling an undefined function', byte_range)
   end
 end

 ---- Report indirect function definitions from undefined base functions.
 for _, indirect_definition_base_csname in ipairs(indirect_definition_base_csnames) do
   local csname, byte_range = table.unpack(indirect_definition_base_csname)
   if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
       and lpeg.match(expl3_well_known_function_csname, csname) == nil
       and not maybe_defined_csname_texts[csname]
       and not lpeg.match(maybe_defined_csname_pattern, csname) then
     issues:add('e411', 'indirect function definition from an undefined function', byte_range)
   end
 end

 -- Store the intermediate results of the analysis.
 results.statements = statements
 results.replacement_texts = replacement_texts
end

return {
 csname_types = csname_types,
 process = semantic_analysis,
 statement_types = statement_types,
 statement_confidences = statement_confidences,
 statement_subtypes = statement_subtypes,
}