-- The semantic analysis step of static analysis determines the meaning of the different function calls.
local lexical_analysis = require("explcheck-lexical-analysis")
local syntactic_analysis = require("explcheck-syntactic-analysis")
local get_option = require("explcheck-config").get_option
local ranges = require("explcheck-ranges")
local parsers = require("explcheck-parsers")
local identity = require("explcheck-utils").identity
local get_token_byte_range = lexical_analysis.get_token_byte_range
local is_token_simple = lexical_analysis.is_token_simple
local token_types = lexical_analysis.token_types
local extract_text_from_tokens = syntactic_analysis.extract_text_from_tokens
local CONTROL_SEQUENCE = token_types.CONTROL_SEQUENCE
local new_range = ranges.new_range
local range_flags = ranges.range_flags
local INCLUSIVE = range_flags.INCLUSIVE
local MAYBE_EMPTY = range_flags.MAYBE_EMPTY
local call_types = syntactic_analysis.call_types
local get_calls = syntactic_analysis.get_calls
local get_call_token_range = syntactic_analysis.get_call_token_range
local transform_replacement_text_tokens = syntactic_analysis.transform_replacement_text_tokens
local CALL = call_types.CALL
local OTHER_TOKENS = call_types.OTHER_TOKENS
local lpeg = require("lpeg")
local statement_types = {
FUNCTION_DEFINITION = "function definition",
FUNCTION_VARIANT_DEFINITION = "function variant definition",
OTHER_STATEMENT = "other statement",
OTHER_TOKENS_SIMPLE = "block of other simple tokens",
OTHER_TOKENS_COMPLEX = "block of other complex tokens",
}
local FUNCTION_DEFINITION = statement_types.FUNCTION_DEFINITION
local FUNCTION_VARIANT_DEFINITION = statement_types.FUNCTION_VARIANT_DEFINITION
local OTHER_STATEMENT = statement_types.OTHER_STATEMENT
local OTHER_TOKENS_SIMPLE = statement_types.OTHER_TOKENS_SIMPLE
local OTHER_TOKENS_COMPLEX = statement_types.OTHER_TOKENS_COMPLEX
local statement_subtypes = {
FUNCTION_DEFINITION = {
DIRECT = "direct function definition",
INDIRECT = "indirect function definition",
}
}
local FUNCTION_DEFINITION_DIRECT = statement_subtypes.FUNCTION_DEFINITION.DIRECT
local FUNCTION_DEFINITION_INDIRECT = statement_subtypes.FUNCTION_DEFINITION.INDIRECT
local DEFINITELY = statement_confidences.DEFINITELY
local MAYBE = statement_confidences.MAYBE
local NONE = statement_confidences.NONE
local csname_types = {
TEXT = "direct text representation of a control sequence name or its part, usually paired with confidence DEFINITELY",
PATTERN = "a PEG pattern that recognizes different control sequences or their parts, usually paired with confidence MAYBE"
}
local TEXT = csname_types.TEXT
local PATTERN = csname_types.PATTERN
-- Determine the meaning of function calls and register any issues.
local function semantic_analysis(pathname, content, issues, results, options)
-- Determine the type of a span of tokens as either "simple text" [1, p. 383] with no expected side effects or
-- a more complex material that may have side effects and presents a boundary between chunks of well-understood
-- expl3 statements.
--
-- [1]: Donald Ervin Knuth. 1986. TeX: The Program. Addison-Wesley, USA.
--
local function classify_tokens(tokens, token_range)
for _, token in token_range:enumerate(tokens) do
if not is_token_simple(token) then -- complex material
return OTHER_TOKENS_COMPLEX
end
end
return OTHER_TOKENS_SIMPLE -- simple material
end
-- Extract statements from function calls and record them. For all identified function definitions, also record replacement texts.
local function record_statements_and_replacement_texts(tokens, transformed_tokens, calls, first_map_back, first_map_forward)
local statements = {}
local replacement_text_tokens = {}
for call_number, call in ipairs(calls) do
local call_range = new_range(call_number, call_number, INCLUSIVE, #calls)
local byte_range = call.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)
-- Try and convert tokens from an argument into a text.
local function extract_text_from_argument(argument)
assert(lpeg.match(parsers.n_type_argument_specifier, argument.specifier) ~= nil)
return extract_text_from_tokens(argument.token_range, transformed_tokens, first_map_forward)
end
-- Extract the name of a control sequence from a call argument.
local function extract_csname_from_argument(argument)
local csname
if argument.specifier == "N" then
local csname_token = transformed_tokens[first_map_forward(argument.token_range:start())]
if csname_token.type ~= CONTROL_SEQUENCE then -- the N-type argument is not a control sequence, give up
return nil
end
csname = csname_token.payload
elseif argument.specifier == "c" then
csname = extract_text_from_argument(argument)
if csname == nil then -- the c-type argument contains complex material, give up
return nil
end
else
return nil
end
assert(csname ~= nil)
return csname
end
-- Split an expl3 control sequence name to a stem and the argument specifiers.
local function parse_expl3_csname(csname)
local _, _, csname_stem, argument_specifiers = csname:find("([^:]*):([^:]*)")
return csname_stem, argument_specifiers
end
-- Determine whether a function is private or public based on its name.
local function is_function_private(csname)
return csname:sub(1, 2) == "__"
end
-- Replace the argument specifiers in an expl3 control sequence name.
local function replace_argument_specifiers(csname_stem, argument_specifiers)
local csname
if type(argument_specifiers) == 'string' then
csname = string.format("%s:%s", csname_stem, argument_specifiers)
else
local transcript = string.format("%s:%s", csname_stem, argument_specifiers.transcript)
if argument_specifiers.type == TEXT then
csname = {
payload = string.format("%s:%s", csname_stem, argument_specifiers.payload),
transcript = transcript,
type = TEXT
}
elseif argument_specifiers.type == PATTERN then
csname = {
payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload,
transcript = transcript,
type = PATTERN
}
else
error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
end
end
return csname
end
-- Determine the control sequence name of a conditional function given a base control sequence name and a condition.
local function get_conditional_function_csname(csname_stem, argument_specifiers, condition)
local csname
if condition == "p" then -- predicate function
local format = "%s_p:%s"
if type(argument_specifiers) == 'string' then
csname = string.format(format, csname_stem, argument_specifiers)
else
local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
if argument_specifiers.type == TEXT then
csname = {
payload = string.format(format, csname_stem, argument_specifiers.payload),
transcript = transcript,
type = TEXT
}
elseif argument_specifiers.type == PATTERN then
csname = {
payload = lpeg.P(csname_stem) * lpeg.P("_p:") * argument_specifiers.payload,
transcript = transcript,
type = PATTERN
}
else
error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
end
end
elseif condition == "T" then -- true-branch conditional function
local format = "%s:%sT"
if type(argument_specifiers) == 'string' then
csname = string.format(format, csname_stem, argument_specifiers)
else
local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
if argument_specifiers.type == TEXT then
csname = {
payload = string.format(format, csname_stem, argument_specifiers.payload),
transcript = transcript,
type = TEXT
}
elseif argument_specifiers.type == PATTERN then
csname = {
payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("T"),
transcript = transcript,
type = PATTERN
}
else
error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
end
end
elseif condition == "F" then -- false-branch conditional function
local format = "%s:%sF"
if type(argument_specifiers) == 'string' then
csname = string.format(format, csname_stem, argument_specifiers)
else
local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
if argument_specifiers.type == TEXT then
csname = {
payload = string.format(format, csname_stem, argument_specifiers.payload),
transcript = transcript,
type = TEXT
}
elseif argument_specifiers.type == PATTERN then
csname = {
payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("F"),
transcript = transcript,
type = PATTERN
}
else
error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
end
end
elseif condition == "TF" then -- true-and-false-branch conditional function
local format = "%s:%sTF"
if type(argument_specifiers) == 'string' then
csname = string.format(format, csname_stem, argument_specifiers)
else
local transcript = string.format(format, csname_stem, argument_specifiers.transcript)
if argument_specifiers.type == TEXT then
csname = {
payload = string.format(format, csname_stem, argument_specifiers.payload),
transcript = transcript,
type = TEXT
}
elseif argument_specifiers.type == PATTERN then
csname = {
payload = lpeg.P(csname_stem) * lpeg.P(":") * argument_specifiers.payload * lpeg.P("TF"),
transcript = transcript,
type = PATTERN,
}
else
error('Unexpected argument specifiers type "' .. argument_specifiers.type .. '"')
end
end
else
error('Unexpected condition "' .. condition .. '"')
end
return csname
end
-- Try and extract a list of conditions in a conditional function (variant) definition.
-- Together with the conditions, include a measurement of confidence about the correctness of the extracted information.
local function parse_conditions(argument)
local conditions
-- try to determine the list of conditions
local conditions_text, condition_list
if argument.specifier ~= "n" then -- conditions are hidden behind expansion, assume all conditions with lower confidence
goto unknown_conditions
end
conditions_text = extract_text_from_argument(argument)
if conditions_text == nil then -- failed to read conditions
goto unknown_conditions -- assume all conditions with lower confidence
end
condition_list = lpeg.match(parsers.conditions, conditions_text)
if condition_list == nil then -- cound not parse conditions, give up
return nil
end
conditions = {}
for _, condition in ipairs(condition_list) do
table.insert(conditions, {condition, DEFINITELY})
end
goto done_parsing
::unknown_conditions::
-- assume all possible conditions with lower confidence
conditions = {{"p", MAYBE}, {"T", MAYBE}, {"F", MAYBE}, {"TF", MAYBE}}
::done_parsing::
return conditions
end
-- Try and extract a list of variant argument specifiers in a (conditional) function variant definition.
-- Together with the argument specifiers, include a measurement of confidence about the correctness of the extracted information.
local function parse_variant_argument_specifiers(csname, argument)
-- extract the argument specifiers from the csname
local _, base_argument_specifiers = parse_expl3_csname(csname)
if base_argument_specifiers == nil then
return nil -- we couldn't parse the csname, give up
end
local variant_argument_specifiers
-- try to determine all sets of variant argument specifiers
local variant_argument_specifiers_text, variant_argument_specifiers_list
if argument.specifier ~= "n" then -- specifiers are hidden behind expansion, assume all possibilities with lower confidence
goto unknown_argument_specifiers
end
variant_argument_specifiers_text = extract_text_from_argument(argument)
if variant_argument_specifiers_text == nil then -- failed to read specifiers
goto unknown_argument_specifiers -- assume all specifiers with lower confidence
end
variant_argument_specifiers_list = lpeg.match(parsers.variant_argument_specifiers, variant_argument_specifiers_text)
if variant_argument_specifiers_list == nil then -- cound not parse specifiers, assume all possibilities with lower confidence
goto unknown_argument_specifiers
end
variant_argument_specifiers = {}
for _, argument_specifiers in ipairs(variant_argument_specifiers_list) do
if #argument_specifiers ~= #base_argument_specifiers then
if #argument_specifiers < #base_argument_specifiers then -- variant argument specifiers are shorter than base specifiers
argument_specifiers = string.format(
"%s%s", -- treat the variant specifiers as a prefix with the rest filled in with the base specifiers
argument_specifiers, base_argument_specifiers:sub(#argument_specifiers + 1)
)
else -- variant argument specifiers are longer than base specifiers
issues:add("t403", "function variant of incompatible type", byte_range)
return nil -- give up
end
end
assert(#argument_specifiers == #base_argument_specifiers)
for i = 1, #argument_specifiers do
local base_argument_specifier = base_argument_specifiers:sub(i, i)
local argument_specifier = argument_specifiers:sub(i, i)
if base_argument_specifier == argument_specifier then -- variant argument specifier is same as base argument specifier
goto continue -- skip further checks
end
local any_compatible_specifier = false
for _, compatible_specifier in ipairs(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier)) do
if argument_specifier == compatible_specifier then -- variant argument specifier is compatible with base argument specifier
any_compatible_specifier = true
break -- skip further checks
end
end
if not any_compatible_specifier then
local any_deprecated_specifier = false
for _, deprecated_specifier in ipairs(lpeg.match(parsers.deprecated_argument_specifiers, base_argument_specifier)) do
if argument_specifier == deprecated_specifier then -- variant argument specifier is deprecated regarding the base specifier
any_deprecated_specifier = true
break -- skip further checks
end
end
if any_deprecated_specifier then
issues:add("w410", "function variant of deprecated type", byte_range)
else
issues:add("t403", "function variant of incompatible type", byte_range)
return nil -- variant argument specifier is incompatible with base argument specifier, give up
end
end
::continue::
end
table.insert(variant_argument_specifiers, {
payload = argument_specifiers,
transcript = argument_specifiers,
type = TEXT,
confidence = DEFINITELY
})
end
goto done_parsing
::unknown_argument_specifiers::
-- assume all possible sets of variant argument specifiers with lower confidence
do
variant_argument_specifiers = {}
local compatible_specifier_pattern, compatible_specifier_transcripts = parsers.success, {}
for i = 1, #base_argument_specifiers do
local base_argument_specifier = base_argument_specifiers:sub(i, i)
local compatible_specifiers = table.concat(lpeg.match(parsers.compatible_argument_specifiers, base_argument_specifier))
compatible_specifier_pattern = compatible_specifier_pattern * lpeg.S(compatible_specifiers)
local compatible_specifier_transcript = string.format('[%s]', compatible_specifiers)
table.insert(compatible_specifier_transcripts, compatible_specifier_transcript)
end
local compatible_specifiers_transcript = table.concat(compatible_specifier_transcripts)
table.insert(variant_argument_specifiers, {
payload = compatible_specifier_pattern,
transcript = compatible_specifiers_transcript,
type = PATTERN,
confidence = MAYBE
})
end
::done_parsing::
return variant_argument_specifiers
end
if call.type == CALL then -- a function call
-- Ignore error S204 (Missing stylistic whitespaces) in Lua code.
for _, arguments_number in ipairs(lpeg.match(parsers.expl3_function_call_with_lua_code_argument_csname, call.csname)) do
local lua_code_argument = call.arguments[arguments_number]
if #lua_code_argument.token_range > 0 then
local lua_code_byte_range = lua_code_argument.token_range:new_range_from_subranges(get_token_byte_range(tokens), #content)
issues:ignore('s204', lua_code_byte_range)
end
end
local function_variant_definition = lpeg.match(parsers.expl3_function_variant_definition_csname, call.csname)
local function_definition = lpeg.match(parsers.expl3_function_definition_csname, call.csname)
-- Process a function variant definition.
if function_variant_definition ~= nil then
local is_conditional = table.unpack(function_variant_definition)
-- determine the name of the defined function
local base_csname_argument = call.arguments[1]
local base_csname = extract_csname_from_argument(base_csname_argument)
if base_csname == nil then -- we couldn't extract the csname, give up
goto other_statement
end
local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
if base_csname_stem == nil then -- we couldn't parse the csname, give up
goto other_statement
end
-- determine the variant argument specifiers
local variant_argument_specifiers = parse_variant_argument_specifiers(base_csname, call.arguments[2])
if variant_argument_specifiers == nil then -- we couldn't parse the variant argument specifiers, give up
goto other_statement
end
-- determine all defined csnames
local defined_csnames = {}
for _, argument_specifiers in ipairs(variant_argument_specifiers) do
if is_conditional then -- conditional function
-- determine the conditions
local conditions = parse_conditions(call.arguments[#call.arguments])
if conditions == nil then -- we couldn't determine the conditions, give up
goto other_statement
end
-- determine the defined csnames
for _, condition_table in ipairs(conditions) do
local condition, condition_confidence = table.unpack(condition_table)
local base_conditional_csname = get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
local defined_conditional_csname = get_conditional_function_csname(base_csname_stem, argument_specifiers, condition)
local confidence = math.min(argument_specifiers.confidence, condition_confidence)
if base_conditional_csname ~= defined_conditional_csname then
table.insert(defined_csnames, {base_conditional_csname, defined_conditional_csname, confidence})
end
end
else -- non-conditional function
local defined_csname = replace_argument_specifiers(base_csname_stem, argument_specifiers)
if base_csname ~= defined_csname then
table.insert(defined_csnames, {base_csname, defined_csname, argument_specifiers.confidence})
end
end
end
-- record function variant definition statements for all effectively defined csnames
for _, defined_csname_table in ipairs(defined_csnames) do -- lua
local effective_base_csname, defined_csname, confidence = table.unpack(defined_csname_table)
local statement = {
type = FUNCTION_VARIANT_DEFINITION,
call_range = call_range,
confidence = confidence,
-- The following attributes are specific to the type.
base_csname = effective_base_csname,
defined_csname = defined_csname,
is_private = is_function_private(base_csname),
is_conditional = is_conditional,
}
table.insert(statements, statement)
end
goto continue
end
-- Process a function definition.
if function_definition ~= nil then
local is_direct = table.unpack(function_definition)
-- Process a direct function definition.
if is_direct then
-- determine the properties of the defined function
local _, _, is_creator_function = table.unpack(function_definition)
local is_conditional, maybe_redefinition, is_global, is_protected, is_nopar
local defined_csname_argument, num_parameters
if is_creator_function == true then -- direct application of a creator function
defined_csname_argument = call.arguments[1]
_, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(function_definition)
else -- indirect application of a creator function
defined_csname_argument = call.arguments[2]
local num_parameter_argument = call.arguments[3]
if num_parameter_argument ~= nil and num_parameter_argument.specifier == "n" then
local num_parameters_text = extract_text_from_argument(num_parameter_argument)
if num_parameters_text ~= nil then
num_parameters = tonumber(num_parameters_text)
end
end
local creator_function_csname = extract_csname_from_argument(call.arguments[1])
if creator_function_csname == nil then -- couldn't determine the name of the creator function, give up
goto other_statement
end
local actual_function_definition = lpeg.match(parsers.expl3_function_definition_csname, creator_function_csname)
if actual_function_definition == nil then -- couldn't understand the creator function, give up
goto other_statement
end
_, is_conditional, _, maybe_redefinition, is_global, is_protected, is_nopar = table.unpack(actual_function_definition)
end
-- determine the name of the defined function
local defined_csname = extract_csname_from_argument(defined_csname_argument)
if defined_csname == nil then -- we couldn't extract the csname, give up
goto other_statement
end
local defined_csname_stem, argument_specifiers = parse_expl3_csname(defined_csname)
-- determine the replacement text
local replacement_text_number
local replacement_text_argument = call.arguments[#call.arguments]
do
if replacement_text_argument.specifier ~= "n" then -- replacement text is hidden behind expansion
goto skip_replacement_text -- record partial information
end
-- determine the number of parameters of the defined function
local function update_num_parameters(updated_num_parameters)
assert(updated_num_parameters ~= nil)
if num_parameters == nil or updated_num_parameters > num_parameters then -- trust the highest guess
num_parameters = updated_num_parameters
end
end
if argument_specifiers ~= nil and lpeg.match(parsers.N_or_n_type_argument_specifiers, argument_specifiers) ~= nil then
update_num_parameters(#argument_specifiers)
end
for _, argument in ipairs(call.arguments) do -- next, try to look for p-type "TeX parameter" argument specifiers
if argument.specifier == "p" and argument.num_parameters ~= nil then
update_num_parameters(argument.num_parameters)
break
end
end
if num_parameters == nil then -- we couldn't determine the number of parameters
goto skip_replacement_text -- record partial information
end
-- parse the replacement text and record the function definition
local mapped_replacement_text_token_range = new_range(
first_map_forward(replacement_text_argument.token_range:start()),
first_map_forward(replacement_text_argument.token_range:stop()),
INCLUSIVE + MAYBE_EMPTY,
#transformed_tokens
)
local doubly_transformed_tokens, second_map_back, second_map_forward = transform_replacement_text_tokens(
content,
transformed_tokens,
issues,
num_parameters,
mapped_replacement_text_token_range
)
if doubly_transformed_tokens == nil then -- we couldn't parse the replacement text
goto skip_replacement_text -- record partial information
end
local function map_back(...) return first_map_back(second_map_back(...)) end
local function map_forward(...) return second_map_forward(first_map_forward(...)) end
table.insert(replacement_text_tokens, {
token_range = replacement_text_argument.token_range,
transformed_tokens = doubly_transformed_tokens,
map_back = map_back,
map_forward = map_forward,
})
replacement_text_number = #replacement_text_tokens
end
::skip_replacement_text::
-- determine all effectively defined csnames
local effectively_defined_csnames = {}
if is_conditional then -- conditional function
-- determine the conditions
local conditions = parse_conditions(call.arguments[#call.arguments - 1])
if conditions == nil then -- we couldn't determine the conditions, give up
goto other_statement
end
-- determine the defined csnames
for _, condition_table in ipairs(conditions) do
local condition, confidence = table.unpack(condition_table)
if condition == "p" and is_protected then
issues:add("e404", "protected predicate function", byte_range)
end
local effectively_defined_csname = get_conditional_function_csname(defined_csname_stem, argument_specifiers, condition)
table.insert(effectively_defined_csnames, {effectively_defined_csname, confidence})
end
else -- non-conditional function
effectively_defined_csnames = {{defined_csname, DEFINITELY}}
end
-- record function definition statements for all effectively defined csnames
for _, effectively_defined_csname_table in ipairs(effectively_defined_csnames) do -- lua
local effectively_defined_csname, confidence = table.unpack(effectively_defined_csname_table)
local statement = {
type = FUNCTION_DEFINITION,
call_range = call_range,
confidence = confidence,
-- The following attributes are specific to the type.
subtype = FUNCTION_DEFINITION_DIRECT,
maybe_redefinition = maybe_redefinition,
is_private = is_function_private(defined_csname),
is_global = is_global,
defined_csname = effectively_defined_csname,
-- The following attributes are specific to the subtype.
is_conditional = is_conditional,
is_protected = is_protected,
is_nopar = is_nopar,
replacement_text_number = replacement_text_number,
replacement_text_argument = replacement_text_argument,
}
table.insert(statements, statement)
end
else
-- Process an indirect function definition.
local _, is_conditional, maybe_redefinition, is_global = table.unpack(function_definition)
-- determine the name of the defined function
local defined_csname_argument = call.arguments[1]
local defined_csname = extract_csname_from_argument(defined_csname_argument)
if defined_csname == nil then -- we couldn't extract the csname, give up
goto other_statement
end
-- determine the name of the base function
local base_csname_argument = call.arguments[2]
local base_csname = extract_csname_from_argument(base_csname_argument)
if base_csname == nil then -- we couldn't extract the csname, give up
goto other_statement
end
-- determine all effectively defined csnames and effective base csnames
local effective_defined_and_base_csnames = {}
if is_conditional then -- conditional function
-- parse the base and defined csnames
local defined_csname_stem, defined_argument_specifiers = parse_expl3_csname(defined_csname)
if defined_csname_stem == nil then -- we couldn't parse the defined csname, give up
goto other_statement
end
local base_csname_stem, base_argument_specifiers = parse_expl3_csname(base_csname)
if base_csname_stem == nil then -- we couldn't parse the base csname, give up
goto other_statement
end
-- determine the conditions
local conditions = parse_conditions(call.arguments[#call.arguments - 1])
if conditions == nil then -- we couldn't determine the conditions, give up
goto other_statement
end
-- determine the defined and base csnames
for _, condition_table in ipairs(conditions) do
local condition, confidence = table.unpack(condition_table)
local effectively_defined_csname
= get_conditional_function_csname(defined_csname_stem, defined_argument_specifiers, condition)
local effective_base_csname
= get_conditional_function_csname(base_csname_stem, base_argument_specifiers, condition)
table.insert(effective_defined_and_base_csnames, {effectively_defined_csname, effective_base_csname, confidence})
end
else -- non-conditional function
effective_defined_and_base_csnames = {{defined_csname, base_csname, DEFINITELY}}
end
-- record function definition statements for all effectively defined csnames
for _, effective_defined_and_base_csname_table in ipairs(effective_defined_and_base_csnames) do -- lua
local effectively_defined_csname, effective_base_csname, confidence
= table.unpack(effective_defined_and_base_csname_table)
local statement = {
type = FUNCTION_DEFINITION,
call_range = call_range,
confidence = confidence,
-- The following attributes are specific to the type.
subtype = FUNCTION_DEFINITION_INDIRECT,
maybe_redefinition = maybe_redefinition,
is_private = is_function_private(defined_csname),
is_global = is_global,
defined_csname = effectively_defined_csname,
-- The following attributes are specific to the subtype.
base_csname = effective_base_csname,
is_conditional = is_conditional,
}
table.insert(statements, statement)
end
end
goto continue
end
::other_statement::
local statement = {
type = OTHER_STATEMENT,
call_range = call_range,
confidence = NONE,
}
table.insert(statements, statement)
elseif call.type == OTHER_TOKENS then -- other tokens
local statement_type = classify_tokens(tokens, call.token_range)
local statement = {
type = statement_type,
call_range = call_range,
confidence = NONE,
}
table.insert(statements, statement)
else
error('Unexpected call type "' .. call.type .. '"')
end
::continue::
end
return statements, replacement_text_tokens
end
-- Extract statements from function calls. For all identified function definitions, record replacement texts and recursively
-- apply syntactic and semantic analysis on them.
local function get_statements(tokens, groupings, calls)
-- First, record top-level statements.
local replacement_texts = {tokens = nil, calls = {}, statements = {}, nesting_depth = {}}
local statements
statements, replacement_texts.tokens = record_statements_and_replacement_texts(tokens, tokens, calls, identity, identity)
-- Then, process any new replacement texts until convergence.
local previous_num_replacement_texts = 0
local current_num_replacement_texts = #replacement_texts.tokens
local current_nesting_depth = 1
while previous_num_replacement_texts < current_num_replacement_texts do
for replacement_text_number = previous_num_replacement_texts + 1, current_num_replacement_texts do
local replacement_text_tokens = replacement_texts.tokens[replacement_text_number]
-- record the current nesting depth with the replacement text
table.insert(replacement_texts.nesting_depth, current_nesting_depth)
-- extract nested calls from the replacement text using syntactic analysis
local nested_calls = get_calls(
tokens,
replacement_text_tokens.transformed_tokens,
replacement_text_tokens.token_range,
replacement_text_tokens.map_back,
replacement_text_tokens.map_forward,
issues,
groupings,
content
)
table.insert(replacement_texts.calls, nested_calls)
-- extract nested statements and replacement texts from the nested calls using semactic analysis
local nested_statements, nested_replacement_text_tokens = record_statements_and_replacement_texts(
tokens,
replacement_text_tokens.transformed_tokens,
nested_calls,
replacement_text_tokens.map_back,
replacement_text_tokens.map_forward
)
for _, nested_statement in ipairs(nested_statements) do
if nested_statement.type == FUNCTION_DEFINITION
and nested_statement.subtype == FUNCTION_DEFINITION_DIRECT
and nested_statement.replacement_text_number ~= nil then
-- make the reference to the replacement text absolute instead of relative
nested_statement.replacement_text_number = nested_statement.replacement_text_number + current_num_replacement_texts
end
end
table.insert(replacement_texts.statements, nested_statements)
for _, nested_tokens in ipairs(nested_replacement_text_tokens) do
table.insert(replacement_texts.tokens, nested_tokens)
end
end
previous_num_replacement_texts = current_num_replacement_texts
current_num_replacement_texts = #replacement_texts.tokens
current_nesting_depth = current_nesting_depth + 1
end
-- Extract statements from function calls.
local statements = {}
local replacement_texts = {}
for part_number, part_calls in ipairs(results.calls) do
local part_tokens = results.tokens[part_number]
local part_groupings = results.groupings[part_number]
local part_statements, part_replacement_texts = get_statements(part_tokens, part_groupings, part_calls)
table.insert(statements, part_statements)
table.insert(replacement_texts, part_replacement_texts)
end
-- Report issues that are apparent after the semantic analysis.
--- Collect all segments of top-level and nested tokens, calls, and statements.
local token_segments, call_segments, statement_segments = {}, {}, {}
for part_number, part_calls in ipairs(results.calls) do
local part_statements = statements[part_number]
table.insert(call_segments, part_calls)
table.insert(statement_segments, part_statements)
local part_tokens = results.tokens[part_number]
table.insert(token_segments, {part_tokens, part_tokens, identity})
local part_replacement_texts = replacement_texts[part_number]
for replacement_text_number, nested_calls in ipairs(part_replacement_texts.calls) do
local nested_statements = part_replacement_texts.statements[replacement_text_number]
table.insert(call_segments, nested_calls)
table.insert(statement_segments, nested_statements)
local replacement_text_tokens = part_replacement_texts.tokens[replacement_text_number]
table.insert(token_segments, {part_tokens, replacement_text_tokens.transformed_tokens, replacement_text_tokens.map_forward})
end
end
--- Make a pass over the segments, building up information.
local defined_private_functions = {}
---- Collect information about symbols that were definitely defined.
local called_functions_and_variants = {}
local defined_private_function_variant_texts, defined_private_function_variant_pattern = {}, parsers.fail
local defined_private_function_variant_byte_ranges = {}
local variant_base_csnames, indirect_definition_base_csnames = {}, {}
---- Collect information about symbols that may have been defined.
local maybe_defined_csname_texts, maybe_defined_csname_pattern = {}, parsers.fail
local maybe_used_csname_texts, maybe_used_csname_pattern = {}, parsers.fail
for segment_number, segment_statements in ipairs(statement_segments) do
local segment_calls = call_segments[segment_number]
local segment_tokens, segment_transformed_tokens, map_forward = table.unpack(token_segments[segment_number])
-- Convert tokens from a range into a PEG pattern.
local function extract_pattern_from_tokens(token_range)
local pattern, transcripts, num_simple_tokens = parsers.success, {}, 0
local previous_token_was_simple = true
for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
if is_token_simple(token) then -- simple material
pattern = pattern * lpeg.P(token.payload)
table.insert(transcripts, token.payload)
num_simple_tokens = num_simple_tokens + 1
previous_token_was_simple = true
else -- complex material
if previous_token_was_simple then
pattern = pattern * parsers.any^0
table.insert(transcripts, "*")
end
previous_token_was_simple = false
end
end
local transcript = table.concat(transcripts)
return pattern, transcript, num_simple_tokens
end
-- Try and convert tokens from a range into a csname.
local function extract_csname_from_tokens(token_range)
local text = extract_text_from_tokens(token_range, segment_transformed_tokens, map_forward)
local csname
if text ~= nil then -- simple material
csname = {
payload = text,
transcript = text,
type = TEXT
}
else -- complex material
local pattern, transcript, num_simple_tokens = extract_pattern_from_tokens(token_range)
if num_simple_tokens < get_option("min_simple_tokens_in_csname_pattern", options, pathname) then -- too few simple tokens, give up
return nil
end
csname = {
payload = pattern,
transcript = transcript,
type = PATTERN
}
end
return csname
end
-- Process an argument and record control sequence name usage and definitions.
local function process_argument_tokens(argument)
-- Record control sequence name usage.
--- Extract text from tokens within c- and v-type arguments.
if argument.specifier == "c" or argument.specifier == "v" then
local csname = extract_csname_from_tokens(argument.token_range)
if csname ~= nil then
if csname.type == TEXT then
maybe_used_csname_texts[csname.payload] = true
elseif csname.type == PATTERN then
maybe_used_csname_pattern = maybe_used_csname_pattern + csname.payload
end
end
end
--- Scan control sequence tokens within N- and n-type arguments.
if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
for _, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
if token.type == CONTROL_SEQUENCE then
maybe_used_csname_texts[token.payload] = true
end
end
end
-- Record control sequence name definitions.
--- Scan control sequence tokens within N- and n-type arguments.
if lpeg.match(parsers.N_or_n_type_argument_specifier, argument.specifier) ~= nil then
for token_number, token in argument.token_range:enumerate(segment_transformed_tokens, map_forward) do
if token.type == CONTROL_SEQUENCE then
if token_number + 1 <= #segment_transformed_tokens then
local next_token = segment_transformed_tokens[token_number + 1]
if (
next_token.type == CONTROL_SEQUENCE
and lpeg.match(parsers.expl3_function_definition_csname, token.payload) ~= nil
) then
maybe_defined_csname_texts[next_token.payload] = true
end
end
end
end
end
end
for _, statement in ipairs(segment_statements) do
local token_range = statement.call_range:new_range_from_subranges(get_call_token_range(segment_calls), #segment_tokens)
local byte_range = token_range:new_range_from_subranges(get_token_byte_range(segment_tokens), #content)
-- Process a function variant definition.
if statement.type == FUNCTION_VARIANT_DEFINITION then
-- Record base control sequence names of variants, both as control sequence name usage and separately.
table.insert(variant_base_csnames, {statement.base_csname, byte_range})
maybe_used_csname_texts[statement.base_csname] = true
-- Record control sequence name definitions.
if statement.defined_csname.type == TEXT then
maybe_defined_csname_texts[statement.defined_csname.payload] = true
elseif statement.defined_csname.type == PATTERN then
maybe_defined_csname_pattern = maybe_defined_csname_pattern + statement.defined_csname.payload
else
error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
end
-- Record private function variant definitions.
if statement.confidence == DEFINITELY and statement.is_private then
table.insert(defined_private_function_variant_byte_ranges, byte_range)
local defined_private_function_variant = {
number = #defined_private_function_variant_byte_ranges,
csname = statement.defined_csname
}
if statement.defined_csname.type == TEXT then
table.insert(defined_private_function_variant_texts, defined_private_function_variant)
elseif statement.defined_csname.type == PATTERN then
defined_private_function_variant_pattern = (
defined_private_function_variant_pattern
+ statement.defined_csname.payload
/ defined_private_function_variant
)
else
error('Unexpected csname type "' .. statement.defined_csname.type .. '"')
end
end
-- Process a function definition.
elseif statement.type == FUNCTION_DEFINITION then
-- Record the base control sequences used in indirect function definitions.
if statement.subtype == FUNCTION_DEFINITION_INDIRECT then
maybe_used_csname_texts[statement.base_csname] = true
table.insert(indirect_definition_base_csnames, {statement.base_csname, byte_range})
end
-- Record control sequence name usage and definitions.
maybe_defined_csname_texts[statement.defined_csname] = true
if statement.subtype == FUNCTION_DEFINITION_DIRECT and statement.replacement_text_number == nil then
process_argument_tokens(statement.replacement_text_argument)
end
-- Record private function defition.
if statement.confidence == DEFINITELY and statement.is_private then
table.insert(defined_private_functions, {statement.defined_csname, byte_range})
end
-- Process an unrecognized statement.
elseif statement.type == OTHER_STATEMENT then
-- Record control sequence name usage and definitions.
for _, call in statement.call_range:enumerate(segment_calls) do
maybe_used_csname_texts[call.csname] = true
table.insert(called_functions_and_variants, {call.csname, byte_range})
for _, argument in ipairs(call.arguments) do
process_argument_tokens(argument)
end
end
-- Process a block of unrecognized tokens.
elseif statement.type == OTHER_TOKENS_SIMPLE or statement.type == OTHER_TOKENS_COMPLEX then
-- Record control sequence name usage by scanning all control sequence tokens.
for _, token in token_range:enumerate(segment_transformed_tokens, map_forward) do
if token.type == CONTROL_SEQUENCE then
maybe_used_csname_texts[token.payload] = true
end
end
else
error('Unexpected statement type "' .. statement.type .. '"')
end
end
end
--- Report issues apparent from the collected information.
---- Report unused private functions.
for _, defined_private_function in ipairs(defined_private_functions) do
local defined_csname, byte_range = table.unpack(defined_private_function)
if not maybe_used_csname_texts[defined_csname] and lpeg.match(maybe_used_csname_pattern, defined_csname) == nil then
issues:add('w401', 'unused private function', byte_range)
end
end
---- Report unused private function variants.
local used_private_function_variants = {}
for private_function_variant_number, _ in ipairs(defined_private_function_variant_byte_ranges) do
used_private_function_variants[private_function_variant_number] = false
end
for _, defined_private_function_variant in ipairs(defined_private_function_variant_texts) do
assert(defined_private_function_variant.csname.type == TEXT)
if maybe_used_csname_texts[defined_private_function_variant.csname.payload]
or lpeg.match(maybe_used_csname_pattern, defined_private_function_variant.csname.payload) ~= nil then
used_private_function_variants[defined_private_function_variant.number] = true
end
end
for maybe_used_csname, _ in pairs(maybe_used_csname_texts) do
-- NOTE: Although we might want to also test whether "defined_private_function_variant_pattern" and
-- "maybe_used_csname_pattern" overlap, intersection is undecideable for parsing expression languages (PELs). In
-- theory, we could use regular expressions instead of PEG patterns, since intersection is decideable for regular
-- languages. In practice, there are no Lua libraries that would implement the required algorithms. Therefore, it
-- seems more practical to just accept that low-confidence function variant definitions and function uses don't
-- interact, not just because the technical difficulty but also because the combined confidence is just too low.
local defined_private_function_variant = lpeg.match(defined_private_function_variant_pattern, maybe_used_csname)
if defined_private_function_variant ~= nil then
assert(defined_private_function_variant.csname.type == PATTERN)
used_private_function_variants[defined_private_function_variant.number] = true
end
end
for private_function_variant_number, byte_range in ipairs(defined_private_function_variant_byte_ranges) do
if not used_private_function_variants[private_function_variant_number] then
issues:add('w402', 'unused private function variant', byte_range)
end
end
local imported_prefixes = get_option('imported_prefixes', options, pathname)
local expl3_well_known_function_csname = parsers.expl3_well_known_function_csname(imported_prefixes)
---- Report function variants for undefined functions.
for _, variant_base_csname in ipairs(variant_base_csnames) do
local base_csname, byte_range = table.unpack(variant_base_csname)
if lpeg.match(expl3_well_known_function_csname, base_csname) == nil
and not maybe_defined_csname_texts[base_csname]
and not lpeg.match(maybe_defined_csname_pattern, base_csname) then
issues:add('e405', 'function variant for an undefined function', byte_range)
end
end
---- Report calls to undefined functions and function variants.
for _, called_function_or_variant in ipairs(called_functions_and_variants) do
local csname, byte_range = table.unpack(called_function_or_variant)
if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
and lpeg.match(expl3_well_known_function_csname, csname) == nil
and not maybe_defined_csname_texts[csname]
and not lpeg.match(maybe_defined_csname_pattern, csname) then
issues:add('e408', 'calling an undefined function', byte_range)
end
end
---- Report indirect function definitions from undefined base functions.
for _, indirect_definition_base_csname in ipairs(indirect_definition_base_csnames) do
local csname, byte_range = table.unpack(indirect_definition_base_csname)
if lpeg.match(parsers.expl3like_function_csname, csname) ~= nil
and lpeg.match(expl3_well_known_function_csname, csname) == nil
and not maybe_defined_csname_texts[csname]
and not lpeg.match(maybe_defined_csname_pattern, csname) then
issues:add('e411', 'indirect function definition from an undefined function', byte_range)
end
end
-- Store the intermediate results of the analysis.
results.statements = statements
results.replacement_texts = replacement_texts
end