#!/usr/bin/env python3
#####
# findsym2.py
# Translated from findsym.pl
#
# This script scans through multiple C++ source files to find all occurrences
# of the SYM(symbol_name) macro. It collects these unique symbol names and
# generates a header file containing declarations for these symbols. The purpose
# is to ensure that each symbol is translated only once when creating the
# symbol table.
#####

import re
import sys
import textwrap

# Retrieve the first command-line argument, which should be the name of the
# output header file.
if len(sys.argv) < 2:
   print(
       "usage: ./findsym2.py out_symbols.h file1.cc file2.cc ...",
       file=sys.stderr,
   )
   sys.exit(1)

_, outname, *innames = sys.argv

# Attempt to open the output file in write mode.
with open(outname, "w", encoding="utf-8") as header:

   # Write a predefined header comment block to the output file.
   header.write(
       textwrap.dedent(
           """\
         /*****
          * This file is automatically generated by findsym.py
          * Changes will be overwritten.
          *****/

         // If the ADDSYMBOL macro is not already defined, define it with the default
         // purpose of referring to an external pre-translated symbol, such that
         // SYM(name) also refers to that symbol.
         #ifndef ADDSYMBOL
             #define ADDSYMBOL(name) extern sym::symbol PRETRANSLATED_SYMBOL_##name
             #define SYM(name) PRETRANSLATED_SYMBOL_##name
         #endif

         """
       )
   )

   # Initialize an empty set to store unique symbols.
   symbols = set()

   # Iterate over each remaining command-line argument, which should be the names
   # of C++ source files to process.
   for inname in innames:
       # Attempt to open the current input file in read mode.
       with open(inname, "r", encoding="utf-8") as infile:
           # Read the input file line by line.
           for line in infile:
               # Use a regular expression to find all occurrences of the SYM macro
               # with a valid symbol name inside parentheses.
               matches = re.findall(r"SYM\(([_A-Za-z][_A-Za-z0-9]*)\)", line)
               for match in matches:
                   # Add each matched symbol name to the symbols set.
                   symbols.add(match)

   # After collecting all unique symbols, iterate over the sorted list of symbol
   # names.
   for symbol in sorted(symbols):
       # Concatenate the ADDSYMBOL macro with the provided symbol name and a
       # newline character, then write it to the header file.
       header.write(f"ADDSYMBOL({symbol});\n")