#!/usr/bin/python
#
# Copyright (C) 2014-2024 Free Software Foundation, Inc.
#
# This script is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.

import sys
import getopt
import re
import io
from datetime import datetime
from operator import attrgetter

# True if unrecognised lines should cause a fatal error.  Might want to turn
# this on by default later.
strict = False

# True if the order of .log segments should match the .sum file, false if
# they should keep the original order.
sort_logs = True

# A version of open() that is safe against whatever binary output
# might be added to the log.
def safe_open (filename):
   if sys.version_info >= (3, 0):
       return open (filename, 'r', errors = 'surrogateescape')
   return open (filename, 'r')

# Force stdout to handle escape sequences from a safe_open file.
if sys.version_info >= (3, 0):
   sys.stdout = io.TextIOWrapper (sys.stdout.buffer,
                                  errors = 'surrogateescape')

class Named:
   def __init__ (self, name):
       self.name = name

class ToolRun (Named):
   def __init__ (self, name):
       Named.__init__ (self, name)
       # The variations run for this tool, mapped by --target_board name.
       self.variations = dict()

   # Return the VariationRun for variation NAME.
   def get_variation (self, name):
       if name not in self.variations:
           self.variations[name] = VariationRun (name)
       return self.variations[name]

class VariationRun (Named):
   def __init__ (self, name):
       Named.__init__ (self, name)
       # A segment of text before the harness runs start, describing which
       # baseboard files were loaded for the target.
       self.header = None
       # The harnesses run for this variation, mapped by filename.
       self.harnesses = dict()
       # A list giving the number of times each type of result has
       # been seen.
       self.counts = []

   # Return the HarnessRun for harness NAME.
   def get_harness (self, name):
       if name not in self.harnesses:
           self.harnesses[name] = HarnessRun (name)
       return self.harnesses[name]

class HarnessRun (Named):
   def __init__ (self, name):
       Named.__init__ (self, name)
       # Segments of text that make up the harness run, mapped by a test-based
       # key that can be used to order them.
       self.segments = dict()
       # Segments of text that make up the harness run but which have
       # no recognized test results.  These are typically harnesses that
       # are completely skipped for the target.
       self.empty = []
       # A list of results.  Each entry is a pair in which the first element
       # is a unique sorting key and in which the second is the full
       # PASS/FAIL line.
       self.results = []

   # Add a segment of text to the harness run.  If the segment includes
   # test results, KEY is an example of one of them, and can be used to
   # combine the individual segments in order.  If the segment has no
   # test results (e.g. because the harness doesn't do anything for the
   # current configuration) then KEY is None instead.  In that case
   # just collect the segments in the order that we see them.
   def add_segment (self, key, segment):
       if key:
           assert key not in self.segments
           self.segments[key] = segment
       else:
           self.empty.append (segment)

class Segment:
   def __init__ (self, filename, start):
       self.filename = filename
       self.start = start
       self.lines = 0

class Prog:
   def __init__ (self):
       # The variations specified on the command line.
       self.variations = []
       # The variations seen in the input files.
       self.known_variations = set()
       # The tools specified on the command line.
       self.tools = []
       # Whether to create .sum rather than .log output.
       self.do_sum = True
       # Regexps used while parsing.
       self.test_run_re = re.compile (r'^Test run by (\S+) on (.*)$',
                                      re.IGNORECASE)
       self.tool_re = re.compile (r'^\t\t=== (.*) tests ===$')
       self.result_re = re.compile (r'^(PASS|XPASS|FAIL|XFAIL|UNRESOLVED'
                                    r'|WARNING|ERROR|UNSUPPORTED|UNTESTED'
                                    r'|KFAIL|KPASS|PATH|DUPLICATE):\s*(.+)')
       self.completed_re = re.compile (r'.* completed at (.*)')
       # Pieces of text to write at the head of the output.
       # start_line is a pair in which the first element is a datetime
       # and in which the second is the associated 'Test Run By' line.
       self.start_line = None
       self.native_line = ''
       self.target_line = ''
       self.host_line = ''
       self.acats_premable = ''
       # Pieces of text to write at the end of the output.
       # end_line is like start_line but for the 'runtest completed' line.
       self.acats_failures = []
       self.version_output = ''
       self.end_line = None
       # Known summary types.
       self.count_names = [
           '# of DejaGnu errors\t\t',
           '# of expected passes\t\t',
           '# of unexpected failures\t',
           '# of unexpected successes\t',
           '# of expected failures\t\t',
           '# of unknown successes\t\t',
           '# of known failures\t\t',
           '# of untested testcases\t\t',
           '# of unresolved testcases\t',
           '# of unsupported tests\t\t',
           '# of paths in test names\t',
           '# of duplicate test names\t'
       ]
       self.runs = dict()

   def usage (self):
       name = sys.argv[0]
       sys.stderr.write ('Usage: ' + name
                         + ''' [-t tool] [-l variant-list] [-L] log-or-sum-file ...

   tool           The tool (e.g. g++, libffi) for which to create a
                  new test summary file.  If not specified then output
                  is created for all tools.
   variant-list   One or more test variant names.  If the list is
                  not specified then one is constructed from all
                  variants in the files for <tool>.
   sum-file       A test summary file with the format of those
                  created by runtest from DejaGnu.
   If -L is used, merge *.log files instead of *.sum.  In this
   mode the exact order of lines may not be preserved, just different
   Running *.exp chunks should be in correct order.
''')
       sys.exit (1)

   def fatal (self, what, string):
       if not what:
           what = sys.argv[0]
       sys.stderr.write (what + ': ' + string + '\n')
       sys.exit (1)

   # Parse the command-line arguments.
   def parse_cmdline (self):
       try:
           (options, self.files) = getopt.getopt (sys.argv[1:], 'l:t:L')
           if len (self.files) == 0:
               self.usage()
           for (option, value) in options:
               if option == '-l':
                   self.variations.append (value)
               elif option == '-t':
                   self.tools.append (value)
               else:
                   self.do_sum = False
       except getopt.GetoptError as e:
           self.fatal (None, e.msg)

   # Try to parse time string TIME, returning an arbitrary time on failure.
   # Getting this right is just a nice-to-have so failures should be silent.
   def parse_time (self, time):
       try:
           return datetime.strptime (time, '%c')
       except ValueError:
           return datetime.now()

   # Parse an integer and abort on failure.
   def parse_int (self, filename, value):
       try:
           return int (value)
       except ValueError:
           self.fatal (filename, 'expected an integer, got: ' + value)

   # Return a list that represents no test results.
   def zero_counts (self):
       return [0 for x in self.count_names]

   # Return the ToolRun for tool NAME.
   def get_tool (self, name):
       if name not in self.runs:
           self.runs[name] = ToolRun (name)
       return self.runs[name]

   # Add the result counts in list FROMC to TOC.
   def accumulate_counts (self, toc, fromc):
       for i in range (len (self.count_names)):
           toc[i] += fromc[i]

   # Parse the list of variations after 'Schedule of variations:'.
   # Return the number seen.
   def parse_variations (self, filename, file):
       num_variations = 0
       while True:
           line = file.readline()
           if line == '':
               self.fatal (filename, 'could not parse variation list')
           if line == '\n':
               break
           self.known_variations.add (line.strip())
           num_variations += 1
       return num_variations

   # Parse from the first line after 'Running target ...' to the end
   # of the run's summary.
   def parse_run (self, filename, file, tool, variation, num_variations):
       header = None
       harness = None
       segment = None
       final_using = 0
       has_warning = 0

       # If this is the first run for this variation, add any text before
       # the first harness to the header.
       if not variation.header:
           segment = Segment (filename, file.tell())
           variation.header = segment

       # Parse the rest of the summary (the '# of ' lines).
       if len (variation.counts) == 0:
           variation.counts = self.zero_counts()

       # Parse up until the first line of the summary.
       if num_variations == 1:
           end = '\t\t=== ' + tool.name + ' Summary ===\n'
       else:
           end = ('\t\t=== ' + tool.name + ' Summary for '
                  + variation.name + ' ===\n')
       while True:
           line = file.readline()
           if line == '':
               self.fatal (filename, 'no recognised summary line')
           if line == end:
               break

           # Look for the start of a new harness.
           if line.startswith ('Running ') and line.endswith (' ...\n'):
               # Close off the current harness segment, if any.
               if harness:
                   segment.lines -= final_using
                   harness.add_segment (first_key, segment)
               name = line[len ('Running '):-len(' ...\n')]
               harness = variation.get_harness (name)
               segment = Segment (filename, file.tell())
               first_key = None
               final_using = 0
               continue

           # Record test results.  Associate the first test result with
           # the harness segment, so that if a run for a particular harness
           # has been split up, we can reassemble the individual segments
           # in a sensible order.
           #
           # dejagnu sometimes issues warnings about the testing environment
           # before running any tests.  Treat them as part of the header
           # rather than as a test result.
           match = self.result_re.match (line)
           if match and (harness or not line.startswith ('WARNING:')):
               if not harness:
                   self.fatal (filename, 'saw test result before harness name')
               name = match.group (2)
               # Ugly hack to get the right order for gfortran.
               if name.startswith ('gfortran.dg/g77/'):
                   name = 'h' + name
               # If we have a time out warning, make sure it appears
               # before the following testcase diagnostic: we insert
               # the testname before 'program' so that sort faces a
               # list of testnames.
               if line.startswith ('WARNING: program timed out'):
                 has_warning = 1
               else:
                 if has_warning == 1:
                     key = (name, len (harness.results))
                     myline = 'WARNING: %s program timed out.\n' % name
                     harness.results.append ((key, myline))
                     has_warning = 0
                 key = (name, len (harness.results))
                 harness.results.append ((key, line))
                 if not first_key and sort_logs:
                     first_key = key
               if line.startswith ('ERROR: (DejaGnu)'):
                   for i in range (len (self.count_names)):
                       if 'DejaGnu errors' in self.count_names[i]:
                           variation.counts[i] += 1
                           break

           # 'Using ...' lines are only interesting in a header.  Splitting
           # the test up into parallel runs leads to more 'Using ...' lines
           # than there would be in a single log.
           if line.startswith ('Using '):
               final_using += 1
           else:
               final_using = 0

           # Add other text to the current segment, if any.
           if segment:
               segment.lines += 1

       # Close off the final harness segment, if any.
       if harness:
           segment.lines -= final_using
           harness.add_segment (first_key, segment)

       while True:
           before = file.tell()
           line = file.readline()
           if line == '':
               break
           if line == '\n':
               continue
           if not line.startswith ('# '):
               file.seek (before)
               break
           found = False
           for i in range (len (self.count_names)):
               if line.startswith (self.count_names[i]):
                   count = line[len (self.count_names[i]):-1].strip()
                   variation.counts[i] += self.parse_int (filename, count)
                   found = True
                   break
           if not found:
               self.fatal (filename, 'unknown test result: ' + line[:-1])

   # Parse an acats run, which uses a different format from dejagnu.
   # We have just skipped over '=== acats configuration ==='.
   def parse_acats_run (self, filename, file):
       # Parse the preamble, which describes the configuration and logs
       # the creation of support files.
       record = (self.acats_premable == '')
       if record:
           self.acats_premable = '\t\t=== acats configuration ===\n'
       while True:
           line = file.readline()
           if line == '':
               self.fatal (filename, 'could not parse acats preamble')
           if line == '\t\t=== acats tests ===\n':
               break
           if record:
               self.acats_premable += line

       # Parse the test results themselves, using a dummy variation name.
       tool = self.get_tool ('acats')
       variation = tool.get_variation ('none')
       self.parse_run (filename, file, tool, variation, 1)

       # Parse the failure list.
       while True:
           before = file.tell()
           line = file.readline()
           if line.startswith ('*** FAILURES: '):
               self.acats_failures.append (line[len ('*** FAILURES: '):-1])
               continue
           file.seek (before)
           break

   # Parse the final summary at the end of a log in order to capture
   # the version output that follows it.
   def parse_final_summary (self, filename, file):
       record = (self.version_output == '')
       while True:
           line = file.readline()
           if line == '':
               break
           if line.startswith ('# of '):
               continue
           if record:
               self.version_output += line
           if line == '\n':
               break

   # Parse a .log or .sum file.
   def parse_file (self, filename, file):
       tool = None
       target = None
       num_variations = 1
       while True:
           line = file.readline()
           if line == '':
               return

           # Parse the list of variations, which comes before the test
           # runs themselves.
           if line.startswith ('Schedule of variations:'):
               num_variations = self.parse_variations (filename, file)
               continue

           # Parse a testsuite run for one tool/variation combination.
           if line.startswith ('Running target '):
               name = line[len ('Running target '):-1]
               if not tool:
                   self.fatal (filename, 'could not parse tool name')
               if name not in self.known_variations:
                   self.fatal (filename, 'unknown target: ' + name)
               self.parse_run (filename, file, tool,
                               tool.get_variation (name),
                               num_variations)
               # If there is only one variation then there is no separate
               # summary for it.  Record any following version output.
               if num_variations == 1:
                   self.parse_final_summary (filename, file)
               continue

           # Parse the start line.  In the case where several files are being
           # parsed, pick the one with the earliest time.
           match = self.test_run_re.match (line)
           if match:
               time = self.parse_time (match.group (2))
               if not self.start_line or self.start_line[0] > time:
                   self.start_line = (time, line)
               continue

           # Parse the form used for native testing.
           if line.startswith ('Native configuration is '):
               self.native_line = line
               continue

           # Parse the target triplet.
           if line.startswith ('Target is '):
               self.target_line = line
               continue

           # Parse the host triplet.
           if line.startswith ('Host   is '):
               self.host_line = line
               continue

           # Parse the acats premable.
           if line == '\t\t=== acats configuration ===\n':
               self.parse_acats_run (filename, file)
               continue

           # Parse the tool name.
           match = self.tool_re.match (line)
           if match:
               tool = self.get_tool (match.group (1))
               continue

           # Skip over the final summary (which we instead create from
           # individual runs) and parse the version output.
           if tool and line == '\t\t=== ' + tool.name + ' Summary ===\n':
               if file.readline() != '\n':
                   self.fatal (filename, 'expected blank line after summary')
               self.parse_final_summary (filename, file)
               continue

           # Parse the completion line.  In the case where several files
           # are being parsed, pick the one with the latest time.
           match = self.completed_re.match (line)
           if match:
               time = self.parse_time (match.group (1))
               if not self.end_line or self.end_line[0] < time:
                   self.end_line = (time, line)
               continue

           # Sanity check to make sure that important text doesn't get
           # dropped accidentally.
           if strict and line.strip() != '':
               self.fatal (filename, 'unrecognised line: ' + line[:-1])

   # Output a segment of text.
   def output_segment (self, segment):
       with safe_open (segment.filename) as file:
           file.seek (segment.start)
           for i in range (segment.lines):
               sys.stdout.write (file.readline())

   # Output a summary giving the number of times each type of result has
   # been seen.
   def output_summary (self, tool, counts):
       for i in range (len (self.count_names)):
           name = self.count_names[i]
           # dejagnu only prints result types that were seen at least once,
           # but acats always prints a number of unexpected failures.
           if (counts[i] > 0
               or (tool.name == 'acats'
                   and name.startswith ('# of unexpected failures'))):
               sys.stdout.write ('%s%d\n' % (name, counts[i]))

   # Output unified .log or .sum information for a particular variation,
   # with a summary at the end.
   def output_variation (self, tool, variation):
       self.output_segment (variation.header)
       for harness in sorted (variation.harnesses.values(),
                              key = attrgetter ('name')):
           sys.stdout.write ('Running ' + harness.name + ' ...\n')
           if self.do_sum:
               harness.results.sort()
               for (key, line) in harness.results:
                   sys.stdout.write (line)
           else:
               # Rearrange the log segments into test order (but without
               # rearranging text within those segments).
               for key in sorted (harness.segments.keys()):
                   self.output_segment (harness.segments[key])
               for segment in harness.empty:
                   self.output_segment (segment)
       if len (self.variations) > 1:
           sys.stdout.write ('\t\t=== ' + tool.name + ' Summary for '
                             + variation.name + ' ===\n\n')
           self.output_summary (tool, variation.counts)

   # Output unified .log or .sum information for a particular tool,
   # with a summary at the end.
   def output_tool (self, tool):
       counts = self.zero_counts()
       if tool.name == 'acats':
           # acats doesn't use variations, so just output everything.
           # It also has a different approach to whitespace.
           sys.stdout.write ('\t\t=== ' + tool.name + ' tests ===\n')
           for variation in tool.variations.values():
               self.output_variation (tool, variation)
               self.accumulate_counts (counts, variation.counts)
           sys.stdout.write ('\t\t=== ' + tool.name + ' Summary ===\n')
       else:
           # Output the results in the usual dejagnu runtest format.
           sys.stdout.write ('\n\t\t=== ' + tool.name + ' tests ===\n\n'
                             'Schedule of variations:\n')
           for name in self.variations:
               if name in tool.variations:
                   sys.stdout.write ('    ' + name + '\n')
           sys.stdout.write ('\n')
           for name in self.variations:
               if name in tool.variations:
                   variation = tool.variations[name]
                   sys.stdout.write ('Running target '
                                     + variation.name + '\n')
                   self.output_variation (tool, variation)
                   self.accumulate_counts (counts, variation.counts)
           sys.stdout.write ('\n\t\t=== ' + tool.name + ' Summary ===\n\n')
       self.output_summary (tool, counts)

   def main (self):
       self.parse_cmdline()
       try:
           # Parse the input files.
           for filename in self.files:
               with safe_open (filename) as file:
                   self.parse_file (filename, file)

           # Decide what to output.
           if len (self.variations) == 0:
               self.variations = sorted (self.known_variations)
           else:
               for name in self.variations:
                   if name not in self.known_variations:
                       self.fatal (None, 'no results for ' + name)
           if len (self.tools) == 0:
               self.tools = sorted (self.runs.keys())

           # Output the header.
           if self.start_line:
               sys.stdout.write (self.start_line[1])
           sys.stdout.write (self.native_line)
           sys.stdout.write (self.target_line)
           sys.stdout.write (self.host_line)
           sys.stdout.write (self.acats_premable)

           # Output the main body.
           for name in self.tools:
               if name not in self.runs:
                   self.fatal (None, 'no results for ' + name)
               self.output_tool (self.runs[name])

           # Output the footer.
           if len (self.acats_failures) > 0:
               sys.stdout.write ('*** FAILURES: '
                                 + ' '.join (self.acats_failures) + '\n')
           sys.stdout.write (self.version_output)
           if self.end_line:
               sys.stdout.write (self.end_line[1])
       except IOError as e:
           self.fatal (e.filename, e.strerror)

Prog().main()