#!/bin/sh
#
# This script parses the output of a gcc bootstrap when using warning
# flags and determines various statistics.
# 
# usage: warn_summary [-llf] [-s stage] [-nosub|-ch|-cp|-f|-java|-ada|-intl|-fixinc]
# 	[-pass|-wpass] [file(s)]
#
# -llf
# Filter out long lines from the bootstrap output before any other
# action.  This is useful for systems with broken awks/greps which choke
# on long lines.  It is not done by default as it sometimes slows things
# down.
#
# -s number
# Take warnings from stage "Number".  Stage 0 means show warnings from
# before and after the gcc bootstrap directory.  E.g. libraries, etc.
# This presupposes using "gcc -W*" for the stage1 compiler.
#
# -nosub
# Only show warnings from the gcc top level directory.
# -ch|-cp|-f|-java|-ada|-intl|-fixinc
# Only show warnings from the specified language subdirectory.
# These override each other so only the last one passed takes effect.
#
# -pass
# Pass through the bootstrap output after filtering stage and subdir
# (useful for manual inspection.)  This is all lines, not just warnings.
# -wpass
# Pass through only warnings from the bootstrap output after filtering
# stage and subdir.
#
# By Kaveh Ghazi  (ghazi@caip.rutgers.edu)  12/13/97.


# Some awks choke on long lines, sed seems to do a better job.
# Truncate lines > 255 characters.  RE '.\{255,\}' doesn't seem to work. :-(
# Only do this if -llf was specified, because it can really slow things down.
longLineFilter()
{
  if test -z "$llf" ; then
    cat
  else
    sed 's/^\(...............................................................................................................................................................................................................................................................\).*/\1/'
  fi
}

# This function does one of three things.  It either passes through
# all warning data, or passes through gcc toplevel warnings, or passes
# through a particular subdirectory set of warnings.
subdirectoryFilter()
{
  longLineFilter | (
  if test -z "$filter" ; then
    # Pass through all lines.
    cat
  else
    if test "$filter" = nosub ; then
      # Omit all subdirectories.
      egrep -v '/gcc/(ch|cp|f|java|intl|fixinc)/'
    else
      # Pass through only subdir $filter.
      grep "/gcc/$filter/"
    fi
  fi )
}

# This function displays all lines from stageN of the bootstrap.  If
# stage==0, then show lines prior to stage1 and lines from after the last
# stage.  I.e. utilities, libraries, etc.
stageNfilter()
{
  if test "$stageN" -lt 1 ; then
    # stage "0" means check everything *but* gcc.
    $AWK "BEGIN{t=1} ; /^Bootstrapping the compiler/{t=0} ; /^Building runtime libraries/{t=1} ; {if(t==1)print}"
  else
    if test "$stageN" -eq 1 ; then
      $AWK "/^Bootstrapping the compiler|^Building the C and C\+\+ compiler/{t=1} ; /stage$stageN/{t=0} ; {if(t==1)print}"
    else
      stageNminus1=`expr $stageN - 1`
      $AWK "/stage${stageNminus1}\//{t=1} ; /stage$stageN/{t=0} ; {if(t==1)print}"
    fi
  fi
}

# This function displays lines containing warnings.
warningFilter()
{
  grep ' warning: '
}

# This function replaces `xxx' with `???', where xxx is usually some
# variable or function name.  This allows similar warnings to be
# counted together when summarizing.  However it avoids replacing
# certain C keywords which are known appear in various messages.

keywordFilter() {
  sed 's/.*warning: //; 
	s/`\(int\)'"'"'/"\1"/g;
	s/`\(long\)'"'"'/"\1"/g;
	s/`\(char\)'"'"'/"\1"/g;
	s/`\(inline\)'"'"'/"\1"/g;
	s/`\(else\)'"'"'/"\1"/g;
	s/`\(return\)'"'"'/"\1"/g;
	s/`\(static\)'"'"'/"\1"/g;
	s/`\(extern\)'"'"'/"\1"/g;
	s/`\(const\)'"'"'/"\1"/g;
	s/`\(noreturn\)'"'"'/"\1"/g;
	s/`\(longjmp\)'"'"' or `\(vfork\)'"'"'/"\1" or "\2"/g;
	s/`'"[^']*'/"'`???'"'/g;"'
	s/.*format, .* arg (arg [0-9][0-9]*)/??? format, ??? arg (arg ???)/;
	s/\([( ]\)arg [0-9][0-9]*\([) ]\)/\1arg ???\2/;
	s/"\([^"]*\)"/`\1'"'"'/g'
}

# This function strips out relative pathnames for source files printed
# by the warningFilter function.  This is done so that as the snapshot
# directory name changes every week, the output of this program can be
# compared to previous runs without spurious diffs caused by source
# directory name changes.

srcdirFilter()
{
  sed '
s%^[^ ]*/\(gcc/\)%\1%;
s%^[^ ]*/\(include/\)%\1%;
s%^[^ ]*/\(texinfo/\)%\1%;
s%^[^ ]*/\(fastjar/\)%\1%;
s%^[^ ]*/\(zlib/\)%\1%;
s%^[^ ]*/\(lib[a-z23+-]*/\)%\1%;'
}

# Start the main section.

usage="usage: `basename $0` [-llf] [-s stage] [-nosub|-ch|-cp|-f|-java|-ada|-intl|-fixinc] [-pass|-wpass] [file(s)]"
stageN=3
tmpfile=/tmp/tmp-warn.$$

# Remove $tmpfile on exit and various signals.
trap "rm -f $tmpfile" 0
trap "rm -f $tmpfile ; exit 1" 1 2 3 5 9 13 15

# Find a good awk.
if test -z "$AWK" ; then
  for AWK in gawk nawk awk ; do
    if type $AWK 2>&1 | grep 'not found' > /dev/null 2>&1 ; then
      :
    else
      break
    fi
  done
fi

# Parse command line arguments.
while test -n "$1" ; do
 case "$1" in
   -llf) llf=1 ; shift ;;
   -s)  if test -z "$2"; then echo $usage 1>&2; exit 1; fi
	stageN="$2"; shift 2 ;;
   -s*) stageN="`expr $1 : '-s\(.*\)'`" ; shift ;;
   -nosub|-ch|-cp|-f|-java|-ada|-intl|-fixinc) filter="`expr $1 : '-\(.*\)'`" ; shift ;;
   -pass) pass=1 ; shift ;;
   -wpass) pass=w ; shift ;;
   -*)  echo $usage 1>&2 ; exit 1 ;;
   *)   break ;;
 esac
done

# Check for a valid value of $stageN.
case "$stageN" in
  [0-9]) ;;
  *) echo "Stage <$stageN> must be in the range [0..9]." 1>&2 ; exit 1 ;;
esac

for file in "$@" ; do

  stageNfilter < $file | subdirectoryFilter > $tmpfile

  # (Just) show me the warnings.
  if test "$pass" != '' ; then
    if test "$pass" = w ; then
      warningFilter < $tmpfile
    else
      cat $tmpfile
    fi
    continue
  fi

  if test -z "$filter" ; then
    echo "Counting all warnings,"
  else
    if test "$filter" = nosub ; then
      echo "Counting non-subdirectory warnings,"
    else
      echo "Counting warnings in the gcc/$filter subdirectory,"
    fi
  fi
  count=`warningFilter < $tmpfile | wc -l`
  echo there are $count warnings in stage$stageN of this bootstrap.

  echo
  echo Number of warnings per file:
  warningFilter < $tmpfile | srcdirFilter | $AWK -F: '{print$1}' | sort | \
    uniq -c | sort -nr

  echo
  echo Number of warning types:
  warningFilter < $tmpfile | keywordFilter | sort | uniq -c | sort -nr

done