#!/bin/ksh # $OpenBSD: check_sym,v 1.8 2019/08/09 05:02:19 guenther Exp $ # # Copyright (c) 2016,2019 Philip Guenther # # Permission to use, copy, modify, and distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # # check_sym -- compare the symbols and external function references in two # versions of a shared library # # SYNOPSIS # check_sym [-ch] [old [new]] # # DESCRIPTION # Library developers need to be aware when they have changed the # ABI of a library. To assist them, check_sym examines two versions # of a shared library and reports changes to the following: # * the set of exported symbols and their strengths # * the set of undefined symbols referenced # * the set of lazily-resolved functions (PLT) # # In each case, additions and removals are reported; for exported # symbols it also reports when a symbol is weakened or strengthened. # # The shared libraries to compare can be specified on the # command-line. Otherwise, check_sym expects to be run from the # source directory of a library with a shlib_version file specifying # the version being built and the new library in the obj subdirectory. # If the old library to compare against wasn't specified either then # check_sym will take the highest version of that library in the # *current* directory, or the highest version of that library in # /usr/lib if it wasn't present in the current directory. # # check_sym uses fixed names in /tmp for its intermediate files, # as they contain useful details for those trying to understand # what changed. If any of them cannot be created by the user, # the command will fail. The files can be cleaned up using # the -c option. # # # The *basic* rules of thumb for library versions are: if you # * stop exporting a symbol, or # * change the size of a data symbol # * start exporting a symbol that an inter-dependent library needs # then you need to bump the MAJOR version of the library. # # Otherwise, if you: # * start exporting a symbol # then you need to bump the MINOR version of the library. # # SEE ALSO # readelf(1), elf(5) # # AUTHORS # Philip Guenther # # CAVEATS # The elf format is infinitely extendable, but check_sym only # handles a few weirdnesses. Running it on or against new archs # may result in meaningless results. # # BUGS # While the author stills find the intermediate files useful, # most people won't. By default they should be placed in a # temp directory and removed. # get_lib_name() { sed -n 's/^[ ]*LIB[ ]*=[ ]*\([^ ]*\).*/\1/p' "$@" } pick_highest() { old= omaj=-1 omin=0 for i do [[ -f $i ]] || continue maj=${i%.*}; maj=${maj##*.} min=${i##*.} if [[ $maj -gt $omaj || ( $maj -eq $omaj && $min -gt $omin ) ]] then old=$i omaj=$maj omin=$min fi done [[ $old != "" ]] } usage() { usage="usage: check_sym [-chv] [old [new]]" if [[ $# -gt 0 ]] then echo "check_sym: $@ $usage" >&2 exit 1 fi echo "$usage" exit 0 } file_list=/tmp/{D{,S,W,O},J,S,U,d,j,r,s}{1,2} verbose=false while getopts :chv opt "$@" do case $opt in h) usage;; c) rm -f $file_list exit 0;; v) verbose=true;; \?) usage "unknown option -- $OPTARG";; esac done shift $((OPTIND - 1)) [[ $# -gt 2 ]] && usage "too many arguments" # Old library? if [[ $1 = ?(*/)lib*.so* ]] then if [[ ! -f $1 ]] then echo "$1 doesn't exist" >&2 exit 1 fi old=$1 lib=${old##*/} lib=${lib%%.so.*} shift else # try determining it from the current directory if [[ -f Makefile ]] && lib=$(get_lib_name Makefile) && [[ $lib != "" ]] then lib=lib$lib else lib=libc fi # Is there a copy of that lib in the current directory? # If so, use the highest numbered one if ! pick_highest $lib.so.* && ! pick_highest /usr/lib/$lib.so.* then echo "unable to find $lib.so.*" >&2 exit 1 fi fi # New library? if [[ $1 = ?(*/)lib*.so* ]] then new=$1 shift else # Dig info out of the just built library . ./shlib_version new=obj/${lib}.so.${major}.${minor} fi if [[ ! -f $new ]] then echo "$new doesn't exist" >&2 exit 1 fi # Filter the output of readelf -s to be easier to parse by removing a # field that only appears on some symbols: [: 88] # Not really arch-specific, but I've only seen it on alpha filt_symtab() { sed 's/\[: [0-9a-f]*\]//' } # precreate all the files we'll use, but with noclobber set to avoid # symlink attacks set -C files= trap 'rm -f $files' 1 2 15 ERR for i in $file_list do rm -f $i 3>$i files="$files $i" done set +C readelf -rW $old > /tmp/r1 readelf -rW $new > /tmp/r2 readelf -sW $old | filt_symtab > /tmp/s1 readelf -sW $new | filt_symtab > /tmp/s2 cpu=$(uname -p) if [[ $cpu = mips64* ]] then gotsym1=$(readelf -d $old | awk '$2 ~ /MIPS_GOTSYM/{print $3}') gotsym2=$(readelf -d $new | awk '$2 ~ /MIPS_GOTSYM/{print $3}') fi jump_slots() { case $cpu in hppa*) awk '/IPLT/ && $5 != ""{print $5}' /tmp/r$1 ;; mips*) # the $((gotsym$1)) converts hex to decimal awk -v g=$((gotsym$1)) \ '/^Symbol table ..symtab/{exit} $1+0 >= g && $4 == "FUNC" {print $8}' /tmp/s$1 ;; *) awk '/JU*MP_SL/ && $5 != ""{print $5}' /tmp/r$1 ;; esac | sort -o /tmp/j$1 } dynamic_sym() { awk -v s=$1 '/^Symbol table ..symtab/{exit} ! /^ *[1-9]/ {next} $7 == "UND" {print $8 | ("sort -o /tmp/U" s); next } $5 == "GLOBAL" {print $8 | ("sort -o /tmp/DS" s) } $5 == "WEAK" {print $8 | ("sort -o /tmp/DW" s) } $5 != "LOCAL" {print $8 | ("sort -o /tmp/D" s) } $5 != "LOCAL" && $4 == "OBJECT" { print $8, $3 | ("sort -o /tmp/DO" s) } {print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/d$1 } static_sym() { awk '/^Symbol table ..symtab/{s=1} /LOCAL/{next} s&&/^ *[1-9]/{print $4, $5, $6, $8}' /tmp/s$1 | sort -o /tmp/S$1 } data_sym_changes() { join "$@" | awk '$2 != $3 { print $1 " " $2 " --> " $3 }' } output_if_not_empty() { leader=$1 shift if "$@" | grep -q . then echo "$leader" "$@" | sed 's:^: :' echo fi } for i in 1 2 do jump_slots $i dynamic_sym $i static_sym $i comm -23 /tmp/j$i /tmp/U$i >/tmp/J$i done echo "$old --> $new" if cmp -s /tmp/d[12] && cmp -s /tmp/DO[12] then printf "No dynamic export changes\n" else printf "Dynamic export changes:\n" output_if_not_empty "added:" comm -13 /tmp/D[12] output_if_not_empty "removed:" comm -23 /tmp/D[12] output_if_not_empty "weakened:" comm -12 /tmp/DS1 /tmp/DW2 output_if_not_empty "strengthened:" comm -12 /tmp/DW1 /tmp/DS2 output_if_not_empty "data object sizes changes:" \ data_sym_changes /tmp/DO[12] fi if ! cmp -s /tmp/U[12] then printf "External reference changes:\n" output_if_not_empty "added:" comm -13 /tmp/U[12] output_if_not_empty "removed:" comm -23 /tmp/U[12] fi if $verbose; then printf "\nReloc counts:\nbefore:\n" grep ^R /tmp/r1 printf "\nafter:\n" grep ^R /tmp/r2 fi output_if_not_empty "PLT added:" comm -13 /tmp/J1 /tmp/J2 output_if_not_empty "PLT removed:" comm -23 /tmp/J1 /tmp/J2