#!/bin/sh
#
# Dredge github for most recent QA failure corpses
#
# Default is to clear everything out and download all .full
# and .out.bad files from the most recent nightly (full) QA
# run.
#
# Usage: update [-do] [-c runner] [run_id]
#
# With -c, overlay downloads with the .full and .out.bad files
# from the most recent CI QA run for just one runner (host).
#

usage="Usage: update [-do?] [-c runner] [run_id]"
runner=''
tmp=${TMPDIR:-/tmp}/ci-qa-update-$$
debug=false
old=false
while [ $# -gt 0 ]
do
    case "$1"
    in
	-d)
	    shift
	    tmp=`pwd`/tmp
	    debug=true
	    ;;
	-c)
	    if [ $# -gt 1 ]
	    then
		shift
		runner="$1"
		shift
	    else
		echo "Error: -c needs <runner> argument"
		exit 1
	    fi
	    ;;
	-o)	# old download algorithm
	    old=true
	    ;;
	-?|--help)
	    echo "$usage"
	    exit 0
	    ;;
	-*)
	    echo "$usage"
	    exit 1
	    ;;
	*)
	    break
	    ;;
    esac
done

sts=1
if $debug
then
    trap "exit \$sts" 0 1 2 3 15
else
    trap "rm -f $tmp.*; exit \$sts" 0 1 2 3 15
fi

rm -f save.html

if ! $old
then
    # need cleartext (BIG SECURITY ALERT) github access token in
    # $HOME/.github/token
    #
    if [ ! -f $HOME/.github/token ]
    then
	echo "Error: new download algorithm needs githun access token in \$HOME/.github/token"
	exit 1
    fi
    GH_TOKEN=`cat $HOME/.github/token`
fi

if [ $# -eq 0 ]
then
    # get Actions page
    #
    # if wget -O $tmp.actions -q 'https://github.com/performancecopilot/pcp/actions?query=workflow%3AQA+event%3Aschedule'
    if wget -O $tmp.actions -q 'https://github.com/performancecopilot/pcp/actions'
    then
	:
    else
	echo "Error: wget failed for Actions page"
	exit
    fi

    # we want these sort of lines from the Actions page ...
    #
    # <a href="/performancecopilot/pcp/actions/runs/19393745030" ... Run 2950 of QA. ...
    # <a href="/performancecopilot/pcp/actions/runs/19385924511" ... Run 3425 of CI. ...
    #
    grep '/performancecopilot/pcp/actions/runs/[0-9][0-9]*"' $tmp.actions >$tmp.tmp
    if [ -z "$runner" ]
    then
	grep  'Run [0-9][0-9]* of QA\.' <$tmp.tmp
    else
	grep  'Run [0-9][0-9]* of CI\.' <$tmp.tmp
    fi \
    | sed -e 's@.*/actions/runs/@@' -e 's/".*//' >$tmp.runs

    run_id=`sed -n -e 1p <$tmp.runs`
    if [ -z "$run_id" ]
    then
	echo "Error: failed to get RunId from Actions page"
	echo "HTML is in save.html"
	cp $tmp.actions save.html
	exit
    fi
else
    run_id="$1"
fi

if [ -z "$runner" ]
then
    echo "Nightly QA RunId: $run_id"
else
    echo "CI QA RunId: $run_id"
fi

if wget -O $tmp.run -q "https://github.com/performancecopilot/pcp/actions/runs/$run_id"
then
    :
else
    echo "Error: wget failed for Artifacts page"
    exit
fi

# for jobs still running, there is a N/M line and for completed jobs
# there is just M on the line, in either case this line comes BEFORE
# the "job completed" or "jobs completed" line in the HTML
#
lineno=`sed -n -e '/jobs* completed/=' $tmp.run`
if [ -z "$lineno" ]
then
    echo "Error: No jobs* completed text in Artifacts page"
    echo "HTML is in save.html"
    cp $tmp.run save.html
    exit
fi
lineno=`expr $lineno - 1`
eval `sed -n <$tmp.run -e "$lineno"'{
s/^[ 	]*/done=/
s@/@\njobs=@
p
}'`

$debug && echo "done=\"$done\" jobs=\"$jobs\""

if echo "$done$jobs" | grep -q '^[0-9][0-9]*$'
then
    if [ -n "$jobs" ]
    then
	echo "Still running, $done of $jobs jobs completed"
	if [ -s $tmp.runs ]
	then
	    echo "Recent completed runs ..."
	    sed <$tmp.runs -e 1d -e 's/^/    /' -e 4q
	fi
	sts=0
	exit
    fi
else
    echo "Error: Failed to extract done ($done) and/or jobs ($jobs) from Artifacts page"
    echo "HTML is in save.html"
    cp $tmp.run save.html
    exit
fi

# each "runner" name is on a line by itself with leading whitespace
# then test-...
#
sed -e 's/^[ 	]*//' <$tmp.run \
| awk '/^test-/	{ print; next }' >$tmp.runners

if [ ! -s $tmp.runners ]
then
    echo "No result artifacts yet"
    sts=0
    exit
fi

if [ -z "$runner" ]
then
    if [ -f last_run_id ]
    then
	$debug && echo "last_run_id=`cat last_run_id`"
	if [ "`cat last_run_id`" = "$run_id" ]
	then
	    echo "Already processed, nothing to do"
	    sts=0
	    exit
	fi
    fi

    rm -rf test-*
else
    if grep -q "^$runner\$" $tmp.runners
    then
	echo "$runner" >$tmp.runners
    else
	echo "Error: -c $runner, not in list of runners ..."
	sed -e 's/^/    /' $tmp.runners
	exit
    fi
fi

if ! $old
then
    # build runner -> artifact map
    # <code id="artifact-5561229137-hash" style="white-space: ...
    # <code id="artifact-5563176475-hash" ...
    #                    ^^^^^^^^^^ here
    #
    sed -e 's/^[ 	]*//' <$tmp.run \
    | awk '
/^test-/	{ runner = $1 }
/<code id=/	{ print runner " " $0 }' \
    | sed -E -e 's/(.*)<code id="artifact-([0-9][0-9]*)-.*/\1 \2/' >$tmp.map
fi

rm -f $tmp.abort
UNZIP=-o; export UNZIP
cat $tmp.runners \
| while read runner
do
    echo -n "$runner:"
    if [ ! -d "$runner" ]
    then
	if ! mkdir "$runner"
	then
	    echo
	    echo "Error: mkdir $runner failed"
	    touch $tmp.abort
	    exit
	fi
    fi
    cd "$runner"
    if [ -n "$runner" ]
    then
	if [ ! -d tmp ]
	then
	if ! mkdir tmp
	then
		echo
		echo "Error: mkdir $runner/tmp failed"
		touch $tmp.abort
		exit
	    fi
	fi
	cd tmp
    fi
    if $old
    then
	if ! gh run download $run_id --name "$runner" >$tmp.err 2>&1
	then
	    cat $tmp.err
	    echo "+ gh run download $run_id --name $runner"
	    echo "Error: gh run download errors"
	    touch $tmp.abort
	    exit
	fi
    else
	# gh run download ... --name ... is busted as of 19 Feb 2026
	#
	# this is truely groteque!  this week, URLs look like ...
	# https://api.github.com/repos/performancecopilot/pcp/actions/artifacts/5567370890/zip
	#
	art_id=`grep "^$runner " <$tmp.map | sed -e 's/.* //'`
	if [ -z "$art_id" ]
	then
	    cat $tmp.map
	    echo "Error: no ArtifactId for RunId $runner in this list ..."
	    touch $tmp.abort
	    exit
	fi
	url="https://api.github.com/repos/performancecopilot/pcp/actions/artifacts/$art_id/zip"
	if ! curl -s -H "Authorization: Bearer $GH_TOKEN" -L --output artifact.zip $url
	then
	    echo "Error: curl failed for ArtifactId $art_id"
	    touch $tmp.abort
	    exit
	fi
	if ! unzip -q -x artifact.zip
	then
	    echo "Error: unzip failed for ArtifactId $art_id"
	    touch $tmp.abort
	    exit
	fi
	$debug || rm artifact.zip
    fi

    rm -f $tmp.err
    nfail=`ls *.out.bad 2>/dev/null | wc -l | sed -e 's/ //g'`
    if [ "$nfail" = 0 ]
    then
	echo " all tests passed"
    else
	echo " $nfail tests failed"
    fi
    if [ -n "$runner" ]
    then
	for file in *
	do
	    if [ -f ../"$file" ]
	    then
		if diff -q "$file" ../"$file"
		then
		    rm -f $file
		else
		    echo "$file: duplicate, saving previous as $file.old"
		    rm -f ../"$file".old
		    mv ../"$file" ../"$file".old
		    mv "$file" ../"$file"
		fi
	    else
		mv "$file" ../"$file"
	    fi
	done
	cd ..
	if ! rmdir tmp
	then
	    echo "Oops! tmp not empty?"
	    ls -la tmp
	fi
    fi

    cd ..
done

[ -f $tmp.abort ] && exit

if [ -z "$runner" ]
then
    # fix symlinks and short names for qa-summary
    #
    ./cleanup
fi

# don't need to do this one again
#
echo "$run_id" >last_run_id

sts=0
