#!/bin/sh
# Copyright 1999-2002 Carnegie Mellon University.  
# Portions Copyright 2002 Sun Microsystems, Inc.  
# Portions Copyright 2002 Mitsubishi ElectricResearch Laboratories.
# All Rights Reserved.  Use is subject to license terms.
# 
# See the file "license.terms" for information on usage and
# redistribution of this file, and for a DISCLAIMER OF ALL 
# WARRANTIES.
#
# This script parses the regression log and outputs a detailed
# regression html report 
#
# usage: ./makeTestDetailReport regression.log listoftests.list
#     machine typeOfResults  title recognizer 
#
# Field definitions: 
#   1) "test"
#   2) date
#   3) time
#   4) machine
#   5) System
#   6) testName 
#   7) who 
#   8) status
#   9) audioTime 
#  10) procTime 
#  11) words 
#  12) insertions 
#  13) deletions 
#  14) substitutions 
#  15) sentences 
#  16) correctSentences 
#  17) heapSize 
#  18) loadAverage 

gawk '

BEGIN {
    FS = "|"
    F_TEST = 1;
    F_DATE = 2;
    F_TIME = 3;
    F_MACHINE = 4;
    F_SYSTEM = 5;
    F_TEST_NAME = 6;
    F_WHO = 7;
    F_STATUS = 8;
    F_AUDIO_TIME = 9;
    F_PROC_TIME = 10;
    F_WORDS = 11;
    F_INSERTIONS = 12;
    F_DELETIONS = 13;
    F_SUBSTITUTIONS = 14;
    F_SENTENCES = 15;
    F_CORRECT_SENTENCES = 16;
    F_HEAP_SIZE = 17;
    F_LOAD_AVERAGE = 18;

    NOT_AVAILABLE = "n/a";

# usage makeTestDetailReport log test.list machine mode title system
    testList = ARGV[2];
    machine = ARGV[3];
    mode = ARGV[4];
    title = ARGV[5];
    whichSystem = ARGV[6];

    if (machine == "this") {
	"./getHostname" | getline hostname;
	machine = hostname
    }


    ARGV[6] = "";
    ARGV[5] = "";
    ARGV[5] = "";
    ARGV[4] = "";
    ARGV[3] = "";
    ARGV[2] = "";

    systemName[0] = "s3";
    systemName[1] = "s3.3";
    systemName[2] = "s4";

    systemTitle["s3"] = "Sphinx 3";
    systemTitle["s3.3"] = "Sphinx 3.3";
    systemTitle["s4"] = "Sphinx 4";
    systemCount = 3;


    modeTitle["latest"] =  "Latest detailed ";
    modeTitle["fastest"] = "Fastest detailed ";
    modeTitle["best"] =    "Most accurate detailed ";

    # read in the date and time when the test was started
    getline date < ".startDate";
    getline time < ".startTimeOfDay";

    # read in the test list

    # parse the s4 align summary output
    totalTestCount = 0;
    while (getline x < testList > 0) {
	if (match(x, "#") != 1) {
	    tests[x] = 1;
	    testOrder[totalTestCount++] = x;
	}
    }
    close(testList);
}

$F_TEST == "test"  && ($F_MACHINE == machine || machine == "any") \
	&& ($F_TEST_NAME in tests || testList == "any") && \
	   ($F_SYSTEM == whichSystem) {
    tag = $F_TEST_NAME;
    if (mode == "latest" && $F_STATUS == "OK") {
        testData[tag] = $0;
    } else if (mode == "fastest") {
        speed = getSpeedFromLine($0);
	if (speed != NOT_AVAILABLE) {
	    if (!(tag in bestSpeed) || speed < bestSpeed[tag]) {
	        bestSpeed[tag] = speed;
		testData[tag] = $0;
	    }
	}
    } else if (mode == "best") {
        accuracy = getAccuracyFromLine($0);
	if (accuracy != NOT_AVAILABLE) {
	    if (!(tag in bestAccuracy) || accuracy < bestAccuracy[tag]) {
	        bestAccuracy[tag] = accuracy;
		testData[tag] = $0;
	    }
	}
    }

    if (testList == "any") {
        tests[$F_TEST_NAME] = 1;
    }
}


#####
# updates the data array with info for the given test
#
function updateData(testName, testCount) {
    split(testData[testName testCount], data);
}


####
# returns the speed for the given test/system
#
function getSpeedFromLine(line) {
     split(line, data);
    _audioTime = data[F_AUDIO_TIME];
    _procTime = data[F_PROC_TIME];

    if (_audioTime == "" || _audioTime == 0 || _procTime == "") {
         return NOT_AVAILABLE;
    } else {
	return _procTime / _audioTime;
    }
}

function getSpeed(testName) {
     return getSpeedFromLine(testData[testName]);
}

####
# returns the accuracy for the given test/system
#
function getAccuracyFromLine(line) {
     split(line, data);
    _errors = data[F_INSERTIONS] + data[F_DELETIONS] + data[F_SUBSTITUTIONS];
    _words = data[F_WORDS];

    if (_words == "" || _words == 0) {
        return NOT_AVAILABLE;
    } else {
	return _errors / _words * 100;
    }
}

function getAccuracy(testName) {
     return getAccuracyFromLine(testData[testName]);
}

####
# returns the date for the given test/system
#
function getDateFromLine(line) {
     split(line, data);
    _date = data[F_DATE];
    _status = data[F_STATUS];

    if (_date == "" || _status != "OK") {
        return NOT_AVAILABLE;
    } else {
	return _date;
    }
}

function getDate(testName) {
     return getDateFromLine(testData[testName]);
}


function get(testName, field) {
# very inefficient, but fast enough for what we are doing
    split(testData[testName], _data);
    _val = _data[field];
    if (_val == "")  {
        return NOT_AVAILABLE;
    } else {
        return _val;
    }
}

function getPercent(num, denom) {
    if (denom != NOT_AVAILABLE && denom !=  0) {
         return ((num / denom) * 100.0)  " ("  num ")";
    } else {
        return NOT_AVAILABLE;
    }
}

####
# Dumps data for a single test
#

function dumpTest(testName) {
   if (testName == "") {
       printf("<tr/>\n");
       return;
   }
   printf("<tr> <td> <font color=#018888> %s </font></td>", testName);
   printf("<td>%s</td>", getSpeed(testName));
   words = get(testName, F_WORDS);
   printf("<td>%s</td>", words);
   printf("<td>%s</td>", getPercent(get(testName, F_INSERTIONS), words));
   printf("<td>%s</td>", getPercent(get(testName, F_DELETIONS), words));
   printf("<td>%s</td>", getPercent(get(testName, F_SUBSTITUTIONS), words));
   printf("<td>%s</td>", getAccuracy(testName));
   sentences = get(testName, F_SENTENCES);
   printf("<td>%s</td>", sentences);
   correctSentences = get(testName, F_CORRECT_SENTENCES);
   sentencesWithErrors = sentences - correctSentences;
   printf("<td>%s</td>", getPercent(sentencesWithErrors, sentences));
   testDate = getDate(testName);
   printf("<td> %s </td>", testDate);
   printf("</tr>\n");
}


#######
# Dumps the title
# 
function dumpTitle(  i) {

    if (machine == "any") {
        machineName = "all systems";
    } else {
        machineName = machine;
    }

    printf("<tr bgcolor=lightblue> <th colspan=10>%s" \
       "%s for %s</tr>\n", \
        modeTitle[mode], title, systemTitle[whichSystem]);

    printf("<tr bgcolor=lightblue> <th> %s %s ", "Test run on", machineName);
    printf("<th> %s ", "Speed (RT)");
    printf("<th> %s ", "Num Words");
    printf("<th> %s ", "Insertions (%) (#)");
    printf("<th> %s ", "Deletions (%) (#)");
    printf("<th> %s ", "Substitutions (%) (#)");
    printf("<th> %s ", "Word Error Rate (%)");
    printf("<th> %s ", "Num Sentences");
    printf("<th> %s ", "Sentence Error Rate");
    printf("<th> %s ", "Date");
    printf("</tr>\n");
}

END {
    # If test list is any then we synthesize the test order

    if (testList == "any") {
	totalTestCount = 0;
        for (i in tests) {
	    testOrder[totalTestCount++] = i;
	}
    }

    printf("<table cellpadding=1 cellspacing=0 border=1 width=90% align=center>\n");

    printf("<tbody>\n");

    dumpTitle();
    for (i = 0; i < totalTestCount; i++) {
        dumpTest(testOrder[i]);
    }
    printf("</tbody>\n");
    printf("</table>\n");

    print "<small>"
    print "<center>"
    printf("<i>This table is automatically generated. ");
    printf("Do not edit this by hand. ");
    printf("Last updated on %s at %s.</i>\n", date, time);
    print "</center>"
    print "</small>"
}

'  $*

