decode lattice

Andreas Stolcke stolcke at speech.sri.com
Thu Feb 26 11:05:43 PST 2004


In message <GLEPKGIJOMFGABPKCAMJEEMLCEAA.john at newington.f9.co.uk>you wrote:
> Dear Andreas,
> 
> I am replying on behalf of my colleague who emailed you earlier regarding
> correct use of the SRILM lattice-tool.
> 
> Based on your previous advice I have tried to decode our lattice using our
> bigram model. All files seem to be in the correct format, so far as I can
> tell. However, when lattice-tool rescores the lattice, all the newly added
> LM probabilities "l=..." come out as "-inf". I tried 1-best decoding using
> viterbi on the rescored lattice and the output is simply:
> 
> lattice.out </s>
> 
> where lattice.out is the utterance name inserted by lattice-tool.
> 
> Do you have any idea why we're experiencing behaviour like this? Can you
> suggest any alterations?

John,

the problem is that your lattices use double-quotes around the word strings,
but the released version of SRILM does't yet implement the HTK quoting
mechanism (an oversight on my part).

You can replace the file lattice/src/HTKLattice.cc with the attached version
and rebuild lattice-tool to make it work.  Or, you can just strip the 
double quotes in your lattice files and keep using the old software.

--Andreas 

-------------- next part --------------
/*
 * HTKLattice.cc --
 *	HTK Standard Lattice Format support for SRILM lattices
 *
 *	Note: there is no separate HTKLattice class, only I/O methods!
 *
 */

#ifndef lint
static char Copyright[] = "Copyright (c) 2004 SRI International.  All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lattice/src/RCS/HTKLattice.cc,v 1.17 2004/02/26 18:48:22 stolcke Exp $";
#endif

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <assert.h>

#include "Array.cc"
#include "LHash.cc"
#include "Lattice.h"
#include "MultiwordVocab.h"
#include "NBest.h"		// for phoneSeparator defn

#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(HTKLink);
#endif

/* from Lattice.cc */
#define DebugPrintFatalMessages         1 
#define DebugPrintFunctionality         1 

const char *HTKLattice_Version = "1.1";

const float HTK_undef_float = HUGE_VAL;
const unsigned HTK_undef_uint = (unsigned)-1;

const char *HTK_null_word = "!NULL";

const float HTK_def_tscale = 1.0;
const float HTK_def_acscale = 1.0;
const float HTK_def_lmscale = 1.0;
const float HTK_def_ngscale = 1.0;
const float HTK_def_wdpenalty = 0.0;
const float HTK_def_prscale = 1.0;
const float HTK_def_duscale = 0.0;

HTKHeader::HTKHeader()
    : logbase(10), tscale(HTK_def_tscale), acscale(HTK_def_acscale),
      ngscale(HTK_def_ngscale), lmscale(HTK_def_lmscale),
      wdpenalty(HTK_def_wdpenalty), prscale(HTK_def_prscale),
      duscale(HTK_def_duscale), amscale(HTK_undef_float),
      vocab(0), lmname(0), ngname(0), hmms(0),
      wordsOnNodes(false), scoresOnNodes(false)
{
};

HTKHeader::HTKHeader(double acscale, double lmscale, double ngscale,
			double prscale, double duscale, double wdpenalty)
    : logbase(10), tscale(HTK_def_tscale), acscale(acscale),
      ngscale(ngscale), lmscale(lmscale),
      wdpenalty(wdpenalty), prscale(prscale),
      duscale(duscale), amscale(HTK_undef_float),
      vocab(0), lmname(0), ngname(0), hmms(0),
      wordsOnNodes(false), scoresOnNodes(false)
{
};

HTKHeader::~HTKHeader()
{
    if (vocab) free(vocab);
    if (lmname) free(lmname);
    if (ngname) free(ngname);
    if (hmms) free(hmms);
}

HTKHeader &
HTKHeader::operator= (const HTKHeader &other)
{
    if (&other == this) {
	return *this;
    }

    if (vocab) free(vocab);
    if (lmname) free(lmname);
    if (ngname) free(ngname);
    if (hmms) free(hmms);

    tscale = other.tscale;
    acscale = other.acscale;
    ngscale = other.ngscale;
    lmscale = other.lmscale;
    wdpenalty = other.wdpenalty;
    prscale = other.prscale;
    duscale = other.duscale;
    amscale = other.amscale;
    if (other.vocab == 0) {
	vocab = 0;
    } else {
	vocab = strdup(other.vocab);
	assert(vocab != 0);
    }
    if (other.lmname == 0) {
	lmname = 0;
    } else {
	lmname = strdup(other.lmname);
	assert(lmname != 0);
    }
    if (other.ngname == 0) {
	ngname = 0;
    } else {
	ngname = strdup(other.ngname);
	assert(ngname != 0);
    }
    if (other.hmms == 0) {
	hmms = 0;
    } else {
	hmms = strdup(other.hmms);
	assert(hmms != 0);
    }

    return *this;
}


HTKLink::HTKLink()
    : time(HTK_undef_float), word(Vocab_None), var(HTK_undef_uint), div(0),
      acoustic(HTK_undef_float), ngram(HTK_undef_float),
      language(HTK_undef_float), pron(HTK_undef_float),
      duration(HTK_undef_float), posterior(HTK_undef_float)
{
}

HTKLink::~HTKLink()
{
    if (div) free(div);
}

HTKLink &
HTKLink::operator= (const HTKLink &other)
{
    if (&other == this) {
	return *this;
    }

    if (div) free(div);

    time = other.time;
    word = other.word;
    var = other.var;
    if (other.div == 0) {
	div = 0;
    } else {
	div = strdup(other.div);
	assert(div != 0);
    }
    acoustic = other.acoustic;
    ngram = other.ngram;
    language = other.language;
    pron = other.pron;
    duration = other.duration;
    posterior = other.posterior;
    return *this;
}

/* 
 * Format HTKLink (for debugging)
 */
ostream &
operator<< (ostream &stream, HTKLink &link)
{
    stream << "[HTKLink";

    if (link.word != Vocab_None) {
	stream << " WORD=" << link.word;
    }
    if (link.time != HTK_undef_float) {
	stream << " time=" << link.time;
    }
    if (link.var != HTK_undef_uint) {
	stream << " var=" << link.var;
    }
    if (link.div != 0) {
	stream << " div=" << link.div;
    }
    if (link.acoustic != HTK_undef_float) {
	stream << " a=" << link.acoustic;
    }
    if (link.ngram != HTK_undef_float) {
	stream << " n=" << link.ngram;
    }
    if (link.language != HTK_undef_float) {
	stream << " l=" << link.language;
    }
    if (link.pron != HTK_undef_float) {
	stream << " r=" << link.pron;
    }
    if (link.duration != HTK_undef_float) {
	stream << " ds=" << link.duration;
    }
    if (link.posterior != HTK_undef_float) {
	stream << " p=" << link.posterior;
    }
    stream << "]";
    return stream;
}


/*
 * Find the next key=value pair in line, return string value, nad 
 * advance line pointer past it.
 * The string pointed to by line is modified in the process.
 */
static char *
getHTKField(char *&line, char *&value)
{
    char *cp = line;
    char *key;

    do {
	switch (*cp) {
	case '\0':
	case '#':
		return 0;
		break;
	case ' ':
	case '\t':
	case '\n':
		cp ++;
		break;
	default:
		key = cp;

		while (*cp != '\0' && !isspace(*cp) && *cp != '=') cp++;

		if (*cp == '=') {
		    *(cp++) = '\0';	// terminate key string
		    value = cp;		// beginning of value string
		    char *cpv = cp;	// target location for copying value

		    char inquote = '\0';

		    /*
		     * Quotes are only treated specially if they 
		     * occur in first position
		     */
		    if (*cp == '\"' || *cp == '\'') {
			inquote = *(cp++);
		    }

		    while (*cp != '\0') {
			if (*cp == '\\') {
			    /*
			     * Backslash quote processing
			     */
			    cp ++;
			    if (*cp == '\0') {
				/*
				 * Shouldn't happen, we just ignore it
				 */
				break;
			    } else if (*cp == '0') {
				/*
				 * Octal char code
				 */
				unsigned charcode;
				unsigned charlen;
				sscanf(cp, "%o%n", &charcode, &charlen);
				*(cpv++) = charcode;
				cp += charlen;
			    } else {
				/*
				 * Other quoted character
				 */
				*(cpv++) = *(cp++);
			    }
			} else if (!inquote && isspace(*cp)) {
			    /*
			     * String deliminted by White-space
			     */
			    cp ++;
			    break;
			} else if (inquote && *cp == inquote) {
			    /*
			     * String delimited by end quote
			     */
			    cp ++;
			    break;
			} else {
			    /* 
			     * Character in string
			     */
			    *(cpv++) = *(cp++);
			}
		    }
		    *cpv = '\0';	// terminate value string
		} else {
		    value = cp;		// beginning of value string
		    if (*cp != '\0') {
			*(cp++) = '\0';	// terminate value string
		    }
		}

		line = cp;
		return key;
	}
    } while (1);
}

/*
 * Output quoted version of string
 */
static void
printQuoted(FILE *f, const char *name)
{
    Boolean octalPrinted = false;

    for (const char *cp = name; *cp != '\0'; cp ++) {
	if (*cp == ' ' || *cp == '\\' || *cp == '\'' || *cp == '\"' ||
	    octalPrinted && isdigit(*cp))
	{
	    /*
	     * This character needs to be quoted
	     */
	    putc('\\', f);
	    putc(*cp, f);
	    octalPrinted = false;
	} else if (!isprint(*cp) || isspace(*cp)) {
	    /*
	     * Print as octal char code
	     */
	    fprintf(f, "\\0%o", *cp);
	    octalPrinted = true;
	} else {
	    /*
	     * Print as plain character
	     */
	    putc(*cp, f);
	    octalPrinted = false;
	}
    }
}

/*
 * Input lattice in HTK format
 *	Algorithm:
 *	- each HTK node becomes a null node.
 *	- each HTK link becomes a non-null node.
 *	- word and other link information is added to the non-null nodes.
 *	- link information attached to HTK nodes is added to non-null nodes.
 *	- lattice transition weights are computed as a log-linear combination
 *	  of HTK scores.
 * Arguments:
 *	- if header != 0, supplied scaling parameters override information
 *	  from lattice header
 *	- if useNullNodes == false null nodes corresponding to original
 *	  HTK nodes are eliminated
 */
Boolean
Lattice::readHTK(File &file, HTKHeader *header, Boolean useNullNodes)
{
    removeAll();

    unsigned HTKnumlinks = 0;
    unsigned HTKnumnodes = 0;
    float HTKlogbase = M_E;
    unsigned HTKfinal = HTK_undef_uint;
    unsigned HTKinitial = HTK_undef_uint;
    char HTKdirection = 'f';

    unsigned HTKfirstnode = HTK_undef_uint;
    unsigned HTKlastnode = HTK_undef_uint;
    float HTKinitialtime, HTKfinaltime;

    LHash<unsigned, NodeIndex> nodeMap;		// maps HTK nodes->lattice nodes
    Array<HTKLink> nodeInfoMap;			// node-based link information

    // dummy word used temporarily to represent HTK nodes
    // (could have used null nodes, but this way we preserve null nodes in
    // the input lattice)
    const char *HTKNodeWord = "***HTK_Node***";
    VocabIndex HTKNodeDummy = useNullNodes ? Vocab_None :
					     vocab.addWord(HTKNodeWord);

    /*
     * Override supplied header parameters
     */
    if (header != 0) {
	if (header->logbase != HTK_undef_float) {
	    htkheader.logbase = header->logbase;
	}
	if (header->acscale != HTK_undef_float) {
	    htkheader.acscale = header->acscale;
	}
	if (header->lmscale != HTK_undef_float) {
	    htkheader.lmscale = header->lmscale;
	}
	if (header->ngscale != HTK_undef_float) {
	    htkheader.ngscale = header->ngscale;
	}
	if (header->prscale != HTK_undef_float) {
	    htkheader.prscale = header->prscale;
	}
	if (header->duscale != HTK_undef_float) {
	    htkheader.duscale = header->duscale;
	}
	if (header->wdpenalty != HTK_undef_float) {
	    htkheader.wdpenalty = header->wdpenalty;
	}
	if (header->amscale != HTK_undef_float) {
	    htkheader.amscale = header->amscale;
	}
	htkheader.wordsOnNodes = header->wordsOnNodes;
	htkheader.scoresOnNodes = header->scoresOnNodes;
    }


    /*
     * Parse HTK lattice file
     */
    while (char *line = file.getline()) {
	char *key;
	char *value;

	/*
	 * Parse key=value pairs
	 * (we test for frequent fields first to save time)
	 * We assume that header information comes before node information,
	 * which comes before link information.  However, this is is not
	 * enforced, and incomplete lattices may result if the input file
	 * contains things out of order.
	 */
	while (key = getHTKField(line, value)) {
#define keyis(x)	(strcmp(key, (x)) == 0)
	    /*
	     * Link fields
	     */
	    if (keyis("J")) {
		unsigned HTKlinkno = atoi(value);

		/*
		 * parse link fields
		 */
		HTKLink *linkinfo = new HTKLink;
		assert(linkinfo != 0);
				// allocates new HTKLink pointer in lattice
		htkinfos[htkinfos.size()] = linkinfo;

		unsigned HTKstartnode, HTKendnode;
		NodeIndex startIndex = NoNode, endIndex = NoNode;

		while (key = getHTKField(line, value)) {
		    if (keyis("S") || keyis("START")) {
			HTKstartnode = atoi(value);
			Boolean found;
			NodeIndex *startIndexPtr =
				nodeMap.insert(HTKstartnode, found);
			if (!found) {
			    // node index not seen before; create it
			    *startIndexPtr = dupNode(Vocab_None);
			}
			startIndex = *startIndexPtr;

		    } else if (keyis("E") || keyis("END")) {
			HTKendnode = atoi(value);
			Boolean found;
			NodeIndex *endIndexPtr =
				nodeMap.insert(HTKendnode, found);
			if (!found) {
			    // node index not seen before; create it
			    *endIndexPtr = dupNode(Vocab_None);
			}
			endIndex = *endIndexPtr;

		    } else if (keyis("W") || keyis("WORD")) {
			if (strcmp(value, HTK_null_word) == 0) {
			    linkinfo->word = Vocab_None;
			} else {
			    linkinfo->word = vocab.addWord(value);
			}
		    } else if (keyis("v") || keyis("var")) {
			linkinfo->var = atoi(value);
		    } else if (keyis("d") || keyis("div")) {
			linkinfo->div = strdup(value);
			assert(linkinfo->div != 0);
		    } else if (keyis("a") || keyis("acoustic")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    linkinfo->acoustic = score * ProbToLogP(HTKlogbase);
			} else {
			    linkinfo->acoustic = ProbToLogP(score);
			}
		    } else if (keyis("n") || keyis("ngram")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    linkinfo->ngram = score * ProbToLogP(HTKlogbase);
			} else {
			    linkinfo->ngram = ProbToLogP(score);
			}
		    } else if (keyis("l") || keyis("language")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    linkinfo->language = score * ProbToLogP(HTKlogbase);
			} else {
			    linkinfo->language = ProbToLogP(score);
			}
		    } else if (keyis("r")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    linkinfo->pron = score * ProbToLogP(HTKlogbase);
			} else {
			    linkinfo->pron = ProbToLogP(score);
			}
		    } else if (keyis("ds")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    linkinfo->duration = score * ProbToLogP(HTKlogbase);
			} else {
			    linkinfo->duration = ProbToLogP(score);
			}
		    } else if (keyis("p")) {
			linkinfo->posterior = atof(value);
		    } else {
			file.position() << "unexpected link field name "
					<< key << endl;
			if (!useNullNodes) vocab.remove(HTKNodeDummy);
			return false;
		    }
		}

		if (startIndex == NoNode) {
		    file.position() << "missing start node spec\n";
		    if (!useNullNodes) vocab.remove(HTKNodeDummy);
		    return false;
		}

		if (endIndex == NoNode) {
		    file.position() << "missing end node spec\n";
		    if (!useNullNodes) vocab.remove(HTKNodeDummy);
		    return false;
		}

		/*
		 * fill in unspecified link info from associated node info
		 * 'forward' lattices use end-node information.
		 * 'backward' lattices use start-node information.
		 */
		HTKLink *nodeinfo = 0;
		if (HTKdirection == 'f') {
		    nodeinfo = &nodeInfoMap[HTKendnode];
		} else if (HTKdirection == 'b') {
		    nodeinfo = &nodeInfoMap[HTKstartnode];
		}

		if (nodeinfo != 0) {
		    linkinfo->time = nodeinfo->time;

		    if (linkinfo->word == Vocab_None) {
			linkinfo->word = nodeinfo->word;
		    }
		    if (linkinfo->var == HTK_undef_uint) {
			linkinfo->var = nodeinfo->var;
		    }
		    if (linkinfo->div == 0 && nodeinfo->div != 0) {
			linkinfo->div = strdup(nodeinfo->div);
			assert(linkinfo->div != 0);
		    }
		    if (linkinfo->acoustic == HTK_undef_float) {
			linkinfo->acoustic = nodeinfo->acoustic;
		    }
		    if (linkinfo->pron == HTK_undef_float) {
			linkinfo->pron = nodeinfo->pron;
		    }
		    if (linkinfo->duration == HTK_undef_float) {
			linkinfo->duration = nodeinfo->duration;
		    }
		}

		/*
		 * Create lattice node
		 */
		NodeIndex newNode = dupNode(linkinfo->word, 0, linkinfo);

		/*
		 * Compute lattice transition weight as a weighted combination
		 * of HTK lattice scores
		 */
		LogP weight = LogP_One;

		if (linkinfo->acoustic != HTK_undef_float) {
		    weight += htkheader.acscale * linkinfo->acoustic;
		}
		if (linkinfo->ngram != HTK_undef_float) {
		    weight += htkheader.ngscale * linkinfo->ngram;
		}
		if (linkinfo->language != HTK_undef_float) {
		    weight += htkheader.lmscale * linkinfo->language;
		}
		if (linkinfo->pron != HTK_undef_float) {
		    weight += htkheader.prscale * linkinfo->pron;
		}
		if (linkinfo->duration != HTK_undef_float) {
		    weight += htkheader.duscale * linkinfo->duration;
		}
		if (!ignoreWord(linkinfo->word)) {
		    weight += htkheader.wdpenalty; 	// do we need to scale ?
		}

		/*
		 * Add transitions from start node, and to end node
		 */
		LatticeTransition trans1(weight, 0);
		insertTrans(startIndex, newNode, trans1);

		LatticeTransition trans2(LogP_One, 0);
		insertTrans(newNode, endIndex, trans2);

		continue;

	    /*
	     * Node fields
	     */
	    } else if (keyis("I")) {
		unsigned HTKnodeno = atoi(value);

		/*
		 * create a null node for this HTK node,
		 * and record node-related info.
		 */
		NodeIndex nullNodeIndex = dupNode(HTKNodeDummy);

		*nodeMap.insert(HTKnodeno) = nullNodeIndex;
		HTKLink &nodeinfo = nodeInfoMap[HTKnodeno];

		/*
		 * parse node fields
		 */
		while (key = getHTKField(line, value)) {
		    if (keyis("t") || keyis("time")) {
			nodeinfo.time = atof(value);

			// remember temporally first node and timestamp
			// in case input doesn't specify initial node
			if (HTKfirstnode == HTK_undef_uint ||
			    nodeinfo.time < HTKinitialtime)
			{
			    HTKfirstnode = HTKnodeno;
			    HTKinitialtime = nodeinfo.time;
			}
			// same for last timestamp
			if (HTKlastnode == HTK_undef_uint ||
			    nodeinfo.time > HTKfinaltime)
			{
			    HTKlastnode = HTKnodeno;
			    HTKfinaltime = nodeinfo.time;
			}
		    } else if (keyis("W") || keyis("WORD")) {
			if (strcmp(value, HTK_null_word) == 0) {
			    nodeinfo.word = Vocab_None;
			} else {
			    nodeinfo.word = vocab.addWord(value);
			}
		    } else if (keyis("v") || keyis("var")) {
			nodeinfo.var = atoi(value);
		    } else if (keyis("d") || keyis("div")) {
			nodeinfo.div = strdup(value);
			assert(nodeinfo.div != 0);
		    } else if (keyis("a") || keyis("acoustic")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    nodeinfo.acoustic = score * ProbToLogP(HTKlogbase);
			} else {
			    nodeinfo.acoustic = ProbToLogP(score);
			}
		    } else if (keyis("r")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    nodeinfo.pron = score * ProbToLogP(HTKlogbase);
			} else {
			    nodeinfo.pron = ProbToLogP(score);
			}
		    } else if (keyis("ds")) {
			double score = atof(value);
			if (HTKlogbase > 0.0) {
			    nodeinfo.duration = score * ProbToLogP(HTKlogbase);
			} else {
			    nodeinfo.duration = ProbToLogP(score);
			}
		    } else {
			file.position() << "unexpected node field name "
					<< key << endl;
			if (!useNullNodes) vocab.remove(HTKNodeDummy);
			return false;
		    }
		}

		if (nodeinfo.time != HTK_undef_float) {
		    // record node time, but no word-related info
		    LatticeNode *nullNode = findNode(nullNodeIndex);
		    assert(nullNode != 0);

		    HTKLink *nullInfo = new HTKLink;
		    assert(nullInfo != 0);
		    htkinfos[htkinfos.size()] = nullInfo;

		    nullNode->htkinfo = nullInfo;
		    nullInfo->time = nodeinfo.time;
		}

		continue;

	    /*
	     * Header fields
	     */
	    } else if (keyis("V") || keyis("VERSION")) {
		; 		// ignore
	    } else if ( keyis("U") || keyis("UTTERANCE")) {
		if (name) free((void *)name);

		// HACK: strip duration spec (which shouldn't be there)
		char *p = strstr(value, "(duration=");
		if (p != 0) *p = '\0';
		    
		name = strdup(value);
		assert(name != 0);
	    } else if (keyis("base")) {
		HTKlogbase = atof(value);
	    } else if (keyis("start")) {
		HTKinitial = atoi(value);
	    } else if (keyis("end")) {
		HTKfinal = atoi(value);
	    } else if (keyis("dir")) {
		HTKdirection = value[0];
	    } else if (keyis("tscale")) {
		htkheader.tscale = atof(value);
	    } else if (keyis("hmms")) {
		htkheader.hmms = strdup(value);
		assert(htkheader.hmms != 0);
	    } else if (keyis("ngname")) {
		htkheader.ngname = strdup(value);
		assert(htkheader.ngname != 0);
	    } else if (keyis("lmname")) {
		htkheader.lmname = strdup(value);
		assert(htkheader.lmname != 0);
	    } else if (keyis("vocab")) {
		htkheader.vocab = strdup(value);
		assert(htkheader.vocab != 0);
	    } else if (keyis("acscale")) {
		if (header == 0 || header->acscale == HTK_undef_float) {
		    htkheader.acscale = atof(value);
		}
	    } else if (keyis("ngscale")) {
		if (header == 0 || header->ngscale == HTK_undef_float) {
		    htkheader.ngscale = atof(value);
		}
	    } else if (keyis("lmscale")) {
		if (header == 0 || header->lmscale == HTK_undef_float) {
		    htkheader.lmscale = atof(value);
		}
	    } else if (keyis("prscale")) {
		if (header == 0 || header->prscale == HTK_undef_float) {
		    htkheader.prscale = atof(value);
		}
	    } else if (keyis("duscale")) {
		if (header == 0 || header->duscale == HTK_undef_float) {
		    htkheader.duscale = atof(value);
		}
	    } else if (keyis("wdpenalty")) {
		if (header == 0 || header->wdpenalty == HTK_undef_float) {
		    htkheader.wdpenalty = atof(value);
		}
	    } else if (keyis("amscale")) {
		if (header == 0 || header->amscale == HTK_undef_float) {
		    htkheader.amscale = atof(value);
		}
	    } else if (keyis("NODES") || keyis("N")) {
		HTKnumnodes = atoi(value);
	    } else if (keyis("LINKS") || keyis("L")) {
		HTKnumlinks = atoi(value);
	    } else {
		file.position() << "unknown field name " << key << endl;
		if (!useNullNodes) vocab.remove(HTKNodeDummy);
		return false;
	    }
#undef keyis
	}
    }

    if (HTKnumnodes == 0) {
	file.position() << "lattice has no nodes\n";
	if (!useNullNodes) vocab.remove(HTKNodeDummy);
	return false;
    }

    /*
     * Set up initial node
     */
    HTKLink *initialinfo;
    LatticeNode *initialNode;

    if (HTKinitial != HTK_undef_uint) {
	initialinfo = &nodeInfoMap[HTKinitial];
	NodeIndex *initialPtr = nodeMap.find(HTKinitial);
	if (initialPtr) {
	    initial = *initialPtr;
	    initialNode = findNode(initial);
	} else {
	    file.position() << "undefined start node " << HTKinitial << endl;
	    if (!useNullNodes) vocab.remove(HTKNodeDummy);
	    return false;
	}
    } else {
	initialinfo = &nodeInfoMap[HTKfirstnode];

	// search for start node: the one without incoming transitions
	LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
	NodeIndex nodeIndex;
	while (LatticeNode *node = nodeIter.next(nodeIndex)) {
	    if (node->inTransitions.numEntries() == 0) {
		initial = nodeIndex;
		initialNode = node;
		break;
	    }
	}
    }
    initialNode->word = vocab.ssIndex();

    // attach HTK initial node info to lattice initial node
    initialNode->htkinfo = new HTKLink;
    *initialNode->htkinfo = *initialinfo;
    htkinfos[htkinfos.size()] = initialNode->htkinfo;

    /*
     * Set up final node
     */
    HTKLink *finalinfo;
    LatticeNode *finalNode;

    if (HTKfinal != HTK_undef_uint) {
	finalinfo = &nodeInfoMap[HTKfinal];
	NodeIndex *finalPtr = nodeMap.find(HTKfinal);
	if (finalPtr) {
	    final = *finalPtr;
	    finalNode = findNode(final);
	} else {
	    file.position() << "undefined end node " << HTKfinal << endl;
	    if (!useNullNodes) vocab.remove(HTKNodeDummy);
	    return false;
	}
    } else {
	finalinfo = &nodeInfoMap[HTKlastnode];
	// search for end node: the one without outgoing transitions
	LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
	NodeIndex nodeIndex;
	while (LatticeNode *node = nodeIter.next(nodeIndex)) {
	    if (node->outTransitions.numEntries() == 0) {
		final = nodeIndex;
		finalNode = node;
		break;
	    }
	}
    }
    finalNode->word = vocab.seIndex();

    // attach HTK final node info to lattice final node
    finalNode->htkinfo = new HTKLink;
    *finalNode->htkinfo = *finalinfo;
    htkinfos[htkinfos.size()] = finalNode->htkinfo;

    // eliminate dummy nodes 
    if (!useNullNodes) {
	removeAllXNodes(HTKNodeDummy);
	vocab.remove(HTKNodeDummy);
    }

    return true;
}

/*
 * Output lattice in HTK format
 *	Algorithm:
 *	- each lattice node becomes an HTK node.
 *	- each lattice transitions becomes an HTK link.
 *	- word information is added to the HTK nodes.
 *	- link information attached to each node is added to the HTK link
 *	  leading into the node.
 *	- lattice transition weights are mapped to one of the
 *	  HTK score fields as indicated by the second argument.
 */
Boolean
Lattice::writeHTK(File &file, HTKScoreMapping scoreMapping,
						    Boolean printPosteriors)
{
    if (debug(DebugPrintFunctionality)) {
      dout()  << "Lattice::writeHTK: writing ";
    }

    fprintf(file, "# Header (generated by SRILM)\n");
    fprintf(file, "VERSION=%s\n", HTKLattice_Version);
    fprintf(file, "UTTERANCE="); printQuoted(file, name); fputc('\n', file);
    fprintf(file, "base=%g\n", htkheader.logbase);
    fprintf(file, "dir=%s\n", "f");		// forward lattice

    /* 
     * Ancillary header information preserved from readHTK()
     */
    if (htkheader.tscale != HTK_def_tscale) {
	fprintf(file, "tscale=%g\n", htkheader.tscale);
    }
    if (htkheader.acscale != HTK_def_acscale) {
	fprintf(file, "acscale=%g\n", htkheader.acscale);
    }
    if (htkheader.lmscale != HTK_def_lmscale) {
	fprintf(file, "lmscale=%g\n", htkheader.lmscale);
    }
    if (htkheader.ngscale != HTK_def_ngscale) {
	fprintf(file, "ngscale=%g\n", htkheader.ngscale);
    }
    if (htkheader.prscale != HTK_def_prscale) {
	fprintf(file, "prscale=%g\n", htkheader.prscale);
    }
    if (htkheader.duscale != HTK_def_duscale) {
	fprintf(file, "duscale=%g\n", htkheader.duscale);
    }
    if (htkheader.amscale != HTK_undef_float && printPosteriors) {
	fprintf(file, "amscale=%g\n", htkheader.amscale);
    }
    if (htkheader.hmms != 0) {
	fprintf(file, "hmms=");
	printQuoted(file, htkheader.hmms); fputc('\n', file);
    }
    if (htkheader.lmname != 0) {
	fprintf(file, "lmname=");
	printQuoted(file, htkheader.lmname); fputc('\n', file);
    }
    if (htkheader.ngname != 0) {
	fprintf(file, "ngname=");
	printQuoted(file, htkheader.ngname); fputc('\n', file);
    }
    if (htkheader.vocab != 0) {
	fprintf(file, "vocab=", htkheader.vocab);
	printQuoted(file, htkheader.vocab); fputc('\n', file);
    }
	
    /*
     * We remap the internal node indices to consecutive unsigned integers
     * to allow a compact output representation.
     * We iterate over all nodes, renumbering them, and also counting the
     * number of transitions overall.
     */
    LHash<NodeIndex,unsigned> nodeMap;		// map nodeIndex to unsigned
    unsigned numNodes = 0;
    unsigned numTransitions = 0;

    LHashIter<NodeIndex, LatticeNode> nodeIter(nodes, nodeSort);
    NodeIndex nodeIndex;

    while (LatticeNode *node = nodeIter.next(nodeIndex)) {
	*nodeMap.insert(nodeIndex) = numNodes ++;
	numTransitions += node->outTransitions.numEntries();
    }

    fprintf(file, "start=%u end=%u\n",  *nodeMap.find(initial),
					*nodeMap.find(final));
    fprintf(file, "NODES=%u LINKS=%u\n", numNodes, numTransitions);

    if (debug(DebugPrintFunctionality)) {
      dout()  << numNodes << " nodes, "
	      << numTransitions << " transitions\n";
    }

    fprintf(file, "# Nodes\n");

    double logscale = 1.0 / ProbToLogP(htkheader.logbase);

    nodeIter.init(); 
    while (LatticeNode *node = nodeIter.next(nodeIndex)) {

	fprintf(file, "I=%u", *nodeMap.find(nodeIndex));

 	if (htkheader.wordsOnNodes) {
	    fprintf(file, "\tW=");
	    printQuoted(file, (node->word == vocab.ssIndex() ||
			       node->word == vocab.seIndex() ||
			       node->word == Vocab_None) ?
				    HTK_null_word : vocab.getWord(node->word));
	}

	if (node->htkinfo != 0) {
	    HTKLink &htkinfo = *node->htkinfo;

	    if (htkinfo.time != HTK_undef_float) {
		fprintf(file, "\tt=%g", htkinfo.time);
	    }
	    if (htkheader.scoresOnNodes &&
		scoreMapping != mapHTKacoustic &&
		htkinfo.acoustic != HTK_undef_float)
	    {
		fprintf(file, "\ta=%g", htkinfo.acoustic * logscale);
	    }
	    if (htkheader.scoresOnNodes &&
		htkinfo.pron != HTK_undef_float)
	    {
		fprintf(file, "\tr=%g", htkinfo.pron * logscale);
	    }
	    if (htkheader.scoresOnNodes &&
		htkinfo.duration != HTK_undef_float)
	    {
		fprintf(file, "\tds=%g", htkinfo.duration * logscale);
	    }
	    if (htkheader.wordsOnNodes &&
		htkinfo.var != HTK_undef_uint)
	    {
		fprintf(file, "\tv=%u", htkinfo.var);
	    }
	    if (htkheader.wordsOnNodes &&
		htkinfo.div != 0)
	    {
		fprintf(file, "\td=%s", htkinfo.div);
	    }
	}
	if (printPosteriors) {
	    fprintf(file, "\tp=%lg", (double)LogPtoProb(node->posterior));
	}
	fprintf(file, "\n");
    }

    fprintf(file, "# Links\n");

    unsigned linkNumber = 0;
    nodeIter.init(); 
    while (LatticeNode *node = nodeIter.next(nodeIndex)) {
	unsigned *fromNodeId = nodeMap.find(nodeIndex);

 	NodeIndex toNodeIndex;

	TRANSITER_T<NodeIndex,LatticeTransition>
	  transIter(node->outTransitions);
	while (LatticeTransition *trans = transIter.next(toNodeIndex)) {
	    LatticeNode *toNode = findNode(toNodeIndex);
	    assert(toNode != 0);

	    unsigned *toNodeId = nodeMap.find(toNodeIndex); 
	    assert(toNodeId != 0);

	    fprintf(file, "J=%u\tS=%u\tE=%u",
				linkNumber++, *fromNodeId, *toNodeId);

	    if (!htkheader.wordsOnNodes) {
		fprintf(file, "\tW=");
		printQuoted(file, (toNode->word == vocab.ssIndex() ||
				   toNode->word == vocab.seIndex() ||
				   toNode->word == Vocab_None) ?
				   HTK_null_word : vocab.getWord(toNode->word));
	    }

	    if (toNode->htkinfo != 0) {
		HTKLink &htkinfo = *toNode->htkinfo;

		if (!htkheader.scoresOnNodes &&
		    scoreMapping != mapHTKacoustic &&
		    htkinfo.acoustic != HTK_undef_float)
		{
		    fprintf(file, "\ta=%g", htkinfo.acoustic * logscale);
		}
		if (!htkheader.scoresOnNodes &&
		    htkinfo.pron != HTK_undef_float)
		{
		    fprintf(file, "\tr=%g", htkinfo.pron * logscale);
		}
		if (!htkheader.scoresOnNodes &&
		    htkinfo.duration != HTK_undef_float)
		{
		    fprintf(file, "\tds=%g", htkinfo.duration * logscale);
		}
		if (!htkheader.wordsOnNodes &&
		    htkinfo.var != HTK_undef_uint) {
		    fprintf(file, "\tv=%u", htkinfo.var);
		}
		if (!htkheader.wordsOnNodes &&
		    htkinfo.div != 0)
		{
		    fprintf(file, "\td=%s", htkinfo.div);
		}
		if (scoreMapping != mapHTKngram &&
		    htkinfo.ngram != HTK_undef_float)
		{
		    fprintf(file, "\tn=%g", htkinfo.ngram * logscale);
		}
		if (scoreMapping != mapHTKlanguage &&
		    htkinfo.language != HTK_undef_float)
		{
		    fprintf(file, "\tl=%g", htkinfo.language * logscale);
		}
	    }

	    /*
	     * map transition weight to one of the standard HTK scores
	     */
	    if (scoreMapping != mapHTKnone) {
		fprintf(file, "\t%c=%g",
			    (scoreMapping == mapHTKacoustic ? 'a' :
			     (scoreMapping == mapHTKngram ? 'n' :
			      (scoreMapping == mapHTKlanguage ? 'l' : '?'))),
			    trans->weight * logscale);
	    }

	    fprintf(file, "\n");
	}
    }

    return true;
}


/* 
 * Compute pronunciation scores
 * 	(for nodes with HTKLink information that have phone backtraces)
 */
Boolean
Lattice::scorePronunciations(VocabMultiMap &dictionary, Boolean intlogs)
{
    if (debug(DebugPrintFunctionality)) {
      dout() << "Lattice::scorePronunciations: starting\n";
    }

    Vocab &phoneVocab = dictionary.vocab2;

    /*
     * Go through all HTLink structures, extract the phone sequences,
     * and look up their probabilities in the dictionary
     */
    for (unsigned i = 0; i < htkinfos.size(); i ++) {
	HTKLink *info = htkinfos[i];

	/*
	 * only rescore words that have pronunciations
	 * (e.g., don't include NULL nodes)
	 */
	if (info->div != 0) {
	    assert(info->word != Vocab_None);

	    /*
	     * parse the phone sequence from the string
	     * example:
	     *	d=:#[s]t,0.12:s[t]r,0.03:t[r]ay,0.05:r[ay]k,0.09:ay[k]#,0.09:
	     * and convert into an index string
	     */
	    char phoneString[strlen(info->div) + 1];
	    strcpy(phoneString, info->div);

	    Array<VocabIndex> phones;
	    unsigned numPhones = 0;

	    for (char *s = strtok(phoneString, phoneSeparator);
		 s != 0;
		 s = strtok(NULL, phoneSeparator))
	    {
		// skip empty components (at beginning and end)
		if (s[0] == '\0') continue;

		// strip duration part
		char *e = strchr(s, ',');
		if (e != 0) *e = '\0';

		// strip context from triphone labels
		e = strchr(s, '[');
		if (e != 0) s = e + 1;

		e = strrchr(s, ']');
		if (e != 0) *e = '\0';

		phones[numPhones ++] = phoneVocab.addWord(s);
	    }
	    phones[numPhones] = Vocab_None;

	    // find pronunciation prob
	    Prob p = dictionary.get(info->word, phones.data());

	    if (p == 0.0) {
		// missing pronunciation get score 0
		info->pron = LogP_One;
	    } else {
		if (intlogs) {
		    info->pron = IntlogToLogP(p);
		} else {
		    info->pron += ProbToLogP(p);
		}
	    }
	}
    }

    return true;
}



More information about the SRILM-User mailing list