decode lattice
Andreas Stolcke
stolcke at speech.sri.com
Thu Feb 26 11:05:43 PST 2004
In message <GLEPKGIJOMFGABPKCAMJEEMLCEAA.john at newington.f9.co.uk>you wrote:
> Dear Andreas,
>
> I am replying on behalf of my colleague who emailed you earlier regarding
> correct use of the SRILM lattice-tool.
>
> Based on your previous advice I have tried to decode our lattice using our
> bigram model. All files seem to be in the correct format, so far as I can
> tell. However, when lattice-tool rescores the lattice, all the newly added
> LM probabilities "l=..." come out as "-inf". I tried 1-best decoding using
> viterbi on the rescored lattice and the output is simply:
>
> lattice.out </s>
>
> where lattice.out is the utterance name inserted by lattice-tool.
>
> Do you have any idea why we're experiencing behaviour like this? Can you
> suggest any alterations?
John,
the problem is that your lattices use double-quotes around the word strings,
but the released version of SRILM does't yet implement the HTK quoting
mechanism (an oversight on my part).
You can replace the file lattice/src/HTKLattice.cc with the attached version
and rebuild lattice-tool to make it work. Or, you can just strip the
double quotes in your lattice files and keep using the old software.
--Andreas
-------------- next part --------------
/*
* HTKLattice.cc --
* HTK Standard Lattice Format support for SRILM lattices
*
* Note: there is no separate HTKLattice class, only I/O methods!
*
*/
#ifndef lint
static char Copyright[] = "Copyright (c) 2004 SRI International. All Rights Reserved.";
static char RcsId[] = "@(#)$Header: /home/srilm/devel/lattice/src/RCS/HTKLattice.cc,v 1.17 2004/02/26 18:48:22 stolcke Exp $";
#endif
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <ctype.h>
#include <math.h>
#include <assert.h>
#include "Array.cc"
#include "LHash.cc"
#include "Lattice.h"
#include "MultiwordVocab.h"
#include "NBest.h" // for phoneSeparator defn
#ifdef INSTANTIATE_TEMPLATES
INSTANTIATE_ARRAY(HTKLink);
#endif
/* from Lattice.cc */
#define DebugPrintFatalMessages 1
#define DebugPrintFunctionality 1
const char *HTKLattice_Version = "1.1";
const float HTK_undef_float = HUGE_VAL;
const unsigned HTK_undef_uint = (unsigned)-1;
const char *HTK_null_word = "!NULL";
const float HTK_def_tscale = 1.0;
const float HTK_def_acscale = 1.0;
const float HTK_def_lmscale = 1.0;
const float HTK_def_ngscale = 1.0;
const float HTK_def_wdpenalty = 0.0;
const float HTK_def_prscale = 1.0;
const float HTK_def_duscale = 0.0;
HTKHeader::HTKHeader()
: logbase(10), tscale(HTK_def_tscale), acscale(HTK_def_acscale),
ngscale(HTK_def_ngscale), lmscale(HTK_def_lmscale),
wdpenalty(HTK_def_wdpenalty), prscale(HTK_def_prscale),
duscale(HTK_def_duscale), amscale(HTK_undef_float),
vocab(0), lmname(0), ngname(0), hmms(0),
wordsOnNodes(false), scoresOnNodes(false)
{
};
HTKHeader::HTKHeader(double acscale, double lmscale, double ngscale,
double prscale, double duscale, double wdpenalty)
: logbase(10), tscale(HTK_def_tscale), acscale(acscale),
ngscale(ngscale), lmscale(lmscale),
wdpenalty(wdpenalty), prscale(prscale),
duscale(duscale), amscale(HTK_undef_float),
vocab(0), lmname(0), ngname(0), hmms(0),
wordsOnNodes(false), scoresOnNodes(false)
{
};
HTKHeader::~HTKHeader()
{
if (vocab) free(vocab);
if (lmname) free(lmname);
if (ngname) free(ngname);
if (hmms) free(hmms);
}
HTKHeader &
HTKHeader::operator= (const HTKHeader &other)
{
if (&other == this) {
return *this;
}
if (vocab) free(vocab);
if (lmname) free(lmname);
if (ngname) free(ngname);
if (hmms) free(hmms);
tscale = other.tscale;
acscale = other.acscale;
ngscale = other.ngscale;
lmscale = other.lmscale;
wdpenalty = other.wdpenalty;
prscale = other.prscale;
duscale = other.duscale;
amscale = other.amscale;
if (other.vocab == 0) {
vocab = 0;
} else {
vocab = strdup(other.vocab);
assert(vocab != 0);
}
if (other.lmname == 0) {
lmname = 0;
} else {
lmname = strdup(other.lmname);
assert(lmname != 0);
}
if (other.ngname == 0) {
ngname = 0;
} else {
ngname = strdup(other.ngname);
assert(ngname != 0);
}
if (other.hmms == 0) {
hmms = 0;
} else {
hmms = strdup(other.hmms);
assert(hmms != 0);
}
return *this;
}
HTKLink::HTKLink()
: time(HTK_undef_float), word(Vocab_None), var(HTK_undef_uint), div(0),
acoustic(HTK_undef_float), ngram(HTK_undef_float),
language(HTK_undef_float), pron(HTK_undef_float),
duration(HTK_undef_float), posterior(HTK_undef_float)
{
}
HTKLink::~HTKLink()
{
if (div) free(div);
}
HTKLink &
HTKLink::operator= (const HTKLink &other)
{
if (&other == this) {
return *this;
}
if (div) free(div);
time = other.time;
word = other.word;
var = other.var;
if (other.div == 0) {
div = 0;
} else {
div = strdup(other.div);
assert(div != 0);
}
acoustic = other.acoustic;
ngram = other.ngram;
language = other.language;
pron = other.pron;
duration = other.duration;
posterior = other.posterior;
return *this;
}
/*
* Format HTKLink (for debugging)
*/
ostream &
operator<< (ostream &stream, HTKLink &link)
{
stream << "[HTKLink";
if (link.word != Vocab_None) {
stream << " WORD=" << link.word;
}
if (link.time != HTK_undef_float) {
stream << " time=" << link.time;
}
if (link.var != HTK_undef_uint) {
stream << " var=" << link.var;
}
if (link.div != 0) {
stream << " div=" << link.div;
}
if (link.acoustic != HTK_undef_float) {
stream << " a=" << link.acoustic;
}
if (link.ngram != HTK_undef_float) {
stream << " n=" << link.ngram;
}
if (link.language != HTK_undef_float) {
stream << " l=" << link.language;
}
if (link.pron != HTK_undef_float) {
stream << " r=" << link.pron;
}
if (link.duration != HTK_undef_float) {
stream << " ds=" << link.duration;
}
if (link.posterior != HTK_undef_float) {
stream << " p=" << link.posterior;
}
stream << "]";
return stream;
}
/*
* Find the next key=value pair in line, return string value, nad
* advance line pointer past it.
* The string pointed to by line is modified in the process.
*/
static char *
getHTKField(char *&line, char *&value)
{
char *cp = line;
char *key;
do {
switch (*cp) {
case '\0':
case '#':
return 0;
break;
case ' ':
case '\t':
case '\n':
cp ++;
break;
default:
key = cp;
while (*cp != '\0' && !isspace(*cp) && *cp != '=') cp++;
if (*cp == '=') {
*(cp++) = '\0'; // terminate key string
value = cp; // beginning of value string
char *cpv = cp; // target location for copying value
char inquote = '\0';
/*
* Quotes are only treated specially if they
* occur in first position
*/
if (*cp == '\"' || *cp == '\'') {
inquote = *(cp++);
}
while (*cp != '\0') {
if (*cp == '\\') {
/*
* Backslash quote processing
*/
cp ++;
if (*cp == '\0') {
/*
* Shouldn't happen, we just ignore it
*/
break;
} else if (*cp == '0') {
/*
* Octal char code
*/
unsigned charcode;
unsigned charlen;
sscanf(cp, "%o%n", &charcode, &charlen);
*(cpv++) = charcode;
cp += charlen;
} else {
/*
* Other quoted character
*/
*(cpv++) = *(cp++);
}
} else if (!inquote && isspace(*cp)) {
/*
* String deliminted by White-space
*/
cp ++;
break;
} else if (inquote && *cp == inquote) {
/*
* String delimited by end quote
*/
cp ++;
break;
} else {
/*
* Character in string
*/
*(cpv++) = *(cp++);
}
}
*cpv = '\0'; // terminate value string
} else {
value = cp; // beginning of value string
if (*cp != '\0') {
*(cp++) = '\0'; // terminate value string
}
}
line = cp;
return key;
}
} while (1);
}
/*
* Output quoted version of string
*/
static void
printQuoted(FILE *f, const char *name)
{
Boolean octalPrinted = false;
for (const char *cp = name; *cp != '\0'; cp ++) {
if (*cp == ' ' || *cp == '\\' || *cp == '\'' || *cp == '\"' ||
octalPrinted && isdigit(*cp))
{
/*
* This character needs to be quoted
*/
putc('\\', f);
putc(*cp, f);
octalPrinted = false;
} else if (!isprint(*cp) || isspace(*cp)) {
/*
* Print as octal char code
*/
fprintf(f, "\\0%o", *cp);
octalPrinted = true;
} else {
/*
* Print as plain character
*/
putc(*cp, f);
octalPrinted = false;
}
}
}
/*
* Input lattice in HTK format
* Algorithm:
* - each HTK node becomes a null node.
* - each HTK link becomes a non-null node.
* - word and other link information is added to the non-null nodes.
* - link information attached to HTK nodes is added to non-null nodes.
* - lattice transition weights are computed as a log-linear combination
* of HTK scores.
* Arguments:
* - if header != 0, supplied scaling parameters override information
* from lattice header
* - if useNullNodes == false null nodes corresponding to original
* HTK nodes are eliminated
*/
Boolean
Lattice::readHTK(File &file, HTKHeader *header, Boolean useNullNodes)
{
removeAll();
unsigned HTKnumlinks = 0;
unsigned HTKnumnodes = 0;
float HTKlogbase = M_E;
unsigned HTKfinal = HTK_undef_uint;
unsigned HTKinitial = HTK_undef_uint;
char HTKdirection = 'f';
unsigned HTKfirstnode = HTK_undef_uint;
unsigned HTKlastnode = HTK_undef_uint;
float HTKinitialtime, HTKfinaltime;
LHash<unsigned, NodeIndex> nodeMap; // maps HTK nodes->lattice nodes
Array<HTKLink> nodeInfoMap; // node-based link information
// dummy word used temporarily to represent HTK nodes
// (could have used null nodes, but this way we preserve null nodes in
// the input lattice)
const char *HTKNodeWord = "***HTK_Node***";
VocabIndex HTKNodeDummy = useNullNodes ? Vocab_None :
vocab.addWord(HTKNodeWord);
/*
* Override supplied header parameters
*/
if (header != 0) {
if (header->logbase != HTK_undef_float) {
htkheader.logbase = header->logbase;
}
if (header->acscale != HTK_undef_float) {
htkheader.acscale = header->acscale;
}
if (header->lmscale != HTK_undef_float) {
htkheader.lmscale = header->lmscale;
}
if (header->ngscale != HTK_undef_float) {
htkheader.ngscale = header->ngscale;
}
if (header->prscale != HTK_undef_float) {
htkheader.prscale = header->prscale;
}
if (header->duscale != HTK_undef_float) {
htkheader.duscale = header->duscale;
}
if (header->wdpenalty != HTK_undef_float) {
htkheader.wdpenalty = header->wdpenalty;
}
if (header->amscale != HTK_undef_float) {
htkheader.amscale = header->amscale;
}
htkheader.wordsOnNodes = header->wordsOnNodes;
htkheader.scoresOnNodes = header->scoresOnNodes;
}
/*
* Parse HTK lattice file
*/
while (char *line = file.getline()) {
char *key;
char *value;
/*
* Parse key=value pairs
* (we test for frequent fields first to save time)
* We assume that header information comes before node information,
* which comes before link information. However, this is is not
* enforced, and incomplete lattices may result if the input file
* contains things out of order.
*/
while (key = getHTKField(line, value)) {
#define keyis(x) (strcmp(key, (x)) == 0)
/*
* Link fields
*/
if (keyis("J")) {
unsigned HTKlinkno = atoi(value);
/*
* parse link fields
*/
HTKLink *linkinfo = new HTKLink;
assert(linkinfo != 0);
// allocates new HTKLink pointer in lattice
htkinfos[htkinfos.size()] = linkinfo;
unsigned HTKstartnode, HTKendnode;
NodeIndex startIndex = NoNode, endIndex = NoNode;
while (key = getHTKField(line, value)) {
if (keyis("S") || keyis("START")) {
HTKstartnode = atoi(value);
Boolean found;
NodeIndex *startIndexPtr =
nodeMap.insert(HTKstartnode, found);
if (!found) {
// node index not seen before; create it
*startIndexPtr = dupNode(Vocab_None);
}
startIndex = *startIndexPtr;
} else if (keyis("E") || keyis("END")) {
HTKendnode = atoi(value);
Boolean found;
NodeIndex *endIndexPtr =
nodeMap.insert(HTKendnode, found);
if (!found) {
// node index not seen before; create it
*endIndexPtr = dupNode(Vocab_None);
}
endIndex = *endIndexPtr;
} else if (keyis("W") || keyis("WORD")) {
if (strcmp(value, HTK_null_word) == 0) {
linkinfo->word = Vocab_None;
} else {
linkinfo->word = vocab.addWord(value);
}
} else if (keyis("v") || keyis("var")) {
linkinfo->var = atoi(value);
} else if (keyis("d") || keyis("div")) {
linkinfo->div = strdup(value);
assert(linkinfo->div != 0);
} else if (keyis("a") || keyis("acoustic")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
linkinfo->acoustic = score * ProbToLogP(HTKlogbase);
} else {
linkinfo->acoustic = ProbToLogP(score);
}
} else if (keyis("n") || keyis("ngram")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
linkinfo->ngram = score * ProbToLogP(HTKlogbase);
} else {
linkinfo->ngram = ProbToLogP(score);
}
} else if (keyis("l") || keyis("language")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
linkinfo->language = score * ProbToLogP(HTKlogbase);
} else {
linkinfo->language = ProbToLogP(score);
}
} else if (keyis("r")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
linkinfo->pron = score * ProbToLogP(HTKlogbase);
} else {
linkinfo->pron = ProbToLogP(score);
}
} else if (keyis("ds")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
linkinfo->duration = score * ProbToLogP(HTKlogbase);
} else {
linkinfo->duration = ProbToLogP(score);
}
} else if (keyis("p")) {
linkinfo->posterior = atof(value);
} else {
file.position() << "unexpected link field name "
<< key << endl;
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
}
if (startIndex == NoNode) {
file.position() << "missing start node spec\n";
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
if (endIndex == NoNode) {
file.position() << "missing end node spec\n";
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
/*
* fill in unspecified link info from associated node info
* 'forward' lattices use end-node information.
* 'backward' lattices use start-node information.
*/
HTKLink *nodeinfo = 0;
if (HTKdirection == 'f') {
nodeinfo = &nodeInfoMap[HTKendnode];
} else if (HTKdirection == 'b') {
nodeinfo = &nodeInfoMap[HTKstartnode];
}
if (nodeinfo != 0) {
linkinfo->time = nodeinfo->time;
if (linkinfo->word == Vocab_None) {
linkinfo->word = nodeinfo->word;
}
if (linkinfo->var == HTK_undef_uint) {
linkinfo->var = nodeinfo->var;
}
if (linkinfo->div == 0 && nodeinfo->div != 0) {
linkinfo->div = strdup(nodeinfo->div);
assert(linkinfo->div != 0);
}
if (linkinfo->acoustic == HTK_undef_float) {
linkinfo->acoustic = nodeinfo->acoustic;
}
if (linkinfo->pron == HTK_undef_float) {
linkinfo->pron = nodeinfo->pron;
}
if (linkinfo->duration == HTK_undef_float) {
linkinfo->duration = nodeinfo->duration;
}
}
/*
* Create lattice node
*/
NodeIndex newNode = dupNode(linkinfo->word, 0, linkinfo);
/*
* Compute lattice transition weight as a weighted combination
* of HTK lattice scores
*/
LogP weight = LogP_One;
if (linkinfo->acoustic != HTK_undef_float) {
weight += htkheader.acscale * linkinfo->acoustic;
}
if (linkinfo->ngram != HTK_undef_float) {
weight += htkheader.ngscale * linkinfo->ngram;
}
if (linkinfo->language != HTK_undef_float) {
weight += htkheader.lmscale * linkinfo->language;
}
if (linkinfo->pron != HTK_undef_float) {
weight += htkheader.prscale * linkinfo->pron;
}
if (linkinfo->duration != HTK_undef_float) {
weight += htkheader.duscale * linkinfo->duration;
}
if (!ignoreWord(linkinfo->word)) {
weight += htkheader.wdpenalty; // do we need to scale ?
}
/*
* Add transitions from start node, and to end node
*/
LatticeTransition trans1(weight, 0);
insertTrans(startIndex, newNode, trans1);
LatticeTransition trans2(LogP_One, 0);
insertTrans(newNode, endIndex, trans2);
continue;
/*
* Node fields
*/
} else if (keyis("I")) {
unsigned HTKnodeno = atoi(value);
/*
* create a null node for this HTK node,
* and record node-related info.
*/
NodeIndex nullNodeIndex = dupNode(HTKNodeDummy);
*nodeMap.insert(HTKnodeno) = nullNodeIndex;
HTKLink &nodeinfo = nodeInfoMap[HTKnodeno];
/*
* parse node fields
*/
while (key = getHTKField(line, value)) {
if (keyis("t") || keyis("time")) {
nodeinfo.time = atof(value);
// remember temporally first node and timestamp
// in case input doesn't specify initial node
if (HTKfirstnode == HTK_undef_uint ||
nodeinfo.time < HTKinitialtime)
{
HTKfirstnode = HTKnodeno;
HTKinitialtime = nodeinfo.time;
}
// same for last timestamp
if (HTKlastnode == HTK_undef_uint ||
nodeinfo.time > HTKfinaltime)
{
HTKlastnode = HTKnodeno;
HTKfinaltime = nodeinfo.time;
}
} else if (keyis("W") || keyis("WORD")) {
if (strcmp(value, HTK_null_word) == 0) {
nodeinfo.word = Vocab_None;
} else {
nodeinfo.word = vocab.addWord(value);
}
} else if (keyis("v") || keyis("var")) {
nodeinfo.var = atoi(value);
} else if (keyis("d") || keyis("div")) {
nodeinfo.div = strdup(value);
assert(nodeinfo.div != 0);
} else if (keyis("a") || keyis("acoustic")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
nodeinfo.acoustic = score * ProbToLogP(HTKlogbase);
} else {
nodeinfo.acoustic = ProbToLogP(score);
}
} else if (keyis("r")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
nodeinfo.pron = score * ProbToLogP(HTKlogbase);
} else {
nodeinfo.pron = ProbToLogP(score);
}
} else if (keyis("ds")) {
double score = atof(value);
if (HTKlogbase > 0.0) {
nodeinfo.duration = score * ProbToLogP(HTKlogbase);
} else {
nodeinfo.duration = ProbToLogP(score);
}
} else {
file.position() << "unexpected node field name "
<< key << endl;
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
}
if (nodeinfo.time != HTK_undef_float) {
// record node time, but no word-related info
LatticeNode *nullNode = findNode(nullNodeIndex);
assert(nullNode != 0);
HTKLink *nullInfo = new HTKLink;
assert(nullInfo != 0);
htkinfos[htkinfos.size()] = nullInfo;
nullNode->htkinfo = nullInfo;
nullInfo->time = nodeinfo.time;
}
continue;
/*
* Header fields
*/
} else if (keyis("V") || keyis("VERSION")) {
; // ignore
} else if ( keyis("U") || keyis("UTTERANCE")) {
if (name) free((void *)name);
// HACK: strip duration spec (which shouldn't be there)
char *p = strstr(value, "(duration=");
if (p != 0) *p = '\0';
name = strdup(value);
assert(name != 0);
} else if (keyis("base")) {
HTKlogbase = atof(value);
} else if (keyis("start")) {
HTKinitial = atoi(value);
} else if (keyis("end")) {
HTKfinal = atoi(value);
} else if (keyis("dir")) {
HTKdirection = value[0];
} else if (keyis("tscale")) {
htkheader.tscale = atof(value);
} else if (keyis("hmms")) {
htkheader.hmms = strdup(value);
assert(htkheader.hmms != 0);
} else if (keyis("ngname")) {
htkheader.ngname = strdup(value);
assert(htkheader.ngname != 0);
} else if (keyis("lmname")) {
htkheader.lmname = strdup(value);
assert(htkheader.lmname != 0);
} else if (keyis("vocab")) {
htkheader.vocab = strdup(value);
assert(htkheader.vocab != 0);
} else if (keyis("acscale")) {
if (header == 0 || header->acscale == HTK_undef_float) {
htkheader.acscale = atof(value);
}
} else if (keyis("ngscale")) {
if (header == 0 || header->ngscale == HTK_undef_float) {
htkheader.ngscale = atof(value);
}
} else if (keyis("lmscale")) {
if (header == 0 || header->lmscale == HTK_undef_float) {
htkheader.lmscale = atof(value);
}
} else if (keyis("prscale")) {
if (header == 0 || header->prscale == HTK_undef_float) {
htkheader.prscale = atof(value);
}
} else if (keyis("duscale")) {
if (header == 0 || header->duscale == HTK_undef_float) {
htkheader.duscale = atof(value);
}
} else if (keyis("wdpenalty")) {
if (header == 0 || header->wdpenalty == HTK_undef_float) {
htkheader.wdpenalty = atof(value);
}
} else if (keyis("amscale")) {
if (header == 0 || header->amscale == HTK_undef_float) {
htkheader.amscale = atof(value);
}
} else if (keyis("NODES") || keyis("N")) {
HTKnumnodes = atoi(value);
} else if (keyis("LINKS") || keyis("L")) {
HTKnumlinks = atoi(value);
} else {
file.position() << "unknown field name " << key << endl;
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
#undef keyis
}
}
if (HTKnumnodes == 0) {
file.position() << "lattice has no nodes\n";
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
/*
* Set up initial node
*/
HTKLink *initialinfo;
LatticeNode *initialNode;
if (HTKinitial != HTK_undef_uint) {
initialinfo = &nodeInfoMap[HTKinitial];
NodeIndex *initialPtr = nodeMap.find(HTKinitial);
if (initialPtr) {
initial = *initialPtr;
initialNode = findNode(initial);
} else {
file.position() << "undefined start node " << HTKinitial << endl;
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
} else {
initialinfo = &nodeInfoMap[HTKfirstnode];
// search for start node: the one without incoming transitions
LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
NodeIndex nodeIndex;
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
if (node->inTransitions.numEntries() == 0) {
initial = nodeIndex;
initialNode = node;
break;
}
}
}
initialNode->word = vocab.ssIndex();
// attach HTK initial node info to lattice initial node
initialNode->htkinfo = new HTKLink;
*initialNode->htkinfo = *initialinfo;
htkinfos[htkinfos.size()] = initialNode->htkinfo;
/*
* Set up final node
*/
HTKLink *finalinfo;
LatticeNode *finalNode;
if (HTKfinal != HTK_undef_uint) {
finalinfo = &nodeInfoMap[HTKfinal];
NodeIndex *finalPtr = nodeMap.find(HTKfinal);
if (finalPtr) {
final = *finalPtr;
finalNode = findNode(final);
} else {
file.position() << "undefined end node " << HTKfinal << endl;
if (!useNullNodes) vocab.remove(HTKNodeDummy);
return false;
}
} else {
finalinfo = &nodeInfoMap[HTKlastnode];
// search for end node: the one without outgoing transitions
LHashIter<NodeIndex, LatticeNode> nodeIter(nodes);
NodeIndex nodeIndex;
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
if (node->outTransitions.numEntries() == 0) {
final = nodeIndex;
finalNode = node;
break;
}
}
}
finalNode->word = vocab.seIndex();
// attach HTK final node info to lattice final node
finalNode->htkinfo = new HTKLink;
*finalNode->htkinfo = *finalinfo;
htkinfos[htkinfos.size()] = finalNode->htkinfo;
// eliminate dummy nodes
if (!useNullNodes) {
removeAllXNodes(HTKNodeDummy);
vocab.remove(HTKNodeDummy);
}
return true;
}
/*
* Output lattice in HTK format
* Algorithm:
* - each lattice node becomes an HTK node.
* - each lattice transitions becomes an HTK link.
* - word information is added to the HTK nodes.
* - link information attached to each node is added to the HTK link
* leading into the node.
* - lattice transition weights are mapped to one of the
* HTK score fields as indicated by the second argument.
*/
Boolean
Lattice::writeHTK(File &file, HTKScoreMapping scoreMapping,
Boolean printPosteriors)
{
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::writeHTK: writing ";
}
fprintf(file, "# Header (generated by SRILM)\n");
fprintf(file, "VERSION=%s\n", HTKLattice_Version);
fprintf(file, "UTTERANCE="); printQuoted(file, name); fputc('\n', file);
fprintf(file, "base=%g\n", htkheader.logbase);
fprintf(file, "dir=%s\n", "f"); // forward lattice
/*
* Ancillary header information preserved from readHTK()
*/
if (htkheader.tscale != HTK_def_tscale) {
fprintf(file, "tscale=%g\n", htkheader.tscale);
}
if (htkheader.acscale != HTK_def_acscale) {
fprintf(file, "acscale=%g\n", htkheader.acscale);
}
if (htkheader.lmscale != HTK_def_lmscale) {
fprintf(file, "lmscale=%g\n", htkheader.lmscale);
}
if (htkheader.ngscale != HTK_def_ngscale) {
fprintf(file, "ngscale=%g\n", htkheader.ngscale);
}
if (htkheader.prscale != HTK_def_prscale) {
fprintf(file, "prscale=%g\n", htkheader.prscale);
}
if (htkheader.duscale != HTK_def_duscale) {
fprintf(file, "duscale=%g\n", htkheader.duscale);
}
if (htkheader.amscale != HTK_undef_float && printPosteriors) {
fprintf(file, "amscale=%g\n", htkheader.amscale);
}
if (htkheader.hmms != 0) {
fprintf(file, "hmms=");
printQuoted(file, htkheader.hmms); fputc('\n', file);
}
if (htkheader.lmname != 0) {
fprintf(file, "lmname=");
printQuoted(file, htkheader.lmname); fputc('\n', file);
}
if (htkheader.ngname != 0) {
fprintf(file, "ngname=");
printQuoted(file, htkheader.ngname); fputc('\n', file);
}
if (htkheader.vocab != 0) {
fprintf(file, "vocab=", htkheader.vocab);
printQuoted(file, htkheader.vocab); fputc('\n', file);
}
/*
* We remap the internal node indices to consecutive unsigned integers
* to allow a compact output representation.
* We iterate over all nodes, renumbering them, and also counting the
* number of transitions overall.
*/
LHash<NodeIndex,unsigned> nodeMap; // map nodeIndex to unsigned
unsigned numNodes = 0;
unsigned numTransitions = 0;
LHashIter<NodeIndex, LatticeNode> nodeIter(nodes, nodeSort);
NodeIndex nodeIndex;
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
*nodeMap.insert(nodeIndex) = numNodes ++;
numTransitions += node->outTransitions.numEntries();
}
fprintf(file, "start=%u end=%u\n", *nodeMap.find(initial),
*nodeMap.find(final));
fprintf(file, "NODES=%u LINKS=%u\n", numNodes, numTransitions);
if (debug(DebugPrintFunctionality)) {
dout() << numNodes << " nodes, "
<< numTransitions << " transitions\n";
}
fprintf(file, "# Nodes\n");
double logscale = 1.0 / ProbToLogP(htkheader.logbase);
nodeIter.init();
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
fprintf(file, "I=%u", *nodeMap.find(nodeIndex));
if (htkheader.wordsOnNodes) {
fprintf(file, "\tW=");
printQuoted(file, (node->word == vocab.ssIndex() ||
node->word == vocab.seIndex() ||
node->word == Vocab_None) ?
HTK_null_word : vocab.getWord(node->word));
}
if (node->htkinfo != 0) {
HTKLink &htkinfo = *node->htkinfo;
if (htkinfo.time != HTK_undef_float) {
fprintf(file, "\tt=%g", htkinfo.time);
}
if (htkheader.scoresOnNodes &&
scoreMapping != mapHTKacoustic &&
htkinfo.acoustic != HTK_undef_float)
{
fprintf(file, "\ta=%g", htkinfo.acoustic * logscale);
}
if (htkheader.scoresOnNodes &&
htkinfo.pron != HTK_undef_float)
{
fprintf(file, "\tr=%g", htkinfo.pron * logscale);
}
if (htkheader.scoresOnNodes &&
htkinfo.duration != HTK_undef_float)
{
fprintf(file, "\tds=%g", htkinfo.duration * logscale);
}
if (htkheader.wordsOnNodes &&
htkinfo.var != HTK_undef_uint)
{
fprintf(file, "\tv=%u", htkinfo.var);
}
if (htkheader.wordsOnNodes &&
htkinfo.div != 0)
{
fprintf(file, "\td=%s", htkinfo.div);
}
}
if (printPosteriors) {
fprintf(file, "\tp=%lg", (double)LogPtoProb(node->posterior));
}
fprintf(file, "\n");
}
fprintf(file, "# Links\n");
unsigned linkNumber = 0;
nodeIter.init();
while (LatticeNode *node = nodeIter.next(nodeIndex)) {
unsigned *fromNodeId = nodeMap.find(nodeIndex);
NodeIndex toNodeIndex;
TRANSITER_T<NodeIndex,LatticeTransition>
transIter(node->outTransitions);
while (LatticeTransition *trans = transIter.next(toNodeIndex)) {
LatticeNode *toNode = findNode(toNodeIndex);
assert(toNode != 0);
unsigned *toNodeId = nodeMap.find(toNodeIndex);
assert(toNodeId != 0);
fprintf(file, "J=%u\tS=%u\tE=%u",
linkNumber++, *fromNodeId, *toNodeId);
if (!htkheader.wordsOnNodes) {
fprintf(file, "\tW=");
printQuoted(file, (toNode->word == vocab.ssIndex() ||
toNode->word == vocab.seIndex() ||
toNode->word == Vocab_None) ?
HTK_null_word : vocab.getWord(toNode->word));
}
if (toNode->htkinfo != 0) {
HTKLink &htkinfo = *toNode->htkinfo;
if (!htkheader.scoresOnNodes &&
scoreMapping != mapHTKacoustic &&
htkinfo.acoustic != HTK_undef_float)
{
fprintf(file, "\ta=%g", htkinfo.acoustic * logscale);
}
if (!htkheader.scoresOnNodes &&
htkinfo.pron != HTK_undef_float)
{
fprintf(file, "\tr=%g", htkinfo.pron * logscale);
}
if (!htkheader.scoresOnNodes &&
htkinfo.duration != HTK_undef_float)
{
fprintf(file, "\tds=%g", htkinfo.duration * logscale);
}
if (!htkheader.wordsOnNodes &&
htkinfo.var != HTK_undef_uint) {
fprintf(file, "\tv=%u", htkinfo.var);
}
if (!htkheader.wordsOnNodes &&
htkinfo.div != 0)
{
fprintf(file, "\td=%s", htkinfo.div);
}
if (scoreMapping != mapHTKngram &&
htkinfo.ngram != HTK_undef_float)
{
fprintf(file, "\tn=%g", htkinfo.ngram * logscale);
}
if (scoreMapping != mapHTKlanguage &&
htkinfo.language != HTK_undef_float)
{
fprintf(file, "\tl=%g", htkinfo.language * logscale);
}
}
/*
* map transition weight to one of the standard HTK scores
*/
if (scoreMapping != mapHTKnone) {
fprintf(file, "\t%c=%g",
(scoreMapping == mapHTKacoustic ? 'a' :
(scoreMapping == mapHTKngram ? 'n' :
(scoreMapping == mapHTKlanguage ? 'l' : '?'))),
trans->weight * logscale);
}
fprintf(file, "\n");
}
}
return true;
}
/*
* Compute pronunciation scores
* (for nodes with HTKLink information that have phone backtraces)
*/
Boolean
Lattice::scorePronunciations(VocabMultiMap &dictionary, Boolean intlogs)
{
if (debug(DebugPrintFunctionality)) {
dout() << "Lattice::scorePronunciations: starting\n";
}
Vocab &phoneVocab = dictionary.vocab2;
/*
* Go through all HTLink structures, extract the phone sequences,
* and look up their probabilities in the dictionary
*/
for (unsigned i = 0; i < htkinfos.size(); i ++) {
HTKLink *info = htkinfos[i];
/*
* only rescore words that have pronunciations
* (e.g., don't include NULL nodes)
*/
if (info->div != 0) {
assert(info->word != Vocab_None);
/*
* parse the phone sequence from the string
* example:
* d=:#[s]t,0.12:s[t]r,0.03:t[r]ay,0.05:r[ay]k,0.09:ay[k]#,0.09:
* and convert into an index string
*/
char phoneString[strlen(info->div) + 1];
strcpy(phoneString, info->div);
Array<VocabIndex> phones;
unsigned numPhones = 0;
for (char *s = strtok(phoneString, phoneSeparator);
s != 0;
s = strtok(NULL, phoneSeparator))
{
// skip empty components (at beginning and end)
if (s[0] == '\0') continue;
// strip duration part
char *e = strchr(s, ',');
if (e != 0) *e = '\0';
// strip context from triphone labels
e = strchr(s, '[');
if (e != 0) s = e + 1;
e = strrchr(s, ']');
if (e != 0) *e = '\0';
phones[numPhones ++] = phoneVocab.addWord(s);
}
phones[numPhones] = Vocab_None;
// find pronunciation prob
Prob p = dictionary.get(info->word, phones.data());
if (p == 0.0) {
// missing pronunciation get score 0
info->pron = LogP_One;
} else {
if (intlogs) {
info->pron = IntlogToLogP(p);
} else {
info->pron += ProbToLogP(p);
}
}
}
}
return true;
}
More information about the SRILM-User
mailing list