language model adaptation

Andreas Stolcke stolcke at speech.sri.com
Wed Aug 31 13:19:44 PDT 2005


There is a better method (no coding required).

Use the ngram -rescore-ngram option.  You feed it the unadapted LM,
and the probs will be recomputed by the adapted model (and renormalized).

I haven't tried this specifically with -adapt-marginals, but it should 
work for any LM supported by ngram.

--Andreas

In message <a3290a1a05083107513c029f83 at mail.gmail.com>you wrote:
> ------=_Part_4844_29044224.1125499913119
> Content-Type: text/plain; charset=ISO-8859-1
> Content-Transfer-Encoding: quoted-printable
> Content-Disposition: inline
> 
> HelloSriLM users,
> 
> I am looking for a way to save language models adapted with=20
> 
> ngram -adapt-marginals.
> 
> Is it possible ?
> 
> As the write method for AdaptiveMarginals is not implemented, I tried to=20
> write one (copied from writeWithOrder in NgramLM.cc) , by calling=20
> wordProb(pword,context+1) instead of iterating on the tree, but it doesn't=
> =20
> seem to work. The probabilities I get in the file, are not the same as the=
> =20
> one I get if I parse a text file with ppl option (precisely, alphas are the=
> =20
> same, but denominators are different).=20
> 
> Any clue or solution to save an adapted model are welcome !
> 
> Thank you,
> 
> --
> Christopher Kermorvant=20
> 
> 
> void
> AdaptiveMarginals::write(File &file)
> {
> this->running(true) ;
> Ngram * lm =3D (Ngram*)&baseLM;
> unsigned i;
> unsigned howmanyNgrams[maxNgramOrder + 1];
> VocabIndex context[maxNgramOrder + 2];
> VocabString scontext[maxNgramOrder + 1];
> unsigned order=3Dlm->setorder();=20
> if (order > maxNgramOrder) {
> order =3D maxNgramOrder;
> }
> 
> fprintf(file, "\n\\data\\\n");
> 
> for (i =3D 1; i <=3D order; i++ ) {
> howmanyNgrams[i] =3D lm->numNgrams(i);
> fprintf(file, "ngram %d=3D%d\n", i, howmanyNgrams[i]);
> }
> 
> for (i =3D 1; i <=3D order; i++ ) {
> fprintf(file, "\n\\%d-grams:\n", i);
> 
> if (debug(DEBUG_WRITE_STATS)) {
> dout() << "writing " << howmanyNgrams[i] << " "
> << i << "-grams\n";
> }
> 
> NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compareIndex());
> BOnode *node;
> 
> while (node =3D iter.next()) {
> 
> vocab.getWords(context + 1, scontext, maxNgramOrder + 1);
> Vocab::reverse(scontext);
> 
> NgramProbsIter piter(*node, vocab.compareIndex());
> VocabIndex pword;
> LogP *prob;
> LogP probAd ;
> while (prob =3D piter.next(pword)) {
> if (file.error()) {
> return;
> }
> context[0] =3D pword;
> probAd =3D wordProb(pword,context+1);
> *prob =3D probAd ;
> fprintf(file, "%.*lg\t", LogP_Precision,
> (double)(*prob =3D=3D LogP_Zero ?
> LogP_PseudoZero : *prob));
> Vocab::write(file, scontext);
> fprintf(file, "%s%s", (i > 1 ? " " : ""), vocab.getWord(pword));
> 
> if (i < order) {
> context[0] =3D pword;
> 
> LogP *bow =3D lm->findBOW(context);
> if (bow) {
> fprintf(file, "\t%.*lg", LogP_Precision,
> (double)(*bow =3D=3D LogP_Zero ?
> LogP_PseudoZero : *bow));
> }
> }
> 
> fprintf(file, "\n");
> }
> }
> }
> 
> fprintf(file, "\n\\end\\\n");
> }
> 
> ------=_Part_4844_29044224.1125499913119
> Content-Type: text/html; charset=ISO-8859-1
> Content-Transfer-Encoding: quoted-printable
> Content-Disposition: inline
> 
> HelloSriLM users,<br>
> <br>
> I am looking for a way to save language models adapted with <br>
> <br>
> ngram -adapt-marginals.<br>
> <br>
> Is it possible&nbsp; ?<br>
> <br>
> As the write method for AdaptiveMarginals is not implemented, I tried
> to write one (copied from writeWithOrder in NgramLM.cc) , by calling
> wordProb(pword,context+1) instead of iterating on the tree, but it
> doesn't seem to work. The probabilities I get in the file, are not the
> same as the one I get if I parse&nbsp; a text file with ppl option
> (precisely, alphas are the same, but denominators are different). <br>
> <br>
> Any clue or solution to save&nbsp; an adapted model are welcome !<br>
> <br>
> Thank you,<br>
> <br>
> --<br>
> Christopher Kermorvant <br>
> <br>
> <br>
> void<br>
> AdaptiveMarginals::write(File &amp;file)<br>
> {<br>
> &nbsp;&nbsp;&nbsp; this-&gt;running(true) ;<br>
> &nbsp;&nbsp;&nbsp; Ngram * lm =3D (Ngram*)&amp;baseLM;<br>
> &nbsp;&nbsp;&nbsp; unsigned i;<br>
> &nbsp;&nbsp;&nbsp; unsigned howmanyNgrams[maxNgramOrder + 1];<br>
> &nbsp;&nbsp;&nbsp; VocabIndex context[maxNgramOrder + 2];<br>
> &nbsp;&nbsp;&nbsp; VocabString scontext[maxNgramOrder + 1];<br>
> &nbsp;&nbsp;&nbsp; unsigned order=3Dlm-&gt;setorder(); <br>
> &nbsp;&nbsp;&nbsp; if (order &gt; maxNgramOrder) {<br>
> &nbsp;&nbsp;&nbsp; order =3D maxNgramOrder;<br>
> &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp; <br>
> &nbsp;&nbsp;&nbsp; fprintf(file, &quot;\n\\data\\\n&quot;);<br>
> <br>
> &nbsp;&nbsp;&nbsp; for (i =3D 1; i &lt;=3D order; i++ ) {<br>
> &nbsp;&nbsp;&nbsp; howmanyNgrams[i] =3D lm-&gt;numNgrams(i);<br>
> &nbsp;&nbsp;&nbsp; fprintf(file, &quot;ngram %d=3D%d\n&quot;, i, howmanyNgr=
> ams[i]);<br>
> &nbsp;&nbsp;&nbsp; }<br>
> <br>
> &nbsp;&nbsp;&nbsp; for (i =3D 1; i &lt;=3D order; i++ ) {<br>
> &nbsp;&nbsp;&nbsp; fprintf(file, &quot;\n\\%d-grams:\n&quot;, i);<br>
> <br>
> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if (debug(DEBUG_WRITE_STATS)) {<=
> br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; dout() &lt;&lt; &quot;writing &quot; =
> &lt;&lt; howmanyNgrams[i] &lt;&lt; &quot; &quot;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp; &lt;&lt; i &lt;&lt; &quo=
> t;-grams\n&quot;;<br>
> &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <br>
> &nbsp;&nbsp;&nbsp; NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compare=
> Index());<br>
> &nbsp;&nbsp;&nbsp; BOnode *node;<br>
> <br>
> &nbsp;&nbsp;&nbsp; while (node =3D iter.next()) {<br>
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; vocab.getWords(context + 1, scontext,=
>  maxNgramOrder + 1);<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; Vocab::reverse(scontext);<br>
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; NgramProbsIter piter(*node, vocab.com=
> pareIndex());<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; VocabIndex pword;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; LogP *prob;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; LogP probAd ;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; while (prob =3D piter.next(pword)) {<=
> br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (file.error()) {<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; return;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; context[0] =3D pword;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; probAd =3D wordProb(pword,context+1);=
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; *prob =3D probAd ;<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; fprintf(file, &quot;%.*lg\t&quot;, Lo=
> gP_Precision,<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;=
>  (double)(*prob =3D=3D LogP_Zero ?<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
> LogP_PseudoZero : *prob));<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; Vocab::write(file, scontext);<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; fprintf(file, &quot;%s%s&quot;, (i &g=
> t; 1 ? &quot; &quot; : &quot;&quot;), vocab.getWord(pword));<br>
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (i &lt; order) {<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; context[0] =3D pwo=
> rd;<br>
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; LogP *bow =3D lm-&=
> gt;findBOW(context);<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; if (bow) {<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; fprintf(file, &quo=
> t;\t%.*lg&quot;, LogP_Precision,<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;=
>  &nbsp;&nbsp;&nbsp; (double)(*bow =3D=3D LogP_Zero ?<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;
> &nbsp;&nbsp;&nbsp; LogP_PseudoZero : *bow));<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
> <br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; fprintf(file, &quot;\n&quot;);<br>
> &nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp; }<br>
> &nbsp;&nbsp;&nbsp; }<br>
> <br>
> &nbsp;&nbsp;&nbsp; fprintf(file, &quot;\n\\end\\\n&quot;);<br>
> }<br>
> <br>
> <br>
> 
> ------=_Part_4844_29044224.1125499913119--




More information about the SRILM-User mailing list