language model adaptation
Andreas Stolcke
stolcke at speech.sri.com
Wed Aug 31 13:19:44 PDT 2005
There is a better method (no coding required).
Use the ngram -rescore-ngram option. You feed it the unadapted LM,
and the probs will be recomputed by the adapted model (and renormalized).
I haven't tried this specifically with -adapt-marginals, but it should
work for any LM supported by ngram.
--Andreas
In message <a3290a1a05083107513c029f83 at mail.gmail.com>you wrote:
> ------=_Part_4844_29044224.1125499913119
> Content-Type: text/plain; charset=ISO-8859-1
> Content-Transfer-Encoding: quoted-printable
> Content-Disposition: inline
>
> HelloSriLM users,
>
> I am looking for a way to save language models adapted with=20
>
> ngram -adapt-marginals.
>
> Is it possible ?
>
> As the write method for AdaptiveMarginals is not implemented, I tried to=20
> write one (copied from writeWithOrder in NgramLM.cc) , by calling=20
> wordProb(pword,context+1) instead of iterating on the tree, but it doesn't=
> =20
> seem to work. The probabilities I get in the file, are not the same as the=
> =20
> one I get if I parse a text file with ppl option (precisely, alphas are the=
> =20
> same, but denominators are different).=20
>
> Any clue or solution to save an adapted model are welcome !
>
> Thank you,
>
> --
> Christopher Kermorvant=20
>
>
> void
> AdaptiveMarginals::write(File &file)
> {
> this->running(true) ;
> Ngram * lm =3D (Ngram*)&baseLM;
> unsigned i;
> unsigned howmanyNgrams[maxNgramOrder + 1];
> VocabIndex context[maxNgramOrder + 2];
> VocabString scontext[maxNgramOrder + 1];
> unsigned order=3Dlm->setorder();=20
> if (order > maxNgramOrder) {
> order =3D maxNgramOrder;
> }
>
> fprintf(file, "\n\\data\\\n");
>
> for (i =3D 1; i <=3D order; i++ ) {
> howmanyNgrams[i] =3D lm->numNgrams(i);
> fprintf(file, "ngram %d=3D%d\n", i, howmanyNgrams[i]);
> }
>
> for (i =3D 1; i <=3D order; i++ ) {
> fprintf(file, "\n\\%d-grams:\n", i);
>
> if (debug(DEBUG_WRITE_STATS)) {
> dout() << "writing " << howmanyNgrams[i] << " "
> << i << "-grams\n";
> }
>
> NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compareIndex());
> BOnode *node;
>
> while (node =3D iter.next()) {
>
> vocab.getWords(context + 1, scontext, maxNgramOrder + 1);
> Vocab::reverse(scontext);
>
> NgramProbsIter piter(*node, vocab.compareIndex());
> VocabIndex pword;
> LogP *prob;
> LogP probAd ;
> while (prob =3D piter.next(pword)) {
> if (file.error()) {
> return;
> }
> context[0] =3D pword;
> probAd =3D wordProb(pword,context+1);
> *prob =3D probAd ;
> fprintf(file, "%.*lg\t", LogP_Precision,
> (double)(*prob =3D=3D LogP_Zero ?
> LogP_PseudoZero : *prob));
> Vocab::write(file, scontext);
> fprintf(file, "%s%s", (i > 1 ? " " : ""), vocab.getWord(pword));
>
> if (i < order) {
> context[0] =3D pword;
>
> LogP *bow =3D lm->findBOW(context);
> if (bow) {
> fprintf(file, "\t%.*lg", LogP_Precision,
> (double)(*bow =3D=3D LogP_Zero ?
> LogP_PseudoZero : *bow));
> }
> }
>
> fprintf(file, "\n");
> }
> }
> }
>
> fprintf(file, "\n\\end\\\n");
> }
>
> ------=_Part_4844_29044224.1125499913119
> Content-Type: text/html; charset=ISO-8859-1
> Content-Transfer-Encoding: quoted-printable
> Content-Disposition: inline
>
> HelloSriLM users,<br>
> <br>
> I am looking for a way to save language models adapted with <br>
> <br>
> ngram -adapt-marginals.<br>
> <br>
> Is it possible ?<br>
> <br>
> As the write method for AdaptiveMarginals is not implemented, I tried
> to write one (copied from writeWithOrder in NgramLM.cc) , by calling
> wordProb(pword,context+1) instead of iterating on the tree, but it
> doesn't seem to work. The probabilities I get in the file, are not the
> same as the one I get if I parse a text file with ppl option
> (precisely, alphas are the same, but denominators are different). <br>
> <br>
> Any clue or solution to save an adapted model are welcome !<br>
> <br>
> Thank you,<br>
> <br>
> --<br>
> Christopher Kermorvant <br>
> <br>
> <br>
> void<br>
> AdaptiveMarginals::write(File &file)<br>
> {<br>
> this->running(true) ;<br>
> Ngram * lm =3D (Ngram*)&baseLM;<br>
> unsigned i;<br>
> unsigned howmanyNgrams[maxNgramOrder + 1];<br>
> VocabIndex context[maxNgramOrder + 2];<br>
> VocabString scontext[maxNgramOrder + 1];<br>
> unsigned order=3Dlm->setorder(); <br>
> if (order > maxNgramOrder) {<br>
> order =3D maxNgramOrder;<br>
> }<br>
> <br>
> fprintf(file, "\n\\data\\\n");<br>
> <br>
> for (i =3D 1; i <=3D order; i++ ) {<br>
> howmanyNgrams[i] =3D lm->numNgrams(i);<br>
> fprintf(file, "ngram %d=3D%d\n", i, howmanyNgr=
> ams[i]);<br>
> }<br>
> <br>
> for (i =3D 1; i <=3D order; i++ ) {<br>
> fprintf(file, "\n\\%d-grams:\n", i);<br>
> <br>
> if (debug(DEBUG_WRITE_STATS)) {<=
> br>
> dout() << "writing " =
> << howmanyNgrams[i] << " "<br>
> << i << &quo=
> t;-grams\n";<br>
> }<br>
> <br>
> NgramBOsIter iter(*lm, context + 1, i - 1, vocab.compare=
> Index());<br>
> BOnode *node;<br>
> <br>
> while (node =3D iter.next()) {<br>
> <br>
> vocab.getWords(context + 1, scontext,=
> maxNgramOrder + 1);<br>
> Vocab::reverse(scontext);<br>
> <br>
> NgramProbsIter piter(*node, vocab.com=
> pareIndex());<br>
> VocabIndex pword;<br>
> LogP *prob;<br>
> LogP probAd ;<br>
> while (prob =3D piter.next(pword)) {<=
> br>
> if (file.error()) {<br>
> return;<br>
> }<br>
> context[0] =3D pword;<br>
> probAd =3D wordProb(pword,context+1);=
> <br>
> *prob =3D probAd ;<br>
> fprintf(file, "%.*lg\t", Lo=
> gP_Precision,<br>
> =
> (double)(*prob =3D=3D LogP_Zero ?<br>
>
>
> LogP_PseudoZero : *prob));<br>
> Vocab::write(file, scontext);<br>
> fprintf(file, "%s%s", (i &g=
> t; 1 ? " " : ""), vocab.getWord(pword));<br>
> <br>
> if (i < order) {<br>
> context[0] =3D pwo=
> rd;<br>
> <br>
> LogP *bow =3D lm-&=
> gt;findBOW(context);<br>
> if (bow) {<br>
> fprintf(file, &quo=
> t;\t%.*lg", LogP_Precision,<br>
> =
> (double)(*bow =3D=3D LogP_Zero ?<br>
>
>
> LogP_PseudoZero : *bow));<br>
> }<br>
> }<br>
> <br>
> fprintf(file, "\n");<br>
> }<br>
> }<br>
> }<br>
> <br>
> fprintf(file, "\n\\end\\\n");<br>
> }<br>
> <br>
> <br>
>
> ------=_Part_4844_29044224.1125499913119--
More information about the SRILM-User
mailing list