Modifying ngram.cc
lambert mathias
lambert at jhu.edu
Tue Jul 19 14:04:19 PDT 2005
Hi,
I tried using the copy constructors in srilm-1.4.5.
I initialized my LM as
Ngram useLM(ngramLM);
However, when I do a useLM->write(), the output shows some of the entried as
(null).
\data\
ngram 1=6
ngram 2=24
ngram 3=96
\1-grams:
0 (null) -99
0 (null) -99
-0.6083246 3 0
-0.6093718 4 0
-1.78533 </s>
-99 <s> -99
\2-grams:
0 3 (null) -99
0 3 (null) -99
-0.6108742 3 3 0
-0.6078538 3 4 3.818703
-1.778024 3 </s>
............
I am trying to figure out if there is a bug in the copy constructor
initialization.
Lambert
On 7/17/05 8:47 PM, "Andreas Stolcke" <stolcke at speech.sri.com> wrote:
> The problem was that copy constructors (and assignment operators)
> for the container data structures (Array, SArray, LHash) weren't implemented.
> I fixed that in the current development version.
> No changes to any of the higher-level classes (like Ngram) is needed,
> as C++ automatically (and recursively) defines copy constructors based on the
> member copy constructors.
>
> You can either replace the attached source files (in dstruct/src), or
> download the "1.4.5 (Beta)" version from the web page.
>
> --Andreas
>
> In message <BEF415D2.332F%lambert at jhu.edu>you wrote:
>> Hi,
>>
>> This is kind of a C++ question
>> I wrote the following copy constructor in Ngram.h
>> Ngram(const Ngram
>> &ng):LM(ng.vocab),contexts(ng.contexts),order(ng.order),_skipOOVs(ng._skipOO
>> Vs),_trustTotals(ng._trustTotals){};
>>
>> I have the following declarations
>>
>> Ngram ngramLM(vocab*,order);
>>
>> While(//<some condition>)
>> Ngram useLM(ngramLM);
>>
>> // Do some stuff with useLM
>> .........
>> ........
>> }
>>
>> The problem is that the assignment useLM=ngramLM doesn't assign the original
>> ngramLM. Any changes I make to useLM shows up in ngramLM too.
>> I just want to make a copy (useLM) of the original ngramLM, work on that
>> copy and then reinitialize another useLM with the original ngramLM.
>>
>> Any thoughts?
>>
>>
>> Lambert
>>
>
> /*
> * Array.h --
> * Extensible array class
> *
> * Copyright (c) 1995-2005 SRI International. All Rights Reserved.
> *
> * @(#)$Header: /home/srilm/devel/dstruct/src/RCS/Array.h,v 1.11 2005/07/17
> 22:12:21 stolcke Exp $
> *
> */
>
> #ifndef _Array_h_
> #define _Array_h_
>
> #include <assert.h>
>
> #include "MemStats.h"
>
> template <class DataT>
> class Array
> {
> public:
> Array(int base = 0, unsigned int size = 0)
> : _base(base), _size(size), _data(0), alloc_size(0)
> { if (size > 0) { alloc(size-1); } };
> Array(Array<DataT> &source)
> : _base(source._base), _size(0), _data(0), alloc_size(0)
> { *this = source; }
>
> ~Array() { delete [] _data; } ;
>
> DataT &operator[](int index)
> { int offset = index - _base; assert(offset >= 0);
> if (offset >= _size) {
> _size = offset + 1;
> if (offset >= alloc_size) { alloc(offset); }
> }
> return _data[offset];
> };
>
> Array<DataT> & operator= (const Array<DataT> &other);
>
> DataT *data() const { return _data - _base; };
> int base() const { return _base; }
> unsigned int size() const { return _size; }
>
> void memStats(MemStats &stats) const;
>
> private:
> unsigned int _size; /* used size */
> int _base;
> DataT *_data;
> unsigned int alloc_size; /* allocated size */
> void alloc(unsigned int size);
> };
>
> #endif /* _Array_h_ */
> /*
> * LHash.cc --
> * Linear search hash table implementation
> *
> */
>
> #ifndef _LHash_cc_
> #define _LHash_cc_
>
> #ifndef lint
> static char LHash_Copyright[] = "Copyright (c) 1995-2005 SRI International.
> All Rights Reserved.";
> static char LHash_RcsId[] = "@(#)$Header:
> /home/srilm/devel/dstruct/src/RCS/LHash.cc,v 1.46 2005/07/17 22:01:31 stolcke
> Exp $";
> #endif
>
> #include <iostream.h>
> #include <stdlib.h>
> #include <string.h>
> #include <assert.h>
> #include <new.h>
>
> #include "LHash.h"
>
> #undef INSTANTIATE_LHASH
> #define INSTANTIATE_LHASH(KeyT, DataT) \
> template <> DataT *LHash< KeyT, DataT >::removedData = 0; \
> template class LHash< KeyT, DataT >; \
> template class LHashIter< KeyT, DataT >
>
> #ifndef __GNUG__
> template <class KeyT, class DataT>
> DataT *LHash<KeyT,DataT>::removedData = 0;
> #endif /* __GNUG__ */
>
> const unsigned minHashBits = 3; /* minimum no. bits for hashing
> * tables smaller than this use linear
> * search to save space */
> const float fillRatio = 0.8; /* fill ration at which the table is
> * expanded and rehashed */
>
> #define BODY(b) ((LHashBody<KeyT,DataT> *)b)
>
> /*
> * Dump the entire hash array to cerr. Unused slots are printed as "FREE".
> */
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::dump() const
> {
> if (body) {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned i;
>
> for (i = 0; i < maxEntries; i++) {
> cerr << " " << i << ": ";
> if (Map_noKeyP(BODY(body)->data[i].key)) {
> cerr << "FREE";
> } else {
> cerr << BODY(body)->data[i].key
> #ifdef DUMP_VALUES
> /*
> * Only certain types can be printed.
> */
> << "->" << BODY(body)->data[i].value
> #endif /* DUMP_VALUES */
> ;
> }
> }
> } else {
> cerr << "EMPTY";
> }
> cerr << endl;
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::memStats(MemStats &stats) const
> {
> stats.total += sizeof(*this);
> if (body) {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
>
> stats.total += sizeof(*BODY(body)) +
> sizeof(BODY(body)->data[0]) *
> (maxEntries - 1);
> stats.wasted += sizeof(BODY(body)->data[0]) *
> (maxEntries - BODY(body)->nEntries);
> }
> }
>
> /*
> * Compute the actual minimum size required for a given number of entries
> */
> static inline unsigned
> roundSize(unsigned size)
> {
> if (size < hashSize(minHashBits)) {
> return size;
> } else {
> return (unsigned)((size + 1)/ fillRatio);
> }
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::alloc(unsigned size)
> {
> unsigned maxBits, maxEntries;
> unsigned i;
>
> /*
> * round up to power of two
> */
> maxBits = 0;
> while (hashSize(maxBits) < size) {
> assert(maxBits < LHash_maxBitLimit);
> maxBits++;
> }
>
> maxEntries = hashSize(maxBits);
>
> //cerr << "LHash::alloc: current " << (body ? BODY(body)->nEntries : 0)
> // << ", requested " << size
> // << ", allocating " << maxEntries << " (" << maxBits << ")\n";
>
> body = (LHashBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
> (maxEntries - 1) * sizeof(BODY(body)->data[0]));
> assert(body != 0);
>
> BODY(body)->maxBits = maxBits;
> BODY(body)->nEntries = 0;
>
> for (i = 0; i < maxEntries; i++) {
> Map_noKey(BODY(body)->data[i].key);
> }
> //cerr << "memory for header = " <<
> // ((void *)&(BODY(body)->data[0]) - (void *)BODY(body)) << endl;
> //cerr << "memory per entry = " <<
> // ((void *)&(BODY(body)->data[3]) - (void *)&(BODY(body)->data[2])) <<
> endl;
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::LHash(unsigned size)
> : body(0)
> {
> if (size != 0) {
> /*
> * determine actual initial size
> */
> alloc(roundSize(size));
> }
> }
>
> template <class KeyT, class DataT>
> void
> LHash<KeyT,DataT>::clear(unsigned size)
> {
> if (body) {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned i;
>
> for (i = 0; i < maxEntries; i++) {
> if (! Map_noKeyP(BODY(body)->data[i].key)) {
> Map_freeKey(BODY(body)->data[i].key);
> }
> }
> free(body);
> body = 0;
> }
> if (size != 0) {
> alloc(roundSize(size));
> }
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::~LHash()
> {
> clear(0);
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT>::LHash(const LHash<KeyT,DataT> &source)
> : body(0)
> {
> #ifdef DEBUG
> cerr << "warning: LHash copy constructor called\n";
> #endif
> *this = source;
> }
>
> template <class KeyT, class DataT>
> LHash<KeyT,DataT> &
> LHash<KeyT,DataT>::operator= (const LHash<KeyT,DataT> &other)
> {
> #ifdef DEBUG
> cerr << "warning: LHash::operator= called\n";
> #endif
>
> if (&other == this) {
> return *this;
> }
>
> /*
> * copy hash entries from old to new
> */
> if (other.body) {
> unsigned maxEntries = hashSize(BODY(other.body)->maxBits);
> clear(maxEntries);
>
> for (unsigned i = 0; i < maxEntries; i++) {
> KeyT thisKey = BODY(other.body)->data[i].key;
>
> if (!Map_noKeyP(thisKey)) {
> /*
> * Copy key
> */
> BODY(body)->data[i].key = Map_copyKey(thisKey);
>
> /*
> * Initialize data, required for = operator
> */
> new (&(BODY(body)->data[i].value)) DataT;
>
> /*
> * Copy data
> */
> BODY(body)->data[i].value = BODY(other.body)->data[i].value;
> }
> }
> BODY(body)->nEntries = BODY(other.body)->nEntries;
> } else {
> clear(0);
> }
>
> return *this;
> }
>
> /*
> * Returns index into data array that has the key which is either
> * equal to key, or indicates the place where key would be placed if it
> * occurred in the array.
> */
> template <class KeyT, class DataT>
> Boolean
> LHash<KeyT,DataT>::locate(KeyT key, unsigned &index) const
> {
> assert(!Map_noKeyP(key));
>
> if (body) {
> unsigned maxBits = BODY(body)->maxBits;
> register MapEntry<KeyT,DataT> *data = BODY(body)->data;
>
> if (maxBits < minHashBits) {
> /*
> * Do a linear search
> */
> unsigned nEntries = BODY(body)->nEntries;
> register unsigned i;
>
> for (i = 0; i < nEntries; i++) {
> if (LHash_equalKey(data[i].key, key)) {
> index = i;
> return true;
> }
> }
> index = i;
> return false;
> } else {
> /*
> * Do a hashed search
> */
> unsigned hash = LHash_hashKey(key, maxBits);
> unsigned i;
>
> for (i = hash; ; i = (i + 1) & hashMask(maxBits))
> {
> if (Map_noKeyP(data[i].key)) {
> index = i;
> return false;
> } else if (LHash_equalKey(data[i].key, key)) {
> index = i;
> return true;
> }
> }
> }
> } else {
> return false;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
> {
> unsigned index;
>
> if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
> } else {
> return 0;
> }
> }
>
> template <class KeyT, class DataT>
> KeyT
> LHash<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
> {
> unsigned index;
> static KeyT zeroKey;
>
> if (foundP = locate(key, index)) {
> return BODY(body)->data[index].key;
> } else {
> return zeroKey;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
> {
> unsigned index;
>
> assert(!(Map_noKeyP(key)));
>
> /*
> * Make sure there is room for at least one entry
> */
> if (body == 0) {
> alloc(1);
> }
>
> if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
> } else {
> unsigned maxEntries = hashSize(BODY(body)->maxBits);
> unsigned nEntries = BODY(body)->nEntries;
>
> /*
> * Rehash table if necessary
> */
> unsigned minSize = roundSize(nEntries + 1);
>
> if (minSize > maxEntries) {
> LHashBody<KeyT,DataT> *oldBody = BODY(body);
> unsigned i;
>
> /*
> * Since LHash_maxEntriesLimit is a power of two minus 1
> * we need to check this only when the array is enlarged
> */
> assert(nEntries < LHash_maxEntriesLimit);
>
> alloc(minSize);
> BODY(body)->nEntries = nEntries;
>
> if (BODY(body)->maxBits < minHashBits) {
> /*
> * Just copy old entries to new storage, no reindexing
> * required.
> */
> memcpy(BODY(body)->data, oldBody->data,
> sizeof(oldBody->data[0]) * nEntries);
> } else {
> /*
> * Rehash
> */
> for (i = 0; i < maxEntries; i++) {
> KeyT key = oldBody->data[i].key;
>
> if (! Map_noKeyP(key)) {
> (void)locate(key, index);
> memcpy(&(BODY(body)->data[index]), &(oldBody->data[i]),
> sizeof(oldBody->data[0]));
> }
> }
> }
> free(oldBody);
>
> /*
> * Entries have been moved, so have to re-locate key
> */
> (void)locate(key, index);
> }
>
> BODY(body)->data[index].key = Map_copyKey(key);
>
> /*
> * Initialize data to zero, but also call constructors, if any
> */
> memset(&(BODY(body)->data[index].value), 0,
> sizeof(BODY(body)->data[index].value));
> new (&(BODY(body)->data[index].value)) DataT;
>
> BODY(body)->nEntries++;
>
> return &(BODY(body)->data[index].value);
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHash<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
> {
> unsigned index;
>
> /*
> * Allocate pseudo-static memory for removed objects (returned by
> * remove()). We cannot make this static because otherwise
> * the destructor for DataT would be called on it at program exit.
> */
> if (removedData == 0) {
> removedData = (DataT *)malloc(sizeof(DataT));
> assert(removedData != 0);
> }
>
> if (foundP = locate(key, index)) {
> Map_freeKey(BODY(body)->data[index].key);
> Map_noKey(BODY(body)->data[index].key);
> memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
>
> if (BODY(body)->maxBits < minHashBits) {
> /*
> * Linear search mode -- Just move all entries above the
> * the one removed to fill the gap.
> */
> unsigned nEntries = BODY(body)->nEntries;
>
> memmove(&BODY(body)->data[index],
> &BODY(body)->data[index+1],
> (nEntries - index - 1) * sizeof(BODY(body)->data[0]));
> Map_noKey(BODY(body)->data[nEntries - 1].key);
> } else {
> /*
> * The entry after the one being deleted could actually
> * belong to a prior spot in the table, but was bounced forward due
> * to a collision. The invariant used in lookup is that
> * all locations between the hash index and the actual index are
> * filled. Hence we examine all entries between the deleted
> * position and the next free position as whether they need to
> * be moved backward.
> */
> while (1) {
> unsigned newIndex;
>
> index = (index + 1) & hashMask(BODY(body)->maxBits);
>
> if (Map_noKeyP(BODY(body)->data[index].key)) {
> break;
> }
>
> /*
> * If find returns false that means the deletion has
> * introduced a hole in the table that would prevent
> * us from finding the next entry. Luckily, find
> * also tells us where the hole is. We move the
> * entry to its rightful place and continue filling
> * the hole created by this move.
> */
> if (!locate(BODY(body)->data[index].key, newIndex)) {
> memcpy(&(BODY(body)->data[newIndex]),
> &(BODY(body)->data[index]),
> sizeof(BODY(body)->data[0]));
> Map_noKey(BODY(body)->data[index].key);
> }
> }
> }
> BODY(body)->nEntries--;
> return removedData;
> } else {
> return 0;
> }
> }
>
> #ifdef __GNUG__
> static
> #endif
> int (*LHash_thisKeyCompare)(); /* used by LHashIter() to communicate
> * with compareIndex() */
> #ifdef __GNUG__
> static
> #endif
> void *LHash_thisBody; /* ditto */
>
> template <class KeyT, class DataT>
> void
> LHashIter<KeyT,DataT>::sortKeys()
> {
> /*
> * Store keys away and sort them to the user's orders.
> */
> unsigned maxEntries = hashSize(myLHashBody->maxBits);
>
> unsigned *sortedIndex = new unsigned[numEntries];
> assert(sortedIndex != 0);
>
> unsigned i;
>
> unsigned j = 0;
> for (i = 0; i < maxEntries; i++) {
> if (!Map_noKeyP(myLHashBody->data[i].key)) {
> sortedIndex[j++] = i;
> }
> }
> assert(j == numEntries);
>
> /*
> * Due to the limitations of the qsort interface we have to
> * pass extra information to compareIndex in these global
> * variables - yuck.
> */
> LHash_thisKeyCompare = (int(*)())sortFunction;
> LHash_thisBody = myLHashBody;
> qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
>
> /*
> * Save the keys for enumeration. The reason we save the keys,
> * not the indices, is that indices may change during enumeration
> * due to deletions.
> */
> sortedKeys = new KeyT[numEntries];
> assert(sortedKeys != 0);
>
> for (i = 0; i < numEntries; i++) {
> sortedKeys[i] = myLHashBody->data[sortedIndex[i]].key;
> }
>
> delete [] sortedIndex;
> }
>
> template <class KeyT, class DataT>
> LHashIter<KeyT,DataT>::LHashIter(const LHash<KeyT,DataT> &lhash,
> int (*keyCompare)(KeyT, KeyT))
> : myLHashBody(BODY(lhash.body)), current(0),
> numEntries(lhash.numEntries()), sortFunction(keyCompare)
> {
> /*
> * Note: we access the underlying LHash through the body pointer,
> * not the top-level LHash itself. This allows the top-level object
> * to be moved without affecting an ongoing iteration.
> * XXX: This only works because
> * - it is illegal to insert while iterating
> * - deletions don't cause reallocation of the data
> */
> if (sortFunction && myLHashBody) {
> sortKeys();
> } else {
> sortedKeys = 0;
> }
> }
>
> /*
> * This is the callback function passed to qsort for comparing array
> * entries. It is passed the indices into the data array, which are
> * compared according to the user-supplied function applied to the
> * keys found at those locations.
> */
> template <class KeyT, class DataT>
> int
> LHashIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
> {
> return (*(compFnType)LHash_thisKeyCompare)
> (BODY(LHash_thisBody)->data[*(unsigned *)idx1].key,
> BODY(LHash_thisBody)->data[*(unsigned *)idx2].key);
> }
>
> template <class KeyT, class DataT>
> LHashIter<KeyT,DataT>::~LHashIter()
> {
> delete [] sortedKeys;
> }
>
>
> template <class KeyT, class DataT>
> void
> LHashIter<KeyT,DataT>::init()
> {
> delete [] sortedKeys;
>
> current = 0;
>
> {
> /*
> * XXX: fake LHash object to access numEntries()
> */
> LHash<KeyT,DataT> myLHash(0);
>
> myLHash.body = myLHashBody;
> numEntries = myLHash.numEntries();
> myLHash.body = 0;
> }
>
> if (sortFunction && myLHashBody) {
> sortKeys();
> } else {
> sortedKeys = 0;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> LHashIter<KeyT,DataT>::next(KeyT &key)
> {
>
> if (myLHashBody == 0) {
> return 0;
> } else {
> unsigned index;
>
> if (sortedKeys) {
> /*
> * Sorted enumeration -- advance to next key in sorted list
> */
> if (current == numEntries) {
> return 0;
> }
>
> /*
> * XXX: fake LHash object to access locate()
> */
> LHash<KeyT,DataT> myLHash(0);
>
> myLHash.body = myLHashBody;
> myLHash.locate(sortedKeys[current++], index);
> myLHash.body = 0;;
> } else {
> /*
> * Detect deletions by comparing old and current number of
> * entries.
> * A legal deletion can only affect the current entry, so
> * adjust the current position to reflect the next entry was
> * moved.
> */
> if (myLHashBody->nEntries != numEntries) {
> assert(myLHashBody->nEntries == numEntries - 1);
>
> numEntries --;
> current --;
> }
>
> /*
> * Random enumeration mode
> */
> unsigned maxBits = myLHashBody->maxBits;
>
> if (maxBits < minHashBits) {
> /*
> * Linear search mode - advance one to the next entry
> */
> if (current == numEntries) {
> return 0;
> }
> } else {
> unsigned maxEntries = hashSize(maxBits);
>
> while (current < maxEntries &&
> Map_noKeyP(myLHashBody->data[current].key))
> {
> current++;
> }
>
> if (current == maxEntries) {
> return 0;
> }
> }
>
> index = current ++;
> }
>
> key = myLHashBody->data[index].key;
> return &(myLHashBody->data[index].value);
> }
> }
>
> #undef BODY
>
> #endif /* _LHash_cc_ */
> /*
> * SArray.cc --
> * Sorted array implementation
> *
> */
>
> #ifndef _SArray_cc_
> #define _SArray_cc_
>
> #ifndef lint
> static char SArray_Copyright[] = "Copyright (c) 1995-2005 SRI International.
> All Rights Reserved.";
> static char SArray_RcsId[] = "@(#)$Header:
> /home/srilm/devel/dstruct/src/RCS/SArray.cc,v 1.37 2005/07/17 22:01:31 stolcke
> Exp $";
> #endif
>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <assert.h>
> #include <new.h>
>
> #include "SArray.h"
>
> #undef INSTANTIATE_SARRAY
> #define INSTANTIATE_SARRAY(KeyT, DataT) \
> template <> DataT *SArray< KeyT, DataT >::removedData = 0; \
> template class SArray< KeyT, DataT >; \
> template class SArrayIter< KeyT, DataT >
>
> #ifndef __GNUG__
> template <class KeyT, class DataT>
> DataT *SArray<KeyT,DataT>::removedData = 0;
> #endif /* __GNUG__ */
>
> #define BODY(b) ((SArrayBody<KeyT,DataT> *)b)
>
> const double growSize = 1.1;
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::dump() const
> {
> if (body == 0) {
> cerr << "EMPTY" << endl;
> } else {
> unsigned nEntries = numEntries();
>
> cerr << "maxEntries = " << BODY(body)->maxEntries;
> cerr << " nEntries = " << nEntries;
>
> for (unsigned i = 0; i < nEntries; i++) {
> cerr << " " << i << ": ";
> cerr << BODY(body)->data[i].key
> #ifdef DUMP_VALUES
> /*
> * Only certain types can be printed.
> */
> << "->" << BODY(body)->data[i].value
> #endif /* DUMP_VALUES */
> ;
> }
> }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::memStats(MemStats &stats) const
> {
> stats.total += sizeof(*this);
>
> if (body) {
> unsigned maxEntries = BODY(body)->maxEntries;
>
> stats.total += sizeof(*BODY(body)) +
> sizeof(BODY(body)->data[0]) *
> (maxEntries - 1);
> stats.wasted += sizeof(BODY(body)->data[0]) *
> (maxEntries - numEntries());
> }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::alloc(unsigned size)
> {
> body = (SArrayBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
> (size - 1) * sizeof(BODY(body)->data[0]));
> assert(body != 0);
>
> BODY(body)->deleted = 0;
> BODY(body)->maxEntries = size;
>
> /*
> * Fill the array with dummy keys, marking the unused space
> */
> for (unsigned i = 0; i < size; i ++) {
> Map_noKey(BODY(body)->data[i].key);
> }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::SArray(unsigned size)
> : body(0)
> {
> if (size != 0) {
> alloc(size);
> }
> }
>
> template <class KeyT, class DataT>
> void
> SArray<KeyT,DataT>::clear(unsigned size)
> {
> if (body) {
> unsigned maxEntries = BODY(body)->maxEntries;
>
> for (unsigned i = 0; i < maxEntries; i++) {
> KeyT thisKey = BODY(body)->data[i].key;
>
> if (Map_noKeyP(thisKey)) {
> break;
> }
> Map_freeKey(thisKey);
> }
> free(body);
> body = 0;
> }
> if (size != 0) {
> alloc(size);
> }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::~SArray()
> {
> clear(0);
> }
>
> template <class KeyT, class DataT>
> unsigned
> SArray<KeyT,DataT>::numEntries() const
> {
> if (body == 0) {
> return 0;
> } else if (Map_noKeyP(BODY(body)->data[0].key)) {
> return 0;
> } else {
> /*
> * Do a binary search for the end of the used memory
> * lower always points to a filled entry
> * upper always points to a free entry beyond the end of used entries
> */
> unsigned lower, upper;
>
> lower = 0; /* minimum index (inclusive) */
> upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
>
> while (lower + 1 < upper) {
> unsigned middle = (lower + upper)/2;
>
> if (Map_noKeyP(BODY(body)->data[middle].key)) {
> upper = middle;
> } else {
> lower = middle;
> }
> }
>
> /* lower + 1 == upper */
> return upper;
> }
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT>::SArray(const SArray<KeyT,DataT> &source)
> : body(0)
> {
> #ifdef DEBUG
> cerr << "warning: SArray copy constructor called\n";
> #endif
> *this = source;
> }
>
> template <class KeyT, class DataT>
> SArray<KeyT,DataT> &
> SArray<KeyT,DataT>::operator= (const SArray<KeyT,DataT> &other)
> {
> #ifdef DEBUG
> cerr << "warning: SArray::operator= called\n";
> #endif
>
> if (&other == this) {
> return *this;
> }
>
> /*
> * copy array entries from old to new
> */
> if (other.body) {
> unsigned maxEntries = BODY(other.body)->maxEntries;
> clear(maxEntries);
>
> for (unsigned i = 0; i < maxEntries; i++) {
> KeyT thisKey = BODY(other.body)->data[i].key;
>
> if (Map_noKeyP(thisKey)) {
> break;
> }
>
> /*
> * Copy key
> */
> BODY(body)->data[i].key = Map_copyKey(thisKey);
>
> /*
> * Initialize data, required for = operator
> */
> new (&(BODY(body)->data[i].value)) DataT;
>
> /*
> * Copy data
> */
> BODY(body)->data[i].value = BODY(other.body)->data[i].value;
>
> }
> } else {
> clear(0);
> }
>
> return *this;
> }
>
> /*
> * Returns index into data array that has the key which is either
> * equal to key, or indicates the place where key would be placed if it
> * occurred in the array.
> */
> template <class KeyT, class DataT>
> Boolean
> SArray<KeyT,DataT>::locate(KeyT key, unsigned &index) const
> {
> assert(!Map_noKeyP(key));
>
> if (body) {
> unsigned lower, upper;
>
> lower = 0; /* minimum index (inclusive) */
> upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
>
> while (lower < upper) {
> unsigned middle = (lower + upper)/2;
>
> KeyT thisKey = BODY(body)->data[middle].key;
>
> if (Map_noKeyP(thisKey)) {
> upper = middle;
> continue;
> }
>
> int compare = SArray_compareKey(key, thisKey);
>
> if (compare < 0) {
> upper = middle;
> } else if (compare > 0) {
> lower = middle + 1;
> } else {
> index = middle;
> return true;
> }
> }
>
> /* we have lower == upper */
> if (lower == BODY(body)->maxEntries ||
> Map_noKeyP(BODY(body)->data[lower].key) ||
> SArray_compareKey(key, BODY(body)->data[lower].key) < 0)
> {
> index = lower;
> } else {
> index = lower + 1;
> }
> return false;
> } else {
> return false;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
> {
> unsigned index;
>
> if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
> } else {
> return 0;
> }
> }
>
> template <class KeyT, class DataT>
> KeyT
> SArray<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
> {
> unsigned index;
> static KeyT zeroKey;
>
> if (foundP = locate(key, index)) {
> return BODY(body)->data[index].key;
> } else {
> return zeroKey;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
> {
> unsigned index;
>
> /*
> * Make sure there is room for at least one entry
> */
> if (body == 0) {
> alloc(1);
> }
>
> if (foundP = locate(key, index)) {
> return &(BODY(body)->data[index].value);
> } else {
> unsigned nEntries = numEntries();
> unsigned maxEntries = BODY(body)->maxEntries;
>
> /*
> * Need to add an element.
> * First, enlarge array if necessary
> */
> if (nEntries == maxEntries) {
> maxEntries = (int)(maxEntries * growSize);
> if (maxEntries == nEntries) {
> maxEntries ++;
> }
> body = (SArrayBody<KeyT,DataT> *)realloc(body,
> sizeof(*BODY(body)) +
> (maxEntries - 1) * sizeof(BODY(body)->data[0]));
> assert(body != 0);
>
> /*
> * Fill new space with dummy keys
> */
> for (unsigned i = BODY(body)->maxEntries; i < maxEntries; i ++) {
> Map_noKey(BODY(body)->data[i].key);
> }
>
> BODY(body)->maxEntries = maxEntries;
> }
>
> memmove(&(BODY(body)->data[index + 1]),
> &(BODY(body)->data[index]),
> (nEntries - index) * sizeof(BODY(body)->data[0]));
>
> BODY(body)->data[index].key = Map_copyKey(key);
>
> /*
> * Initialize data to zero, but also call constructors, if any
> */
> memset(&(BODY(body)->data[index].value), 0,
> sizeof(BODY(body)->data[index].value));
> new (&(BODY(body)->data[index].value)) DataT;
>
> return &(BODY(body)->data[index].value);
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArray<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
> {
> unsigned index;
>
> /*
> * Allocate pseudo-static memory for removed objects (returned by
> * remove()). We cannot make this static because otherwise
> * the destructor for DataT would be called on it at program exit.
> */
> if (removedData == 0) {
> removedData = (DataT *)malloc(sizeof(DataT));
> assert(removedData != 0);
> }
>
> if (foundP = locate(key, index)) {
> unsigned nEntries = numEntries();
>
> Map_freeKey(BODY(body)->data[index].key);
> memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
>
> memmove(&(BODY(body)->data[index]),
> &(BODY(body)->data[index + 1]),
> (nEntries - index - 1) * sizeof(BODY(body)->data[0]));
>
> /*
> * mark previous last slot as free
> */
> Map_noKey(BODY(body)->data[nEntries-1].key);
>
> /*
> * Indicate that deletion occurred
> */
> BODY(body)->deleted = 1;
>
> return removedData;
> } else {
> return 0;
> }
> }
>
> #ifdef __GNUG__
> static
> #endif
> int (*SArray_thisKeyCompare)(); /* used by SArrayIter() to communicate
> * with compareIndex() */
> #ifdef __GNUG__
> static
> #endif
> void *SArray_thisBody; /* ditto */
>
>
> template <class KeyT, class DataT>
> void
> SArrayIter<KeyT,DataT>::sortKeys()
> {
> /*
> * Store keys away and sort them to the user's orders.
> */
> unsigned *sortedIndex = new unsigned[numEntries];
> assert(sortedIndex != 0);
>
> unsigned i;
> for (i = 0; i < numEntries; i++) {
> sortedIndex[i] = i;
> }
>
> /*
> * Due to the limitations of the qsort interface we have to
> * pass extra information to compareIndex in these global
> * variables - yuck.
> */
> SArray_thisKeyCompare = (int (*)())sortFunction;
> SArray_thisBody = mySArrayBody;
> qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
>
> /*
> * Save the keys for enumeration. The reason we save the keys,
> * not the indices, is that indices may change during enumeration
> * due to deletions.
> */
> sortedKeys = new KeyT[numEntries];
> assert(sortedKeys != 0);
>
> for (i = 0; i < numEntries; i++) {
> sortedKeys[i] = mySArrayBody->data[sortedIndex[i]].key;
> }
>
> delete [] sortedIndex;
> }
>
> template <class KeyT, class DataT>
> SArrayIter<KeyT,DataT>::SArrayIter(const SArray<KeyT,DataT> &sarray,
> int (*keyCompare)(KeyT, KeyT))
> : mySArrayBody(BODY(sarray.body)), current(0),
> numEntries(sarray.numEntries()), sortFunction(keyCompare)
> {
> /*
> * Note: we access the underlying SArray through the body pointer,
> * not the top-level SArray itself. This allows the top-level object
> * to be moved without affecting an ongoing iteration.
> * XXX: This only works because
> * - it is illegal to insert while iterating
> * - deletions don't cause reallocation of the data
> */
> if (sortFunction && mySArrayBody) {
> sortKeys();
> } else {
> sortedKeys = 0;
> }
>
> if (mySArrayBody) {
> mySArrayBody->deleted = 0;
> }
> }
>
> /*
> * This is the callback function passed to qsort for comparing array
> * entries. It is passed the indices into the data array, which are
> * compared according to the user-supplied function applied to the
> * keys found at those locations.
> */
> template <class KeyT, class DataT>
> int
> SArrayIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
> {
> return (*(compFnType)SArray_thisKeyCompare)
> (BODY(SArray_thisBody)->data[*(unsigned *)idx1].key,
> BODY(SArray_thisBody)->data[*(unsigned *)idx2].key);
> }
>
> template <class KeyT, class DataT>
> SArrayIter<KeyT,DataT>::~SArrayIter()
> {
> delete [] sortedKeys;
> }
>
> template <class KeyT, class DataT>
> void
> SArrayIter<KeyT,DataT>::init()
> {
> delete [] sortedKeys;
> sortedKeys = 0;
>
> current = 0;
>
> {
> /*
> * XXX: fake SArray object to access numEntries()
> */
> SArray<KeyT,DataT> mySArray(0);
>
> mySArray.body = mySArrayBody;
> numEntries = mySArray.numEntries();
> mySArray.body = 0;
> }
>
> if (mySArrayBody) {
> if (sortFunction) {
> sortKeys();
> }
> mySArrayBody->deleted = 0;
> }
> }
>
> template <class KeyT, class DataT>
> DataT *
> SArrayIter<KeyT,DataT>::next(KeyT &key)
> {
> if (mySArrayBody == 0) {
> return 0;
> } else {
> unsigned index;
>
> if (sortedKeys == 0) {
> /*
> * Detect deletion while iterating.
> * A legal deletion can only affect the current entry, so
> * adjust the current position to reflect the next entry was
> * moved.
> */
> if (mySArrayBody->deleted) {
> numEntries --;
> current --;
> }
>
> if (current == numEntries) {
> return 0;
> }
>
> index = current++;
> } else {
> if (current == numEntries) {
> return 0;
> }
>
> /*
> * XXX: fake an SArray object to access locate()
> */
> SArray<KeyT,DataT> mySArray(0);
>
> mySArray.body = mySArrayBody;
> (void)mySArray.locate(sortedKeys[current++], index);
> mySArray.body = 0;
> }
> mySArrayBody->deleted = 0;
>
> key = mySArrayBody->data[index].key;
> return &(mySArrayBody->data[index].value);
> }
> }
>
> #undef BODY
>
> #endif /* _SArray_cc_ */
More information about the SRILM-User
mailing list