Modifying ngram.cc
Andreas Stolcke
stolcke at speech.sri.com
Sun Jul 17 17:47:57 PDT 2005
The problem was that copy constructors (and assignment operators)
for the container data structures (Array, SArray, LHash) weren't implemented.
I fixed that in the current development version.
No changes to any of the higher-level classes (like Ngram) is needed,
as C++ automatically (and recursively) defines copy constructors based on the
member copy constructors.
You can either replace the attached source files (in dstruct/src), or
download the "1.4.5 (Beta)" version from the web page.
--Andreas
In message <BEF415D2.332F%lambert at jhu.edu>you wrote:
> Hi,
>
> This is kind of a C++ question
> I wrote the following copy constructor in Ngram.h
> Ngram(const Ngram
> &ng):LM(ng.vocab),contexts(ng.contexts),order(ng.order),_skipOOVs(ng._skipOO
> Vs),_trustTotals(ng._trustTotals){};
>
> I have the following declarations
>
> Ngram ngramLM(vocab*,order);
>
> While(//<some condition>)
> Ngram useLM(ngramLM);
>
> // Do some stuff with useLM
> .........
> ........
> }
>
> The problem is that the assignment useLM=ngramLM doesn't assign the original
> ngramLM. Any changes I make to useLM shows up in ngramLM too.
> I just want to make a copy (useLM) of the original ngramLM, work on that
> copy and then reinitialize another useLM with the original ngramLM.
>
> Any thoughts?
>
>
> Lambert
>
-------------- next part --------------
/*
* Array.h --
* Extensible array class
*
* Copyright (c) 1995-2005 SRI International. All Rights Reserved.
*
* @(#)$Header: /home/srilm/devel/dstruct/src/RCS/Array.h,v 1.11 2005/07/17 22:12:21 stolcke Exp $
*
*/
#ifndef _Array_h_
#define _Array_h_
#include <assert.h>
#include "MemStats.h"
template <class DataT>
class Array
{
public:
Array(int base = 0, unsigned int size = 0)
: _base(base), _size(size), _data(0), alloc_size(0)
{ if (size > 0) { alloc(size-1); } };
Array(Array<DataT> &source)
: _base(source._base), _size(0), _data(0), alloc_size(0)
{ *this = source; }
~Array() { delete [] _data; } ;
DataT &operator[](int index)
{ int offset = index - _base; assert(offset >= 0);
if (offset >= _size) {
_size = offset + 1;
if (offset >= alloc_size) { alloc(offset); }
}
return _data[offset];
};
Array<DataT> & operator= (const Array<DataT> &other);
DataT *data() const { return _data - _base; };
int base() const { return _base; }
unsigned int size() const { return _size; }
void memStats(MemStats &stats) const;
private:
unsigned int _size; /* used size */
int _base;
DataT *_data;
unsigned int alloc_size; /* allocated size */
void alloc(unsigned int size);
};
#endif /* _Array_h_ */
-------------- next part --------------
/*
* LHash.cc --
* Linear search hash table implementation
*
*/
#ifndef _LHash_cc_
#define _LHash_cc_
#ifndef lint
static char LHash_Copyright[] = "Copyright (c) 1995-2005 SRI International. All Rights Reserved.";
static char LHash_RcsId[] = "@(#)$Header: /home/srilm/devel/dstruct/src/RCS/LHash.cc,v 1.46 2005/07/17 22:01:31 stolcke Exp $";
#endif
#include <iostream.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <new.h>
#include "LHash.h"
#undef INSTANTIATE_LHASH
#define INSTANTIATE_LHASH(KeyT, DataT) \
template <> DataT *LHash< KeyT, DataT >::removedData = 0; \
template class LHash< KeyT, DataT >; \
template class LHashIter< KeyT, DataT >
#ifndef __GNUG__
template <class KeyT, class DataT>
DataT *LHash<KeyT,DataT>::removedData = 0;
#endif /* __GNUG__ */
const unsigned minHashBits = 3; /* minimum no. bits for hashing
* tables smaller than this use linear
* search to save space */
const float fillRatio = 0.8; /* fill ration at which the table is
* expanded and rehashed */
#define BODY(b) ((LHashBody<KeyT,DataT> *)b)
/*
* Dump the entire hash array to cerr. Unused slots are printed as "FREE".
*/
template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::dump() const
{
if (body) {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned i;
for (i = 0; i < maxEntries; i++) {
cerr << " " << i << ": ";
if (Map_noKeyP(BODY(body)->data[i].key)) {
cerr << "FREE";
} else {
cerr << BODY(body)->data[i].key
#ifdef DUMP_VALUES
/*
* Only certain types can be printed.
*/
<< "->" << BODY(body)->data[i].value
#endif /* DUMP_VALUES */
;
}
}
} else {
cerr << "EMPTY";
}
cerr << endl;
}
template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::memStats(MemStats &stats) const
{
stats.total += sizeof(*this);
if (body) {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
stats.total += sizeof(*BODY(body)) +
sizeof(BODY(body)->data[0]) *
(maxEntries - 1);
stats.wasted += sizeof(BODY(body)->data[0]) *
(maxEntries - BODY(body)->nEntries);
}
}
/*
* Compute the actual minimum size required for a given number of entries
*/
static inline unsigned
roundSize(unsigned size)
{
if (size < hashSize(minHashBits)) {
return size;
} else {
return (unsigned)((size + 1)/ fillRatio);
}
}
template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::alloc(unsigned size)
{
unsigned maxBits, maxEntries;
unsigned i;
/*
* round up to power of two
*/
maxBits = 0;
while (hashSize(maxBits) < size) {
assert(maxBits < LHash_maxBitLimit);
maxBits++;
}
maxEntries = hashSize(maxBits);
//cerr << "LHash::alloc: current " << (body ? BODY(body)->nEntries : 0)
// << ", requested " << size
// << ", allocating " << maxEntries << " (" << maxBits << ")\n";
body = (LHashBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
(maxEntries - 1) * sizeof(BODY(body)->data[0]));
assert(body != 0);
BODY(body)->maxBits = maxBits;
BODY(body)->nEntries = 0;
for (i = 0; i < maxEntries; i++) {
Map_noKey(BODY(body)->data[i].key);
}
//cerr << "memory for header = " <<
// ((void *)&(BODY(body)->data[0]) - (void *)BODY(body)) << endl;
//cerr << "memory per entry = " <<
// ((void *)&(BODY(body)->data[3]) - (void *)&(BODY(body)->data[2])) << endl;
}
template <class KeyT, class DataT>
LHash<KeyT,DataT>::LHash(unsigned size)
: body(0)
{
if (size != 0) {
/*
* determine actual initial size
*/
alloc(roundSize(size));
}
}
template <class KeyT, class DataT>
void
LHash<KeyT,DataT>::clear(unsigned size)
{
if (body) {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned i;
for (i = 0; i < maxEntries; i++) {
if (! Map_noKeyP(BODY(body)->data[i].key)) {
Map_freeKey(BODY(body)->data[i].key);
}
}
free(body);
body = 0;
}
if (size != 0) {
alloc(roundSize(size));
}
}
template <class KeyT, class DataT>
LHash<KeyT,DataT>::~LHash()
{
clear(0);
}
template <class KeyT, class DataT>
LHash<KeyT,DataT>::LHash(const LHash<KeyT,DataT> &source)
: body(0)
{
#ifdef DEBUG
cerr << "warning: LHash copy constructor called\n";
#endif
*this = source;
}
template <class KeyT, class DataT>
LHash<KeyT,DataT> &
LHash<KeyT,DataT>::operator= (const LHash<KeyT,DataT> &other)
{
#ifdef DEBUG
cerr << "warning: LHash::operator= called\n";
#endif
if (&other == this) {
return *this;
}
/*
* copy hash entries from old to new
*/
if (other.body) {
unsigned maxEntries = hashSize(BODY(other.body)->maxBits);
clear(maxEntries);
for (unsigned i = 0; i < maxEntries; i++) {
KeyT thisKey = BODY(other.body)->data[i].key;
if (!Map_noKeyP(thisKey)) {
/*
* Copy key
*/
BODY(body)->data[i].key = Map_copyKey(thisKey);
/*
* Initialize data, required for = operator
*/
new (&(BODY(body)->data[i].value)) DataT;
/*
* Copy data
*/
BODY(body)->data[i].value = BODY(other.body)->data[i].value;
}
}
BODY(body)->nEntries = BODY(other.body)->nEntries;
} else {
clear(0);
}
return *this;
}
/*
* Returns index into data array that has the key which is either
* equal to key, or indicates the place where key would be placed if it
* occurred in the array.
*/
template <class KeyT, class DataT>
Boolean
LHash<KeyT,DataT>::locate(KeyT key, unsigned &index) const
{
assert(!Map_noKeyP(key));
if (body) {
unsigned maxBits = BODY(body)->maxBits;
register MapEntry<KeyT,DataT> *data = BODY(body)->data;
if (maxBits < minHashBits) {
/*
* Do a linear search
*/
unsigned nEntries = BODY(body)->nEntries;
register unsigned i;
for (i = 0; i < nEntries; i++) {
if (LHash_equalKey(data[i].key, key)) {
index = i;
return true;
}
}
index = i;
return false;
} else {
/*
* Do a hashed search
*/
unsigned hash = LHash_hashKey(key, maxBits);
unsigned i;
for (i = hash; ; i = (i + 1) & hashMask(maxBits))
{
if (Map_noKeyP(data[i].key)) {
index = i;
return false;
} else if (LHash_equalKey(data[i].key, key)) {
index = i;
return true;
}
}
}
} else {
return false;
}
}
template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
{
unsigned index;
if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
} else {
return 0;
}
}
template <class KeyT, class DataT>
KeyT
LHash<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
{
unsigned index;
static KeyT zeroKey;
if (foundP = locate(key, index)) {
return BODY(body)->data[index].key;
} else {
return zeroKey;
}
}
template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
{
unsigned index;
assert(!(Map_noKeyP(key)));
/*
* Make sure there is room for at least one entry
*/
if (body == 0) {
alloc(1);
}
if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
} else {
unsigned maxEntries = hashSize(BODY(body)->maxBits);
unsigned nEntries = BODY(body)->nEntries;
/*
* Rehash table if necessary
*/
unsigned minSize = roundSize(nEntries + 1);
if (minSize > maxEntries) {
LHashBody<KeyT,DataT> *oldBody = BODY(body);
unsigned i;
/*
* Since LHash_maxEntriesLimit is a power of two minus 1
* we need to check this only when the array is enlarged
*/
assert(nEntries < LHash_maxEntriesLimit);
alloc(minSize);
BODY(body)->nEntries = nEntries;
if (BODY(body)->maxBits < minHashBits) {
/*
* Just copy old entries to new storage, no reindexing
* required.
*/
memcpy(BODY(body)->data, oldBody->data,
sizeof(oldBody->data[0]) * nEntries);
} else {
/*
* Rehash
*/
for (i = 0; i < maxEntries; i++) {
KeyT key = oldBody->data[i].key;
if (! Map_noKeyP(key)) {
(void)locate(key, index);
memcpy(&(BODY(body)->data[index]), &(oldBody->data[i]),
sizeof(oldBody->data[0]));
}
}
}
free(oldBody);
/*
* Entries have been moved, so have to re-locate key
*/
(void)locate(key, index);
}
BODY(body)->data[index].key = Map_copyKey(key);
/*
* Initialize data to zero, but also call constructors, if any
*/
memset(&(BODY(body)->data[index].value), 0,
sizeof(BODY(body)->data[index].value));
new (&(BODY(body)->data[index].value)) DataT;
BODY(body)->nEntries++;
return &(BODY(body)->data[index].value);
}
}
template <class KeyT, class DataT>
DataT *
LHash<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
{
unsigned index;
/*
* Allocate pseudo-static memory for removed objects (returned by
* remove()). We cannot make this static because otherwise
* the destructor for DataT would be called on it at program exit.
*/
if (removedData == 0) {
removedData = (DataT *)malloc(sizeof(DataT));
assert(removedData != 0);
}
if (foundP = locate(key, index)) {
Map_freeKey(BODY(body)->data[index].key);
Map_noKey(BODY(body)->data[index].key);
memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
if (BODY(body)->maxBits < minHashBits) {
/*
* Linear search mode -- Just move all entries above the
* the one removed to fill the gap.
*/
unsigned nEntries = BODY(body)->nEntries;
memmove(&BODY(body)->data[index],
&BODY(body)->data[index+1],
(nEntries - index - 1) * sizeof(BODY(body)->data[0]));
Map_noKey(BODY(body)->data[nEntries - 1].key);
} else {
/*
* The entry after the one being deleted could actually
* belong to a prior spot in the table, but was bounced forward due
* to a collision. The invariant used in lookup is that
* all locations between the hash index and the actual index are
* filled. Hence we examine all entries between the deleted
* position and the next free position as whether they need to
* be moved backward.
*/
while (1) {
unsigned newIndex;
index = (index + 1) & hashMask(BODY(body)->maxBits);
if (Map_noKeyP(BODY(body)->data[index].key)) {
break;
}
/*
* If find returns false that means the deletion has
* introduced a hole in the table that would prevent
* us from finding the next entry. Luckily, find
* also tells us where the hole is. We move the
* entry to its rightful place and continue filling
* the hole created by this move.
*/
if (!locate(BODY(body)->data[index].key, newIndex)) {
memcpy(&(BODY(body)->data[newIndex]),
&(BODY(body)->data[index]),
sizeof(BODY(body)->data[0]));
Map_noKey(BODY(body)->data[index].key);
}
}
}
BODY(body)->nEntries--;
return removedData;
} else {
return 0;
}
}
#ifdef __GNUG__
static
#endif
int (*LHash_thisKeyCompare)(); /* used by LHashIter() to communicate
* with compareIndex() */
#ifdef __GNUG__
static
#endif
void *LHash_thisBody; /* ditto */
template <class KeyT, class DataT>
void
LHashIter<KeyT,DataT>::sortKeys()
{
/*
* Store keys away and sort them to the user's orders.
*/
unsigned maxEntries = hashSize(myLHashBody->maxBits);
unsigned *sortedIndex = new unsigned[numEntries];
assert(sortedIndex != 0);
unsigned i;
unsigned j = 0;
for (i = 0; i < maxEntries; i++) {
if (!Map_noKeyP(myLHashBody->data[i].key)) {
sortedIndex[j++] = i;
}
}
assert(j == numEntries);
/*
* Due to the limitations of the qsort interface we have to
* pass extra information to compareIndex in these global
* variables - yuck.
*/
LHash_thisKeyCompare = (int(*)())sortFunction;
LHash_thisBody = myLHashBody;
qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
/*
* Save the keys for enumeration. The reason we save the keys,
* not the indices, is that indices may change during enumeration
* due to deletions.
*/
sortedKeys = new KeyT[numEntries];
assert(sortedKeys != 0);
for (i = 0; i < numEntries; i++) {
sortedKeys[i] = myLHashBody->data[sortedIndex[i]].key;
}
delete [] sortedIndex;
}
template <class KeyT, class DataT>
LHashIter<KeyT,DataT>::LHashIter(const LHash<KeyT,DataT> &lhash,
int (*keyCompare)(KeyT, KeyT))
: myLHashBody(BODY(lhash.body)), current(0),
numEntries(lhash.numEntries()), sortFunction(keyCompare)
{
/*
* Note: we access the underlying LHash through the body pointer,
* not the top-level LHash itself. This allows the top-level object
* to be moved without affecting an ongoing iteration.
* XXX: This only works because
* - it is illegal to insert while iterating
* - deletions don't cause reallocation of the data
*/
if (sortFunction && myLHashBody) {
sortKeys();
} else {
sortedKeys = 0;
}
}
/*
* This is the callback function passed to qsort for comparing array
* entries. It is passed the indices into the data array, which are
* compared according to the user-supplied function applied to the
* keys found at those locations.
*/
template <class KeyT, class DataT>
int
LHashIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
{
return (*(compFnType)LHash_thisKeyCompare)
(BODY(LHash_thisBody)->data[*(unsigned *)idx1].key,
BODY(LHash_thisBody)->data[*(unsigned *)idx2].key);
}
template <class KeyT, class DataT>
LHashIter<KeyT,DataT>::~LHashIter()
{
delete [] sortedKeys;
}
template <class KeyT, class DataT>
void
LHashIter<KeyT,DataT>::init()
{
delete [] sortedKeys;
current = 0;
{
/*
* XXX: fake LHash object to access numEntries()
*/
LHash<KeyT,DataT> myLHash(0);
myLHash.body = myLHashBody;
numEntries = myLHash.numEntries();
myLHash.body = 0;
}
if (sortFunction && myLHashBody) {
sortKeys();
} else {
sortedKeys = 0;
}
}
template <class KeyT, class DataT>
DataT *
LHashIter<KeyT,DataT>::next(KeyT &key)
{
if (myLHashBody == 0) {
return 0;
} else {
unsigned index;
if (sortedKeys) {
/*
* Sorted enumeration -- advance to next key in sorted list
*/
if (current == numEntries) {
return 0;
}
/*
* XXX: fake LHash object to access locate()
*/
LHash<KeyT,DataT> myLHash(0);
myLHash.body = myLHashBody;
myLHash.locate(sortedKeys[current++], index);
myLHash.body = 0;;
} else {
/*
* Detect deletions by comparing old and current number of
* entries.
* A legal deletion can only affect the current entry, so
* adjust the current position to reflect the next entry was
* moved.
*/
if (myLHashBody->nEntries != numEntries) {
assert(myLHashBody->nEntries == numEntries - 1);
numEntries --;
current --;
}
/*
* Random enumeration mode
*/
unsigned maxBits = myLHashBody->maxBits;
if (maxBits < minHashBits) {
/*
* Linear search mode - advance one to the next entry
*/
if (current == numEntries) {
return 0;
}
} else {
unsigned maxEntries = hashSize(maxBits);
while (current < maxEntries &&
Map_noKeyP(myLHashBody->data[current].key))
{
current++;
}
if (current == maxEntries) {
return 0;
}
}
index = current ++;
}
key = myLHashBody->data[index].key;
return &(myLHashBody->data[index].value);
}
}
#undef BODY
#endif /* _LHash_cc_ */
-------------- next part --------------
/*
* SArray.cc --
* Sorted array implementation
*
*/
#ifndef _SArray_cc_
#define _SArray_cc_
#ifndef lint
static char SArray_Copyright[] = "Copyright (c) 1995-2005 SRI International. All Rights Reserved.";
static char SArray_RcsId[] = "@(#)$Header: /home/srilm/devel/dstruct/src/RCS/SArray.cc,v 1.37 2005/07/17 22:01:31 stolcke Exp $";
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <new.h>
#include "SArray.h"
#undef INSTANTIATE_SARRAY
#define INSTANTIATE_SARRAY(KeyT, DataT) \
template <> DataT *SArray< KeyT, DataT >::removedData = 0; \
template class SArray< KeyT, DataT >; \
template class SArrayIter< KeyT, DataT >
#ifndef __GNUG__
template <class KeyT, class DataT>
DataT *SArray<KeyT,DataT>::removedData = 0;
#endif /* __GNUG__ */
#define BODY(b) ((SArrayBody<KeyT,DataT> *)b)
const double growSize = 1.1;
template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::dump() const
{
if (body == 0) {
cerr << "EMPTY" << endl;
} else {
unsigned nEntries = numEntries();
cerr << "maxEntries = " << BODY(body)->maxEntries;
cerr << " nEntries = " << nEntries;
for (unsigned i = 0; i < nEntries; i++) {
cerr << " " << i << ": ";
cerr << BODY(body)->data[i].key
#ifdef DUMP_VALUES
/*
* Only certain types can be printed.
*/
<< "->" << BODY(body)->data[i].value
#endif /* DUMP_VALUES */
;
}
}
}
template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::memStats(MemStats &stats) const
{
stats.total += sizeof(*this);
if (body) {
unsigned maxEntries = BODY(body)->maxEntries;
stats.total += sizeof(*BODY(body)) +
sizeof(BODY(body)->data[0]) *
(maxEntries - 1);
stats.wasted += sizeof(BODY(body)->data[0]) *
(maxEntries - numEntries());
}
}
template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::alloc(unsigned size)
{
body = (SArrayBody<KeyT,DataT> *)malloc(sizeof(*BODY(body)) +
(size - 1) * sizeof(BODY(body)->data[0]));
assert(body != 0);
BODY(body)->deleted = 0;
BODY(body)->maxEntries = size;
/*
* Fill the array with dummy keys, marking the unused space
*/
for (unsigned i = 0; i < size; i ++) {
Map_noKey(BODY(body)->data[i].key);
}
}
template <class KeyT, class DataT>
SArray<KeyT,DataT>::SArray(unsigned size)
: body(0)
{
if (size != 0) {
alloc(size);
}
}
template <class KeyT, class DataT>
void
SArray<KeyT,DataT>::clear(unsigned size)
{
if (body) {
unsigned maxEntries = BODY(body)->maxEntries;
for (unsigned i = 0; i < maxEntries; i++) {
KeyT thisKey = BODY(body)->data[i].key;
if (Map_noKeyP(thisKey)) {
break;
}
Map_freeKey(thisKey);
}
free(body);
body = 0;
}
if (size != 0) {
alloc(size);
}
}
template <class KeyT, class DataT>
SArray<KeyT,DataT>::~SArray()
{
clear(0);
}
template <class KeyT, class DataT>
unsigned
SArray<KeyT,DataT>::numEntries() const
{
if (body == 0) {
return 0;
} else if (Map_noKeyP(BODY(body)->data[0].key)) {
return 0;
} else {
/*
* Do a binary search for the end of the used memory
* lower always points to a filled entry
* upper always points to a free entry beyond the end of used entries
*/
unsigned lower, upper;
lower = 0; /* minimum index (inclusive) */
upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
while (lower + 1 < upper) {
unsigned middle = (lower + upper)/2;
if (Map_noKeyP(BODY(body)->data[middle].key)) {
upper = middle;
} else {
lower = middle;
}
}
/* lower + 1 == upper */
return upper;
}
}
template <class KeyT, class DataT>
SArray<KeyT,DataT>::SArray(const SArray<KeyT,DataT> &source)
: body(0)
{
#ifdef DEBUG
cerr << "warning: SArray copy constructor called\n";
#endif
*this = source;
}
template <class KeyT, class DataT>
SArray<KeyT,DataT> &
SArray<KeyT,DataT>::operator= (const SArray<KeyT,DataT> &other)
{
#ifdef DEBUG
cerr << "warning: SArray::operator= called\n";
#endif
if (&other == this) {
return *this;
}
/*
* copy array entries from old to new
*/
if (other.body) {
unsigned maxEntries = BODY(other.body)->maxEntries;
clear(maxEntries);
for (unsigned i = 0; i < maxEntries; i++) {
KeyT thisKey = BODY(other.body)->data[i].key;
if (Map_noKeyP(thisKey)) {
break;
}
/*
* Copy key
*/
BODY(body)->data[i].key = Map_copyKey(thisKey);
/*
* Initialize data, required for = operator
*/
new (&(BODY(body)->data[i].value)) DataT;
/*
* Copy data
*/
BODY(body)->data[i].value = BODY(other.body)->data[i].value;
}
} else {
clear(0);
}
return *this;
}
/*
* Returns index into data array that has the key which is either
* equal to key, or indicates the place where key would be placed if it
* occurred in the array.
*/
template <class KeyT, class DataT>
Boolean
SArray<KeyT,DataT>::locate(KeyT key, unsigned &index) const
{
assert(!Map_noKeyP(key));
if (body) {
unsigned lower, upper;
lower = 0; /* minimum index (inclusive) */
upper = BODY(body)->maxEntries; /* maximum index (exclusive) */
while (lower < upper) {
unsigned middle = (lower + upper)/2;
KeyT thisKey = BODY(body)->data[middle].key;
if (Map_noKeyP(thisKey)) {
upper = middle;
continue;
}
int compare = SArray_compareKey(key, thisKey);
if (compare < 0) {
upper = middle;
} else if (compare > 0) {
lower = middle + 1;
} else {
index = middle;
return true;
}
}
/* we have lower == upper */
if (lower == BODY(body)->maxEntries ||
Map_noKeyP(BODY(body)->data[lower].key) ||
SArray_compareKey(key, BODY(body)->data[lower].key) < 0)
{
index = lower;
} else {
index = lower + 1;
}
return false;
} else {
return false;
}
}
template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::find(KeyT key, Boolean &foundP) const
{
unsigned index;
if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
} else {
return 0;
}
}
template <class KeyT, class DataT>
KeyT
SArray<KeyT,DataT>::getInternalKey(KeyT key, Boolean &foundP) const
{
unsigned index;
static KeyT zeroKey;
if (foundP = locate(key, index)) {
return BODY(body)->data[index].key;
} else {
return zeroKey;
}
}
template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::insert(KeyT key, Boolean &foundP)
{
unsigned index;
/*
* Make sure there is room for at least one entry
*/
if (body == 0) {
alloc(1);
}
if (foundP = locate(key, index)) {
return &(BODY(body)->data[index].value);
} else {
unsigned nEntries = numEntries();
unsigned maxEntries = BODY(body)->maxEntries;
/*
* Need to add an element.
* First, enlarge array if necessary
*/
if (nEntries == maxEntries) {
maxEntries = (int)(maxEntries * growSize);
if (maxEntries == nEntries) {
maxEntries ++;
}
body = (SArrayBody<KeyT,DataT> *)realloc(body,
sizeof(*BODY(body)) +
(maxEntries - 1) * sizeof(BODY(body)->data[0]));
assert(body != 0);
/*
* Fill new space with dummy keys
*/
for (unsigned i = BODY(body)->maxEntries; i < maxEntries; i ++) {
Map_noKey(BODY(body)->data[i].key);
}
BODY(body)->maxEntries = maxEntries;
}
memmove(&(BODY(body)->data[index + 1]),
&(BODY(body)->data[index]),
(nEntries - index) * sizeof(BODY(body)->data[0]));
BODY(body)->data[index].key = Map_copyKey(key);
/*
* Initialize data to zero, but also call constructors, if any
*/
memset(&(BODY(body)->data[index].value), 0,
sizeof(BODY(body)->data[index].value));
new (&(BODY(body)->data[index].value)) DataT;
return &(BODY(body)->data[index].value);
}
}
template <class KeyT, class DataT>
DataT *
SArray<KeyT,DataT>::remove(KeyT key, Boolean &foundP)
{
unsigned index;
/*
* Allocate pseudo-static memory for removed objects (returned by
* remove()). We cannot make this static because otherwise
* the destructor for DataT would be called on it at program exit.
*/
if (removedData == 0) {
removedData = (DataT *)malloc(sizeof(DataT));
assert(removedData != 0);
}
if (foundP = locate(key, index)) {
unsigned nEntries = numEntries();
Map_freeKey(BODY(body)->data[index].key);
memcpy(removedData, &BODY(body)->data[index].value, sizeof(DataT));
memmove(&(BODY(body)->data[index]),
&(BODY(body)->data[index + 1]),
(nEntries - index - 1) * sizeof(BODY(body)->data[0]));
/*
* mark previous last slot as free
*/
Map_noKey(BODY(body)->data[nEntries-1].key);
/*
* Indicate that deletion occurred
*/
BODY(body)->deleted = 1;
return removedData;
} else {
return 0;
}
}
#ifdef __GNUG__
static
#endif
int (*SArray_thisKeyCompare)(); /* used by SArrayIter() to communicate
* with compareIndex() */
#ifdef __GNUG__
static
#endif
void *SArray_thisBody; /* ditto */
template <class KeyT, class DataT>
void
SArrayIter<KeyT,DataT>::sortKeys()
{
/*
* Store keys away and sort them to the user's orders.
*/
unsigned *sortedIndex = new unsigned[numEntries];
assert(sortedIndex != 0);
unsigned i;
for (i = 0; i < numEntries; i++) {
sortedIndex[i] = i;
}
/*
* Due to the limitations of the qsort interface we have to
* pass extra information to compareIndex in these global
* variables - yuck.
*/
SArray_thisKeyCompare = (int (*)())sortFunction;
SArray_thisBody = mySArrayBody;
qsort(sortedIndex, numEntries, sizeof(*sortedIndex), compareIndex);
/*
* Save the keys for enumeration. The reason we save the keys,
* not the indices, is that indices may change during enumeration
* due to deletions.
*/
sortedKeys = new KeyT[numEntries];
assert(sortedKeys != 0);
for (i = 0; i < numEntries; i++) {
sortedKeys[i] = mySArrayBody->data[sortedIndex[i]].key;
}
delete [] sortedIndex;
}
template <class KeyT, class DataT>
SArrayIter<KeyT,DataT>::SArrayIter(const SArray<KeyT,DataT> &sarray,
int (*keyCompare)(KeyT, KeyT))
: mySArrayBody(BODY(sarray.body)), current(0),
numEntries(sarray.numEntries()), sortFunction(keyCompare)
{
/*
* Note: we access the underlying SArray through the body pointer,
* not the top-level SArray itself. This allows the top-level object
* to be moved without affecting an ongoing iteration.
* XXX: This only works because
* - it is illegal to insert while iterating
* - deletions don't cause reallocation of the data
*/
if (sortFunction && mySArrayBody) {
sortKeys();
} else {
sortedKeys = 0;
}
if (mySArrayBody) {
mySArrayBody->deleted = 0;
}
}
/*
* This is the callback function passed to qsort for comparing array
* entries. It is passed the indices into the data array, which are
* compared according to the user-supplied function applied to the
* keys found at those locations.
*/
template <class KeyT, class DataT>
int
SArrayIter<KeyT,DataT>::compareIndex(const void *idx1, const void *idx2)
{
return (*(compFnType)SArray_thisKeyCompare)
(BODY(SArray_thisBody)->data[*(unsigned *)idx1].key,
BODY(SArray_thisBody)->data[*(unsigned *)idx2].key);
}
template <class KeyT, class DataT>
SArrayIter<KeyT,DataT>::~SArrayIter()
{
delete [] sortedKeys;
}
template <class KeyT, class DataT>
void
SArrayIter<KeyT,DataT>::init()
{
delete [] sortedKeys;
sortedKeys = 0;
current = 0;
{
/*
* XXX: fake SArray object to access numEntries()
*/
SArray<KeyT,DataT> mySArray(0);
mySArray.body = mySArrayBody;
numEntries = mySArray.numEntries();
mySArray.body = 0;
}
if (mySArrayBody) {
if (sortFunction) {
sortKeys();
}
mySArrayBody->deleted = 0;
}
}
template <class KeyT, class DataT>
DataT *
SArrayIter<KeyT,DataT>::next(KeyT &key)
{
if (mySArrayBody == 0) {
return 0;
} else {
unsigned index;
if (sortedKeys == 0) {
/*
* Detect deletion while iterating.
* A legal deletion can only affect the current entry, so
* adjust the current position to reflect the next entry was
* moved.
*/
if (mySArrayBody->deleted) {
numEntries --;
current --;
}
if (current == numEntries) {
return 0;
}
index = current++;
} else {
if (current == numEntries) {
return 0;
}
/*
* XXX: fake an SArray object to access locate()
*/
SArray<KeyT,DataT> mySArray(0);
mySArray.body = mySArrayBody;
(void)mySArray.locate(sortedKeys[current++], index);
mySArray.body = 0;
}
mySArrayBody->deleted = 0;
key = mySArrayBody->data[index].key;
return &(mySArrayBody->data[index].value);
}
}
#undef BODY
#endif /* _SArray_cc_ */
More information about the SRILM-User
mailing list