/*****
* symbol.cc
* Andy Hammerlindl 2002/06/18
*
* Creates symbols from strings so that multiple calls for a symbol of
* the same string will return an identical object.
*****/
#include <cstring>
#include <cstdlib>
using std::strlen;
#include "settings.h"
#include "symbol.h"
namespace sym {
const char USED = 1;
const char SKIP = 2;
struct symbolRecord {
// When a symbol is entered into the table, its hash is computed. If the
// corresponding entry in the table is full, this value is incremented until
// an empty slot is found. hashplus stores the end value.
// Each symbol has a unique hashplus value, even if there is a collision in
// the original hashing function.
uint hashplus;
// Whether the cell of the table is empty, in use, or a "skip" entry due to
// a resizing of the table.
unsigned char flag;
// Pointer to a copy of the string (allocated on the heap). This string
// will never be deallocated. Symbols, in essence, last forever.
char *s;
};
// The table size must be a power of two so that (h % tableSize) can be
// replaced by (h & tableMask). 1 << 15 was chosen based on the number of
// unique symbols (roughly 4000) which occured in all of the base modules.
const size_t SYMBOL_TABLE_BASE_CAPACITY = 1 << 15;
symbolRecord baseSymbolTable[SYMBOL_TABLE_BASE_CAPACITY];
// Set every entry to empty. (Is this faster than memsetting the whole
// thing?)
for (size_t i = 0; i < tableCapacity; ++i)
table[i].flag = 0;
// The zeroth entry is reserved for the "null" symbol.
if (table == baseSymbolTable) {
table[0].flag = USED;
table[0].s = new char[strlen(nullsymstr) + 1];
strcpy(table[0].s, nullsymstr);
++tableSize;
// Hashing constants found experimentally to reduce collision (a little).
const uint A = 25191, B = 16342, C = 1746, D = 18326;
// Hash the string into an integer. Experimental testing has shown that
// hashing only the first few letters seems to be faster than hashing deeper
// into the string, even though this approach causes more hash collisions.
uint hash(const char *s, size_t len)
{
uint h = s[0];
if (len == 2)
return h;
h += A*s[1];
if (len == 3)
return h;
h += B*s[2];
if (len == 4)
return h;
h += C*s[3];
if (len == 5)
return h;
h += D*s[4];
return h+len;
}
/* Under normal circumstances, the initial table should be large enough for
* all of the symbols used and will never be resized. Just in case the
* program encounters a large number of distinct symbols, we implement
* resizing of the table.
*/
void resizeTable() {
symbolRecord *oldTable = table;
size_t oldSize = tableSize;
size_t oldCapacity = tableCapacity;
tableCapacity *= 4;
table = new symbolRecord[tableCapacity];
initTable();
// The null symbol is a special case.
table[0] = oldTable[0];
++tableSize;
#if 0
printf("old:\n");
for (size_t i = 0; i < oldCapacity; ++i) {
symbolRecord &r = oldTable[i];
if (r.flag != USED)
continue;
printf(" %u -> %s\n", r.hashplus, r.s);
}
#endif
for (size_t i = 1; i < oldCapacity; ++i) {
symbolRecord &r = oldTable[i];
if (r.flag != USED)
continue;
// Entries that were skipped over when this symbol was entered into the
// old hash table may not appear in the same spot in the new hash table.
// Put "SKIP" entries in their place, so that the symbol will still be
// found.
for (uint h = hash(r.s, strlen(r.s)+1); h < r.hashplus; ++h) {
symbolRecord &skipr = recordByHashplus(h);
if (skipr.flag == 0)
skipr.flag = SKIP;
}
// Enter the symbol in its spot.
symbolRecord &newr = recordByHashplus(r.hashplus);
assert(newr.flag != USED);
newr.flag = USED;
newr.hashplus = r.hashplus;
newr.s = r.s;
++tableSize;
}
#if 0
printf("new:\n");
for (size_t i = 0; i < tableCapacity; ++i) {
symbolRecord &r = table[i];
if (r.flag != USED)
continue;
printf(" %u -> %s\n", r.hashplus, r.s);
}
#endif
assert(tableSize == oldSize);
// Debugging resize.
for (size_t i = 1; i < oldCapacity; ++i) {
symbolRecord &r = oldTable[i];
symbol symbolize(uint h) {
symbol s;
s.hashplus = h;
return s;
}
// Handles the insertion of a new symbol into a table the has been resized (or
// needs resizing).
symbol advancedInsert(const char *s, size_t len)
{
if (2*tableSize >= tableCapacity)
resizeTable();
// We know the symbol is not in the table. Just search for the first unused
// entry (either empty or a skip entry) and insert there.
for (;;) {
symbolRecord &r = recordByHashplus(hashplus);
symbol symbol::gensym(string s) {
// Gensym can be inserted as if it were a normal string not already in the
// table. advancedInsert handles this.
s = "gensym " + s;
return advancedInsert(s.c_str(), s.size() + 1);
}
// Search through the table till we find the symbol already translated or
// an empty field.
for (;;) {
symbolRecord &r = recordByHashplus(hashplus);
// Translating pre-existing symbols is more common, so check for it first.
if (r.hashplus == hashplus &&
r.flag == USED &&
strncmp(r.s, s, len) == 0) {
return symbolize(hashplus);
}
// Then check for an empty entry, in which case the entry is added.
if (r.flag == 0) {
// Test if the table needs resizing before entering a new symbol, or if
// the table has already been resized. In either case, the symbol will
// be added to a resized table which may contain skip entries, and a
// more involved insertion routine is needed.
if (2*tableSize >= SYMBOL_TABLE_BASE_CAPACITY)
return advancedInsert(s, len);
ostream& operator<< (ostream& out, const symbol sym)
{
symbolRecord &r = recordByHashplus(sym.hashplus);
return out << r.s;
}
} // end namespace sym
/* Define all of operator symbols SYM_PLUS, etc. */
#define OPSYMBOL(str, name) \
sym::symbol name = sym::symbol::opTrans(str)
#include "opsymbols.h"
#undef OPSYMBOL
/* Define all of the symbols of the type SYM(name) in selected files. */
#define ADDSYMBOL(name) \
sym::symbol PRETRANSLATED_SYMBOL_##name = sym::symbol::literalTrans(#name)
#include "allsymbols.h"
#undef ADDSYMBOL