/*****
* symbol.cc
* Andy Hammerlindl 2002/06/18
*
* Creates symbols from strings so that multiple calls for a symbol of
* the same string will return an identical object.
*****/

#include <cstring>
#include <cstdlib>

using std::strlen;


#include "settings.h"
#include "symbol.h"

namespace sym {

const char USED = 1;
const char SKIP = 2;

struct symbolRecord {
 // When a symbol is entered into the table, its hash is computed.  If the
 // corresponding entry in the table is full, this value is incremented until
 // an empty slot is found.  hashplus stores the end value.
 // Each symbol has a unique hashplus value, even if there is a collision in
 // the original hashing function.
 uint hashplus;

 // Whether the cell of the table is empty, in use, or a "skip" entry due to
 // a resizing of the table.
 unsigned char flag;

 // Pointer to a copy of the string (allocated on the heap).  This string
 // will never be deallocated.  Symbols, in essence, last forever.
 char *s;
};

// The table size must be a power of two so that (h % tableSize) can be
// replaced by (h & tableMask).  1 << 15 was chosen based on the number of
// unique symbols (roughly 4000) which occured in all of the base modules.
const size_t SYMBOL_TABLE_BASE_CAPACITY = 1 << 15;
symbolRecord baseSymbolTable[SYMBOL_TABLE_BASE_CAPACITY];

symbolRecord *table = baseSymbolTable;
size_t tableCapacity = SYMBOL_TABLE_BASE_CAPACITY;
uint tableMask = 0;
size_t tableSize = 0;

symbolRecord &recordByHashplus(uint h)
{
 return table[h & tableMask];
}

GCInit symbol::initialize;
symbol symbol::nullsym;
symbol symbol::initsym;
symbol symbol::castsym;
symbol symbol::ecastsym;

const char *nullsymstr = "<nullsym>";

void initTable() {
 tableMask = (uint)(tableCapacity - 1);
 tableSize = 0;

 // Set every entry to empty.  (Is this faster than memsetting the whole
 // thing?)
 for (size_t i = 0; i < tableCapacity; ++i)
   table[i].flag = 0;

 // The zeroth entry is reserved for the "null" symbol.
 if (table == baseSymbolTable) {
   table[0].flag = USED;
   table[0].s = new char[strlen(nullsymstr) + 1];
   strcpy(table[0].s, nullsymstr);
   ++tableSize;

   symbol::nullsym.hashplus = 0;

   symbol::initsym = symbol::opTrans("init");
   symbol::castsym = symbol::opTrans("cast");
   symbol::ecastsym = symbol::opTrans("ecast");
 }
}

// Hashing constants found experimentally to reduce collision (a little).
const uint A = 25191, B = 16342, C = 1746, D = 18326;

// Hash the string into an integer.  Experimental testing has shown that
// hashing only the first few letters seems to be faster than hashing deeper
// into the string, even though this approach causes more hash collisions.
uint hash(const char *s, size_t len)
{
 uint h = s[0];
 if (len == 2)
   return h;
 h += A*s[1];
 if (len == 3)
   return h;
 h += B*s[2];
 if (len == 4)
   return h;
 h += C*s[3];
 if (len == 5)
   return h;
 h += D*s[4];
 return h+len;
}

/* Under normal circumstances, the initial table should be large enough for
* all of the symbols used and will never be resized.  Just in case the
* program encounters a large number of distinct symbols, we implement
* resizing of the table.
*/
void resizeTable() {
 symbolRecord *oldTable = table;
 size_t oldSize = tableSize;
 size_t oldCapacity = tableCapacity;

 tableCapacity *= 4;
 table = new symbolRecord[tableCapacity];
 initTable();

 // The null symbol is a special case.
 table[0] = oldTable[0];
 ++tableSize;

#if 0
 printf("old:\n");
 for (size_t i = 0; i < oldCapacity; ++i) {
   symbolRecord &r = oldTable[i];

   if (r.flag != USED)
     continue;

   printf("  %u -> %s\n", r.hashplus, r.s);
 }
#endif

 for (size_t i = 1; i < oldCapacity; ++i) {
   symbolRecord &r = oldTable[i];

   if (r.flag != USED)
     continue;

   // Entries that were skipped over when this symbol was entered into the
   // old hash table may not appear in the same spot in the new hash table.
   // Put "SKIP" entries in their place, so that the symbol will still be
   // found.
   for (uint h = hash(r.s, strlen(r.s)+1); h < r.hashplus; ++h) {
     symbolRecord &skipr = recordByHashplus(h);
     if (skipr.flag == 0)
       skipr.flag = SKIP;
   }

   // Enter the symbol in its spot.
   symbolRecord &newr = recordByHashplus(r.hashplus);
   assert(newr.flag != USED);
   newr.flag = USED;
   newr.hashplus = r.hashplus;
   newr.s = r.s;
   ++tableSize;
 }

#if 0
 printf("new:\n");
 for (size_t i = 0; i < tableCapacity; ++i) {
   symbolRecord &r = table[i];

   if (r.flag != USED)
     continue;

   printf("  %u -> %s\n", r.hashplus, r.s);
 }
#endif

 assert(tableSize == oldSize);

 // Debugging resize.
 for (size_t i = 1; i < oldCapacity; ++i) {
   symbolRecord &r = oldTable[i];

   if (r.flag != USED)
     continue;

   symbolRecord &newr = recordByHashplus(r.hashplus);
   assert(newr.hashplus == r.hashplus);
   assert(newr.flag != 0);
   assert(newr.flag != SKIP);
   assert(newr.flag == USED);
   assert(newr.s = r.s);
   if (strncmp(r.s, "gensym", 6) != 0)
     assert(symbol::rawTrans(r.s, strlen(r.s)+1).hashplus == r.hashplus);
 }

#if 0
 // Diagnostics.
 uint empty=0, used=0, skip=0;
 for (size_t i = 0; i < tableCapacity; ++i) {
   symbolRecord &r = table[i];
   if (r.flag == 0) ++empty;
   else if (r.flag == USED) ++used;
   else if (r.flag == SKIP) ++skip;
   else assert("Unknown flag" == 0);
 }

 cout << "Resized symbol table.  "
      << "empty: " << empty
      << "used: " << used
      << "skip: " << skip
      << endl;
#endif
}

symbol symbolize(uint h) {
 symbol s;
 s.hashplus = h;
 return s;
}

// Handles the insertion of a new symbol into a table the has been resized (or
// needs resizing).
symbol advancedInsert(const char *s, size_t len)
{
 if (2*tableSize >= tableCapacity)
   resizeTable();

 uint hashplus = hash(s, len);

#if 1
 assert(s != 0);
 assert(len > 0);
 assert(2*tableSize <= tableCapacity);
#endif

 // We know the symbol is not in the table.  Just search for the first unused
 // entry (either empty or a skip entry) and insert there.
 for (;;) {
   symbolRecord &r = recordByHashplus(hashplus);

   if (r.flag != USED) {
     r.flag = USED;
     r.s = new char[len];
     memcpy(r.s, s, len);
     assert(r.s[len-1] == '\0');
     r.hashplus = hashplus;

     ++tableSize;
     assert(2*tableSize <= tableCapacity);

     return symbolize(hashplus);
   }

   ++hashplus;
 }

 assert("Unreachable code" == 0);
 return symbol::nullsym;
}

symbol symbol::gensym(string s) {
 // Gensym can be inserted as if it were a normal string not already in the
 // table.  advancedInsert handles this.
 s = "gensym " + s;
 return advancedInsert(s.c_str(), s.size() + 1);
}

symbol symbol::rawTrans(const char *s, size_t len)
{
 uint hashplus = sym::hash(s, len);

#if 1
 assert(s != 0);
 assert(len > 0);
 assert(2*tableSize <= tableCapacity);
#endif

 // Search through the table till we find the symbol already translated or
 // an empty field.
 for (;;) {
   symbolRecord &r = recordByHashplus(hashplus);

   // Translating pre-existing symbols is more common, so check for it first.
   if (r.hashplus == hashplus &&
       r.flag == USED &&
       strncmp(r.s, s, len) == 0) {
     return symbolize(hashplus);
   }

   // Then check for an empty entry, in which case the entry is added.
   if (r.flag == 0) {
     // Test if the table needs resizing before entering a new symbol, or if
     // the table has already been resized.  In either case, the symbol will
     // be added to a resized table which may contain skip entries, and a
     // more involved insertion routine is needed.
     if (2*tableSize >= SYMBOL_TABLE_BASE_CAPACITY)
       return advancedInsert(s, len);

     r.flag = USED;
     r.s = new char[len];
     memcpy(r.s, s, len);
     assert(r.s[len-1] == '\0');
     r.hashplus = hashplus;

     ++tableSize;
     assert(2*tableSize <= tableCapacity);

     return symbolize(hashplus);
   }

   // A case where a different symbol is in the spot, continue along the
   // table.
   ++hashplus;
 }

 assert("Unreachable code" == 0);
 return symbol::nullsym;
}

symbol::operator string () const {
 symbolRecord &r = recordByHashplus(this->hashplus);
 return (string)r.s;
}

#ifdef USEGC
symbol::operator std::string () const {
 symbolRecord &r = recordByHashplus(this->hashplus);
 return (std::string)r.s;
}
#endif

ostream& operator<< (ostream& out, const symbol sym)
{
 symbolRecord &r = recordByHashplus(sym.hashplus);
 return out << r.s;
}

} // end namespace sym

/* Define all of operator symbols SYM_PLUS, etc. */
#define OPSYMBOL(str, name)                     \
 sym::symbol name = sym::symbol::opTrans(str)
#include "opsymbols.h"
#undef OPSYMBOL

/* Define all of the symbols of the type SYM(name) in selected files. */
#define ADDSYMBOL(name)                                                 \
 sym::symbol PRETRANSLATED_SYMBOL_##name = sym::symbol::literalTrans(#name)
#include "allsymbols.h"
#undef ADDSYMBOL