// sym.h -- A symbol table package
//
// (c) Mark Johnson, 10th March 2001
// (c) Mark Johnson, 12th December 2001 (fix static initialization order bug)
// (c) Mark Johnson, 1st January 2002 (enlarge initial table size)
// (c) Mark Johnson, 4th May 2002 (write/read invariance, i.e., << and >> are inverses)
// (c) Mark Johnson, 16th July 2002 (g++ 3.1 namespace compatibility)
// (c) Mark Johnson, 24th February 2006 (put code into a single header file)
//
// A symbol is just a pointer to a string.  These strings are guaranteed to be
// unique, i.e., if symbols s1 and s2 contain different string pointers then
// the strings they point to are different.  This means that symbol copying, 
// equality, ordering and hashing are very cheap (they involve only the
// pointer and not the string's contents).
//
// A symbol whose string pointer is NULL is an undefined string.  Undefined
// symbols can be compared and hashed just like other symbols, but it is an
// error to obtain the string pointed to by an undefined symbol.
//
// Symbols possess write/read invariance, i.e., you can write a symbol
// to a stream and read it from the same stream.  (Note that the relative
// ordering of symbols is not preserved, since the underlying string objects
// may be allocated in different locations).  
//
//   A sequence of alphanumeric characters or escaped characters, where '\'
//   is the escape character.  Note that '_', '.', '-' and '+' are considered 
//   alphanumeric characters (thus ints and floats can be symbols).
//
//   A ' (quote) character, followed by a sequence of non-quote characters
//   or an escaped \' character, and terminated by an unescaped ' (quote)
//   character.  Thus the empty symbol can be written ''.
//
//   %UNDEFINED% denotes the undefined symbol.
//
// NOTE: You _must_ write a whitespace or non-alphanumeric character other
// than '\', '_', '+', '-', and '.' immediately after each symbol.  
// The symbol reader will stop reading at this character and then put it back on 
// the input stream, so your code is responsible for consuming this character.
//
//
// Constructors:
//
//  sym()                This returns an undefined symbol
//  sym(const string&)
//
// Conversions and member functions:
//
//  string(symbol)
//  symbol.string_reference()
//  symbol.string_pointer()
//  symbol.c_str()
//  symbol.is_defined()
//
//  std::hash(symbol)
//
//  The comparison operators ==, !=, <, <=, >, >=.
//    Note: the comparison ordering is NOT alphabetic, but is based on 
//    the location in memory of each symbol.  Thus the relative ordering
//    of the same symbol strings may differ on different runs.
//
// Static (global) functions:
//
//  symbol::size()       The number of symbols defined
//  symbol::already_defined(const string&)
//  
//  The input and output operators >> and <<

#ifndef SYM_H
#define SYM_H

#include <cassert>
#include <cctype>
#include <ext/hash_set>
#include <iostream>
#include <string>
#include <utility>

// The namespaces for the SGI extensions (e.g., hash_maps
// and hash_sets) has changed in g++ 3.1.  Sigh.
//
#if (__GNUC__ > 3) || (__GNUC__ >= 3 && __GNUC_MINOR__ >= 1)
#define EXT_NAMESPACE __gnu_cxx
#else
#define EXT_NAMESPACE std
#endif

namespace ext = EXT_NAMESPACE;

template <typename string_type=std::string>
class symbol_type {

  struct hashstr //  This is the fn hashpjw of Aho, Sethi and Ullman, p 436.
  {
    size_t operator()(const string_type& s) const 
    {
      typedef typename string_type::const_iterator CI;
      
      unsigned long h = 0; 
      unsigned long g;
      CI p = s.begin();
      CI end = s.end();
      
      while (p!=end) {
	h = (h << 4) + (*p++);
	if ((g = h&0xf0000000)) {
	  h = h ^ (g >> 24);
	  h = h ^ g;
	}}
      return size_t(h);
    }
  };

  typedef string_type* stringptr;

  //! A symbol is just a pointer to a string
  //
  const string_type* sp;

  //! construct a symbol from a string pointer
  //
  symbol_type(const string_type* sp_) : sp(sp_) { }

  typedef ext::hash_set<string_type, hashstr> Table;

  //! table() returns the shared hash table that maps strings to
  //! symbols.  table_ is accessed via table() to avoid the static
  //! initialization order bug.
  //
  inline static Table& table()
  {
    static Table table_(65536);   // default table size
    return table_;
  }

  //! These define how the escape characters, the quote symbols and
  //! the undefined symbol (i.e., the NULL pointer) are printed and
  //! read in
  //
  static const char ESCAPE = '\\';
  static const char OPENQUOTE = '\'';
  static const char CLOSEQUOTE = '\'';

  //! UNDEFINED() returns the character string we use to represent
  //! an undefined symbol (i.e., the NULL pointer).  It is wrapped
  //! inside a function in order to avoid the "static order initialization
  //! bug" that plagues C++!
  //
  inline static char* UNDEFINED() {
    static char UNDEFINED_[] = "%UNDEFINED%";
    return UNDEFINED_;
  }

  //! dont_escape() is true when c doesn't need to be escaped
  //
  inline static bool dont_escape(char c) { 
    return isgraph(c) && c != ESCAPE && c != OPENQUOTE && c != CLOSEQUOTE 
      && c != '%' && c != '(' && c != ')';
  }

  //! escaped_char() maps characters seen following an ESCAPE to
  //! whatever their internal representation should be
  //
  inline static char escaped_char(char c) {
    switch (c) {
    case 'a': return('\a');
    case 'b': return('\b');
    case 'f': return('\f');
    case 'n': return('\n');
    case 'r': return('\r');
    case 't': return('\t');
    case 'v': return('\v');
    default: return c;
    }
    return c;
  }  //! symbol_type::escaped_char()

public:

  //! returns a NULL symbol
  //
  symbol_type() : sp(NULL) { }
 
  //! converts a string s into a symbol
  //
  symbol_type(const string_type& s) : sp(&*(table().insert(s).first)) { };

  //! converts a character array into a symbol
  //
  symbol_type(const char* cp) { 
    if (cp) {
      std::string s(cp); 
      sp = &*(table().insert(s).first);
    }
    else
      sp = NULL;
  };

  bool is_defined() const { return sp != NULL; }
  bool is_undefined() const { return sp == NULL; }

  operator string_type() const { assert(is_defined()); return *sp; }
  const string_type& string_reference() const { assert(is_defined()); return *sp; }
  const string_type* string_pointer() const { return sp; }
  const char* c_str() const { assert(is_defined()); return sp->c_str(); }

  static symbol_type undefined() { return symbol_type(stringptr(NULL)); }
  static size_t size() { return table().size(); }

  bool operator== (const symbol_type s) const { return sp == s.sp; }
  bool operator!= (const symbol_type s) const { return sp != s.sp; }
  bool operator< (const symbol_type s) const { return sp < s.sp; }
  bool operator<= (const symbol_type s) const { return sp <= s.sp; }
  bool operator> (const symbol_type s) const { return sp > s.sp; }
  bool operator>= (const symbol_type s) const { return sp >= s.sp; }

  //! read() reads a symbol from input stream is
  //
  std::istream& read(std::istream& is)
  {
    std::string str;
    char c;
    if (!(is >> c)) return is;           // If read fails, return error
    if (dont_escape(c) || c == ESCAPE) { // Recognize a normal symbol
      do {
	if (c == ESCAPE) {
	  if (!is.get(c)) return is;     //  Read next character; return if read fails.
	  str.push_back(escaped_char(c));//  Push escaped char onto string.
	}
	else
	  str.push_back(c);
      }
      while (is.get(c) && (dont_escape(c) || c == ESCAPE));
      if (!is.fail())                    //  Did we read one too many chars?
	is.putback(c);                   //   Yes.  Put it back.
      else if (is.eof())                 //  Are we at eof?
	is.clear(is.rdstate() & ~std::ios::failbit & ~std::ios::eofbit);
      sp = &*(table().insert(str).first);  //  Load string into symbol
    }
    else if (c == OPENQUOTE) {           // Recognize a quoted string
      if (!is.get(c)) return is;         //  Read next character; return if read fails
      while (c != CLOSEQUOTE) {
	if (c == ESCAPE) {               //  Is this character the escape character?
	  if (!is.get(c)) return is;     //   Yes.  Get quoted character.
	  str.push_back(escaped_char(c));//   Push character onto string.
	}
	else
	  str.push_back(c);              //   Push back ordinary character.
	if (!is.get(c)) return is;       //  Read next character.
      }
      sp = &*(table().insert(str).first);  //  Load string into symbol
    }
    else if (c == UNDEFINED()[0]) {
      for (const char* cp = &UNDEFINED()[1]; *cp; ++cp)
	if (!is.get(c) || c != *cp) {
	  is.clear(std::ios::failbit);   //  We didn't get the whole UNDEFINED symbol
	  return is;
	}
      sp = NULL;                         //  Set s to undefined
    }
    else {                               // c doesn't begin a symbol
      is.putback(c);                     // put it back onto the stream
      is.clear(std::ios::failbit);       // set the fail bit
    }
    return is;
  }  // symbol_type::read()

  
  std::ostream& write(std::ostream& os)
  {
    if (is_undefined())
      os << UNDEFINED();
    else {
      const std::string& str = string_reference();
      if (str.empty())
	os << OPENQUOTE << CLOSEQUOTE;
      else
	for (std::string::const_iterator si = str.begin(); si != str.end(); ++si) {
	  if (!dont_escape(*si))
	    os.put(ESCAPE);
	  os.put(*si);
	}
    }
    return os;
  }  // symbol_type::write()
};

namespace EXT_NAMESPACE {
  template <typename string_type> struct hash<symbol_type<string_type> > {
    size_t operator()(symbol_type<string_type> s) const
    {
      return size_t(s.string_pointer());
    }
  };
}

template <typename string_type>
std::istream& operator>> (std::istream& is, symbol_type<string_type>& s) {
  return s.read(is);
}

template <typename string_type>
std::ostream& operator<< (std::ostream& os, symbol_type<string_type> s) {
  return s.write(os);
}

//! This declares symbol to be another name for symbol_type<std::string>
//
typedef symbol_type<std::string> symbol;

#endif  // sym.h
