// gibbs-pcfg.cc
//
// gibbs-pcfg uses gibbs sampling to estimate a PCFG
//
// Mark Johnson, 17th April 2006

const char usage[] =
"gibbs-pcfg [-d debug] [-F trace-file] [-S sample-rate]\n"
"       [-A parsefile] [-G grammarfile] [-I]  [-r rand-init]\n"
"       [-n niterations] [-w weight]\n"
"       [-T anneal-temp-start] [-t anneal-temp-stop] [-m anneal-its]\n"
"       [-a alpha] [-X] [-Z z-temp] [-z z-its]\n"
"       grammar.lt < train.yld\n"
"\n"
" -d debug       -- debug level\n"
" -F trace-file  -- file to write trace output to (default is stderr)\n"
" -r rand-init   -- initializer for random number generator (integer)\n"
" -n niterations -- number of iterations\n"
" -w weight      -- default rule weight\n"
" -a alpha       -- default pseudo-count alpha\n"
" -S sample_rate -- resample theta every sample_rate parses\n"
" -A parsefile   -- print analyses of training data to parsefile at termination\n"
" -G grammarfile -- print out grammar to grammarfile at termination\n"
" -I             -- parse sentences in order (default is random order)\n"
" -T             -- start at this annealing temperature\n"
" -t             -- stop with this annealing temperature\n"
" -m             -- anneal for this many iterations\n"
" -X             -- train.yld contains initial trees rather than strings\n"
" -Z z-temp      -- set Z-temp\n"
" -z z-its       -- perform z-its at temperature Z-temp at end\n";
    

static const float unaryclosetolerance = 1e-7;
         
#include <algorithm>
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <ext/hash_map>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <unistd.h>
#include <utility>
#include <vector>

#include "gammavariate.h"
#include "mt19937ar.h"
#include "sym.h"
#include "tree.h"
#include "trie.h"
#include "utility.h"


typedef unsigned U;
typedef long double F;
typedef symbol S;
typedef std::vector<S> Ss;

typedef std::map<S,F> S_F;

int debug = 0;

inline float power(float x, float y) { return powf(x, y); }
inline double power(double x, double y) { return pow(x, y); }
inline long double power(long double x, long double y) { return powl(x, y); }

//! readline_symbols() reads all of the symbols on the current
//! line into syms
//
std::istream& readline_symbols(std::istream& is, Ss& syms) {
  syms.clear();
  std::string line;
  if (std::getline(is, line)) {
    std::istringstream iss(line);
    std::string s;
    while (iss >> s)
      syms.push_back(s);
  }
  return is;
}  // readline_symbols()


//! A default_value_type{} object is used to read an object from a stream,
//! assigning a default value if the read fails.  Users should not need to
//! construct such objects, but should use default_value() instead.
//
template <typename object_type, typename default_type>
struct default_value_type {
  object_type& object;
  const default_type defaultvalue;
  default_value_type(object_type& object, const default_type defaultvalue)
    : object(object), defaultvalue(defaultvalue) { }
};

//! default_value() is used to read an object from a stream, assigning a
//! default value if the read fails.  It returns a default_value_type{}
//! object, which does the actual reading.
//
template <typename object_type, typename default_type>
default_value_type<object_type,default_type>
default_value(object_type& object, const default_type defaultvalue=default_type()) {
  return default_value_type<object_type,default_type>(object, defaultvalue);
}

//! This version of operator>>() reads default_value_type{} from an input stream.
//
template <typename object_type, typename default_type>
std::istream& operator>> (std::istream& is, 
			  default_value_type<object_type, default_type> dv) {
  if (is) {
    if (is >> dv.object)
      ;
    else {
      is.clear(is.rdstate() & ~std::ios::failbit);  // clear failbit
      dv.object = dv.defaultvalue;
    }
  }
  return is;
}

// inline F random1() { return rand()/(RAND_MAX+1.0); }
inline F random1() { return mt_genrand_res53(); }

//! R{} holds a single PCFG rule, together with associated
//! information, i.e., its prior and its count
//
struct R {
  Ss cats;   // parent is cats[0], first child is cats[1], etc.
  F  theta;  // rule probability
  F  alpha;  // bayesian prior
  F  count;  // count

  R() : theta(0), alpha(0), count(0) { }

  R(S parent, const Ss& rhs, F theta, F alpha) 
    : theta(theta), alpha(alpha), count(0) {
    cats.reserve(rhs.size()+1);
    cats.push_back(parent);
    cats.insert(cats.end(), rhs.begin(), rhs.end());
  }  // R::R()

  S parent() const { assert(!cats.empty()); return cats[0]; }

  F prob() const { return theta; }

};  // R{}

std::ostream& operator<< (std::ostream& os, const R& r) {
  assert(!r.cats.empty());
  os << r.theta << '\t';
  if (debug >= 1000)
    os << r.alpha << '\t' << r.count;
  os << '\t' << r.cats[0].c_str() << " -->";
  for (Ss::const_iterator it = r.cats.begin()+1; it != r.cats.end(); ++it)
    os << ' ' << it->c_str();
  return os;
}

typedef std::vector<R> Rs;

typedef R* Rp;

typedef ext::hash_map<S, Rp> S_Rp;
typedef ext::hash_map<S, S_Rp> S_S_Rp;

typedef ext::hash_map<Ss,Rp> Ss_Rp;

typedef trie<S, S_Rp> St_S_Rp;
typedef St_S_Rp::const_iterator Stit;


struct pcfg_type {
  Rs      rules;                       //!< grammar rules
  F       default_theta;               //!< used when reading a grammar
  F       default_alpha;
  
  // indices 

  Ss_Rp   cats_rulep;                  //!< rule pointer for this rule
  S_S_Rp  child_parent_urulep;         //!< unary rules, indexed by child
  St_S_Rp rhs_parent_rulep;            //!< rhs -> rules with this rhs
  
  pcfg_type(F t=1, F a=1) : default_theta(t), default_alpha(a) { }

  //! start() returns the start category of the grammar
  //
  S start() const { assert(!rules.empty()); assert(!rules[0].cats.empty()); return rules[0].cats[0]; } 


  //! sample_theta() sets theta to a sample from the Dirichlet distribution
  //! defined by the rule counts and alpha
  //
  void sample_theta (F beta=1) {
    S_F parent_sum;
    foreach (Rs, it, rules) {
      F alpha = it->count + it->alpha;
      if (beta != 1) 
	alpha = beta*alpha - (beta-1);
      assert(alpha > 0);
      if (alpha > 0) 
	parent_sum[it->parent()] += (it->theta = gammavariate(alpha));
      else
	it->theta = 0;
    }
    foreach (Rs, it, rules) 
      if (it->theta > 0)
	it->theta /= afind(parent_sum, it->parent());
  }  // pcfg_type::sample_theta()

  //! increment() increments the count of the all of the local trees
  //! in tp, and returns the conditional probability of tp
  //
  F increment(const tree* tp, F count = 1) {
    F prob = 1;
    if (tp->child) {
      { 
	Ss cats;
	cats.push_back(tp->label.cat);
	for (const tree* child = tp->child; child; child = child->next)
	  cats.push_back(child->label.cat);
	Rp rp = afind(cats_rulep, cats);
	prob *= rp->prob();
	rp->count += count;
	if (rp->count < 0) 
	  std::cerr << "## rp->count = " << rp->count << ", cats = " << cats << ", tp = " << tp << std::endl;
	assert(rp->count >= 0);
      }
      for (const tree* child = tp->child; child; child = child->next)
	prob *= increment(child, count);
    }
    return prob;
  }  // pcfg_type::increment()

  //! index() constructs the indices into rules needed by the sampler
  //
  void index() {
    cats_rulep.clear();
    child_parent_urulep.clear();
    rhs_parent_rulep.clear();
    S_Rp nullsrp;

    foreach (Rs, rit, rules) {

      bool inserted = cats_rulep.insert(Ss_Rp::value_type(rit->cats, &*rit)).second;
      if (!inserted)
	std::cerr << "## Error in gibbs-pcfg::index() Duplicate rule " << *rit << std::endl;

      if (rit->cats.size() <= 2)
	child_parent_urulep[rit->cats[1]][rit->cats[0]] = &*rit;
      else {
	S_Rp& parent_rulep = rhs_parent_rulep.insert(rit->cats.begin()+1, rit->cats.end(), nullsrp).first->data;
	parent_rulep[rit->cats[0]] = &*rit;
      }
    }
  }  // pcfg_type::index()

  //! log2prob_corpus() returns the marginal probability of
  //! the corpus counts given the prior
  //
  F log2prob_corpus() const {
    typedef std::pair<F,F> FF;
    typedef ext::hash_map<S,FF> S_FF;
    S_FF parent_alphacount;
    F logprob = 0;
    cforeach (Rs, it, rules) {
      logprob += lgamma(it->count+it->alpha);
      logprob -= lgamma(it->alpha);
      FF& alphacount = parent_alphacount[it->parent()];
      alphacount.first += it->alpha;
      alphacount.second += it->count;
    }
    cforeach (S_FF, it, parent_alphacount) {
      logprob -= lgamma(it->second.first+it->second.second);
      logprob += lgamma(it->second.first);
    }
    return logprob / log(2.0);
  }  // pcfg_type::log2prob_corpus()

  //! loglikelihood() returns the probability of the parse trees
  //! (or equivalently, their counts) given theta estimated from
  //! them.  This is what EM optimizes.
  //
  F loglikelihood() const {
    S_F parent_count;
    F logprob = 0;
    cforeach (Rs, it, rules) 
      if (it->count > 0) {
	logprob += it->count*log(it->count);
	parent_count[it->parent()] += it->count;
      }
    
    cforeach (S_F, it, parent_count)
      logprob -= it->second*log(it->second);
    return logprob;
  } // pcfg_type::loglikelihood()

};  // pcfg_type{}
  

std::istream& operator>> (std::istream& is, pcfg_type& g) {
  g.rules.clear();
  S_F parent_sum;   // used to normalize theta
  std::string parent;
  F theta, alpha;
  while (is >> default_value(theta, g.default_theta) 
	 >> default_value(alpha, g.default_alpha)
	 >> parent >> " -->") {
    Ss rhs;
    readline_symbols(is, rhs);
    g.rules.push_back(R(parent, rhs, theta, alpha));
    parent_sum[parent] += theta;
  }

  foreach (Rs, it, g.rules)
    it->theta /= afind(parent_sum, it->parent());  // normalize theta;

  g.index();  // construct indices

  return is;
} 

std::ostream& operator<< (std::ostream& os, const pcfg_type& g) {
  cforeach (Rs, it, g.rules)
    os << *it << std::endl;
  return os;
}

namespace EXT_NAMESPACE {
  template <> struct hash<Stit> {
    size_t operator()(const Stit t) const
    {
      return size_t(&(*t));
    }  // ext::hash<Stit>::operator()
  };  // ext::hash<Stit>{}
}  // namespace EXT_NAMESPACE


struct cky_type {

  pcfg_type& g;
  F anneal;         // annealing factor (1 = no annealing)
  
  cky_type(pcfg_type& g, F anneal=1) : g(g), anneal(anneal) { }

  //! index() returns the location of cell in cells[]
  //
  static U index(U i, U j) { return j*(j-1)/2+i; }

  //! ncells() returns the number of cells required for sentence of length n
  //
  static U ncells(U n) { return n*(n+1)/2; }

  typedef std::vector<S_F> S_Fs;
  typedef ext::hash_map<Stit,F> Stit_F;
  typedef std::vector<Stit_F> Stit_Fs;
  
  Ss terminals;
  S_Fs inactives;
  Stit_Fs actives;


  //! inside() constructs the inside table, and returns the probability
  //! of the start symbol rewriting to the terminals.
  //
  template <typename terminals_type>
  F inside(const terminals_type& terminals0) {

    terminals = terminals0;

    if (debug >= 10000)
      std::cerr << "# cky::inside() terminals = " << terminals << std::endl;

    U n = terminals.size();

    inactives.clear();
    inactives.resize(ncells(n));
    actives.clear();
    actives.resize(ncells(n));

    for (U i = 0; i < n; ++i) {
      inactives[index(i,i+1)][terminals[i]] = 1;
      inside_unaryclose(inactives[index(i,i+1)], actives[index(i,i+1)]);
      
      if (debug >= 20000)
	std::cerr << "# cky::inside() inactives[" << i << "," << i+1 << "] = " 
		  << inactives[index(i,i+1)] << std::endl;
      if (debug >= 20100)
	std::cerr << "# cky::inside() actives[" << i << "," << i+1 << "] = " 
		  << actives[index(i,i+1)] << std::endl;
    }

    for (U gap = 2; gap <= n; ++gap)
      for (U left = 0; left + gap <= n; ++left) {
	U right = left + gap;
	S_F& parentinactives = inactives[index(left,right)];
	Stit_F& parentactives = actives[index(left,right)];
	for (U mid = left+1; mid < right; ++mid) {
	  Stit_F& leftactives = actives[index(left,mid)];
	  const S_F& rightinactives = inactives[index(mid,right)];
	  cforeach (Stit_F, itleft, leftactives) {
	    const Stit leftactive = itleft->first;
	    const F leftprob = itleft->second;
	    cforeach (S_F, itright, rightinactives) {
	      S rightinactive = itright->first;
	      const F rightprob = itright->second;
	      const Stit parentactive = leftactive->find1(rightinactive);
	      if (parentactive != leftactive->end()) {
		F leftrightprob = leftprob * rightprob;
		cforeach (S_Rp, itparent, parentactive->data) {
		  S parent = itparent->first;
		  parentinactives[parent] += leftrightprob 
		    * power(itparent->second->prob(), anneal);
		}
		if (!parentactive->key_trie.empty())
		  parentactives[parentactive] += leftrightprob;
	      }
	    }
	  }
	}
	inside_unaryclose(parentinactives, parentactives);
	if (debug >= 20000)
	  std::cerr << "# cky::inside() inactives[" << left << "," << right 
		    << "] = " << parentinactives << std::endl;
	if (debug >= 20100)
	  std::cerr << "# cky::inside() actives[" << left << "," << right << "] = " 
		    << parentactives << std::endl;
      }

    return dfind(inactives[index(0,n)], g.start());
  }  // cky_type::inside()

  void inside_unaryclose(S_F& inactives, Stit_F& actives) {
    F delta = 1;
    S_F delta_prob1 = inactives;
    S_F delta_prob0;
    while (delta > unaryclosetolerance) {
      delta = 0;
      delta_prob0.swap(delta_prob1);
      delta_prob1.clear();
      cforeach (S_F, it0, delta_prob0) {
	S child = it0->first;
	S_S_Rp::const_iterator it = g.child_parent_urulep.find(child);
	if (it != g.child_parent_urulep.end()) {
	  const S_Rp& parent_urulep = it->second;
	  cforeach (S_Rp, it1, parent_urulep) {
	    S parent = it1->first;
	    F prob = it0->second * power(it1->second->prob(), anneal);
	    delta_prob1[parent] += prob;
	    delta = std::max(delta, prob/(inactives[parent] += prob));
	  }
	}
      }
    }
    cforeach (S_F, it0, inactives) {
      Stit it1 = g.rhs_parent_rulep.find1(it0->first);
      if (it1 != g.rhs_parent_rulep.end())
	actives[it1] += it0->second;
    }
  } // cky_type::inside_unaryclose()

 
  //! random_tree() returns a random parse tree for terminals
  //
  tree* random_tree() {
    U n = terminals.size();
    return random_inactive(g.start(), afind(inactives[index(0, n)], g.start()), 0, n);
  }  // cky_type::random_tree

  //! random_inactive() returns a random expansion for an inactive edge
  //
  tree* random_inactive(const S parent, const F parentprob, const U left, const U right,
			tree* next = NULL) const {

    tree* tp = new tree(parent, NULL, next);

    if (left+1 == right && parent == terminals[left])
      return tp;

    const S_F& parentinactives = inactives[index(left, right)];
    F probthreshold = random1() * parentprob;
    F probsofar = 0;

    // try unary rules

    cforeach (S_F, it0, parentinactives) {
      S child = it0->first;
      F childprob = it0->second;
      S_S_Rp::const_iterator it1 = g.child_parent_urulep.find(child);
      if (it1 != g.child_parent_urulep.end()) {
	const S_Rp& parent1_urulep = it1->second;
	S_Rp::const_iterator it2 = parent1_urulep.find(parent);
	if (it2 != parent1_urulep.end()) {
	  probsofar += childprob * power(it2->second->prob(), anneal);
	  if (probsofar >= probthreshold) {
	    tp->child = random_inactive(child, childprob, left, right);
	    return tp;
	  }
	}
      }
    }

    // try binary rules

    for (U mid = left+1; mid < right; ++mid) {
      const Stit_F& leftactives = actives[index(left,mid)];
      const S_F& rightinactives = inactives[index(mid,right)];
      cforeach (Stit_F, itleft, leftactives) {
	const Stit leftactive = itleft->first;
	const F leftprob = itleft->second;
	cforeach (S_F, itright, rightinactives) {
	  S rightinactive = itright->first;
	  const F rightprob = itright->second;
	  const Stit parentactive = leftactive->find1(rightinactive);
	  if (parentactive != leftactive->end()) {
	    S_Rp::const_iterator it = parentactive->data.find(parent);
	    if (it != parentactive->data.end()) {
	      probsofar += leftprob * rightprob 
		* power(it->second->prob(), anneal);
	      if (probsofar >= probthreshold) {
		tp->child = random_active(leftactive, leftprob, left, mid,
					  random_inactive(rightinactive, rightprob, mid, right));
		return tp;
	      }
	    }
	  }
	}
      }
    }

    std::cerr << "## Error in cky_type::random_inactive(), parent = " << parent
	      << ", left = " << left << ", right = " << right 
	      << ", probsofar = " << probsofar << ", probthreshold = " << probthreshold 
	      << std::endl;
    return tp;
  }  // cky_type::random_inactive()

  tree* random_active(const Stit parent, F parentprob, const U left, const U right, 
		      tree* next = NULL) const {
    F probthreshold = random1() * parentprob;
    F probsofar = 0;

    // unary rule
    
    const S_F& parentinactives = inactives[index(left, right)];
    cforeach (S_F, it, parentinactives)
      if (g.rhs_parent_rulep.find1(it->first) == parent) {
	probsofar += it->second;
	if (probsofar >= probthreshold)
	  return random_inactive(it->first, it->second, left, right, next);
	break;  // only one unary child can possibly generate this parent
      }

    // binary rules

    for (U mid = left + 1; mid < right; ++mid) {
      const Stit_F& leftactives = actives[index(left,mid)];
      const S_F& rightinactives = inactives[index(mid,right)];
      cforeach (Stit_F, itleft, leftactives) {
	const Stit leftactive = itleft->first;
	const F leftprob = itleft->second;
	cforeach (S_F, itright, rightinactives) {
	  S rightinactive = itright->first;
	  const F rightprob = itright->second;
	  if (parent == leftactive->find1(rightinactive)) {
	    probsofar += leftprob * rightprob;
	    if (probsofar >= probthreshold) {
	      return random_active(leftactive, leftprob, left, mid,
				   random_inactive(rightinactive, rightprob, mid, right, next));
	    }
	  }
	}
      }
    }

    std::cerr << "## Error in cky_type::random_active(), parent = " << parent
	      << ", left = " << left << ", right = " << right 
	      << ", probsofar = " << probsofar << ", probthreshold = " << probthreshold 
	      << std::endl;
    return NULL;
  }  // cky_type::random_active()

}; // cky_type{}


typedef std::vector<Ss> Sss;
typedef std::vector<tree*> tps_type;

F gibbs_estimate(pcfg_type& g, const Sss& trains, tps_type& tps, 
		 U niterations = 100, 
		 F anneal_start = 1, F anneal_stop = 1, U anneal_its = 0,
		 F z_temp = 1.0, U z_its = 0,
		 U theta_sample_rate = 0,
		 bool random_order = true, 
		 std::ostream* analyses_stream_ptr = NULL,
		 std::ostream* trace_stream_ptr = NULL) {

  U n = trains.size();
  assert(tps.size() == n);
  if (theta_sample_rate > n)
    theta_sample_rate = n;      // no point in sampling less than once per iteration

  U nwords = 0;
  cky_type p(g, anneal_start);
  F sum_log2prob = 0;

  // initialize tps with trees; don't learn

  for (unsigned i = 0; i < n; ++i) {
    if (debug >= 1000)
      std::cerr << "# trains[" << i << "] = " << trains[i];

    nwords += trains[i].size();

    if (!tps[i]) {
      F tprob = p.inside(trains[i]);

      if (debug >= 1000)
	std::cerr << ", tprob = " << tprob;
      if (tprob <= 0) 
	std::cerr << "## Error in gibbs_estimate(): tprob = " << tprob
		  << ", trains[" << i << "] = " << trains[i] << std::endl;

      assert(tprob > 0);
      sum_log2prob += log2(tprob);
      tps[i] = p.random_tree();
    }

    g.increment(tps[i], 1);
    
    if (debug >= 1000)
      std::cerr << ", tps[" << i << "] = " << tps[i] << std::endl;
  }

  // collect statistics from the random trees
  typedef std::vector<U> Us;
  Us index(n);
  U unchanged = 0;
  
  for (unsigned i = 0; i < n; ++i) 
    index[i] = i;

  for (U iteration = 0; iteration < niterations; ++iteration) {

    if (random_order)
      std::random_shuffle(index.begin(), index.end());

    if (iteration < anneal_its) 
      p.anneal = anneal_start*power(anneal_stop/anneal_start,F(iteration)/F(anneal_its-1));
    else if (iteration + z_its > niterations) 
      p.anneal = 1.0/z_temp;
    else
      p.anneal = anneal_stop;

    assert(finite(p.anneal));

    F log2prob_corpus = g.log2prob_corpus();
    F loglikelihood = g.loglikelihood();

    if (debug >= 100) {
      std::cerr << "# Iteration " << iteration << ", " 
		<< -log2prob_corpus/nwords << " bits per word, " 
		<< "log likelihood = " << -loglikelihood << ", "
		<< unchanged << '/' << n << " parses did not change";
      if (p.anneal != 1)
	std::cerr << ", temperature = " << 1/p.anneal;
      std::cerr << '.' << std::endl;
    }

    if (trace_stream_ptr)
      *trace_stream_ptr << iteration << '\t'            // iteration
			<< 1.0/p.anneal << '\t'         // temperature
			<< -log2prob_corpus << '\t'     // - log2 P(corpus)
			<< -loglikelihood << '\t'       // - log likelihood
			<< unchanged << '\t'            // # unchanged parses 
			<< n-unchanged                  // # changed
			<< std::endl;

    sum_log2prob = 0;
    unchanged = 0;

    if (theta_sample_rate == 0)
      g.sample_theta(p.anneal);

    for (U i0 = 0; i0 < n; ++i0) {
      
      if (theta_sample_rate == 1 
	  || ( theta_sample_rate > 0 && ((iteration*n+i0) % theta_sample_rate) == 0)
	  || ( theta_sample_rate == 0 && i0 == 0)) {
	  g.sample_theta(p.anneal);
	  if (debug >= 10000)
	    std::cerr << "\n# resampled grammar\n" << g << std::endl;
      }

      U i = index[i0];
      if (debug >= 1000)
	std::cerr << "\n# trains[" << i << "] = " << trains[i];

      tree* tp0 = tps[i];

      F tprob = p.inside(trains[i]);       // parse string
      if (tprob <= 0) 
	std::cerr << "## Error in gibbs_estimate(): tprob = " << tprob
		  << ", iteration = " << iteration 
		  << ", trains[" << i << "] = " << trains[i] << std::endl
		  << "## g = " << g << std::endl;
      assert(tprob > 0);
      if (debug >= 1000)
	std::cerr << ", tprob = " << tprob;
      sum_log2prob += log2(tprob);
      
      tree* tp1 = p.random_tree();

      if (*tp0 == *tp1) {
	++unchanged;
	delete tp1;
	if (debug >= 1000)
	  std::cerr << ", tp0 == tp1" << std::flush;
      }
      else { 
	tps[i] = tp1;
	g.increment(tp0, -1);
	g.increment(tp1, 1);
	delete tp0;
      }

      if (debug >= 1000)
	std::cerr << ", tps[" << i << "] = " << tps[i] << std::endl;
    }
  }
  
  F log2prob_corpus = g.log2prob_corpus();
  F loglikelihood = g.loglikelihood();

  if (debug >= 10) {
    std::cerr << "# After " << niterations << " iterations, " 
	      << -log2prob_corpus/nwords << " bits per word, " 
	      << "log likelihood = " << -loglikelihood << ", "
	      << unchanged << '/' << n << " parses did not change";
    std::cerr << '.' << std::endl;
  }

  if (analyses_stream_ptr)
    for (U i = 0; i < n; ++i)
      *analyses_stream_ptr << tps[i] << std::endl;

  for (U i = 0; i < n; ++i) 
    delete tps[i];

  return log2prob_corpus;
}  // gibbs_estimate()



int main(int argc, char** argv) {

  pcfg_type g;
  bool random_order = true;
  U niterations = 100;
  U theta_sample_rate = 0;
  F anneal_start = 1;
  F anneal_stop = 1;
  U anneal_its = 100;
  bool tree_initialize = false;
  F z_temp = 1;
  U z_its = 0;
  unsigned long rand_init = 0;
  std::ostream* grammar_stream_ptr = NULL;
  std::ostream* analyses_stream_ptr = NULL;
  std::ostream* trace_stream_ptr = NULL;
  
  int chr;
  while ((chr = getopt(argc, argv, "A:F:G:IS:T:XZ:a:d:m:n:r:t:w:z:")) != -1)
    switch (chr) {
    case 'A':
      analyses_stream_ptr = new std::ofstream(optarg);
      break;
    case 'F':
      trace_stream_ptr = new std::ofstream(optarg);
      break;
    case 'G':
      grammar_stream_ptr = new std::ofstream(optarg);
      break;
    case 'I':
      random_order = false;
      break;
    case 'S':
      theta_sample_rate = strtoul(optarg, NULL, 10);
      break;
    case 'T':
      anneal_start = 1/atof(optarg);
      break;
    case 'X':
      tree_initialize = true;
      break;
    case 'Z':
      z_temp = atof(optarg);
      break;
    case 'a':
      g.default_alpha = atof(optarg);
      break;
    case 'd':
      debug = atoi(optarg);
      break;
    case 'm':
      anneal_its = atoi(optarg);
      break;
    case 'n':
      niterations = atoi(optarg);
      break;
    case 'r':
      rand_init = strtoul(optarg, NULL, 10);
      break;
    case 't':
      anneal_stop = 1/atof(optarg);
      break;
    case 'w':
      g.default_theta = atof(optarg);
      break;
    case 'z':
      z_its = atoi(optarg);
      break;
    default:
      std::cerr << "# Error in " << argv[0] 
		<< ": can't interpret argument -" << char(chr) << std::endl;
      std::cerr << usage << std::endl;
      exit(EXIT_FAILURE);
    }

  if (argc - optind != 1) 
    std::cerr << "# Error in " << argv[0] << '\n' << usage << std::endl;

  {
    std::ifstream is(argv[optind]);
    is >> g;
  }

  if (rand_init == 0)
    rand_init = time(NULL);

  mt_init_genrand(rand_init);
      
  if (trace_stream_ptr) 
    *trace_stream_ptr << "# I = " << random_order 
		      << ", n = " << niterations
		      << ", S = " << theta_sample_rate
		      << ", a = " << g.default_alpha
		      << ", w = " << g.default_theta
		      << ", m = " << anneal_its
		      << ", Z = " << z_temp
		      << ", z = " << z_its
		      << ", T = " << 1.0/anneal_start
		      << ", t = " << anneal_stop
		      << ", r = " << rand_init
		      << std::endl
		      << "# iteration temperature -logP -logL unchanged changed" 
		      << std::endl;
  
  if (debug >= 1000)
    std::cerr << "# gibbs-pcfg Initial grammar = " << g << std::endl;

  Sss trains;
  tps_type trees;
  
  if (tree_initialize) {
    Ss terminals;
    tree* tp;
    while (std::cin >> tp) {
      trees.push_back(tp);
      terminals.clear();
      tp->terminals(terminals, true);
      trains.push_back(terminals);
    }
  }
  else { 
    Ss terminals;
    while (readline_symbols(std::cin, terminals)) {
      trains.push_back(terminals);
      trees.push_back(NULL);
    }
  }
  
  if (debug >= 1000)
    std::cerr << "# trains.size() = " << trains.size() << std::endl;

  cky_type parser(g);

  gibbs_estimate(g, trains, trees, niterations, 
		 anneal_start, anneal_stop, anneal_its, z_temp, z_its,
		 theta_sample_rate, random_order, analyses_stream_ptr, 
		 trace_stream_ptr);

  if (grammar_stream_ptr)
    *grammar_stream_ptr << g << std::flush;

  delete trace_stream_ptr;
  delete analyses_stream_ptr;
  delete grammar_stream_ptr;
}

