/* interaction1.c
 *
 * (c) Mark Johnson, 30th March 2001
 *
 * float interaction(int n, float s[], float *ase)
 *
 * This is a version of interaction.c, hacked up so that the model
 * consists only of a single n-way interaction term and unigram
 * statistics -- all intermediate-level interactions are ignored.
 *
 * This routine returns information to determine if an n-way combination
 * of variables X_0=x_0, ..., X_n-1=x_n-1, occurs more frequently than the 
 * unigram statistics would lead us to expect.
 *
 * interaction() returns the MLE of the n-way interaction term in a
 * log-linear model of the data in s[], and ase is the asymptotic
 * standard error of estimate of this term.  This routine destructively
 * changes s[].
 *
 * s[] should be a vector of size 2^n.  If 0 <= m < n and the bits that
 * are turned on in m are i_1, ..., i_k, then s[m] contains the number
 * of times the combination of variables X_i_1=x_i_1, ..., X_i_k=x_i_k
 * was observed.  Thus s[0] is the total number of counts in the data,
 * s[1] is the number of times X_0=x_0 was observed in the data and
 * s[2^n-1] is the number of times X_0=x_0, ..., X_n-1=x_n-1 was observed
 * in the data.  Note that most of these counts are ignored; they are
 * included here so that the interface is the same as for interaction.c
 * (i.e., this file should be a drop-in replacement)
 */

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

inline static int nbits(int n)
{
  int nb = 0;
  while (n != 0) {
    if (n & 1)
      nb++;
    n >>= 1;
  }
  return nb;
}

#define SUBSET(subset, superset) ((superset & subset) == subset)

float interaction(int n, float s[], float *ase)
{
  float lambda = 0, variance = 0;
  int i, j, nn;

  assert(n > 1);
  nn = 1 << n;         /* size of s[] = 2^n */

  for (i = nn-2; i >= 0; i--) 
    for (j = i+1; j < nn; j++) 
      if (SUBSET(i, j))
	s[i] -= s[j];  /* make s into f by subtracting all more specialized scores */

  for (i = 0; i < n; i++) {
    int ii = 1 << i;
    if (s[ii] < 0) {
      fprintf(stderr, "Error in interaction.c: cell count f[%d]"
	      " = %g is negative\n", ii, s[ii]);
      abort();
    }
    s[ii] += 0.5;           /* continuity correction, see Goodman (1970) */
    variance += 1.0/s[ii];
  }
  variance += 1/s[nn-1];
  variance += (n-1)*(n-1)/s[0];

  assert(variance >= 0);
  *ase = sqrt(variance);

  lambda = log(s[nn-1]);

  for (i = 0; i < n; i++) 
    lambda -= log(s[1 << i]);

  lambda += (n-1)*log(s[0]);

  return lambda;
}


float lower_lambda(int n, float s[], float alpha)
{
  float ase, lambda;
  lambda = interaction(n, s, &ase);
  return lambda - alpha*ase;
}


/* routines for finding inverse of error function
 */

static float sig_;

#define Z_TOL 1.0e-7    /* accuracy of solution */
#define ITMAX 100       /* max no of iterations */
#define EPS   3.0e-8    /* floating point accuracy */

static float erfc_sig(float x) {
  return erfc(x) - sig_;
}

static float
zbrent(float (*func)(float), float x1, float x2, float tol)
{
  int iter;
  float a=x1,b=x2,c=0,d=0,e=0,min1,min2;
  float fa=(*func)(a),fb=(*func)(b),fc,p,q,r,s,tol1,xm;
  
  if (fb*fa > 0.0) {
    fprintf(stderr, "Root must be bracketed in ZBRENT");
    abort();
  }

  fc=fb;
  for (iter=1;iter<=ITMAX;iter++) {
    if (fb*fc > 0.0) {
      c=a;
      fc=fa;
      e=d=b-a;
    }
    if (fabs(fc) < fabs(fb)) {
      a=b;
      b=c;
      c=a;
      fa=fb;
      fb=fc;
      fc=fa;
    }
    tol1=2.0*EPS*fabs(b)+0.5*tol;
    xm=0.5*(c-b);
    if (fabs(xm) <= tol1 || fb == 0.0) return b;
    if (fabs(e) >= tol1 && fabs(fa) > fabs(fb)) {
      s=fb/fa;
      if (a == c) {
	p=2.0*xm*s;
	q=1.0-s;
      } else {
	q=fa/fc;
	r=fb/fc;
	p=s*(2.0*xm*q*(q-r)-(b-a)*(r-1.0));
	q=(q-1.0)*(r-1.0)*(s-1.0);
      }
      if (p > 0.0)  q = -q;
      p=fabs(p);
      min1=3.0*xm*q-fabs(tol1*q);
      min2=fabs(e*q);
      if (2.0*p < (min1 < min2 ? min1 : min2)) {
	e=d;
	d=p/q;
      } else {
	d=xm;
	e=d;
      }
    } else {
      d=xm;
      e=d;
    }
    a=b;
    fa=fb;
    if (fabs(d) > tol1)
      b += d;
    else
      b += (xm > 0.0 ? fabs(tol1) : -fabs(tol1));
    fb=(*func)(b);
  }
  fprintf(stderr, "Maximum number of iterations exceeded in ZBRENT");
  abort();
}

float standard_errors(float sig) {
  sig_ = sig;
  return sqrt(2.0)*zbrent(erfc_sig, 0, 10, Z_TOL);
}
