/* interaction.c
 *
 * (c) Mark Johnson, 27th March 2001
 *
 * float interaction(int n, float s[], float *ase)
 *
 * This routine returns information to determine if an n-way combination
 * of variables X_0=x_0, ..., X_n-1=x_n-1, occurs more frequently than the 
 * 0 ... n-1 way combinations of variables would lead us to expect.
 *
 * interaction() returns the MLE of the n-way interaction term in a
 * log-linear model of the data in s[], and ase is the asymptotic
 * standard error of estimate of this term.  This routine destructively
 * changes s[].
 *
 * s[] should be a vector of size 2^n.  If 0 <= m < n and the bits that
 * are turned on in m are i_1, ..., i_k, then s[m] contains the number
 * of times the combination of variables X_i_1=x_i_1, ..., X_i_k=x_i_k
 * was observed.  Thus s[0] is the total number of counts in the data,
 * s[1] is the number of times X_0=x_0 was observed in the data and
 * s[2^n-1] is the number of times X_0=x_0, ..., X_n-1=x_n-1 was observed
 * in the data.
 */

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#define BIT2(b1,b0)  ((b1<<1)|b0)

float interaction2(float s[4], float *ase)
{
  float variance = 0;
  int i;
  assert(s[BIT2(1,1)] <= s[BIT2(1,0)]);
  assert(s[BIT2(1,1)] <= s[BIT2(0,1)]);
  assert(s[BIT2(1,0)] <= s[BIT2(0,0)]);
  assert(s[BIT2(0,1)] <= s[BIT2(0,0)]);
  s[BIT2(1,0)] -= s[BIT2(1,1)];
  s[BIT2(0,1)] -= s[BIT2(1,1)];
  s[BIT2(0,0)] -= s[BIT2(1,1)] + s[BIT2(1,0)] + s[BIT2(0,1)];
  
  for (i = 0; i < 4; ++i) {
    s[i] += 0.5;     /* continuity correction, see Goodman (1970) */
    variance += 1.0/s[i];
    s[i] = log(s[i]);
  }
  assert(variance >= 0);
  *ase = sqrt(variance);
  return s[BIT2(1,1)] - s[BIT2(0,1)] - s[BIT2(1,0)] + s[BIT2(0,0)];
}

  
inline static int nbits(int n)
{
  int nb = 0;
  while (n != 0) {
    if (n & 1)
      nb++;
    n >>= 1;
  }
  return nb;
}

#define SUBSET(subset, superset) ((superset & subset) == subset)

float interaction(int n, float s[], float *ase)
{
  float lambda = 0, variance = 0;
  int i, j, nn, nb;

  assert(n > 1);
  nn = 1 << n;         /* size of s[] = 2^n */

  for (i = nn-2; i >= 0; i--) 
    for (j = i+1; j < nn; j++) 
      if (SUBSET(i, j))
	s[i] -= s[j];  /* make s into f by subtracting all more specialized scores */

  for (i = 0; i < nn; i++) {
    if (s[i] < 0) {
      fprintf(stderr, "Error in interaction.c: cell count f[%d]"
	      " = %g is negative\n", i, s[i]);
      abort();
    }
    s[i] += 0.5;           /* continuity correction, see Goodman (1970) */
    variance += 1.0/s[i];
  }
  assert(variance >= 0);
  *ase = sqrt(variance);

  nb = nbits(nn-1) % 2;     /* number of variables */
  for (i = nn-1; i >= 0; i--) 
    lambda += (nb == (nbits(i) % 2) ? 1.0 : -1.0) * log(s[i]);

  return lambda;
}


float lower_lambda(int n, float s[], float alpha)
{
  float ase, lambda;
  lambda = interaction(n, s, &ase);
  return lambda - alpha*ase;
}


/* routine for finding inverse of error function
 * This uses Newton's method
 */

#define EPSILON 1e-7  /* halt when fabs(x1-x0) <= EPSILON */
#define MAXITS  100

inline static float ierfc(float alpha)
{
  float x0, x1;
  int   it = 0;

  assert(alpha > 1e-10);
  assert(alpha <= 1);

  if (alpha == 1.0)
    return 0.0;

  x0 = 0.8862269255 * (1-alpha);       /* lower bound on root */
  x1 = 5;                              /* upper bound on root */

  while (fabs(x1-x0) > EPSILON) {
    float x, f;
    if(++it > MAXITS)
      break;
    x = (x0+x1)/2;
    f = erfc(x) - alpha;
    if (f == 0)
      return x;
    else if (f < 0)
      x1 = x;
    else 
      x0 = x;
  }
  
  return (x0+x1)/2;
}
  
float standard_errors(float sig) {
  return sqrt(2.0)*ierfc(sig);
}
