/* oi.c
 */

const char* instructions =
 "oi  -- odds ratio lower bound estimator\n"
 "\n"
 "(c) Mark Johnson, 5th September 2000\n"
 "    Brown University\n"
 "    Email: Mark_Johnson@Brown.edu\n"
 "    Web: http://www.cog.brown.edu/~mj\n"
 "\n"
 "This program is made available freely\n"
 "for research purposes only.  Please contact me if you are interested\n"
 "in commercial application.  I request acknowledgement if results from\n"
 "this program or a program derived from this code appears in a\n"
 "publication.\n"
 "\n"
 "                                NO WARRANTY\n"
 "\n"
 "BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n"
 "FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT\n"
 "WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER\n"
 "PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND,\n"
 "EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\n"
 "IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n"
 "PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\n"
 "PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME\n"
 "THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n"
 "\n"
 "IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n"
 "WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n"
 "REDISTRIBUTE THE PROGRAM, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n"
 "GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF\n"
 "THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO\n"
 "LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n"
 "YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY\n"
 "OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED\n"
 "OF THE POSSIBILITY OF SUCH DAMAGES.\n"
 "\n"
 "\n"
 "Compilation:\n"
 "===========\n"
 "\n"
 "The programs here are written in ANSI C, and should compile with any\n"
 "standard C compiler.  It should only be necessary to run \"make\".  The\n"
 "code is written in such a way that it should benefit from optimization,\n"
 "especially in-lining.  Since compilers differ in the flags they require\n"
 "for such optimization, it is up to you to set the environment variable\n"
 "CFLAGS appropriately.\n"
 "\n"
 "\n"
 "Usage:\n"
 "=====\n"
 "\n"
 "	oi alpha\n"
 "\n"
 "Alpha, the significance level, should be a real number greater than\n"
 "zero and less than one.  A typical value is 0.05.\n"
 "\n"
 "The program reads lines from standard input and writes an equal number\n"
 "of lines to standard output.  Each input line should begin with a two\n"
 "ratios of positive integers r1/n1 and r2/n2.  These are interpreted as\n"
 "samples of two different distributions.  It must be the case that\n"
 "n1>=r1>=0, n1>=1, n2>=r2>=0 and n2>=0.  The program copies each input\n"
 "line to the output, prepending a real number p to the line, which is\n"
 "the lower limit of an alpha-level confidence interval estimate of the\n"
 "conditional odds ratio for the 2x2 table\n"
 "\n"
 "	( r1    ,    n1-r1  )\n"
 "	( r2    ,    n2-r2  )\n"
 "\n"
 "A conditional estimator for the odds ratio is one that estimates the\n"
 "odds ratio using the distribution of r1 conditioned on the row and\n"
 "column totals.  Conditional estimators may not be appropriate in some\n"
 "situations.  The issue is complicated: please see textbooks on\n"
 "Categorical Data Analysis (under Exact Statistics) for information.\n"
 "\n"
 "This procedure is computationally intensive; the effort grows linearly\n"
 "with n1+n2.\n"
 "\n"
 "\n"
 "Example:\n"
 "=======\n"
 "\n"
 "Suppose the input file sample.dat contains:\n"
 "\n"
 "	5/10 16/100 sample1 2\n"
 "	25/100 160/1000 sample3 4\n"
 "\n"
 "Then running\n"
 "\n"
 "	oi 0.1 < sample.dat\n"
 "\n"
 "produces as output\n"
 "\n"
 "	1.323577         5/10 16/100      sample1 2\n"
 "	1.121612         25/100 160/1000  sample3 4\n"
 "\n"
 "The prefixed number is the 0.1 confidence level lower bound on the\n"
 "odds ratios of the two distributions whose samples are represented on\n"
 "each line.  If you interpret the odds ratio as a measure of how\n"
 "different the two distributions are, this number is a lower bound on\n"
 "the measure of how different these distributions are.\n"
 "\n"
 "As alpha becomes smaller, the confidence bounds become wider and the\n"
 "measure becomes more conservative.  Thus\n"
 "\n"
 "	oi 0.01 < sample.dat\n"
 "\n"
 "produces as output\n"
 "\n"
 "	0.662043         5/10 16/100 sample1 2\n"
 "	0.874781         25/100 160/1000 sample3 4\n"
 "\n"
 "The Unix sort command can be used to sort the output of the oi program\n"
 "so that the highest scoring samples come first.\n"
 "\n"
 "	oi 0.01 < sample.dat | sort -k1,1 -nr\n"
 "\n"
 "produces\n"
 "\n"
 "	0.874781         25/100 160/1000  sample3 4\n"
 "	0.662043         5/10 16/100      sample1 2\n";

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>

#define MAXITS 1000       /* max number of iterations in solve and solve_aux */
#define TOL    1.0e-7     /* estimate accuracy */

typedef double Float;

int imin(int x, int y) { return x <= y ? x : y; }
int imax(int x, int y) { return x >= y ? x : y; }

#define incf(n11, n, n1_, n_1, theta) 	((n1_-n11)*(n_1-n11)*theta/((1+n11)*((n-n1_-n_1+1)+n11)))
#define decf(n11, n, n1_, n_1, theta)	(n11*(n-n1_-n_1+n11)/((n1_-n11+1)*(n_1-n11+1)*theta))

static Float sums_ratio(int n11_lower, int n11_upper, int n, int n1_, int n_1, Float theta)
{
  int lower = imax(0, n1_+n_1-n);
  int upper = imin(n1_, n_1);
  Float numerator = 0.0;
  Float denominator = 0.0;
  Float t = 1.0;

  int u = lower;
  while (1) {
    denominator += t;
    if (u >= n11_lower && u <= n11_upper)
      numerator += t;
    if (u >= upper)
      break;
    t *= incf(u, n, n1_, n_1, theta);     /* note: updatet is a macro */
    u++;
  }
  return numerator/denominator;
}

static Float P_lower(int n11, int n, int n1_, int n_1, Float theta)
{
  Float left_sum = 0.0;
  Float right_sum = 0.0;
  Float t;
  int   i;
  int   lower = imax(0, n1_+n_1-n);
  int   upper = imin(n1_, n_1);

  if (n11==lower) 
    return 1.0;

  if (theta==0.0) 
    return 0.0;

  i = n11;
  t = 1.0;
  while (1) {
    right_sum += t;
    if (i >= upper)
      break;
    t *= incf(i, n, n1_, n_1, theta);    /* note: macro */
    i++;
  }

  i = n11;
  t = decf(i, n, n1_, n_1, theta);
  i--;
  while (1) {
    left_sum += t;
    if (i <= lower)
      break;
    t *= decf(i, n, n1_, n_1, theta);   /* note: macro */
    i--;
  }
  
  return right_sum/(left_sum+right_sum);
}

static Float P_upper(int n11, int n, int n1_, int n_1, Float theta)
{
  return sums_ratio(imax(0,n1_+n_1-n), n11, n, n1_, n_1, theta);
}


static struct {
  int   n11;
  int   n;
  int   n1_;
  int   n_1;
  Float value;
} data;


Float F_lower(Float theta)
{
  return P_lower(data.n11, data.n, data.n1_, data.n_1, theta) - data.value;
}

Float F_upper(Float theta)
{
  return P_upper(data.n11, data.n, data.n1_, data.n_1, theta) - data.value;
}
  



static Float solve(Float (*f)(Float), Float a)    /* guess bounds for f, given that f is monotonic */
{
  Float fa = (*f)(a);
  const Float f1 = 2;
  const Float f2 = 3;
  Float b = f1*a;
  Float fb = (*f)(b);
  Float c;
  Float fc;
  Float d = 0;
  Float e = 0;
  int   its;

  if (fa == 0.0) return a;
  if (fb == 0.0) return b;

  for (its = 0; its < MAXITS && fa*fb > 0.0; its++) {
    Float delta = fabs(fa)-fabs(fb);
    if (delta < 0) {      /* fabs(fa) < fabs(fb); decrease a */
      b = a;
      fb = fa;
      a /= f2;
      fa = (*f)(a);
      if (fa == 0.0) return a;
    }
    else if (delta > 0) { /* fabs(fb) < fabs(fa); increase b */
      a = b;
      fa = fb;
      b *= f1;
      fb = (*f)(b);
      if (fb == 0.0) return b;
    }
    else {                /* fabs(fa) == fabs(fb); decrease a, increase b */
      a /= f2;
      fa = (*f)(a);
      if (fa == 0.0) return a;
      b *= f1;
      fb = (*f)(b);
      if (fb == 0.0) return b;
    }
  }

  assert(its < MAXITS);
  assert((fa > 0.0 && fb < 0.0) || (fa < 0.0 && fb > 0.0));

  fc = fb;
  c = b;
  for (its = 0; its < MAXITS; its++) { 
    Float p, q, r, s, xm; 
    Float tol1;
    /* printf("f(%g) = %g, f(%g) = %g, f(%g) = %g\n", a, fa, b, fb, c, fc); */
    if ((fc < 0.0 && fb < 0.0) || (fc > 0.0 && fb > 0.0)) { 
      fc = fa; 
      c = a; e = d = b-a; 
    } 
    if (fabs(fc) < fabs(fb)) { 
      fa = fb; fb = fc; fc = fa; 
      a = b; b = c; c = a; 
    } 
    tol1 = 1.0e-8*fabs(b)+0.5*TOL; 
    xm = 0.5*(c-b); 
    if (fabs(xm) <=  tol1 || fb == 0.0) 
      return b; 
    if ((fabs(e) >= tol1) && (fabs(fb) < fabs(fa))) { 
      Float min1;
      Float min2; 
      s = fb/fa; 
      if (a == c) { 
	p = 2.0*xm*s; 
	q = 1.0-s; 
      } 
      else { 
	r = fb/fc; 
	q = fa/fc; 
	q = (q-1.0)*(r-1.0)*(s-1.0); 
	p = s*(2.0*xm*q*(q-r) - (b-a)*(r-1.0)); 
      } 
      if (p > 0.0) 
	q = -q;
      p = fabs(p); 
      min1 = 3.0*xm*q-fabs(tol1*q); 
      min2 = fabs(e*q); 
      if (2.0*p < (min1 > min2 ? min2 : min1)) { 
	e = d; 
	d = p/q; 
      } 
      else 
	e = d = xm; 
    } 
    else 
      e = d = xm;
    fa = fb; 
    a = b; 
    if (fabs(d) > tol1) 
      b +=  d; 
    else 
      b += (xm < 0.0) ? -fabs(tol1) : fabs(tol1);
    fb = (*f)(b); 
  } 
  assert(its < MAXITS);
  return 0.0;    /* never get here */
}


Float theta_unconditional(int n11, int n, int n1_, int n_1)
{
  int n12 = n1_-n11;
  int n21 = n_1-n11;
  int n22 = n-n1_-n_1+n11;
  return ((Float) n11*n22)/((Float) n12*n21);
}

Float theta_min(int n11, int n, int n1_, int n_1, Float alpha)
{
  if (n11 == imax(0,n1_+n_1-n))
    return 0;

  data.n11 = n11;
  data.n = n;
  data.n1_ = n1_;
  data.n_1 = n_1;
  data.value = (n11 == imin(n1_,n_1)) ? alpha : alpha/2.0;

  return solve(F_lower, 1.0);
}


Float theta_max(int n11, int n, int n1_, int n_1, Float alpha)
{
  if (n11 == imin(n1_,n_1))
    return 0;

  data.n11 = n11;
  data.n = n;
  data.n1_ = n1_;
  data.n_1 = n_1;
  data.value = (n11 == imax(0,n1_+n_1-n)) ? alpha : alpha/2.0;

  return solve(F_upper, 1.0);
}


static void quit_with_error(char *format, char *s)
{
  fprintf(stderr, "Error: ");
  fprintf(stderr, format, s);
  fprintf(stderr, "\n%s", instructions);
  exit(EXIT_FAILURE);
}
     
int main(int argc, char **argv)
{
  FILE *inputstream = stdin;
  FILE *outputstream = stdout;
  int  nread;
  Float significance_level;
  char  *cp;

  if (argc != 2) 
    quit_with_error(" Was not called with exactly one argument%s\n", "");

  significance_level = strtod(argv[1], &cp);
  if (cp == argv[1] || *cp != '\0') 
    quit_with_error(" Expected a float argument, got %s\n", argv[1]);

  while (1) {
    int r1, n1, r2, n2;
    nread = fscanf(inputstream, " %d / %d %d / %d", &r1, &n1, &r2, &n2);

    if (nread == EOF)
      return(EXIT_SUCCESS);

    if (nread != 4) {
      if (feof(inputstream))
	fprintf(stderr, "%s: input ended with %d numbers unread\n", 
		argv[0], nread);
      else
	fprintf(stderr, "%s: can't read input as a number: '%c'\n",
		argv[0], getc(inputstream));
      exit(EXIT_FAILURE);
    }

    assert(r1 >= 0);
    assert(n1 >= r1);
    assert(n1 >= 1);
    assert(r2 >= r1);
    assert(n2 >= r2);
    assert(n2 >= 1);

    { 
      int n11 = r1;
      int n1_ = n1;
      int n = n1+n2;
      int n_1 = r1+r2;
      /* fprintf(outputstream, "%f\t", theta_unconditional(n11, n, n1_, n_1)); */
      fprintf(outputstream, "%f\t ", theta_min(n11, n, n1_, n_1, significance_level)); 
      /* fprintf(outputstream, "%f\t ", theta_max(n11, n, n1_, n_1, significance_level)); */
    }

    /* write out original scores */

    fprintf(outputstream, "%d/%d %d/%d\t", r1, n1, r2, n2);

    /* copy rest of line from input to output */

    { int c;
      while ((c=getc(inputstream)) != EOF) {
	int result;
	if (c=='\n')
	  break;
	result = putc(c, outputstream);
	assert(result != EOF);
      }
      putc('\n', outputstream);
    }
  }
}
