/* icb.c
 *
 * (c) Mark Johnson, Brown University; http://www.cog.brown.edu/~mj/
 * 5th September, 2000
 */

static char *instructions =
 "icb  -- inverse cumulative binomial distribution\n"
 "\n"
 "(c) Mark Johnson, 5th September 2000\n"
 "    Brown University\n"
 "    Email: Mark_Johnson@Brown.edu\n"
 "    Web: http://www.cog.brown.edu/~mj\n"
 "\n"
 "This program is made available freely for research purposes only.\n"
 "Please contact me if you are interested in commercial application.\n"  
 "I request acknowledgement if results from\n"
 "this program or a program derived from this code appears in a\n"
 "publication.  \n"
 "\n"
 "                                NO WARRANTY\n"
 "\n"
 "BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY\n"
 "FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT\n"
 "WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER\n"
 "PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY OF ANY KIND,\n"
 "EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE\n"
 "IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n"
 "PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE\n"
 "PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME\n"
 "THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n"
 "\n"
 "IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n"
 "WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR\n"
 "REDISTRIBUTE THE PROGRAM, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n"
 "GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF\n"
 "THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO\n"
 "LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY\n"
 "YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY\n"
 "OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED\n"
 "OF THE POSSIBILITY OF SUCH DAMAGES.\n"
 "\n"
 "\n"
 "Routines from the CEPHES library:\n"
 "================================\n"
 "\n"
 "The files incbi.c incbet.c gamma.c polevl.c const.c mtherr.c ndtri.c\n"
 "are part of the freely distributed CEPHES library.\n"
 "\n"
 "\n"
 "Compilation:\n"
 "===========\n"
 "\n"
 "The programs here are written in ANSI C, and should compile with any\n"
 "standard C compiler.  It should only be necessary to run \"make\".\n"
 "\n"
 "\n"
 "Usage:\n"
 "=====\n"
 "\n"
 "	icb alpha\n"
 "\n"
 "Alpha, the significance level, should be a real number greater than\n"
 "zero and less than one.  A typical value is 0.025.\n"
 "\n"
 "The program reads lines from standard input and writes an equal number\n"
 "of lines to standard output.  Each input line should begin with a\n"
 "ratio of positive integers r/n.  It must be the case that n>=r>=0 and\n"
 "n>=1.  The program copies each input line to the output, prepending a\n"
 "real number p to the line, which satisfies:\n"
 "\n"
 "	alpha = sum_{i=k}^n binomial(n,i) p^i (1-p)^(n-i) \n"
 "\n"
 "It does this by computing the inverse incomplete Beta function I_p,\n"
 "which is related to the cumulative binomial via the equation\n"
 "\n"
 "	I_p(k,n-k+1) = sum_{i=k}^n binomial(n,i) p^i (1-p)^(n-i) \n"
 "\n"
 "\n"
 "Example:\n"
 "=======\n"
 "\n"
 "Suppose the input file sample.dat contains:\n"
 "\n"
 "	5/10 sample1\n"
 "	40/100 sample2\n"
 "	300/1000 sample3\n"
 "\n"
 "Then running\n"
 "\n"
 "	icb 0.1 < in.dat\n"
 "\n"
 "produces as output\n"
 "	\n"
 "	0.267318        5/10     sample1\n"
 "	0.334237        40/100   sample2\n"
 "	0.281234        300/1000 sample3\n"
 "\n"
 "The number on the left is a ``discounted probability'' of success.\n"
 "This score is higher for sample2, even though the maximum likelihood\n"
 "probability for sample1 is higher.\n"
 "\n"
 "As alpha becomes smaller, the score becomes more conservative.  Thus\n"
 "\n"
 "	icb 0.001 < sample.dat\n"
 "\n"
 "produces\n"
 "\n"
 "	0.089813        5/10     sample1\n"
 "	0.254536        40/100   sample2\n"
 "	0.256185        300/1000 sample3\n"
 "\n"
 "The Unix sort command can be used to sort the output of the icb program\n"
 "so that the highest scoring samples come first.\n"
 "\n"
 "	icb 0.001 < sample.dat | sort -k1,1 -nr\n"
 "\n"
 "produces\n"
 "\n"
 "	0.256185        300/1000 sample3\n"
 "	0.254536        40/100   sample2\n"
 "	0.089813        5/10     sample1\n";

#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>

#include "mconf.h"  /* the CEPHES configuration file */


static double binomial_lower_bound(double n, double k, double alpha)
{
  /* This returns the value of p for which
   *
   *    alpha = sum_{i=k}^n binomial(n,i) p^i (1-p)^(n-i) 
   *
   * It does this by computing the inverse incomplete Beta function I,
   * which is related to the cumulative binomial via the equation
   *
   *    I_p(k,n-k+1) = sum_{i=k}^n binomial(n,i) p^i (1-p)^(n-i) 
   */
  return k == 0.0 ? 0.0 : incbi(k, n-k+1.0, alpha);
}

/*
static double binomial_upper_bound(double n, double k, double alpha)
{
  return k == n ? 1.0 : 1.0-incbi(n, k-1.0, alpha);
}
*/


main(int argc, char **argv)
{
  double sig;
  double n, r;
  FILE *inputstream = stdin;
  FILE *outputstream = stdout;
  char *remainder;
  int  nread;

  if (argc != 2) {
    fprintf(stderr, "Expected usage: %s alpha\n\n%s", 
	    argv[0], instructions);
    exit(EXIT_FAILURE);
  }

  sig = strtod(argv[1], &remainder);
  if (*remainder != '\0') {
    fprintf(stderr, "%s: Couldn't parse significance level alpha: %s\n\n%s",
	    argv[0], argv[1], instructions);
    exit(EXIT_FAILURE);
  }

  if (sig<=0.0 || sig >=1.0) {
    fprintf(stderr, "%s: significance level alpha not between 0.0 and 1.0: %s\n",
	    argv[0], argv[1]);
    exit(EXIT_FAILURE);
  }

  while (1) {
    nread = fscanf(inputstream, " %lg / %lg", &r, &n);

    if (nread == EOF)
      return(EXIT_SUCCESS);

    if (nread != 2) {
      if (feof(inputstream))
	fprintf(stderr, "%s: input ended with %d numbers not processed\n", 
		argv[0], nread);
      else
	fprintf(stderr, "%s: can't read input as a number: '%c'\n",
		argv[0], getc(inputstream));
      exit(EXIT_FAILURE);
    }

    assert(r >= 0.0);
    assert(n >= r);
    assert(n >= 1);

    fprintf(outputstream, "%f\t", binomial_lower_bound(n, r, sig));

    /* write out original scores */

    fprintf(outputstream, "%g/%g\t", r, n);

    /* copy rest of line from input to output */

    { int c;
      while ((c=getc(inputstream)) != EOF) {
	int result;
	if (c=='\n')
	  break;
	result = putc(c, outputstream);
	assert(result != EOF);
      }
      putc('\n', outputstream);
    }
  }
}
