#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <math.h>

#define MAXHISTORY 128

void usage(char *appname);
int myfgetc(FILE *file);
void buildprobabilitytable(FILE *file, int *table, char *prefix, int *total);

int main(int argc, char *argv[])
{
  FILE *thefile;
  int order;
  char *find;
  int table[256];
  int total;
  char findbuffer[100];
  int pos;
  double prob,totalprob;
  int end;
  int escapes;
  double probaverage;
  int numletters;
  char *findbufferptr;

  if (argc<4)
    {
      usage(argv[0]);
      exit(0);
    }

  find=argv[2];
  order=atoi(argv[3]);
  pos=0;

  if ((thefile=fopen(argv[1],"r"))==NULL)
    {
      perror(argv[1]);
      return -1;
    }

  totalprob=1;
  end=strlen(find);
  escapes=0;
  probaverage=0;
  numletters=0;

  while ((pos+order-1)<end)
    {
      findbufferptr=findbuffer;
      
    loop:
      strncpy(findbuffer,&find[pos],order-1);
      
      buildprobabilitytable(thefile,table,findbufferptr,&total);
      
      prob=((double)table[(int) find[pos+order-1]])/total;
      if (total==0 || prob==0)
	{
	  printf("!");
	  findbufferptr++;
	  goto loop;
	}
    
      probaverage+=prob;

      if (prob==0)
	escapes++;
      else
	{
	totalprob*=prob;
	numletters++;
	}

      printf("%c: %1.4lf\t\t%1.9le\t%.4f\t%.4f%i\n",find[pos+order-1],prob,totalprob,probaverage/numletters,pow(totalprob,1.0/((double)numletters)),escapes);

      pos++;
    }
  
  return 0;
}

/* prefix should be order-1 letters long */
void buildprobabilitytable(FILE *file, int *table, char *prefix, int *total)
{
  char filehistory[MAXHISTORY];
  int i;
  int thischar;
  int order;

  rewind(file);

  order=strlen(prefix)+1;

  memset(filehistory,0,MAXHISTORY*sizeof(char));
  memset(table,0,256*sizeof(int));

  *total=0;

  while ((thischar=myfgetc(file))!=EOF)
    {
      if (!strcmp(filehistory,prefix))
	{
	table[thischar]++;
	*total=*total+1;
	}

      /* update the history */
      for (i=0;i<order-2;i++)
	filehistory[i]=filehistory[i+1];

      if (order>1)
	filehistory[order-2]=thischar;
      filehistory[order-1]=0;
    }

return;
}

void usage(char *appname)
{
printf("Usage: %s <filename> <seed string>\n\n",appname);
}

int myfgetc(FILE *file)
{
  int thechar;
  
loop:
  thechar=fgetc(file);
  
  if (!isalpha(thechar) && thechar!=' ' && thechar!=EOF)
    goto loop;
  
  if (isspace(thechar))
    {
      while (isspace(thechar=fgetc(file)))
	{}
      ungetc(thechar,file);
      thechar=' ';
    }
  
  thechar=toupper(thechar);
  
  return(thechar);
}

