import java.io.*;

public class Biased
 {
   public static class indexPair
    { public final int  fst, snd;

      public indexPair(int fst, int snd)
       { this.fst = fst;  this.snd = snd; }//constructor

      public String toString()
       { return( "(" + fst + "," + snd + ")" ); }
    }//indexPair class

// ----------------------------------------------------------------------------
// Find the longest bi@sed.interval in `seq[]',
// i.e. that is at least `fraction' made up of chars in `select',
// and that is as long as possible.

   public static indexPair                                // longestBiased(...)
      longestBiased( String select, double fraction, char[] seq )
    {
      double[] fstMinVal = new double[seq.length+1];  // new minimum values and
      int[]    fstMinPos = new int   [seq.length+1];  // their positions.
      double cummSum = 0, lastCummSum = 0;  // cummulative sum & previous value
      int begins  = 0, ends = -1,           // best interval so far
          tracker = 0,                      // tracks fstMin...[]
          numMin  = 1;                      // effective size of fstMin...[]

      fstMinVal[0] = 0; fstMinPos[0] = -1;  // seq[0..-1] is the empty interval

      fraction = fraction / ( 1 + 0.5/seq.length );       // + a little "slack"
      if(      fraction > 1 ) fraction = 1; // sanity!
      else if( fraction < 0 ) fraction = 0;

      double good = 1-fraction;
      double bad  =  -fraction;

      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------
      // CONDITIONS OF USE: Provided that you                                 |
      // (a) cite:                                                            |
      //     L.Allison,                                                       |
      //     Longest Biased Interval and Longest Non-Negative Sum Interval,   |
      //     Bioinformatics, V19, #10, pp1294-1295, July 2003,                |
      //     http://dx.doi.org/10.1093/bioinformatics/btg135                  |
      //     if this code is used in, or contributes                          |
      //     towards, published work, and                                     |
      // (b) maintain this notice, then                                       |
      // (c) the code is available to you under the                           |
      //     GNU General Public Licence (GPL)                                 |
      //     ( see http://www.gnu.org/copyleft/gpl.html ),                    |
      // otherwise the code is not available for use by you.                  |
      // -- L.A. 30/1/2003.                                                   |
      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------

      int maxSelected = 0;    // make boolean array for O(1)-time test of chars
      for( int i = 0; i < select.length(); i++ )
        if( (int) select.charAt(i) > maxSelected )
           maxSelected = (int) select.charAt(i);
      boolean[] selected = new boolean[maxSelected+1];
      for( int i = 0; i <= maxSelected; i++ )
         selected[i] = select.indexOf((char) i) >= 0;


      for( int i = 0; i < seq.length; i++ )
       { // INV: seq[begins..ends] is longest bi@sed.interval of seq[0..i-1],
         //      fstMinVal[] is strictly decreasing and with
         //      fstMinPos[] shows the 1st occurences of minima.

         int seqI = (int) seq[i];
         cummSum += (seqI <= maxSelected && selected[seqI]) ? good : bad;

         if( cummSum < fstMinVal[numMin-1] ) // a new, lower minimum
          { fstMinVal[numMin] = cummSum;
            fstMinPos[numMin] = i;
            numMin++;
          }
         // NB. fstMinVal[] is in *strictly* decreasing order.

         if( cummSum >= lastCummSum )        // it's increasing, getting better
            while( tracker > 0 && fstMinVal[tracker-1] <= cummSum )
               tracker-- ;
            // tracker == 0 or fstMinVal[tracker-1] > cummSum

         else     //  cummSum < lastCummSum      it's decreasing, getting worse
            while( tracker < numMin-1 && fstMinVal[tracker] > cummSum )
               tracker++ ;
            // tracker == numMin-1 or fstMinVal[tracker] <= cummSum

         // (tracker == 0        or fstMinVal[tracker-1] > cummSum) and
         // (tracker == numMin-1 or fstMinVal[tracker]  <= cummSum)

         // In fact, tracker is least index s.t. fstMinVal[tracker] <= cummSum,
         // so seq[fstMinPos[tracker]+1 .. i] is a candidate interval.

         if( i-fstMinPos[tracker] > ends-begins+1 )          // ? new longest ?
            { begins = fstMinPos[tracker]+1; ends = i; }

         lastCummSum = cummSum;
       }//for i
      // seq[begins..ends] is longest bi@sed.interval of seq[0..seq.length-1]

      return( new indexPair(begins, ends) );
    }//longestBiased

// ----------------------------------------------------------------------------

   public static char[] readSeq(String fileName, String dontIgnore)
    { StringBuffer sb = new StringBuffer();
      try{ File inputFile = new File( fileName );
           FileReader in  = new FileReader(inputFile);
           int c;
           while((c = in.read()) != -1)
            { if( dontIgnore.indexOf(c) < 0 ) continue;
              char ch = (char) c;
              sb.append( ch );
         }  }
      catch(Exception e)
         { System.out.println( "failed: " + e.getMessage() );
           System.exit(1);
         }
      String s = sb.toString();
      char[] arr = new char[s.length()];
      for( int i = 0; i < s.length(); i++ )
         arr[i] = s.charAt(i);
      return arr;
    }//readSeq



   public static indexPair  run( String select, double fraction, char[] seq )
   // Do a scan of the String at the given fraction and print the findings.
    { int cutoff = 20;
      indexPair interval = longestBiased( select, fraction, seq );
      int len = interval.snd - interval.fst + 1;
      System.out.print( " >=" + fraction + ": ["
                      + interval.fst + ".." + interval.snd
                      + "] (" + len + ") " );
      if( len <= cutoff )                        // short, print all characters
       { for(int i = interval.fst; i <= interval.snd; i++ )
            System.out.print( seq[i] );
       }
      else                     // long, print first few and last few characters
       { for(int i = interval.fst; i < interval.fst + cutoff/2; i++)
            System.out.print( seq[i] );
         System.out.print( "..." );
         for(int i = interval.snd - cutoff/2 + 1; i <= interval.snd; i++)
            System.out.print( seq[i] );
       }
      System.out.println();
      return interval;
    }//run



   public static void  main(String[] argv)
   { System.out.println("#--- Biased.java, L.Allison, .au ---");

     for(int i=0; i < argv.length; i++)           // command line params if any
        System.out.println("# argv[" + i + "] = " + argv[i]);

     String select   = "";
     double fraction = 0.5;
     String fileName = "";
     try
      { select   = argv[0];
        fraction = new Double(argv[1]).doubleValue();     // read the min' bi@s
        fileName = argv[2];
      }
     catch(Exception e)
      { System.out.println("usage: java prog_name \"chars\" fraction filename");
        System.exit(1);
      }


     char[] arr = readSeq( fileName, "acgtuACGTUrnyRNY" );  // INPUT THE STRING
     // NB. you may want to change the set of characters that are not ignored.


     System.out.print("# " + arr.length + "bp");
     System.out.print(" ");
     for(int i=0; i < (arr.length < 65 ? arr.length : 65); i++)
        System.out.print( arr[i] );
     System.out.println( arr.length > 65 ? "..." : "" );

     if( fraction >= 0 && fraction <= 1 )                // a "proper" fraction
        run( select, fraction, arr );

     else // specified fraction > 1 or < 0; take this as a flag to try several
          // decreasing fractions until we get the whole string, and stop.
        for( int ifrac = 100; ifrac > 0; ifrac -= 5 )
         { indexPair interval = run( select, ifrac/100.0, arr );
           if( interval.snd - interval.fst + 1 >= arr.length ) break;
         }

     System.out.println("#--- end ---");
   }//main

 }//Biased class


// ---------------------------------------------------------------------
// October 2002, January 2003, L.Allison,                              |
// Note the ``conditions of use'' in the code above.                   |
// ---------------------------------------------------------------------



