import java.io.*;

public class Biased
 {
   public static class indexPair
    { public final int  fst, snd;

      public indexPair(int fst, int snd)
       { this.fst = fst;  this.snd = snd; }//constructor

      public String toString()
       { return( "(" + fst + "," + snd + ")" ); }
    }//indexPair class

      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------
      // CONDITIONS OF USE: Provided that you                                 |
      // (a) cite:                                                            |
      //     L.Allison,                                                       |
      //     Longest Biased Interval and Longest Non-Negative Sum Interval,   |
      //     Bioinformatics, V19, #10, pp1294-1295, July 2003,                |
      //     http://dx.doi.org/10.1093/bioinformatics/btg135                  |
      //     if this code is used in, or contributes                          |
      //     towards, published work, and                                     |
      // (b) maintain this notice, then                                       |
      // (c) the code is available to you under the                           |
      //     GNU General Public Licence (GPL)                                 |
      //     ( see http://www.gnu.org/copyleft/gpl.html ),                    |
      // otherwise the code is not available for use by you.                  |
      // -- L.A. 30/1/2003, 10/2003.                                          |
      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------

// ----------------------------------------------------------------------------

      //-----------------------------------------------------------------------
      //-----------------------------------------------------------------------
      // First algorithm as per                                               |
      //   J. Bioinformatics, 19(10), pp1294-1295, 1 July 2003                |
      // It is almost always linear time.                                     |
      //-----------------------------------------------------------------------
      //-----------------------------------------------------------------------

// Find the longest bi@sed.interval in `seq[]',
// i.e. that is at least `fraction' made up of chars in `select',
// and that is as long as possible.

   public static indexPair                               // longestBiased1(...)
      longestBiased1( String select, double fraction, char[] seq )
    {
      double[] fstMinVal = new double[seq.length+1];  // new minimum values and
      int[]    fstMinPos = new int   [seq.length+1];  // their positions.
      double cumSum = 0, lastCumSum = 0;    // cumulative sum & previous value
      int begins  = 0, ends = -1,           // best interval so far
          tracker = 0,                      // tracks fstMin...[]
          numMin  = 1;                      // effective size of fstMin...[]

      fstMinVal[0] = 0; fstMinPos[0] = -1;  // seq[0..-1] is the empty interval

      fraction = fraction / ( 1 + 0.5/seq.length );       // + a little "slack"
      if(      fraction > 1 ) fraction = 1; // sanity!
      else if( fraction < 0 ) fraction = 0;

      double good = 1-fraction;
      double bad  =  -fraction;

      int maxSelected = 0;    // make boolean array for O(1)-time test of chars
      for( int i = 0; i < select.length(); i++ )
        if( (int) select.charAt(i) > maxSelected )
           maxSelected = (int) select.charAt(i);
      boolean[] selected = new boolean[maxSelected+1];
      for( int i = 0; i <= maxSelected; i++ )
         selected[i] = select.indexOf((char) i) >= 0;


      for( int i = 0; i < seq.length; i++ )
       { // INV: seq[begins..ends] is longest bi@sed.interval of seq[0..i-1],
         //      fstMinVal[] is strictly decreasing and with
         //      fstMinPos[] shows the 1st occurences of minima.

         int seqI = (int) seq[i];
         cumSum += (seqI <= maxSelected && selected[seqI]) ? good : bad;

         if( cumSum < fstMinVal[numMin-1] ) // a new, lower minimum
          { fstMinVal[numMin] = cumSum;
            fstMinPos[numMin] = i;
            numMin++;
          }
         // NB. fstMinVal[] is in *strictly* decreasing order.

         if( cumSum >= lastCumSum )        // it's increasing, getting better
            while( tracker > 0 && fstMinVal[tracker-1] <= cumSum )
               tracker-- ;
            // tracker == 0 or fstMinVal[tracker-1] > cumSum

         else     //  cumSum < lastCumSum      it's decreasing, getting worse
            while( tracker < numMin-1 && fstMinVal[tracker] > cumSum )
               tracker++ ;
            // tracker == numMin-1 or fstMinVal[tracker] <= cumSum

         // (tracker == 0        or fstMinVal[tracker-1] > cumSum) and
         // (tracker == numMin-1 or fstMinVal[tracker]  <= cumSum)

         // In fact, tracker is least index s.t. fstMinVal[tracker] <= cumSum,
         // so seq[fstMinPos[tracker]+1 .. i] is a candidate interval.

         if( i-fstMinPos[tracker] > ends-begins+1 )          // ? new longest ?
            { begins = fstMinPos[tracker]+1; ends = i; }

         lastCumSum = cumSum;
       }//for i
      // seq[begins..ends] is longest bi@sed.interval of seq[0..seq.length-1]

      return( new indexPair(begins, ends) );
    }//longestBiased1

// ----------------------------------------------------------------------------

      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------
      // ... and here is code for a 3-pass *always* linear-time algorithm     |
      // that was suggested by Sung Kwon Kim [skkim at cau.ac.kr] in          |
      // personnal communication, 28 July, 12 Aug 2003 ...                    |
      // ----------------------------------------------------------------------
      // ----------------------------------------------------------------------

// Find the longest bi@sed.interval in `seq[]',
// i.e. that is at least `fraction' made up of chars in `select',
// and that is as long as possible.

   public static indexPair                               // longestBiased2(...)
      longestBiased2( String select, double fraction, char[] seq )
    {
      fraction = fraction / ( 1 + 0.5/seq.length );       // + a little "slack"
      if(      fraction > 1 ) fraction = 1; // sanity!
      else if( fraction < 0 ) fraction = 0;

      double good = 1-fraction;
      double bad  =  -fraction;

      int maxSelected = 0;    // make boolean array for O(1)-time test of chars
      for( int i = 0; i < select.length(); i++ )
        if( (int) select.charAt(i) > maxSelected )
           maxSelected = (int) select.charAt(i);
      boolean[] selected = new boolean[maxSelected+1];
      for( int i = 0; i <= maxSelected; i++ )
         selected[i] = select.indexOf((char) i) >= 0;

// 1st pass
      // btw. double is better than float for veryy longgg sequences
      double[] cumSums = new double[seq.length];  // cumulative sums
      double cumSum = 0;
      for( int i = 0; i < seq.length; i++ )
       { int seqI = (int) seq[i];
         cumSum += (seqI <= maxSelected && selected[seqI]) ? good : bad;
         cumSums[i] = cumSum;
       }//for i

// 2nd pass (reverse)
      int[] lastAbove = new int[seq.length];  // last maxima of cumSums[]
      int finalDescent = seq.length-1;
      for( int i = seq.length-1; i >= 0; i-- )
       { if( cumSums[i] > cumSums[finalDescent] )
            finalDescent = i;
       // finalDescent is the right-most index to
       // maximize cumSums[i..seq.length-1]
         lastAbove[i] = finalDescent;
       }//for i
      // NB. cumSums[lastAbove[i]] is montonic, flat or decreasing,
      //     as i increases.

//3rd pass
      int j = -1; double cumSumI1 = 0;
      int begins = 0, ends = -1;       // so far, best interval is empty
      for( int i = 0; i < seq.length; i++ )
       { // cumSumI1 = SUM scores of seq[0..i-1]
         // It is sufficient to consider intervals [i..j] where i is a
         // new "first below" minimum and j is a "last above" maximum.
         while( j < seq.length-1 && cumSums[lastAbove[j+1]] >= cumSumI1 )
            j = lastAbove[j+1];
         if( j - i > ends - begins )
          { begins = i; ends = j; }
         cumSumI1 = cumSums[i];
       }//for i

      return( new indexPair(begins, ends) );
    }//longestBiased2

// ----------------------------------------------------------------------------


   public static char[] readSeq(String fileName, String dontIgnore)
    { StringBuffer sb = new StringBuffer();
      try{ File inputFile = new File( fileName );
           FileReader in  = new FileReader(inputFile);
           int c;
           while((c = in.read()) != -1)
            { if( dontIgnore.indexOf(c) < 0 ) continue;
              char ch = (char) c;
              sb.append( ch );
         }  }
      catch(Exception e)
         { System.out.println( "failed: " + e.getMessage() );
           System.exit(1);
         }
      String s = sb.toString();
      char[] arr = new char[s.length()];
      for( int i = 0; i < s.length(); i++ )
         arr[i] = s.charAt(i);
      return arr;
    }//readSeq



   public static indexPair  run( String select, double fraction, char[] seq )
   // Do a scan of the String at the given fraction and print the findings.
    { int cutoff = 20;

//---------------------------------------------------------------------
// Comment one of these out, and also make consequent changes, if you |
// only want to run one of the algorithms ...                         |
//---------------------------------------------------------------------

      indexPair interval1 = longestBiased1( select, fraction, seq );
      indexPair interval2 = longestBiased2( select, fraction, seq );
      if( interval1.snd - interval1.fst != interval2.snd - interval2.fst )
         System.out.print( "DISAGREEMENT ");

      int len = interval1.snd - interval1.fst + 1;
      System.out.print( " >=" + fraction + ": ["
                      + interval1.fst + ".." + interval1.snd
                      + "] (" + len + ") " );

      if( len <= cutoff )                        // short, print all characters
       { for(int i = interval1.fst; i <= interval1.snd; i++ )
            System.out.print( seq[i] );
       }
      else                     // long, print first few and last few characters
       { for(int i = interval1.fst; i < interval1.fst + cutoff/2; i++)
            System.out.print( seq[i] );
         System.out.print( "..." );
         for(int i = interval1.snd - cutoff/2 + 1; i <= interval1.snd; i++)
            System.out.print( seq[i] );
       }

      System.out.print( " [" + + interval2.fst + ".." + interval2.snd + "]" );

      System.out.println();
      return interval1;
    }//run



   public static void  main(String[] argv)
   { System.out.println("#--- Biased.java, Lloyd Allison ---");

     for(int i=0; i < argv.length; i++)           // command line params if any
        System.out.println("# argv[" + i + "] = " + argv[i]);

     String select   = "";
     double fraction = 0.5;
     String fileName = "";
     try
      { select   = argv[0];
        fraction = new Double(argv[1]).doubleValue();     // read the min' bi@s
        fileName = argv[2];
      }
     catch(Exception e)
      { System.out.println("usage: java prog_name \"chars\" fraction filename");
        System.exit(1);
      }


     char[] arr = readSeq( fileName, "acgtuACGTUrnyRNY" );  // INPUT THE STRING
     // NB. you may want to change the set of characters that are not ignored.


     System.out.print("# " + arr.length + "bp");
     System.out.print(" ");
     for(int i=0; i < (arr.length < 65 ? arr.length : 65); i++)
        System.out.print( arr[i] );
     System.out.println( arr.length > 65 ? "..." : "" );

     if( fraction >= 0 && fraction <= 1 )                // a "proper" fraction
        run( select, fraction, arr );

     else // specified fraction > 1 or < 0; take this as a flag to try several
          // decreasing fractions until we get the whole string, and stop.
        for( int ifrac = 100; ifrac > 0; ifrac -= 5 )
         { indexPair interval = run( select, ifrac/100.0, arr );
           if( interval.snd - interval.fst + 1 >= arr.length ) break;
         }

     System.out.println("#--- end ---");
   }//main

 }//Biased class


// ---------------------------------------------------------------------
// 10/2002, 1/2002, 7/2003, 10/2003 L.Allison                          |
// Note the ``conditions of use'' in the code above.                   |
// ---------------------------------------------------------------------


