
/******************************************************************************
 *
 *  This file is part of canu, a software program that assembles whole-genome
 *  sequencing reads into contigs.
 *
 *  This software is based on:
 *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
 *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
 *
 *  Except as indicated otherwise, this is a 'United States Government Work',
 *  and is released in the public domain.
 *
 *  File 'README.licenses' in the root directory of this distribution
 *  contains full conditions and disclaimers.
 */

#include "prefixEditDistance.H"




//  Put the delta encoding of the alignment represented in Edit_Array
//  starting at row e (which is the number of errors) and column d
//  (which is the diagonal) and working back to the start, into
//  Right_Delta. Set Right_Delta_Len to the number of
//  delta entries.
//
//  Used in Prefix_Edit_Distance
//
void
prefixEditDistance::Set_Right_Delta (int e, int d) {
  int  from, last, max;
  int  i, j, k;

  assert(Edit_Array_Lazy[e] != NULL);

  last = Edit_Array_Lazy[e][d];
  Right_Delta_Len = 0;

  for (k = e;  k > 0;  k--) {
    assert(Edit_Array_Lazy[k] != NULL);

    from = d;
    max = 1 + Edit_Array_Lazy[k - 1][d];
    if ((j = Edit_Array_Lazy[k - 1][d - 1]) > max) {
      from = d - 1;
      max = j;
    }
    if ((j = 1 + Edit_Array_Lazy[k - 1][d + 1]) > max) {
      from = d + 1;
      max = j;
    }
    if (from == d - 1) {
      Delta_Stack[Right_Delta_Len++] = max - last - 1;
      d--;
      last = Edit_Array_Lazy[k - 1][from];
    } else if (from == d + 1) {
      Delta_Stack[Right_Delta_Len++] = last - (max - 1);
      d++;
      last = Edit_Array_Lazy[k - 1][from];
    }
  }
  Delta_Stack[Right_Delta_Len++] = last + 1;

  k = 0;
  for (i = Right_Delta_Len - 1;  i > 0;  i--)
    Right_Delta[k++]
      = abs (Delta_Stack[i]) * Sign (Delta_Stack[i - 1]);
  Right_Delta_Len--;
}







//  Return the minimum number of changes (inserts, deletes, replacements)
//  needed to match string  A[0 .. (m-1)]  with a prefix of string
//   T[0 .. (n-1)]  if it's not more than  Error_Limit .
//  If no match, return the number of errors for the best match
//  up to a branch point.
//  Put delta description of alignment in  Right_Delta  and set
//  Right_Delta_Len  to the number of entries there if it's a complete
//  match.
//  Set  A_End  and  T_End  to the rightmost positions where the
//  alignment ended in  A  and  T , respectively.
//  Set  Match_To_End  true if the match extended to the end
//  of at least one string; otherwise, set it false to indicate
//  a branch point.

int32
prefixEditDistance::forward(char    *A,   int32 m,
                            char    *T,   int32 n,
                            int32    Error_Limit,
                            int32   &A_End,
                            int32   &T_End,
                            bool    &Match_To_End) {
  double  Score;
  int  Max_Score_Len = 0, Max_Score_Best_d = 0, Max_Score_Best_e = 0;
  int  Best_d, Best_e, From, Last, Longest, Max, Row;
  int  d, e, i, j, k;

#ifdef SHOW_BRI
  fprintf(stderr, "FORWARD errorlimit %d m %d n %d\n", Error_Limit, m, n);
#endif

  assert (m <= n);
  Best_d = Best_e = Longest = 0;
  Right_Delta_Len = 0;

  for (Row = 0;  Row < m
          && (A[Row] == T[Row]
              || A[Row] == 'n'
              || T[Row] == 'n');  Row++)
    ;

  if (Edit_Array_Lazy[0] == NULL)
    Allocate_More_Edit_Space(0);

  Edit_Array_Lazy[0][0] = Row;

  if (Row == m) {
    // Exact match
    A_End = T_End = m;
    Match_To_End = true;
#ifdef SHOW_BRI
    fprintf(stderr, "WorkArea %2d FWD exact match\n", omp_get_thread_num());
#endif
    return  0;
  }

  int32 Left  = 0;
  int32 Right = 0;

  double Max_Score = 0.0;

  for (e = 1;  e <= Error_Limit;  e++) {
#ifdef SHOW_BRI
    fprintf(stderr, "FORWARD errors %d\n", e);
#endif

    if (Edit_Array_Lazy[e] == NULL)
      Allocate_More_Edit_Space(e);

    Left  = std::max(Left  - 1, -e);
    Right = std::min(Right + 1,  e);

    Edit_Array_Lazy[e - 1][Left     ] = -2;
    Edit_Array_Lazy[e - 1][Left  - 1] = -2;
    Edit_Array_Lazy[e - 1][Right    ] = -2;
    Edit_Array_Lazy[e - 1][Right + 1] = -2;

    for (d = Left;  d <= Right;  d++) {
      Row = 1 + Edit_Array_Lazy[e - 1][d];

      if ((j = Edit_Array_Lazy[e - 1][d - 1]) > Row)
        Row = j;

      if ((j = 1 + Edit_Array_Lazy[e - 1][d + 1]) > Row)
        Row = j;

      while  (Row < m && Row + d < n && (A[Row] == T[Row + d] || A[Row] == 'n' || T[Row + d] == 'n'))
        Row++;

      Edit_Array_Lazy[e][d] = Row;
#ifdef SHOW_BRI
      fprintf(stderr, "Edit_Array_Lazy[%d][%d] = %d\n", e, d, Row);
#endif

      if (Row == m || Row + d == n) {
        //  Check for branch point here caused by uneven distribution of errors
        Score = Row * Branch_Match_Value - e;  //  Assumes Branch_Match_Value - Branch_Error_Value == 1.0

        int32  Tail_Len = Row - Max_Score_Len;
        bool   abort    = false;

        double slope    = (double)(Max_Score - Score) / Tail_Len;

        if ((doingPartialOverlaps == true) && (Score < Max_Score))
          abort = true;

#ifdef SHOW_EXTEND_ALIGN
        fprintf(stderr, "WorkArea %2d FWD e=%d MIN=%d Tail_Len=%d Max_Score=%f Score=%f slope=%f SLOPE=%f\n",
                omp_get_thread_num(), e, MIN_BRANCH_END_DIST, Tail_Len, Max_Score, Score, slope, MIN_BRANCH_TAIL_SLOPE);
#endif

        if ((e > MIN_BRANCH_END_DIST / 2) &&
            (Tail_Len >= MIN_BRANCH_END_DIST) &&
            (slope >= MIN_BRANCH_TAIL_SLOPE)) {
#ifdef SHOW_BRI
          fprintf(stderr, "ABORT!  e %d > %d MIN_BRANCH_END_DIST/2  AND  TailLen %d >= %d MIN_BRANCH_END_DIST  AND  slope %f >= %f MIN_BRANCH_TAIL_SLOPE\n",
                  e, MIN_BRANCH_END_DIST / 2,
                  Tail_Len, MIN_BRANCH_END_DIST,
                  slope, MIN_BRANCH_TAIL_SLOPE);
#endif
          abort = true;
        }

        if (abort) {
          A_End = Max_Score_Len;
          T_End = Max_Score_Len + Max_Score_Best_d;

          Set_Right_Delta (Max_Score_Best_e, Max_Score_Best_d);

          Match_To_End = false;

#ifdef SHOW_BRI
          fprintf(stderr, "RETURN e=%d Max_Score_Best_e %d MAtch_To_End=false\n", e, Max_Score_Best_e);
#endif
          return(Max_Score_Best_e);
        }

        // Force last error to be mismatch rather than insertion
        if ((Row == m) &&
            (1 + Edit_Array_Lazy[e - 1][d + 1] == Edit_Array_Lazy[e][d]) &&
            (d < Right)) {
          d++;
          Edit_Array_Lazy[e][d] = Edit_Array_Lazy[e][d - 1];
        }

        A_End = Row;           // One past last align position
        T_End = Row + d;

        Set_Right_Delta (e, d);

        Match_To_End = true;

#ifdef SHOW_BRI
        fprintf(stderr, "RETURN e=%d Match_To_End=true\n", e);
#endif
        return(e);
      }
    }

#ifdef SHOW_BRI
    fprintf(stderr, "Row %4d - left %4d len %5d maxErr %6.2f minLen %7d prev %7d - right %4d len %5d maxErr %6.2f minLen %7d prev %7d\n",
            Row,
            Left,  Edit_Array_Lazy[e][Left],  pruneAlign_pe(e, Left, 0, m),  pruneAlign_ml(e, Left, 0, m),  pruneAlign_ML(e, Left, 0, m),
            Right, Edit_Array_Lazy[e][Right], pruneAlign_pe(e, Right, 0, m), pruneAlign_ml(e, Right, 0, m), pruneAlign_ML(e, Right, 0, m));
#endif

    //  Close the bounds if the alignment isn't long enough for the number of errors we have
    while  ((Left <= Right) && (Left < 0) && (pruneAlign(e, Left, 0, m))) {
#ifdef SHOW_BRI
      fprintf(stderr, "EAL[e][left=%d] %d  DELETE f1\n", Left, Edit_Array_Lazy[e][Left]);
#endif
      Left++;
    }

    if (Left >= 0)
      while  ((Left <= Right) && (pruneAlign(e, Left, Left, m))) {
#ifdef SHOW_BRI
        fprintf(stderr, "EAL[e][left=%d] %d  DELETE f2\n", Left, Edit_Array_Lazy[e][Left]);
#endif
        Left++;
      }

    if (Left > Right) {
#ifdef SHOW_EXTEND_ALIGN
      fprintf(stderr, "WorkArea %2d FWD BREAK at Left=%d Right=%d\n", omp_get_thread_num(), Left, Right);
#endif
      break;
    }

    while  ((Right > 0) && (pruneAlign(e, Right, Right, m))) {
#ifdef SHOW_BRI
      fprintf(stderr, "EAL[e][right=%d] %d  DELETE f3\n", Right, Edit_Array_Lazy[e][Right]);
#endif
      Right--;
    }

    if (Right <= 0)
      while  (pruneAlign(e, Right, 0, m)) {
#ifdef SHOW_BRI
        fprintf(stderr, "EAL[e][right=%d] %d  DELETE f4\n", Right, Edit_Array_Lazy[e][Right]);
#endif
        Right--;
      }

    assert (Left <= Right);

    for (d = Left;  d <= Right;  d++)
      if (Edit_Array_Lazy[e][d] > Longest) {
        Best_d = d;
        Best_e = e;
        Longest = Edit_Array_Lazy[e][d];
      }

    Score = Longest * Branch_Match_Value - e;

    // Assumes  Branch_Match_Value - Branch_Error_Value == 1.0
    if (Score > Max_Score) {
      Max_Score = Score;
      Max_Score_Len = Longest;
      Max_Score_Best_d = Best_d;
      Max_Score_Best_e = Best_e;
    }
  }

#ifdef SHOW_EXTEND_ALIGN
  fprintf(stderr, "WorkArea %2d FWD ERROR_LIMIT at e=%d Error_Limit=%d best_e=%d\n", omp_get_thread_num(), e, Error_Limit, Max_Score_Best_e);
#endif

  A_End = Max_Score_Len;
  T_End = Max_Score_Len + Max_Score_Best_d;
  Set_Right_Delta (Max_Score_Best_e, Max_Score_Best_d);
  Match_To_End = false;
#ifdef SHOW_BRI
  fprintf(stderr, "RETURN e=%d Exhausted errors Max_Score_Best_e %d\n", e, Max_Score_Best_e);
#endif
  return  Max_Score_Best_e;
}
