///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
///
/// =========================================================================
// preconditioner IC0 when no-MPI or no-TRILINOS :
// as replacement of MPI-based trilinos/ifpack one
// note: both seq & mpi implementations
//
#include "rheolef/config.h"
#if !defined(_RHEOLEF_HAVE_TRILINOS) && !defined(_RHEOLEF_HAVE_PASTIX)
#include "solver_no_trilinos_ifpack.h"

namespace rheolef {
using namespace std;

// =========================================================================
// part 1. IC0 algorithms
// =========================================================================
// count non-zero entries of the strict upper part of A
template<class T, class M>
static
typename csr<T,M>::size_type
nnz_upper_strict (const csr<T,M>& a)
{
  typedef typename csr<T,M>::size_type size_type;
  size_type nnz = 0;
  typename csr<T,M>::const_iterator ia = a.begin();
  for (size_type i = 0, n = a.nrow(); i < n; i++) {
    for (typename csr<T,M>::const_data_iterator p = ia[i]; p < ia[i+1]; p++) {
      size_type j = (*p).first;
      if (j > i) nnz++;
    }
  }
  return nnz;
}
// extract non-zero entries of the diag+upper part of A
template<class T, class M>
static
void
extract_upper (const csr<T,M>& a, csr<T,M>& u)
{
  typedef typename csr<T,M>::size_type size_type;
  typename csr<T,M>::const_iterator ia = a.begin();
  typename csr<T,M>::iterator       iu = u.begin_nonconst();
  typename csr<T,M>::data_iterator  pu = iu[0];
  for (size_type i = 0, n = a.nrow(); i < n; i++) {
    for (typename csr<T,M>::const_data_iterator pa = ia[i]; pa < ia[i+1]; pa++) {
      size_type j = (*pa).first;
      if (j >= i) {
        *pu++ = *pa;
      }
    }
    iu [i+1] = pu;
  }
}
// inplace IC0(A)
template<class T, class M>
static
void
inplace_ic0 (csr<T,M>& u)
{
  typedef typename csr<T,M>::size_type size_type;
  typedef typename csr<T,M>::data_iterator data_iterator;
  typename csr<T,M>::iterator iu = u.begin_nonconst();
  size_type n = u.nrow();
  for (size_type k = 0; k+1 < n; k++) {
    data_iterator pkk = iu[k];
    T ukk = (*pkk).second = sqrt((*pkk).second);
    for (data_iterator pik = pkk+1; pik < iu[k+1]; pik++) {
      (*pik).second /= ukk;
    }
    for (data_iterator pik = pkk+1; pik < iu[k+1]; pik++) {
      size_type i   = (*pik).first;
      T         aik = (*pik).second;
      data_iterator pij = pik;
      for (data_iterator pji = iu[i]; pji < iu[i+1]; pji++) {
        for ( ; pij < iu[k+1] && (*(pij+1)).first <= (*pji).first; pij++) {
          if ((*pij).first == (*pji).first) {
            (*pji).second -= aik*(*pij).second;
          }
        }
      }
    }
  }
  if (n >= 1) {
    typename csr<T,M>::data_iterator pnn = iu[n-1];
    (*pnn).second = sqrt((*pnn).second);
  }
}
template<class T, class M>
static
void
inplace_csc_lower_diag_solve (const csr<T,M>& a, vec<T,M>& x_vec)
{
  typedef typename csr<T,M>::size_type size_type;
  typename csr<T,M>::const_iterator ia = a.begin();
  typename vec<T,M>::iterator x  = x_vec.begin();
  for (size_type i = 0, n = a.nrow(); i < n; i++) {
    T aii = (*(ia[i])).second;
    T xi = x[i] / aii;
    x[i] = xi;
    for (typename csr<T,M>::const_data_iterator pij = ia[i]; pij < ia[i+1]; pij++) {
      size_type j   = (*pij).first;
      T         aij = (*pij).second;
      x[j] -= aij * xi;
    }
  }
}
template<class T, class M>
static
void
inplace_csr_upper_diag_solve (const csr<T,M>& a, vec<T,M>& x_vec, const vec<T,M>& b_vec)
{
  typedef typename csr<T,M>::size_type size_type;
  typename csr<T,M>::const_iterator ia = a.begin();
  typename vec<T,M>::iterator x  = x_vec.begin();
  typename vec<T,M>::const_iterator b  = b_vec.begin();
  size_type n = a.nrow();
  for (long int i = n - 1; i >= 0; i--) {
    T sum = 0;
    for (typename csr<T,M>::const_data_iterator pij = ia[i]; pij < ia[i+1]; pij++) {
      size_type j   = (*pij).first;
      T         aij = (*pij).second;
      
    }
    typename csr<T,M>::const_data_iterator pii = ia[i];
    T aii = (*pii).second;
    x[i] = (b[i] - sum) / aii;
  }
}
// =========================================================================
// part 2. the class interface
// =========================================================================
template<class T, class M>
void
solver_no_trilinos_ifpack_rep<T,M>::update_values (const csr<T,M>& a)
{
  check_macro (a.is_symmetric(), "ic0: unsymmetric matrix not supported");
  extract_upper (a, _ic0);
  inplace_ic0   (_ic0);
}
template<class T, class M>
solver_no_trilinos_ifpack_rep<T,M>::solver_no_trilinos_ifpack_rep (const csr<T,M>& a, const solver_option_type& opt)
 : solver_abstract_rep<T,M>(opt),
   _ic0()
{
  size_t nnz_u = nnz_upper_strict (a);
  _ic0.resize (a.row_ownership(), a.col_ownership(), nnz_u + a.nrow());
  update_values (a);
}
template<class T, class M>
solver_no_trilinos_ifpack_rep<T,M>::~solver_no_trilinos_ifpack_rep ()
{
}
template<class T, class M>
vec<T,M>
solver_no_trilinos_ifpack_rep<T,M>::solve (const vec<T,M>& b) const
{
  vec<T,M> x = b;
  inplace_csc_lower_diag_solve (_ic0, x);
  inplace_csr_upper_diag_solve (_ic0, x, b);
  return x;
}
template<class T, class M>
vec<T,M>
solver_no_trilinos_ifpack_rep<T,M>::trans_solve (const vec<T,M>& b) const
{
  return solve (b);
}
// ----------------------------------------------------------------------------
// instanciation in library
// ----------------------------------------------------------------------------

template class solver_no_trilinos_ifpack_rep<Float,sequential>;
#ifdef _RHEOLEF_HAVE_MPI
template class solver_no_trilinos_ifpack_rep<Float,distributed>;
#endif // _RHEOLEF_HAVE_MPI

} // namespace rheolef
#endif // !defined(_RHEOLEF_HAVE_TRILINOS) && !defined(_RHEOLEF_HAVE_PASTIX)
