module basicmod
  use config, only: nhymax, nhydis, timemax, dtout & 
       & , ngridtotal1, ngridtotal2, ngridtotal3, ntiles & 
       & , x1min, x1max, x2min, x2max, x3min, x3max &
       & , ncomp
  implicit none
      integer::nhy
      real(8)::time,dt
      data time / 0.0d0 /

      integer,parameter:: ngrid1 = ngridtotal1/ntiles(1)
      integer,parameter:: ngrid2 = ngridtotal2/ntiles(2)
      integer,parameter:: ngrid3 = ngridtotal3/ntiles(3)
      integer,parameter:: mgn=2
      integer,parameter:: in=ngrid1+2*mgn+1 &
     &                  , jn=ngrid2+2*mgn+1 &
     &                  , kn=ngrid3+2*mgn+1
      integer,parameter:: is=mgn+1 &
     &                  , js=mgn+1 &
     &                  , ks=mgn+1
      integer,parameter:: ie=ngrid1+mgn &
     &                  , je=ngrid2+mgn &
     &                  , ke=ngrid3+mgn

      real(8),dimension(in)::x1a,x1b
      real(8),dimension(jn)::x2a,x2b
      real(8),dimension(kn)::x3a,x3b

      real(8),dimension(in,jn,kn)::d,et,mv1,mv2,mv3
      real(8),dimension(in,jn,kn)::p,ei,v1,v2,v3,cs
      real(8),dimension(in,jn,kn)::b1,b2,b3,bp

      real(8),dimension(in,jn,kn)::gp,gp1a,gp2a,gp3a
      
      real(8),dimension(ncomp,in,jn,kn):: DXcomp
      real(8),dimension(ncomp,in,jn,kn)::  Xcomp
            
!$acc declare create(ngrid1,ngrid2,ngrid3)
!$acc declare create(mgn)
!$acc declare create(in,jn,kn)
!$acc declare create(is,js,ks)
!$acc declare create(ie,je,ke)
      
!$acc declare create(dt)
!$acc declare create(x1a,x1b)
!$acc declare create(x2a,x2b)
!$acc declare create(x3a,x3b)
      
!$acc declare create(d,et,mv1,mv2,mv3)
!$acc declare create(p,ei,v1,v2,v3,cs)
!$acc declare create(b1,b2,b3,bp)
!$acc declare create(gp,gp1a,gp2a,gp3a)

!$acc declare create(ncomp)
!$acc declare create(Dxcomp,Xcomp)
      
      end module basicmod
      
      module eosmod
      implicit none
! adiabatic
      real(8),parameter::gam=1.4d0!! adiabatic index
!$acc declare create(gam)
! isothermal
!      real(8)::csiso  !! isothemal sound speed
!!$acc declare create(csiso)
      end module eosmod
    
      module fluxmod
      use basicmod, only : in,jn,kn,ncomp
      implicit none
      real(8):: chg
      integer,parameter::nhyd=11+ncomp
      integer,parameter::nden=1,nve1=2,nve2=3,nve3=4,nene=5,npre=6,ncsp=7 &
           &                   ,nbm1=8,nbm2=9,nbm3=10,nbps=11&
           &                   ,nst=nhyd-ncomp+1,ned=nhyd ! composition
      real(8),dimension(nhyd,in,jn,kn):: svc
      
      integer,parameter:: mflx=9+ncomp,madd=3
      integer,parameter::mudn=     1,muvu=     2,muvv=     3,muvw=     4,muet=     5 &
     &                              ,mubu=     6,mubv=     7,mubw=     8,mubp=     9 &
     &                  ,mfdn=mflx+1,mfvu=mflx+2,mfvv=mflx+3,mfvw=mflx+4,mfet=mflx+5 &
     &                              ,mfbu=mflx+6,mfbv=mflx+7,mfbw=mflx+8,mfbp=mflx+9 &
     &                          ,must=  mflx-ncomp+1,mued=  mflx &! composition
     &                          ,mfst=2*mflx-ncomp+1,mfed=2*mflx &! composition
     &                          ,mcsp=2*mflx+1,mvel=2*mflx+2,mpre=2*mflx+3 

      integer,parameter:: mden=1,mrv1=2,mrv2=3,mrv3=4,meto=5   &
     &                          ,mrvu=muvu,mrvv=muvv,mrvw=muvw &
     &                          ,mbm1=6,mbm2=7,mbm3=8,mbps=9   &
     &                          ,mbmu=mubu,mbmv=mubv,mbmw=mubw &
     &                          ,mst=mflx-ncomp+1,med=mflx
      real(8),dimension(mflx,in,jn,kn):: nflux1,nflux2,nflux3
      real(8),dimension(in,jn,kn):: grvsrc1,grvsrc2,grvsrc3

!$acc declare create(chg)
!$acc declare create(svc,nflux1,nflux2,nflux3)
!$acc declare create(grvsrc1,grvsrc2,grvsrc3)
      end module fluxmod
      
      subroutine ConsvVariable
      use basicmod
      implicit none
      integer::i,j,k
      
!$acc kernels      
!$acc loop collapse(3) independent 
      do k=ks,ke
      do j=js,je
      do i=is,ie
          et(i,j,k) = 0.5d0*d(i,j,k)*(   &
     &                    +v1(i,j,k)**2  &
     &                    +v2(i,j,k)**2  &
     &                    +v3(i,j,k)**2) &
     &               +0.5d0*(            &
     &                    +b1(i,j,k)**2  &
     &                    +b2(i,j,k)**2  &
     &                    +b3(i,j,k)**2) &
     &                    +ei(i,j,k)
          mv1(i,j,k) =d(i,j,k)*v1(i,j,k)
          mv2(i,j,k) =d(i,j,k)*v2(i,j,k)
          mv3(i,j,k) =d(i,j,k)*v3(i,j,k)
          DXcomp(1:ncomp,i,j,k) = d(i,j,k)*Xcomp(1:ncomp,i,j,k)
      enddo
      enddo
      enddo
!$acc end kernels
      
      return
      end subroutine Consvvariable

      subroutine PrimVariable
      use basicmod
      use eosmod  
      implicit none
      integer::i,j,k,n
!$acc kernels      
!$acc loop collapse(3) independent private(n)
      do k=ks,ke
      do j=js,je
      do i=is,ie
          v1(i,j,k) = mv1(i,j,k)/d(i,j,k)
          v2(i,j,k) = mv2(i,j,k)/d(i,j,k)
          v3(i,j,k) = mv3(i,j,k)/d(i,j,k)

          ei(i,j,k) =  et(i,j,k)          &
     &          -0.5d0*d(i,j,k)*(         &
     &                    +v1(i,j,k)**2   &
     &                    +v2(i,j,k)**2   &
     &                    +v3(i,j,k)**2)  &
     &          -0.5d0*(                  &
     &                    +b1(i,j,k)**2   &
     &                    +b2(i,j,k)**2   &
     &                    +b3(i,j,k)**2)
! adiabatic
           p(i,j,k) =  ei(i,j,k)*(gam-1.0d0)
           cs(i,j,k) =  sqrt(gam*p(i,j,k)/d(i,j,k))
! isotermal
!           p(i,j,k) =  d(i,j,k)*csiso**2
!          cs(i,j,k) =  csiso
           
          do n=1,ncomp
             DXcomp(n,i,j,k) = max(0.0d0,DXcomp(n,i,j,k))
          enddo
          Xcomp(1:ncomp,i,j,k) = DXcomp(1:ncomp,i,j,k)/d(i,j,k)
      enddo
      enddo
      enddo
!$acc end kernels

      return
      end subroutine PrimVariable

subroutine TimestepControl
  use basicmod
  use mpimod      
  implicit none
  real(8)::dtl1
  real(8)::dtl2
  real(8)::dtl3
  real(8)::dtlocal
  real(8)::dtmin
  integer::theid
  real(8)::ctot
  integer::i,j,k

!$acc kernels
  dtmin=1.0d90
!$acc loop collapse(3) reduction(min:dtmin)  
  do k=ks,ke
  do j=js,je
  do i=is,ie
         ctot = sqrt(cs(i,j,k)**2 &
     &            +( b1(i,j,k)**2 &
     &              +b2(i,j,k)**2 &
     &              +b3(i,j,k)**2 &
     &                           )/d(i,j,k))
         dtl1 =(x1a(i+1)-x1a(i))/(abs(v1(i,j,k)) + ctot)
         dtl2 =(x2a(j+1)-x2a(j))/(abs(v2(i,j,k)) + ctot)
         dtl3 =(x3a(k+1)-x3a(k))/(abs(v3(i,j,k)) + ctot)
         dtlocal = min (dtl1,dtl2,dtl3)
         if(dtlocal .lt. dtmin) dtmin = dtlocal
  enddo
  enddo
  enddo
  bufinpmin(1) = dtmin
  bufinpmin(2) = dble(myid_w)
!$acc end kernels
  call MPIminfind
!$acc kernels
  dtmin =     bufoutmin(1)
  theid = int(bufoutmin(2))
  dt = 0.05d0 * dtmin
!$acc end kernels
!$acc update host (dt)
  
  return
end subroutine TimestepControl

      subroutine StateVevtor
      use basicmod
      use fluxmod
      use eosmod
      implicit none
      integer::i,j,k

!$acc kernels
!$acc loop collapse(3) independent
      do k=1,kn-1
      do j=1,jn-1
      do i=1,in-1
         svc(nden,i,j,k) =  d(i,j,k)
         svc(nve1,i,j,k) = v1(i,j,k)
         svc(nve2,i,j,k) = v2(i,j,k)
         svc(nve3,i,j,k) = v3(i,j,k)
         
         svc(nene,i,j,k) = ei(i,j,k)/d(i,j,k)
         svc(npre,i,j,k) = ei(i,j,k)*(gam-1.0d0)
         svc(ncsp,i,j,k) = sqrt(gam*(gam-1.0d0)*ei(i,j,k)/d(i,j,k))
         p(i,j,k) = svc(npre,i,j,k)  ! for output boundary
         
         svc(nbm1,i,j,k) = b1(i,j,k)
         svc(nbm2,i,j,k) = b2(i,j,k)
         svc(nbm3,i,j,k) = b3(i,j,k)
         svc(nbps,i,j,k) = bp(i,j,k)
         svc(nst:ned,i,j,k) = Xcomp(1:ncomp,i,j,k)
            
      enddo
      enddo
      enddo
!$acc end kernels

      return
      end subroutine StateVevtor

      subroutine minmod(a,b,d)
!$acc routine seq
      use fluxmod, only : nhyd
      implicit none
      real(8),dimension(nhyd),intent(in)::a,b
      real(8),dimension(nhyd),intent(out)::d
      integer:: n

      do n=1,nhyd
         d(n) = sign(1.0d0,a(n))*max(0.0d0,min(abs(a(n)) &
     &                                        ,sign(1.0d0,a(n))*b(n)))
      enddo

      return
      end subroutine minmod


      subroutine vanLeer(dvp,dvm,dv)
!$acc routine seq
      use fluxmod, only : nhyd
      implicit none
      real(8),dimension(nhyd),intent(in)::dvp,dvm
      real(8),dimension(nhyd),intent(out)::dv
      integer:: n

      do n=1,nhyd
         if(dvp(n)*dvm(n) .gt. 0.0d0)then
            dv(n) =2.0d0*dvp(n)*dvm(n)/(dvp(n)+dvm(n))
         else
            dv(n) = 0.0d0
         endif

      enddo

      return
      end subroutine vanLeer



      subroutine MClimiter(a,b,c,d)
!$acc routine seq
      use fluxmod, only : nhyd
      implicit none
      real(8),dimension(nhyd),intent(in)::a,b,c
      real(8),dimension(nhyd),intent(out)::d
      integer:: n

      do n=1,nhyd
         d(n) = sign(1.0d0,a(n))*max(0.0d0,min(abs(a(n))          &
     &                                  ,sign(1.0d0,a(n))*b(n)    &
     &                                  ,sign(1.0d0,a(n))*c(n)))
      enddo

      return
      end subroutine MClimiter



      subroutine NumericalFlux1
      use basicmod, only: is,ie,in,js,je,jn,ks,ke,kn
      use fluxmod
      implicit none
      integer::i,j,k
      real(8),dimension(nhyd):: dsvp,dsvm,dsvc,dsv
      real(8),dimension(nhyd):: Pleftc1, Pleftc2, Plefte
      real(8),dimension(nhyd):: Prigtc1, Prigtc2, Prigte
      real(8),dimension(2*mflx+madd):: leftco,rigtco
      real(8),dimension(mflx):: nflux
      real(8):: ptl,css,cts,etot
      
!$acc kernels      
!$acc loop collapse(2) independent private(i, Pleftc1, Pleftc2, Plefte,Prigtc1, Prigtc2, Prigte,dsv,dsvp,dsvm,etot,ptl,css,cts,leftco,rigtco,nflux)
      do k=ks,ke
      do j=js,je
      do i=is,ie+1
         Pleftc1(:) = svc(:,i-2,j,k)
         Pleftc2(:) = svc(:,i-1,j,k)
         Prigtc1(:) = svc(:,i  ,j,k)
         Prigtc2(:) = svc(:,i+1,j,k)
         
! | Pleftc1   | Pleftc2 =>| Prigtc1   | Prigtc2   |        
!                     You are here               
!====================
! Left
!====================
         dsvp(:) = Prigtc1(:) - Pleftc2(:) 
         dsvm(:) =              Pleftc2(:) - Pleftc1(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Plefte(:) = Pleftc2(:) + 0.5d0*dsv(:)

! Consvative variables
         leftco(mudn)=Plefte(nden) ! rho
         leftco(muvu)=Plefte(nve1)*Plefte(nden)   ! rho v_x
         leftco(muvv)=Plefte(nve2)*Plefte(nden)   ! rho v_y
         leftco(muvw)=Plefte(nve3)*Plefte(nden)   ! rho v_z
         etot =Plefte(nene)*Plefte(nden) & ! e_i
     &               +0.5d0*Plefte(nden)*(                  &
     &                     +Plefte(nve1)**2                 &
     &                     +Plefte(nve2)**2                 &
     &                     +Plefte(nve3)**2)                & ! + rho v^2/2
     &               +0.5d0*                    (                 &
     &                     +Plefte(nbm1)**2                 &
     &                     +Plefte(nbm2)**2                 &
     &                     +Plefte(nbm3)**2)                  ! + B^2/2

         leftco(muet) = etot
         leftco(mubu)=Plefte(nbm1)  ! b_x
         leftco(mubv)=Plefte(nbm2)  ! b_y
         leftco(mubw)=Plefte(nbm3)  ! b_z
         leftco(mubp)=Plefte(nbps)  ! psi
         leftco(must:mued)=Plefte(nst:ned)*Plefte(nden) ! rho X
         
! Flux
         ptl = Plefte(npre) + ( Plefte(nbm1)**2        &
     &                         +Plefte(nbm2)**2        &
     &                         +Plefte(nbm3)**2)/2.0d0 

         leftco(mfdn)=Plefte(nden)             *Plefte(nve1)
         leftco(mfvu)=Plefte(nden)*Plefte(nve1)*Plefte(nve1) &
     &                        +ptl-Plefte(nbm1)*Plefte(nbm1)
         leftco(mfvv)=Plefte(nden)*Plefte(nve2)*Plefte(nve1) &
     &                            -Plefte(nbm2)*Plefte(nbm1)
         leftco(mfvw)=Plefte(nden)*Plefte(nve3)*Plefte(nve1) &
     &                            -Plefte(nbm3)*Plefte(nbm1)
         leftco(mfet)= (etot +ptl   )*Plefte(nve1) &
     &                -( Plefte(nbm1)*Plefte(nve1) &
     &                  +Plefte(nbm2)*Plefte(nve2) &
     &                  +Plefte(nbm3)*Plefte(nve3))*Plefte(nbm1)

         leftco(mfbu) =  0.0d0
         leftco(mfbv) =  Plefte(nbm2)*Plefte(nve1) &
     &                  -Plefte(nve2)*Plefte(nbm1)
         leftco(mfbw) =  Plefte(nbm3)*Plefte(nve1) &
     &                  -Plefte(nve3)*Plefte(nbm1)
         leftco(mfbp) = 0.0d0  ! psi
         leftco(mfst:mfed)=Plefte(nden)*Plefte(nst:ned)*Plefte(nve1) ! rho X v
         
         css =Plefte(ncsp)**2
         cts =  css  & !c_s^2*c_a^2
     &                       +( Plefte(nbm1)**2  &
     &                         +Plefte(nbm2)**2  &
     &                         +Plefte(nbm3)**2)/Plefte(nden) 

         leftco(mcsp)= sqrt((cts +sqrt(cts**2                 &
     &                             -4.0d0*css*Plefte(nbm1)**2 &
     &                                       /Plefte(nden))   &
     &                            )/2.0d0)
         leftco(mvel)= Plefte(nve1)
         leftco(mpre)= ptl

! | Pleftc1   | Pleftc2 |<= Prigtc1   | Prigtc2   |        
!                     You are here               
!====================
! Right
!====================
         dsvp = Prigtc2(:) - Prigtc1(:) 
         dsvm =              Prigtc1(:) - Pleftc2(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Prigte(:) = Prigtc1(:) - 0.5d0*dsv(:)
         
! Consvative variables
         rigtco(mudn)=Prigte(nden) ! rho
         rigtco(muvu)=Prigte(nve1)*Prigte(nden)   ! rho v_x
         rigtco(muvv)=Prigte(nve2)*Prigte(nden)   ! rho v_y
         rigtco(muvw)=Prigte(nve3)*Prigte(nden)   ! rho v_z
         etot =Prigte(nene)*Prigte(nden) & ! e_i
     &               +0.5d0*Prigte(nden)*(                  &
     &                     +Prigte(nve1)**2                 &
     &                     +Prigte(nve2)**2                 &
     &                     +Prigte(nve3)**2)                & ! + rho v^2/2
     &               +0.5d0*                    (                 &
     &                     +Prigte(nbm1)**2                 &
     &                     +Prigte(nbm2)**2                 &
     &                     +Prigte(nbm3)**2)                 ! + B^2/2
         rigtco(muet) = etot
         rigtco(mubu)=Prigte(nbm1)  ! b_x
         rigtco(mubv)=Prigte(nbm2)  ! b_y
         rigtco(mubw)=Prigte(nbm3)  ! b_z
         rigtco(mubp)=Prigte(nbps)  ! psi
         rigtco(must:mued)=Prigte(nden)*Prigte(nst:ned) ! rho X
 
! Flux
         ptl = Prigte(npre) + ( Prigte(nbm1)**2        &
     &                         +Prigte(nbm2)**2        &
     &                         +Prigte(nbm3)**2)/2.0d0

         rigtco(mfdn)=Prigte(nden)             *Prigte(nve1)
         rigtco(mfvu)=Prigte(nden)*Prigte(nve1)*Prigte(nve1) &
     &                        +ptl-Prigte(nbm1)*Prigte(nbm1)
         rigtco(mfvv)=Prigte(nden)*Prigte(nve2)*Prigte(nve1) &
     &                            -Prigte(nbm2)*Prigte(nbm1)
         rigtco(mfvw)=Prigte(nden)*Prigte(nve3)*Prigte(nve1) &
     &                            -Prigte(nbm3)*Prigte(nbm1)
         rigtco(mfet)=(etot+ptl)*Prigte(nve1) &
     &                     -( Prigte(nbm1)*Prigte(nve1)     &
     &                       +Prigte(nbm2)*Prigte(nve2)     &
     &                       +Prigte(nbm3)*Prigte(nve3))*Prigte(nbm1)
     
         rigtco(mfbu) =  0.0d0
         rigtco(mfbv) =  Prigte(nbm2)*Prigte(nve1) &
     &                  -Prigte(nve2)*Prigte(nbm1)
         rigtco(mfbw) =  Prigte(nbm3)*Prigte(nve1) &
     &                  -Prigte(nve3)*Prigte(nbm1)
         rigtco(mfbp) = 0.0d0  ! b_z
         rigtco(mfst:mfed)=Prigte(nden)*Prigte(nst:ned)*Prigte(nve1) ! rho X v
         
         css = Prigte(ncsp)**2
         cts =  css   &!c_s^2*c_a^2
     &                       +( Prigte(nbm1)**2 &
     &                         +Prigte(nbm2)**2 &
     &                         +Prigte(nbm3)**2)/Prigte(nden) 


         rigtco(mcsp)= sqrt((cts +sqrt(cts**2                 &
     &                             -4.0d0*css*Prigte(nbm1)**2 &
     &                                       /Prigte(nden))   &
     &                            )/2.0d0)
         rigtco(mvel)= Prigte(nve1)
         rigtco(mpre)= ptl

!         call HLLE(leftco,rigtco,nflux)
!         call HLLC(leftco,rigtco,nflux)
         call HLLD(leftco,rigtco,nflux)
         nflux1(mden,i,j,k)=nflux(mden)
         nflux1(mrv1,i,j,k)=nflux(mrvu)
         nflux1(mrv2,i,j,k)=nflux(mrvv)
         nflux1(mrv3,i,j,k)=nflux(mrvw)
         nflux1(meto,i,j,k)=nflux(meto)
         nflux1(mbm1,i,j,k)=nflux(mbmu)
         nflux1(mbm2,i,j,k)=nflux(mbmv)
         nflux1(mbm3,i,j,k)=nflux(mbmw)
         nflux1(mst:med,i,j,k)=nflux(mst:med)

         nflux1(mbm1,i,j,k) =  0.5d0*(leftco(mubp)+rigtco(mubp)) &
     &                    -0.5d0*chg*(rigtco(mubu)-leftco(mubu))        ! finite volume
         nflux1(mbps,i,j,k) = (0.5d0*(leftco(mubu)+rigtco(mubu)) &
     &                    -0.5d0/chg*(rigtco(mubp)-leftco(mubp)))*chg**2 ! finite volume

!         print *, "bpf1",nflux1(mbps,i,j,k)

      enddo
      enddo
      enddo
!$acc end kernels

      return
      end subroutine Numericalflux1

      subroutine NumericalFlux2
      use basicmod, only: is,ie,in,js,je,jn,ks,ke,kn
      use fluxmod
      implicit none
! | Pleftc1   | Pleftc2 | Prigtc1   | Prigtc2   |        
!                     You are here               
      integer::i,j,k
      real(8),dimension(nhyd):: dsvp,dsvm,dsvc,dsv
      real(8),dimension(nhyd):: Pleftc1, Pleftc2, Plefte
      real(8),dimension(nhyd):: Prigtc1, Prigtc2, Prigte
      real(8),dimension(2*mflx+madd):: leftco,rigtco
      real(8),dimension(mflx):: nflux
      real(8):: ptl,css,cts,etot

!$acc kernels
!$acc loop collapse(2) independent private(j, Pleftc1, Pleftc2, Plefte,Prigtc1, Prigtc2, Prigte,dsv,dsvp,dsvm,etot,ptl,css,cts,leftco,rigtco,nflux)
      do k=ks,ke
      do i=is,ie
      do j=js,je+1
         Pleftc1(:) = svc(:,i,j-2,k)
         Pleftc2(:) = svc(:,i,j-1,k)
         Prigtc1(:) = svc(:,i,j  ,k)
         Prigtc2(:) = svc(:,i,j+1,k)     
! | Pleftc1   | Pleftc2 |<= Prigtc1   | Prigtc2   |        
!                     You are here   
!====================
! Left
!====================
         dsvp(:) = Prigtc1(:) - Pleftc2(:) 
         dsvm(:) = Pleftc2(:) - Pleftc1(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Plefte(:) = Pleftc2(:) + 0.5d0*dsv(:)
         
! Consvative variables
         leftco(mudn)=Plefte(nden)
         leftco(muvw)=Plefte(nve1)*Plefte(nden)
         leftco(muvu)=Plefte(nve2)*Plefte(nden)   ! rho v
         leftco(muvv)=Plefte(nve3)*Plefte(nden)
         etot        =Plefte(nene)*Plefte(nden) & ! internal
     &               +0.5d0*Plefte(nden)*(                  &
     &                     +Plefte(nve1)**2                 &
     &                     +Plefte(nve2)**2                 &
     &                     +Plefte(nve3)**2)                & ! kinetic
     &               +0.5d0*                    (                 &
     &                     +Plefte(nbm1)**2                 &
     &                     +Plefte(nbm2)**2                 &
     &                     +Plefte(nbm3)**2) ! magnetic
         leftco(muet)=etot
         leftco(mubw)=Plefte(nbm1)  ! b_x
         leftco(mubu)=Plefte(nbm2)  ! b_y
         leftco(mubv)=Plefte(nbm3)  ! b_z
         leftco(mubp)=Plefte(nbps)  ! psi
         leftco(must:mued)=Plefte(nst:ned)*Plefte(nden) ! rho X

         ptl = Plefte(npre) + ( Plefte(nbm1)**2        &
     &                         +Plefte(nbm2)**2        &
     &                         +Plefte(nbm3)**2)/2.0d0 

         leftco(mfdn)=Plefte(nden)             *Plefte(nve2)   ! rho v
         leftco(mfvw)=Plefte(nden)*Plefte(nve1)*Plefte(nve2) &
     &                            -Plefte(nbm1)*Plefte(nbm2)
         leftco(mfvu)=Plefte(nden)*Plefte(nve2)*Plefte(nve2) &
     &                        +ptl-Plefte(nbm2)*Plefte(nbm2)
         leftco(mfvv)=Plefte(nden)*Plefte(nve3)*Plefte(nve2) &
     &                            -Plefte(nbm3)*Plefte(nbm2)
         leftco(mfet)=(etot+ptl)*Plefte(nve2) &
     &                     -( Plefte(nbm1)*Plefte(nve1)     &
     &                       +Plefte(nbm2)*Plefte(nve2)     &
     &                       +Plefte(nbm3)*Plefte(nve3))*Plefte(nbm2)

         leftco(mfbw) =  Plefte(nbm1)*Plefte(nve2) &
     &                  -Plefte(nve1)*Plefte(nbm2)
         leftco(mfbu) =  0.0d0
         leftco(mfbv) =  Plefte(nbm3)*Plefte(nve2) &
     &                  -Plefte(nve3)*Plefte(nbm2)
         leftco(mfbp) = 0.0d0  ! psi
         leftco(mfst:mfed)=Plefte(nst:ned)*Plefte(nden)*Plefte(nve2) ! rho X v
     
         css = Plefte(ncsp)**2
         cts =  css  & !c_s^2*c_a^2
     &                       +( Plefte(nbm1)**2  &
     &                         +Plefte(nbm2)**2  &
     &                         +Plefte(nbm3)**2)/Plefte(nden) 

         leftco(mcsp)= sqrt((cts +sqrt(cts**2                  &
     &                             -4.0d0*css*Plefte(nbm2)**2  &
     &                                          /Plefte(nden)) &
     &                            )/2.0d0)
         leftco(mvel)= Plefte(nve2)
         leftco(mpre)= ptl
     
! | Pleftc1   | Pleftc2 =>| Prigtc1   | Prigtc2   |        
!                     You are here  
!====================
! Right
!====================
         dsvp(:) = Prigtc2(:) - Prigtc1(:) 
         dsvm(:) = Prigtc1(:) - Pleftc2(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Prigte(:) = Prigtc1(:) - 0.5d0*dsv(:)

         rigtco(mudn)=Prigte(nden)
         rigtco(muvw)=Prigte(nve1)*Prigte(nden)
         rigtco(muvu)=Prigte(nve2)*Prigte(nden)   ! rho v
         rigtco(muvv)=Prigte(nve3)*Prigte(nden)
         etot =Prigte(nene)*Prigte(nden) & ! internal
     &               +0.5d0*Prigte(nden)*(                  &
     &                     +Prigte(nve1)**2                 &
     &                     +Prigte(nve2)**2                 &
     &                     +Prigte(nve3)**2)                & ! kinetic
     &               +0.5d0*                    (                 &
     &                     +Prigte(nbm1)**2                 &
     &                     +Prigte(nbm2)**2                 &
     &                     +Prigte(nbm3)**2) ! magnetic
         rigtco(muet) = etot
         rigtco(mubw)=Prigte(nbm1)  ! b_x
         rigtco(mubu)=Prigte(nbm2)  ! b_y
         rigtco(mubv)=Prigte(nbm3)  ! b_z
         rigtco(mubp)=Prigte(nbps)  ! psi
         rigtco(must:mued)=Prigte(nden)*Prigte(nst:ned) ! rho X v
         
         ptl = Prigte(npre) + ( Prigte(nbm1)**2 &
     &                         +Prigte(nbm2)**2 &
     &                         +Prigte(nbm3)**2)/2.0d0 

         rigtco(mfdn)=Prigte(nden)             *Prigte(nve2) ! rho v
         rigtco(mfvw)=Prigte(nden)*Prigte(nve1)*Prigte(nve2) &
     &                            -Prigte(nbm1)*Prigte(nbm2)
         rigtco(mfvu)=Prigte(nden)*Prigte(nve2)*Prigte(nve2) &
     &                        +ptl-Prigte(nbm2)*Prigte(nbm2)
         rigtco(mfvv)=Prigte(nden)*Prigte(nve3)*Prigte(nve2) &
     &                            -Prigte(nbm3)*Prigte(nbm2)
         rigtco(mfet)=(etot+ptl)*Prigte(nve2) &
     &                     -( Prigte(nbm1)*Prigte(nve1)     &
     &                       +Prigte(nbm2)*Prigte(nve2)     &
     &                       +Prigte(nbm3)*Prigte(nve3))*Prigte(nbm2)
         rigtco(mfbw) =  Prigte(nbm1)*Prigte(nve2) &
     &                  -Prigte(nve1)*Prigte(nbm2)
         rigtco(mfbu) =  0.0d0
         rigtco(mfbv) =  Prigte(nbm3)*Prigte(nve2) &
     &                        -Prigte(nve3)*Prigte(nbm2)
         rigtco(mfbp) = 0.0d0  ! psi
         rigtco(mfst:mfed)=Prigte(nden)*Prigte(nst:ned)*Prigte(nve2) ! rho X v
         
         css = Prigte(ncsp)**2
         cts =  css  & !c_s^2*c_a^2
     &                       +( Prigte(nbm1)**2 &
     &                         +Prigte(nbm2)**2 &
     &                         +Prigte(nbm3)**2)/Prigte(nden) 

         rigtco(mcsp)= sqrt((cts +sqrt(cts**2                  &
     &                             -4.0d0*css*Prigte(nbm2)**2  &
     &                                          /Prigte(nden)) &
     &                            )/2.0d0)
         rigtco(mvel)= Prigte(nve2)
         rigtco(mpre)= ptl

!         call HLLE(leftco,rigtco,nflux)
!         call HLLC(leftco,rigtco,nflux)
         call HLLD(leftco,rigtco,nflux)

         nflux2(mden,i,j,k)=nflux(mden)
         nflux2(mrv1,i,j,k)=nflux(mrvw)
         nflux2(mrv2,i,j,k)=nflux(mrvu) ! mrv2=3, mrvu=2
         nflux2(mrv3,i,j,k)=nflux(mrvv)
         nflux2(meto,i,j,k)=nflux(meto)
         nflux2(mbm1,i,j,k)=nflux(mbmw)
         nflux2(mbm2,i,j,k)=nflux(mbmu)
         nflux2(mbm3,i,j,k)=nflux(mbmv)
         nflux2(mst:med,i,j,k)=nflux(mst:med)

         nflux2(mbm2,i,j,k) =  0.5d0*(leftco(mubp)+rigtco(mubp)) &
     &                    -0.5d0*chg*(rigtco(mubu)-leftco(mubu))        ! finite volume
         nflux2(mbps,i,j,k) = (0.5d0*(leftco(mubu)+rigtco(mubu)) &
     &                    -0.5d0/chg*(rigtco(mubp)-leftco(mubp)))*chg**2 ! finite volume
!         print *, "bpf2",nflux2(mbps,i,j,k)

      enddo
      enddo
      enddo
!$acc end kernels
      
      return
      end subroutine Numericalflux2

      subroutine NumericalFlux3
      use basicmod, only: is,ie,in,js,je,jn,ks,ke,kn
      use fluxmod
      implicit none
      integer::i,j,k
      real(8),dimension(nhyd):: dsvp,dsvm,dsvc,dsv
      real(8),dimension(nhyd):: Pleftc1, Pleftc2, Plefte
      real(8),dimension(nhyd):: Prigtc1, Prigtc2, Prigte
      real(8),dimension(2*mflx+madd):: leftco,rigtco
      real(8),dimension(mflx):: nflux
      real(8):: ptl,css,cts,etot

!$acc kernels
!$acc loop collapse(2) independent private(k, Pleftc1, Pleftc2, Plefte,Prigtc1, Prigtc2, Prigte,dsv,dsvp,dsvm,etot,ptl,css,cts,leftco,rigtco,nflux)
      do j=js,je
      do i=is,ie
      do k=ks,ke+1
            
         Pleftc1(:) = svc(:,i,j,k-2)
         Pleftc2(:) = svc(:,i,j,k-1)
         Prigtc1(:) = svc(:,i,j,k  )
         Prigtc2(:) = svc(:,i,j,k+1)
                       
! | Pleftc1   | Pleftc2 =>| Prigtc1   | Prigtc2   |        
!                     You are here  
!====================
! Left
!====================
         dsvp(:) = Prigtc1(:) - Pleftc2(:) 
         dsvm(:) = Pleftc2(:) - Pleftc1(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Plefte(:) = Pleftc2(:) + 0.5d0*dsv(:)
         
! Consvative variables
         leftco(mudn)=Plefte(nden)
         leftco(muvv)=Plefte(nve1)*Plefte(nden)
         leftco(muvw)=Plefte(nve2)*Plefte(nden) ! rho v
         leftco(muvu)=Plefte(nve3)*Plefte(nden)
         etot =Plefte(nene)*Plefte(nden) & ! thermal
     &               +0.5d0*Plefte(nden)*(                  & ! kinetic
     &                     +Plefte(nve1)**2                 &
     &                     +Plefte(nve2)**2                 &
     &                     +Plefte(nve3)**2)                &
     &               +0.5d0*                    (                 & ! magnetic 
     &                     +Plefte(nbm1)**2                 &
     &                     +Plefte(nbm2)**2                 &
     &                     +Plefte(nbm3)**2)                 
         leftco(muet) = etot 
         leftco(mubv)=Plefte(nbm1)  ! b_x
         leftco(mubw)=Plefte(nbm2)  ! b_y
         leftco(mubu)=Plefte(nbm3)  ! b_z
         leftco(mubp)=Plefte(nbps)  ! psi
         leftco(must:mued)=Plefte(nden)*Plefte(nst:ned)
         
         ptl = Plefte(npre) + ( Plefte(nbm1)**2        &
     &                         +Plefte(nbm2)**2        &
     &                         +Plefte(nbm3)**2)/2.0d0 

         leftco(mfdn)=Plefte(nden)             *Plefte(nve3) ! rho v
         leftco(mfvv)=Plefte(nden)*Plefte(nve1)*Plefte(nve3) &
     &                            -Plefte(nbm1)*Plefte(nbm3)
         leftco(mfvw)=Plefte(nden)*Plefte(nve2)*Plefte(nve3) &
     &                            -Plefte(nbm2)*Plefte(nbm3)
         leftco(mfvu)=Plefte(nden)*Plefte(nve3)*Plefte(nve3) &
     &                     +ptl   -Plefte(nbm3)*Plefte(nbm3)
         leftco(mfet)=(etot+ptl                )*Plefte(nve3) &
     &                     -( Plefte(nbm1)*Plefte(nve1)     &
     &                       +Plefte(nbm2)*Plefte(nve2)     &
     &                       +Plefte(nbm3)*Plefte(nve3))*Plefte(nbm3)

         leftco(mfbv) =  Plefte(nbm1)*Plefte(nve3) &
     &                  -Plefte(nve1)*Plefte(nbm3)
         leftco(mfbw) =  Plefte(nbm2)*Plefte(nve3) &
     &                  -Plefte(nve2)*Plefte(nbm3)
         leftco(mfbu) = 0.d0
         leftco(mfbp) = 0.0d0  ! psi
         leftco(mfst:mfed)=Plefte(nden)*Plefte(nst:ned)*Plefte(nve3)

         css = Plefte(ncsp)**2
         cts =  css  & !c_s^2*c_a^2
     &                       +( Plefte(nbm1)**2  &
     &                         +Plefte(nbm2)**2  &
     &                         +Plefte(nbm3)**2)/Plefte(nden) 

         leftco(mcsp)= sqrt((cts +sqrt(cts**2                  &
     &                             -4.0d0*css*Plefte(nbm3)**2  &
     &                                          /Plefte(nden)) &
     &                            )/2.0d0)
         leftco(mvel)= Plefte(nve3)
         leftco(mpre)= ptl
     
! | Pleftc1   | Pleftc2 |<= Prigtc1   | Prigtc2   |        
!                     You are here  
!==============================
! Right 
!==============================
         dsvp = Prigtc2(:) - Prigtc1(:) 
         dsvm = Prigtc1(:) - Pleftc2(:)
         call vanLeer(dsvp,dsvm,dsv)
         !         call minmod(dsvp,dsvm,dsv)
         Prigte(:) = Prigtc1(:) - 0.5d0*dsv(:)
         
         rigtco(mudn)=Prigte(nden)
         rigtco(muvv)=Prigte(nve1)*Prigte(nden)
         rigtco(muvw)=Prigte(nve2)*Prigte(nden)
         rigtco(muvu)=Prigte(nve3)*Prigte(nden)
         etot =Prigte(nene)*Prigte(nden) & ! internal
     &               +0.5d0*Prigte(nden)*(                  & ! kinetic
     &                     +Prigte(nve1)**2                 &
     &                     +Prigte(nve2)**2                 &
     &                     +Prigte(nve3)**2)                & 
     &               +0.5d0*                    (                 & ! magnetic
     &                     +Prigte(nbm1)**2                 &
     &                     +Prigte(nbm2)**2                 &
     &                     +Prigte(nbm3)**2)
         rigtco(muet) = etot
         rigtco(mubv)=Prigte(nbm1)  ! b_x
         rigtco(mubw)=Prigte(nbm2)  ! b_y
         rigtco(mubu)=Prigte(nbm3)  ! b_z
         rigtco(mubp)=Prigte(nbps)  ! psi
         rigtco(must:mued)=Prigte(nden)*Prigte(nst:ned)

         ptl = Prigte(npre) + ( Prigte(nbm1)**2 &
     &                         +Prigte(nbm2)**2 &
     &                         +Prigte(nbm3)**2)/2.0d0 

         rigtco(mfdn)=Prigte(nden)             *Prigte(nve3)
         rigtco(mfvv)=Prigte(nden)*Prigte(nve1)*Prigte(nve3) &
     &                            -Prigte(nbm1)*Prigte(nbm3)
         rigtco(mfvw)=Prigte(nden)*Prigte(nve2)*Prigte(nve3) &
     &                            -Prigte(nbm2)*Prigte(nbm3)
         rigtco(mfvu)=Prigte(nden)*Prigte(nve3)*Prigte(nve3) &
     &                     +ptl   -Prigte(nbm3)*Prigte(nbm3)
         rigtco(mfet)=(etot+ptl                )*Prigte(nve3) &
     &                     -( Prigte(nbm1)*Prigte(nve1)      &
     &                       +Prigte(nbm2)*Prigte(nve2)      &
     &                       +Prigte(nbm3)*Prigte(nve3))*Prigte(nbm3)
         rigtco(mfbv) =  Prigte(nbm1)*Prigte(nve3) &
     &                  -Prigte(nve1)*Prigte(nbm3)
         rigtco(mfbw) =  Prigte(nbm2)*Prigte(nve3) &
     &                  -Prigte(nve2)*Prigte(nbm3)
         rigtco(mfbu) = 0.0d0
         rigtco(mfbp) = 0.0d0  ! psi
         rigtco(mfst:mfed)=Prigte(nden)*Prigte(nst:ned)*Prigte(nve3)
     
         css = Prigte(ncsp)**2
         cts =  css  & !c_s^2*c_a^2
     &                       +( Prigte(nbm1)**2 &
     &                         +Prigte(nbm2)**2 &
     &                         +Prigte(nbm3)**2)/Prigte(nden) 

         rigtco(mcsp)= sqrt((cts +sqrt(cts**2                  &
     &                             -4.0d0*css*Prigte(nbm3)**2  &
     &                                          /Prigte(nden)) &
     &                            )/2.0d0)
         rigtco(mvel)= Prigte(nve3)
         rigtco(mpre)= ptl

!         call HLLE(leftco,rigtco,nflux)
!         call HLLC(leftco,rigtco,nflux)
         call HLLD(leftco,rigtco,nflux)
         nflux3(mden,i,j,k)=nflux(mden)
         nflux3(mrv1,i,j,k)=nflux(mrvv)
         nflux3(mrv2,i,j,k)=nflux(mrvw)
         nflux3(mrv3,i,j,k)=nflux(mrvu)
         nflux3(meto,i,j,k)=nflux(meto)
         nflux3(mbm1,i,j,k)=nflux(mbmv)
         nflux3(mbm2,i,j,k)=nflux(mbmw)
         nflux3(mbm3,i,j,k)=nflux(mbmu)
         nflux3(mst:med,i,j,k)=nflux(mst:med)

         nflux3(mbm3,i,j,k) =  0.5d0*(leftco(mubp)+rigtco(mubp)) &
     &                    -0.5d0*chg*(rigtco(mubu)-leftco(mubu))        ! finite volume
         nflux3(mbps,i,j,k) = (0.5d0*(leftco(mubu)+rigtco(mubu)) &
     &                    -0.5d0/chg*(rigtco(mubp)-leftco(mubp)))*chg**2 ! finite volume

      enddo
      enddo
      enddo
!$acc end kernels

      return
      end subroutine Numericalflux3
      
      subroutine HLLE(leftst,rigtst,nflux)
!$acc routine seq
      use fluxmod
      implicit none
      real(8),dimension(2*mflx+madd),intent(in)::leftst,rigtst
      real(8),dimension(mflx),intent(out)::nflux
      real(8),dimension(mflx)::ul,ur,fl,fr
      real(8)::csl,csr
      real(8):: vl, vr
      real(8):: sl, sr

      ul(1:mflx) = leftst(1:mflx)
      fl(1:mflx) = leftst(mflx+1:2*mflx)
      ur(1:mflx) = rigtst(1:mflx)
      fr(1:mflx) = rigtst(mflx+1:2*mflx)
      csl=leftst(mcsp)
      csr=rigtst(mcsp)
       vl=leftst(mvel)
       vr=rigtst(mvel)

       sl = min(vl,vr) - max(csl,csr)
       sl = min(0.0d0,sl)
       sr = max(vl,vr) + max(csl,csr)
       sr = max(0.0d0,sr)

       nflux(:) = (sr*fl(:)-sl*fr(:) +sl*sr*(ur(:)-ul(:)))/(sr-sl)

      return
      end subroutine HLLE

      subroutine HLLC(leftst,rigtst,nflux)
!$acc routine seq
!=====================================================================
!
! HLLC Scheme
!
! Purpose
! Calculation of Numerical Flux by HLLC method
!
! Reference
!  Toro EF, Spruce M, Speares W. (1992,1994)
!
! Input
! Output
!=====================================================================
      use fluxmod, only: mflx,madd                &
     &                 , mudn,muvu,muvv,muvw,muet &
     &                 , mfdn,mfvu,mfvv,mfvw,mfet &
     &                 , mcsp,mvel,mpre           &
     &                 , mden,mrvu,mrvv,mrvw,meto &
     &                 , mubu,mubv,mubw,mubp      &
     &                 , mfbu,mfbv,mfbw,mfbp      &
     &                 , mbmu,mbmv,mbmw &
     &                 , ncomp, mst, med

      implicit none
      real(8),dimension(2*mflx+madd),intent(in)::leftst,rigtst
      real(8),dimension(mflx),intent(out)::nflux

!----- U -----
! qql :: left state
! qqr :: right state
      real(8) :: rol,vxl,vyl,vzl,ptl,eel
      real(8) :: ror,vxr,vyr,vzr,ptr,eer
      real(8) :: rxl,ryl,rzl
      real(8) :: rxr,ryr,rzr
      real(8) ::     byl,bzl
      real(8) ::     byr,bzr
      real(8) :: ptst
      real(8),dimension(ncomp) :: scl,scr

!----- U* ----
! qqlst ::  left state
! qqrst :: right state
      real(8) :: rolst,vxlst,vylst,vzlst,eelst
      real(8) :: rorst,vxrst,vyrst,vzrst,eerst
      real(8) :: rxlst,rylst,rzlst
      real(8) :: rxrst,ryrst,rzrst
      real(8) ::       bylst,bzlst
      real(8) ::       byrst,bzrst
      real(8),dimension(ncomp) :: sclst,scrst

!----- flux ---
! fqql ::  left physical flux
! fqqr :: right physical flux
      real(8) :: frol,frxl,fryl,frzl,feel
      real(8) ::           fbyl,fbzl
      real(8) :: fror,frxr,fryr,frzr,feer
      real(8) ::           fbyr,fbzr
      real(8),dimension(ncomp) :: fscl,fscr

!----- wave speed ---
! sl ::  left-going fastest signal velocity
! sr :: right-going fastest signal velocity
! sm :: contact discontinuity velocity
! slst ::  left-going alfven velocity
! srst :: right-going alfven velocity
      real(8) :: sm,sl,sr

! cfl :: left-state Fast wave velocity
! cfr :: right-sate Fast wave velocity
      real(8) :: cfl,cfr

!--------------------
! temporary variables
      real(8) :: sdl,sdr,sdml,sdmr,isdml,isdmr,rosdl,rosdr
      real(8) :: temp
      real(8),dimension(ncomp) :: scsdl,scsdr
! no if
      real(8) :: sign1,maxs1,mins1
      real(8) :: msl,msr

!----- Step 0. ----------------------------------------------------------|

!---- Left state
        
        rol = leftst(mudn)
        eel = leftst(muet)
        rxl = leftst(muvu)
        ryl = leftst(muvv)
        rzl = leftst(muvw)
        vxl = leftst(muvu)/leftst(mudn)
        vyl = leftst(muvv)/leftst(mudn)
        vzl = leftst(muvw)/leftst(mudn)
        scl(1:ncomp) = leftst(mst:med)
        byl = leftst(mubv)
        bzl = leftst(mubw)
        ptl = leftst(mpre)

!---- Right state
        
        ror = rigtst(mudn)
        eer = rigtst(muet)
        rxr = rigtst(muvu)
        ryr = rigtst(muvv)
        rzr = rigtst(muvw)
        vxr = rigtst(muvu)/rigtst(mudn)
        vyr = rigtst(muvv)/rigtst(mudn)
        vzr = rigtst(muvw)/rigtst(mudn)
        scr(1:ncomp) = rigtst(mst:med)
        byr = rigtst(mubv)
        bzr = rigtst(mubw)
        ptr = rigtst(mpre)
!----- Step 1. ----------------------------------------------------------|
! Compute wave left & right wave speed
!
         
        cfl = leftst(mcsp)
        cfr = rigtst(mcsp)

        sl = min(vxl,vxr)-max(cfl,cfr) ! note sl is negative
        sr = max(vxl,vxr)+max(cfl,cfr)
!----- Step 2. ----------------------------------------------------------|
! compute L/R fluxs
!
! Left value
        frol = leftst(mfdn)
        feel = leftst(mfet)
        frxl = leftst(mfvu)
        fryl = leftst(mfvv)
        frzl = leftst(mfvw)
        fbyl = leftst(mfbv)
        fbzl = leftst(mfbw)    
        fscl(1:ncomp) = leftst(mflx+mst:mflx+med)

! Right value
! Left value
        fror = rigtst(mfdn)
        feer = rigtst(mfet)
        frxr = rigtst(mfvu)
        fryr = rigtst(mfvv) 
        frzr = rigtst(mfvw)
        fbyr = rigtst(mfbv)
        fbzr = rigtst(mfbw)
        fscr(1:ncomp) = rigtst(mflx+mst:mflx+med)

!----- Step 4. ----------------------------------------------------------|
! compute middle and alfven wave
!
        sdl = sl - vxl
        sdr = sr - vxr
        rosdl = rol*sdl
        rosdr = ror*sdr
        scsdl(1:ncomp) = scl(1:ncomp)*sdl
        scsdr(1:ncomp) = scr(1:ncomp)*sdr

        temp = 1.0d0/(rosdr - rosdl)
! Eq. 45
        sm = (rosdr*vxr - rosdl*vxl - ptr + ptl)*temp
           
        sdml = sl - sm; isdml = 1.0d0/sdml
        sdmr = sr - sm; isdmr = 1.0d0/sdmr
        
!----- Step 5. ----------------------------------------------------------|
! compute intermediate states
!
! Eq. 49
        ptst = (rosdr*ptl-rosdl*ptr+rosdl*rosdr*(vxr-vxl))*temp

!----- Step 5A. ----------------------------------------------------------|
! compute Ul*
!

        rolst = rol*sdl   *isdml
        vxlst = sm
        rxlst = rolst*vxlst
           
        vylst = vyl
        rylst = rolst*vylst
        vzlst = vzl
        rzlst = rolst*vzlst

        bylst = rolst/rol * byl
        bzlst = rolst/rol * bzl

        eelst =(sdl*eel - ptl*vxl + ptst*sm  )*isdml

!----- Step 5B. ----------------------------------------------------------|
! compute Ur*
!

        rorst   = rosdr   *isdmr
        vxrst = sm
        rxrst = rorst*vxrst
        vyrst = vyr
        ryrst = rorst*vyrst
        vzrst = vzr
        rzrst = rorst*vzrst

        byrst = rorst/ror * byr
        bzrst = rorst/ror * bzr
           
        eerst = (sdr*eer - ptr*vxr  + ptst*sm  )*isdmr
              
!----- Step 6. ----------------------------------------------------------|
! compute flux
        sign1 = sign(1.0d0,sm)    ! 1 for sm>0, -1 for sm<0
        maxs1 =  max(0.0d0,sign1) ! 1 sm>0, 0 for sm<0
        mins1 = -min(0.0d0,sign1) ! 0 sm>0,-1 for sm<0

        msl   = min(sl  ,0.0d0)   ! 0 for sl > 0, sl for sl < 0
        msr   = max(sr  ,0.0d0)   ! S_R > 0

        nflux(mden) = (frol+msl*(rolst-rol))*maxs1 &
     &               +(fror+msr*(rorst-ror))*mins1
        nflux(meto) = (feel+msl*(eelst-eel))*maxs1 &
     &               +(feer+msr*(eerst-eer))*mins1
        nflux(mrvu) = (frxl+msl*(rxlst-rxl))*maxs1 &
     &               +(frxr+msr*(rxrst-rxr))*mins1
        nflux(mrvv) = (fryl+msl*(rylst-ryl))*maxs1 &
     &               +(fryr+msr*(ryrst-ryr))*mins1
        nflux(mrvw) = (frzl+msl*(rzlst-rzl))*maxs1 &
     &               +(frzr+msr*(rzrst-rzr))*mins1
        nflux(mbmu) = 0.0d0
        nflux(mbmv) = (fbyl+msl*(bylst-byl))*maxs1 &
     &               +(fbyr+msr*(byrst-byr))*mins1
        nflux(mbmw) = (fbzl+msl*(bzlst-bzl))*maxs1 &
     &               +(fbzr+msr*(bzrst-bzr))*mins1

        nflux(mst:med) = &
     &     (fscl(1:ncomp)+msl*(sclst(1:ncomp)-scl(1:ncomp)))*maxs1 &
     &    +(fscr(1:ncomp)+msr*(scrst(1:ncomp)-scr(1:ncomp)))*mins1
      return
      end subroutine HLLC

      subroutine HLLD(leftst,rigtst,nflux)
!$acc routine seq
!=====================================================================
!
! HLLD Scheme
!
! Purpose
! Calculation of Numerical Flux by HLLD method
!
! Reference
!
! Input
! Output
!=====================================================================
      use fluxmod, only: mflx,madd                &
     &                 , mudn,muvu,muvv,muvw,muet &
     &                 , mfdn,mfvu,mfvv,mfvw,mfet &
     &                 , mcsp,mvel,mpre           &
     &                 , mden,mrvu,mrvv,mrvw,meto &
     &                 , mubu,mubv,mubw,mubp      &
     &                 , mfbu,mfbv,mfbw,mfbp      &
     &                 , mbmu,mbmv,mbmw           &
     &                 , ncomp,mst,med

      implicit none
      real(8),dimension(2*mflx+madd),intent(in)::leftst,rigtst
      real(8),dimension(mflx),intent(out)::nflux

!----- U -----
! qql :: left state
! qqr :: right state
      real(8) :: rol,vxl,vyl,vzl,ptl,eel
      real(8) :: ror,vxr,vyr,vzr,ptr,eer
      real(8) :: rxl,ryl,rzl
      real(8) :: rxr,ryr,rzr
      real(8) :: bxs,byl,bzl
      real(8) ::     byr,bzr
      real(8) :: ptst
      real(8),dimension(ncomp) :: scl,scr

!----- U* ----
! qqlst ::  left state
! qqrst :: right state
      real(8) :: rolst,vxlst,vylst,vzlst,eelst
      real(8) :: rorst,vxrst,vyrst,vzrst,eerst
      real(8) :: rxlst,rylst,rzlst
      real(8) :: rxrst,ryrst,rzrst
      real(8) ::       bylst,bzlst
      real(8) ::       byrst,bzrst
      real(8),dimension(ncomp) :: sclst,scrst

!----- U** ----
! qqlst ::  left state
! qqrst :: right state
      real(8) :: vyldst,vzldst,eeldst
      real(8) :: vyrdst,vzrdst,eerdst
      real(8) :: ryldst,rzldst
      real(8) :: ryrdst,rzrdst
      real(8) ::       byldst,bzldst
      real(8) ::       byrdst,bzrdst

!----- flux ---
! fqql ::  left physical flux
! fqqr :: right physical flux
      real(8) :: frol,frxl,fryl,frzl,feel
      real(8) ::           fbyl,fbzl
      real(8) :: fror,frxr,fryr,frzr,feer
      real(8) ::           fbyr,fbzr
      real(8),dimension(ncomp) :: fscl,fscr

!----- wave speed ---
! sl ::  left-going fastest signal velocity
! sr :: right-going fastest signal velocity
! sm :: contact discontinuity velocity
! slst ::  left-going alfven velocity
! srst :: right-going alfven velocity
      real(8) :: sm,sl,sr,slst,srst

! cfl :: left-state Fast wave velocity
! cfr :: right-sate Fast wave velocity
      real(8) :: cfl,cfr

!--------------------
! temporary variables
      real(8) :: sdl,sdr,sdml,sdmr,isdml,isdmr,rosdl,rosdr
      real(8) :: temp
  
! no if
      real(8) :: sign1,maxs1,mins1
      real(8) :: msl,msr,mslst,msrst,temp1,invsumro,sqrtror,sqrtrol,abbx
      real(8) :: bxsq,temp_fst,eps,itf,vdbstl,vdbstr,signbx

!----- Step 0. ----------------------------------------------------------|
      eps = 1d-30
!---- Left state
        
        rol = leftst(mudn)
        eel = leftst(muet)
        rxl = leftst(muvu)
        ryl = leftst(muvv)
        rzl = leftst(muvw)
        vxl = leftst(muvu)/leftst(mudn)
        vyl = leftst(muvv)/leftst(mudn)
        vzl = leftst(muvw)/leftst(mudn)
        byl = leftst(mubv)
        bzl = leftst(mubw)
        ptl = leftst(mpre)
        scl(1:ncomp) = leftst(mst:med)

!---- Right state
        
        ror = rigtst(mudn)
        eer = rigtst(muet)
        rxr = rigtst(muvu)
        ryr = rigtst(muvv)
        rzr = rigtst(muvw)
        vxr = rigtst(muvu)/rigtst(mudn)
        vyr = rigtst(muvv)/rigtst(mudn)
        vzr = rigtst(muvw)/rigtst(mudn)
        byr = rigtst(mubv)
        bzr = rigtst(mubw)
        ptr = rigtst(mpre)
        scr(1:ncomp) = rigtst(mst:med)

!----- Step 1. ----------------------------------------------------------|
! Compute wave left & right wave speed
!
        cfl = leftst(mcsp)
        cfr = rigtst(mcsp)

        sl = min(vxl,vxr)-max(cfl,cfr) ! note sl is negative
        sr = max(vxl,vxr)+max(cfl,cfr)
!----- Step 2. ----------------------------------------------------------|
! compute L/R fluxs
!
! Left value
        frol = leftst(mfdn)
        feel = leftst(mfet)
        frxl = leftst(mfvu)
        fryl = leftst(mfvv)
        frzl = leftst(mfvw)
        fbyl = leftst(mfbv)
        fbzl = leftst(mfbw)
        fscl(1:ncomp) = leftst(mflx+mst:mflx+med)

! Right value
        fror = rigtst(mfdn)
        feer = rigtst(mfet)
        frxr = rigtst(mfvu)
        fryr = rigtst(mfvv) 
        frzr = rigtst(mfvw)
        fbyr = rigtst(mfbv)
        fbzr = rigtst(mfbw)
        fscr(1:ncomp) = rigtst(mflx+mst:mflx+med)


!----- Step 4. ----------------------------------------------------------|
! compute middle and alfven wave
!
        sdl = min(-1d-20,sl - vxl)
        sdr = max(1d-20,sr - vxr)
        rosdl = rol*sdl
        rosdr = ror*sdr

        temp = 1.0d0/(rosdr - rosdl)
! Eq. 45
        sm = (rosdr*vxr - rosdl*vxl - ptr + ptl)*temp
           
        sdml = min(-1d-20,sl - sm); isdml = 1.0d0/sdml
        sdmr = max(1d-20,sr - sm); isdmr = 1.0d0/sdmr

!----- Step 5. ----------------------------------------------------------|
! compute intermediate states
!
! Eq. 49
        ptst = (rosdr*ptl-rosdl*ptr+rosdl*rosdr*(vxr-vxl))*temp
		
!----- Step 5A. ----------------------------------------------------------|
! compute Ul*
!
           bxs = 0.5d0*(leftst(mubu)+rigtst(mubu))
           bxsq = bxs*bxs
           temp_fst = rosdl*sdml - bxsq
           sign1 = sign(1.0d0,abs(temp_fst)-eps)

           maxs1 = max(0.0d0,sign1)
           mins1 = min(0.0d0,sign1)

           itf = 1.0d0/(temp_fst+mins1)
           isdml = 1.0d0/sdml

           temp = bxs*(sdl-sdml)*itf
           rolst = maxs1*(rosdl*isdml) - mins1*rol
           sclst(1:ncomp) = rolst/rol*scl(1:ncomp)
           vxlst = maxs1*sm - mins1*vxl
           rxlst = rolst*vxlst
           
           vylst = maxs1*(vyl-byl*temp) - mins1*vyl
           rylst = rolst*vylst
           vzlst = maxs1*(vzl-bzl*temp) - mins1*vzl
           rzlst = rolst*vzlst
           
           temp = (rosdl*sdl-bxsq)*itf
           bylst = maxs1*(byl*temp) - mins1*byl
           bzlst = maxs1*(bzl*temp) - mins1*bzl

           vdbstl = vxlst*bxs+vylst*bylst+vzlst*bzlst
           eelst = maxs1*(sdl*eel - ptl*vxl + ptst*sm +      &
     &          bxs*(vxl*bxs+vyl*byl+vzl*bzl-vdbstl))*isdml  &
     &          - mins1*eel		
           
!----- Step 5B. ----------------------------------------------------------|
! compute Ur*
!
           temp_fst = rosdr*sdmr - bxsq
           sign1 = sign(1.0d0,abs(temp_fst)-eps)
           maxs1 = max(0.0d0,sign1)
           mins1 = min(0.0d0,sign1)

           itf = 1.0d0/(temp_fst+mins1)
           isdmr = 1.0d0/sdmr
           
           temp = bxs*(sdr-sdmr)*itf
           rorst = maxs1*(rosdr*isdmr) - mins1*ror
           scrst(1:ncomp) = rorst/ror*scr(1:ncomp)
           vxrst = maxs1*sm - mins1*vxr
           rxrst = rorst*vxrst
           
           vyrst = maxs1*(vyr-byr*temp) - mins1*vyr
           ryrst = rorst*vyrst
           vzrst = maxs1*(vzr-bzr*temp) - mins1*vzr
           rzrst = rorst*vzrst
           
           temp = (rosdr*sdr-bxsq)*itf
           byrst = maxs1*(byr*temp) - mins1*byr
           bzrst = maxs1*(bzr*temp) - mins1*bzr
				
           vdbstr = vxrst*bxs+vyrst*byrst+vzrst*bzrst
           eerst = maxs1*((sdr*eer - ptr*vxr  + ptst*sm)*isdmr +   & 
     &          bxs*(vxr*bxs+vyr*byr+vzr*bzr-vdbstr)*isdmr)        &
     &          - mins1*eer

!----- Step 5C. ----------------------------------------------------------|
! compute Ul** and Ur**
!
           sqrtrol = sqrt(rolst)
           sqrtror = sqrt(rorst)

           abbx = abs(bxs)
           signbx = sign(1.0d0,bxs)           
           sign1 = sign(1.0d0,abbx-eps)

           maxs1 = max(0d0,sign1)
           mins1 = -min(0d0,sign1)
           invsumro = maxs1/(sqrtrol + sqrtror)

           temp = invsumro*(sqrtrol*vylst + sqrtror*vyrst  &
     &          + signbx*(byrst-bylst))
           vyldst = vylst*mins1 + temp
           vyrdst = vyrst*mins1 + temp
           ryldst = rylst*mins1 + rolst * temp
           ryrdst = ryrst*mins1 + rorst * temp

           temp = invsumro*(sqrtrol*vzlst + sqrtror*vzrst  &
     &          + signbx*(bzrst-bzlst))
           vzldst = vzlst*mins1 + temp
           vzrdst = vzrst*mins1 + temp
           rzldst = rzlst*mins1 + rolst * temp
           rzrdst = rzrst*mins1 + rorst * temp

           temp = invsumro*(sqrtrol*byrst + sqrtror*bylst  &
     &          + signbx*sqrtrol*sqrtror*(vyrst-vylst))
           byldst = bylst*mins1 + temp
           byrdst = byrst*mins1 + temp
              
           temp = invsumro*(sqrtrol*bzrst + sqrtror*bzlst  &
     &           + signbx*sqrtrol*sqrtror*(vzrst-vzlst))
           bzldst = bzlst*mins1 + temp
           bzrdst = bzrst*mins1 + temp
              
           temp = sm*bxs + vyldst*byldst + vzldst*bzldst
           eeldst = eelst - sqrtrol*signbx*(vdbstl - temp)*maxs1
           eerdst = eerst + sqrtror*signbx*(vdbstr - temp)*maxs1
              
!----- Step 6. ----------------------------------------------------------|
! compute flux
           slst = (sm - abbx/sqrtrol)*maxs1
           srst = (sm + abbx/sqrtror)*maxs1

           sign1 = sign(1.0d0,sm)
           maxs1 = max(0.0d0,sign1)
           mins1 = -min(0.0d0,sign1)

           msl = min(sl,0.0d0)
           msr = max(sr,0.0d0)
           mslst = min(slst,0.0d0)
           msrst = max(srst,0.0d0)

           temp = mslst-msl
           temp1 = msrst-msr

           nflux(mden) = (frol+(rolst-rol)*msl)*maxs1   &
     &                  +(fror+(rorst-ror)*msr)*mins1
           nflux(meto) = (feel+(eelst-eel)*msl+(eeldst-eelst)*mslst)*maxs1  & 
     &                  +(feer+(eerst-eer)*msr+(eerdst-eerst)*msrst)*mins1
           nflux(mrvu) = (frxl+(rxlst-rxl)*msl)*maxs1   &
     &                  +(frxr+(rxrst-rxr)*msr)*mins1
           nflux(mrvv) = (fryl+(rylst-ryl)*msl+(ryldst-rylst)*mslst)*maxs1  & 
     &                  +(fryr+(ryrst-ryr)*msr+(ryrdst-ryrst)*msrst)*mins1
           nflux(mrvw) = (frzl+(rzlst-rzl)*msl+(rzldst-rzlst)*mslst)*maxs1  &
     &                  +(frzr+(rzrst-rzr)*msr+(rzrdst-rzrst)*msrst)*mins1
           nflux(mbmu) = 0d0
           nflux(mbmv) = (fbyl+(bylst-byl)*msl+(byldst-bylst)*mslst)*maxs1  &
     &                  +(fbyr+(byrst-byr)*msr+(byrdst-byrst)*msrst)*mins1
           nflux(mbmw) = (fbzl+(bzlst-bzl)*msl+(bzldst-bzlst)*mslst)*maxs1  &
     &                  +(fbzr+(bzrst-bzr)*msr+(bzrdst-bzrst)*msrst)*mins1

        nflux(mst:med) = &
     &     (fscl(1:ncomp)+msl*(sclst(1:ncomp)-scl(1:ncomp)))*maxs1 &
     &    +(fscr(1:ncomp)+msr*(scrst(1:ncomp)-scr(1:ncomp)))*mins1
      return
      end subroutine HLLD

      subroutine GravForce
      use basicmod
      use fluxmod
      implicit none
      integer :: i,j,k,n

!$acc kernels
!$acc loop collapse(3) independent
      do k=ks,ke
      do j=js,je
      do i=is,ie+1
         gp1a(i  ,j,k) = gp(i,j,k) &
     & - 0.5d0*(gp(i  ,j,k)-gp(i-1,j,k))

         gp1a(i+1,j,k) = gp(i,j,k) &
     & + 0.5d0*(gp(i+1,j,k)-gp(i  ,j,k))

       grvsrc1(i,j,k) = (gp1a(i+1,j,k)-gp1a(i,j,k))/(x1a(i+1)-x1a(i))*d(i,j,k)

      enddo
      enddo
      enddo
!$acc end kernels
      
!$acc kernels
!$acc loop collapse(3) independent
      do k=ks,ke
      do i=is,ie
      do j=js,je+1
       grvsrc2(i,j,k) = 0.0d0
      enddo
      enddo
      enddo
!$acc end kernels
      
!$acc kernels
!$acc loop collapse(3) independent
      do j=js,je
      do i=is,ie
      do k=ks,ke+1
         grvsrc3(i,j,k) = 0.0d0
      enddo
      enddo
      enddo
!$acc end kernels
      return
      end subroutine  GravForce
    
      subroutine UpdateConsv
      use basicmod
      use fluxmod
      implicit none
      integer::i,j,k

!$acc kernels
!$acc loop collapse(3) independent
      do k=ks,ke
      do j=js,je
      do i=is,ie
         
         d(i,j,k) = d(i,j,k)  &
     & +dt*(                  &
     &  (- nflux1(mden,i+1,j,k) &
     &   + nflux1(mden,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mden,i,j+1,k) &
     &   + nflux2(mden,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mden,i,j,k+1) &
     &   + nflux3(mden,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

         mv1(i,j,k) = mv1(i,j,k) &
     & +dt*( &
     &      +  grvsrc1(i,j,k)                      &
     & +(- nflux1(mrv1,i+1,j,k) &
     &   + nflux1(mrv1,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mrv1,i,j+1,k) &
     &   + nflux2(mrv1,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mrv1,i,j,k+1) &
     &   + nflux3(mrv1,i,j,k  ))/(x3a(k+1)-x3a(k)) & 
     &      )
         mv2(i,j,k) = mv2(i,j,k) &
     & +dt*( &
     &      +  grvsrc2(i,j,k)                      &
     & +(- nflux1(mrv2,i+1,j,k) &
     &   + nflux1(mrv2,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mrv2,i,j+1,k) &
     &   + nflux2(mrv2,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mrv2,i,j,k+1) &
     &   + nflux3(mrv2,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

         mv3(i,j,k) = mv3(i,j,k)  &
     & +dt*( &
     &      +  grvsrc3(i,j,k)                      &
     & +(- nflux1(mrv3,i+1,j,k) &
     &   + nflux1(mrv3,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mrv3,i,j+1,k) &
     &   + nflux2(mrv3,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mrv3,i,j,k+1) &
     &   + nflux3(mrv3,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

          et(i,j,k) = et(i,j,k) &
     & +dt*( &
     &  (- nflux1(meto,i+1,j,k) &
     &   + nflux1(meto,i  ,j,k))/(x1a(i+1)-x1a(i)) &
     & +(- nflux2(meto,i,j+1,k) &
     &   + nflux2(meto,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(meto,i,j,k+1) &
     &   + nflux3(meto,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

          b1(i,j,k) = b1(i,j,k) &
     & +dt*( &
     &  (- nflux1(mbm1,i+1,j,k) &
     &   + nflux1(mbm1,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mbm1,i,j+1,k) &
     &   + nflux2(mbm1,i,j  ,k))/(x2a(j+1)-x2a(j)) &  
     & +(- nflux3(mbm1,i,j,k+1) &
     &   + nflux3(mbm1,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

          b2(i,j,k) = b2(i,j,k) &
     & +dt*( &
     &  (- nflux1(mbm2,i+1,j,k) &
     &   + nflux1(mbm2,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mbm2,i,j+1,k) &
     &   + nflux2(mbm2,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mbm2,i,j,k+1) &
     &   + nflux3(mbm2,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

          b3(i,j,k) = b3(i,j,k) &
     & +dt*( &
     &  (- nflux1(mbm3,i+1,j,k) &
     &   + nflux1(mbm3,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mbm3,i,j+1,k) &
     &   + nflux2(mbm3,i,j  ,k))/(x2a(j+1)-x2a(j)) &  
     & +(- nflux3(mbm3,i,j,k+1) &
     &   + nflux3(mbm3,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

          bp(i,j,k) = bp(i,j,k) &
     & +dt*( &
     &  (- nflux1(mbps,i+1,j,k) &
     &   + nflux1(mbps,i  ,j,k))/(x1a(i+1)-x1a(i)) & 
     & +(- nflux2(mbps,i,j+1,k) &
     &   + nflux2(mbps,i,j  ,k))/(x2a(j+1)-x2a(j)) & 
     & +(- nflux3(mbps,i,j,k+1) &
     &   + nflux3(mbps,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )

!          print *, i,j,k,bp(i,j,k)
         DXcomp(1:ncomp,i,j,k) = DXcomp(1:ncomp,i,j,k) &
     & +dt*( &
     & +(- nflux1(mst:med,i+1,j,k) &
     &   + nflux1(mst:med,i  ,j,k))/(x1a(i+1)-x1a(i)) &
     & +(- nflux2(mst:med,i,j+1,k) &
     &   + nflux2(mst:med,i,j  ,k))/(x2a(j+1)-x2a(j)) &
     & +(- nflux3(mst:med,i,j,k+1) &
     &   + nflux3(mst:med,i,j,k  ))/(x3a(k+1)-x3a(k)) &
     &      )
      enddo
      enddo
      enddo
!$acc end kernels

      return
    end subroutine UpdateConsv
subroutine EvaulateCh
  use basicmod
  use fluxmod
  use mpimod
  implicit none
  integer :: i,j,k,n
  real(8),parameter:: drate=0.1d0 ! 
! local variable
  real(8):: dh1l,dh2l,dh3l,dhl,dhd
  real(8):: ch1l,ch2l,ch3l,chl,chd
  real(8):: cts,css,cms
  real(8),parameter:: huge=1.0d90
  integer::theid

!$acc kernels
  chd = 0.0d0
  ch1l = 0.0d0; ch2l = 0.0d0; ch3l = 0.0d0
  dhd = huge
  dh1l =  huge; dh2l =  huge; dh3l =  huge
!$acc loop collapse(3) reduction(max:chd)
  do k=ks,ke
  do j=js,je
  do i=is,ie
            css  = svc(ncsp,i,j,k)**2
            cts  = css  &! cs^2+c_a^2
     &          + (svc(nbm1,i,j,k)**2+svc(nbm2,i,j,k)**2+svc(nbm3,i,j,k)**2)/svc(nden,i,j,k)
            cms  = sqrt((cts +sqrt(cts**2 &
     &      -4.0d0*css*svc(nbm1,i,j,k)**2/svc(nden,i,j,k)))/2.0d0)
            ch1l = ( abs(svc(nve1,i,j,k)) + cms )
            dh1l =  (x1a(i+1)-x1a(i))

            cms  = sqrt((cts +sqrt(cts**2 &
     &      -4.0d0*css*svc(nbm2,i,j,k)**2/svc(nden,i,j,k)))/2.0d0)
            ch2l = ( abs(svc(nve2,i,j,k)) + cms )
            dh2l = (x2a(j+1)-x2a(j)) 

            cms  = sqrt((cts +sqrt(cts**2 &
     &      -4.0d0*css*svc(nbm3,i,j,k)**2/svc(nden,i,j,k)))/2.0d0)
            ch3l = ( abs(svc(nve3,i,j,k)) + cms )
            dh3l = (x3a(k+1)-x3a(k))

         chl     = max(ch1l,ch2l,ch3l)
         dhl     = min(dh1l,dh2l,dh3l)
         chd     = max(chl,chd)
         dhd     = min(dhl,dhd)
  enddo
  enddo
  enddo
  bufinpmax(1) = chd
  bufinpmax(2) = dble(myid_w)
!$acc end kernels
  call MPImaxfind
!$acc kernels
  chd = bufoutmax(1)
  theid = int(bufoutmax(2)) 
  chg      =      chd
!$acc end kernels
  
  return
end subroutine  EvaulateCh

      subroutine DampPsi
      use basicmod
      use fluxmod
      implicit none
      integer :: i,j,k,n
      real(8),parameter:: alphabp=0.1d0 !
      real(8):: taui
      real(8):: dhl,dh1l,dh2l,dh3l
      real(8),parameter:: huge=1.0d90 

!$acc kernels  
      dh1l=huge
      dh2l=huge
      dh3l=huge
!$acc loop collapse(3) independent
      do k=ks,ke
      do j=js,je
      do i=is,ie
            dh1l = x1a(i+1)-x1a(i)
            dh2l = x2a(j+1)-x2a(j)
            dh3l = x3a(k+1)-x3a(k)

         dhl = min(dh1l,dh2l,dh3l)
         taui = alphabp * chg /dhl ! cm/s /cm => 1/s
         bp(i,j,k) = bp(i,j,k)*(1.0d0 - dt*taui) ! if dt = dtloc, damping by factor of (1.0-drate)
      enddo
      enddo
      enddo
!$acc end kernels

      return
end subroutine  DampPsi
