diff --git a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F index 42da7e7f5b..787e7719a1 100644 --- a/src/core_atmosphere/dynamics/mpas_atm_boundaries.F +++ b/src/core_atmosphere/dynamics/mpas_atm_boundaries.F @@ -99,9 +99,12 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (mpas_pool_type), pointer :: lbc real (kind=RKIND) :: dt - integer, pointer :: nCells - integer, pointer :: nEdges - integer, pointer :: index_qv + integer, pointer :: nCells_ptr + integer, pointer :: nEdges_ptr + integer, pointer :: nVertLevels_ptr + integer, pointer :: index_qv_ptr + integer, pointer :: nScalars_ptr + integer :: nCells, nEdges, nVertLevels, index_qv, nScalars real (kind=RKIND), dimension(:,:), pointer :: u real (kind=RKIND), dimension(:,:), pointer :: ru @@ -129,7 +132,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr type (MPAS_Time_Type) :: currTime type (MPAS_TimeInterval_Type) :: lbc_interval character(len=StrKIND) :: read_time - integer :: iEdge + integer :: iEdge, iCell, k, j integer :: cell1, cell2 @@ -169,6 +172,7 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_u', u, 2) call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) + call mpas_pool_get_array(lbc, 'lbc_w', w, 2) call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) call mpas_pool_get_array(lbc, 'lbc_rtheta_m', rtheta_m, 2) call mpas_pool_get_array(lbc, 'lbc_rho_zz', rho_zz, 2) @@ -176,26 +180,87 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) call mpas_pool_get_array(mesh, 'cellsOnEdge', cellsOnEdge) - call mpas_pool_get_dimension(mesh, 'nCells', nCells) - call mpas_pool_get_dimension(mesh, 'nEdges', nEdges) - call mpas_pool_get_dimension(lbc, 'index_qv', index_qv) + call mpas_pool_get_dimension(mesh, 'nCells', nCells_ptr) + call mpas_pool_get_dimension(mesh, 'nEdges', nEdges_ptr) + call mpas_pool_get_dimension(mesh, 'nVertLevels', nVertLevels_ptr) + call mpas_pool_get_dimension(state, 'num_scalars', nScalars_ptr) + call mpas_pool_get_dimension(lbc, 'index_qv', index_qv_ptr) call mpas_pool_get_array(mesh, 'zz', zz) + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') + if (.not. firstCall) then + call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) + call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) + call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) + call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) + call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) + call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) + call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) + call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + + !$acc enter data copyin(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + !$acc enter data copyin(u, w, theta, rho, scalars) + !$acc enter data create(ru, rho_edge, rtheta_m, rho_zz) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') + + ! Dereference the pointers to avoid non-array pointer for OpenACC + nCells = nCells_ptr + nEdges = nEdges_ptr + nVertLevels = nVertLevels_ptr + nScalars = nScalars_ptr + index_qv = index_qv_ptr + ! Compute lbc_rho_zz - zz(:,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line - rho_zz(:,:) = rho(:,:) / zz(:,:) + !$acc parallel default(present) + !$acc loop vector + do k=1,nVertLevels + zz(k,nCells+1) = 1.0_RKIND ! Avoid potential division by zero in the following line + end do + !$acc end parallel + + !$acc parallel default(present) + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rho_zz(k,iCell) = rho(k,iCell) / zz(k,iCell) + end do + end do + !$acc end parallel ! Average lbc_rho_zz to edges + !$acc parallel default(present) + !$acc loop gang worker do iEdge=1,nEdges cell1 = cellsOnEdge(1,iEdge) cell2 = cellsOnEdge(2,iEdge) if (cell1 > 0 .and. cell2 > 0) then - rho_edge(:,iEdge) = 0.5_RKIND * (rho_zz(:,cell1) + rho_zz(:,cell2)) + !$acc loop vector + do k = 1, nVertLevels + rho_edge(k,iEdge) = 0.5_RKIND * (rho_zz(k,cell1) + rho_zz(k,cell2)) + end do end if end do + !$acc end parallel - ru(:,:) = u(:,:) * rho_edge(:,:) - rtheta_m(:,:) = theta(:,:) * rho_zz(:,:) * (1.0_RKIND + rvord * scalars(index_qv,:,:)) + !$acc parallel default(present) + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + ru(k,iEdge) = u(k,iEdge) * rho_edge(k,iEdge) + end do + end do + + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + rtheta_m(k,iCell) = theta(k,iCell) * rho_zz(k,iCell) * (1.0_RKIND + rvord * scalars(index_qv,k,iCell)) + end do + end do + !$acc end parallel if (.not. firstCall) then lbc_interval = currTime - LBC_intv_end @@ -203,37 +268,46 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr dt = 86400.0_RKIND * real(dd_intv, kind=RKIND) + real(s_intv, kind=RKIND) & + (real(sn_intv, kind=RKIND) / real(sd_intv, kind=RKIND)) - call mpas_pool_get_array(lbc, 'lbc_u', u, 2) - call mpas_pool_get_array(lbc, 'lbc_ru', ru, 2) - call mpas_pool_get_array(lbc, 'lbc_rho_edge', rho_edge, 2) - call mpas_pool_get_array(lbc, 'lbc_w', w, 2) - call mpas_pool_get_array(lbc, 'lbc_theta', theta, 2) - call mpas_pool_get_array(lbc, 'lbc_rtheta_m', rtheta_m, 2) - call mpas_pool_get_array(lbc, 'lbc_rho_zz', rho_zz, 2) - call mpas_pool_get_array(lbc, 'lbc_rho', rho, 2) - call mpas_pool_get_array(lbc, 'lbc_scalars', scalars, 2) - call mpas_pool_get_array(lbc, 'lbc_u', lbc_tend_u, 1) - call mpas_pool_get_array(lbc, 'lbc_ru', lbc_tend_ru, 1) - call mpas_pool_get_array(lbc, 'lbc_rho_edge', lbc_tend_rho_edge, 1) - call mpas_pool_get_array(lbc, 'lbc_w', lbc_tend_w, 1) - call mpas_pool_get_array(lbc, 'lbc_theta', lbc_tend_theta, 1) - call mpas_pool_get_array(lbc, 'lbc_rtheta_m', lbc_tend_rtheta_m, 1) - call mpas_pool_get_array(lbc, 'lbc_rho_zz', lbc_tend_rho_zz, 1) - call mpas_pool_get_array(lbc, 'lbc_rho', lbc_tend_rho, 1) - call mpas_pool_get_array(lbc, 'lbc_scalars', lbc_tend_scalars, 1) + dt = 1.0_RKIND / dt + + !$acc parallel default(present) + !$acc loop gang vector collapse(2) + do iEdge=1,nEdges+1 + do k=1,nVertLevels + lbc_tend_u(k,iEdge) = (u(k,iEdge) - lbc_tend_u(k,iEdge)) * dt + lbc_tend_ru(k,iEdge) = (ru(k,iEdge) - lbc_tend_ru(k,iEdge)) * dt + lbc_tend_rho_edge(k,iEdge) = (rho_edge(k,iEdge) - lbc_tend_rho_edge(k,iEdge)) * dt + end do + end do + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels+1 + lbc_tend_w(k,iCell) = (w(k,iCell) - lbc_tend_w(k,iCell)) * dt + end do + end do - dt = 1.0_RKIND / dt - lbc_tend_u(:,:) = (u(:,:) - lbc_tend_u(:,:)) * dt - lbc_tend_ru(:,:) = (ru(:,:) - lbc_tend_ru(:,:)) * dt - lbc_tend_rho_edge(:,:) = (rho_edge(:,:) - lbc_tend_rho_edge(:,:)) * dt - lbc_tend_w(:,:) = (w(:,:) - lbc_tend_w(:,:)) * dt - lbc_tend_theta(:,:) = (theta(:,:) - lbc_tend_theta(:,:)) * dt - lbc_tend_rtheta_m(:,:) = (rtheta_m(:,:) - lbc_tend_rtheta_m(:,:)) * dt - lbc_tend_rho_zz(:,:) = (rho_zz(:,:) - lbc_tend_rho_zz(:,:)) * dt - lbc_tend_rho(:,:) = (rho(:,:) - lbc_tend_rho(:,:)) * dt - lbc_tend_scalars(:,:,:) = (scalars(:,:,:) - lbc_tend_scalars(:,:,:)) * dt + !$acc loop gang vector collapse(2) + do iCell=1,nCells+1 + do k=1,nVertLevels + lbc_tend_theta(k,iCell) = (theta(k,iCell) - lbc_tend_theta(k,iCell)) * dt + lbc_tend_rtheta_m(k,iCell) = (rtheta_m(k,iCell) - lbc_tend_rtheta_m(k,iCell)) * dt + lbc_tend_rho_zz(k,iCell) = (rho_zz(k,iCell) - lbc_tend_rho_zz(k,iCell)) * dt + lbc_tend_rho(k,iCell) = (rho(k,iCell) - lbc_tend_rho(k,iCell)) * dt + end do + end do + + !$acc loop gang + do iCell=1,nCells+1 + !$acc loop vector collapse(2) + do k=1,nVertLevels + do j = 1,nScalars + lbc_tend_scalars(j,k,iCell) = (scalars(j,k,iCell) - lbc_tend_scalars(j,k,iCell)) * dt + end do + end do + end do + !$acc end parallel ! ! Logging the lbc start and end times appears to be backwards, but @@ -249,6 +323,17 @@ subroutine mpas_atm_update_bdy_tend(clock, streamManager, block, firstCall, ierr end if + MPAS_ACC_TIMER_START('mpas_atm_update_bdy_tend [ACC_data_xfer]') + if (.not. firstCall) then + !$acc exit data copyout(lbc_tend_u, lbc_tend_ru, lbc_tend_rho_edge, lbc_tend_w, & + !$acc lbc_tend_theta, lbc_tend_rtheta_m, lbc_tend_rho_zz, & + !$acc lbc_tend_rho, lbc_tend_scalars) + end if + + !$acc exit data copyout(ru, rho_edge, rtheta_m, rho_zz) + !$acc exit data delete(u, w, theta, rho, scalars) + MPAS_ACC_TIMER_STOP('mpas_atm_update_bdy_tend [ACC_data_xfer]') + LBC_intv_end = currTime end subroutine mpas_atm_update_bdy_tend