parallel processing - Code implementation for openmp -


i want implement following sequential code in openmp (2 core) parallel computing.

which modifications should run openmp? if me appreciated. in advance

      program potential       parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)       data rl2,rl2allow /1.,1.e-7/ c..read input data, generate grid data , initialize solution        call init   c..start iterative solution loop         k = 0       while (k .lt. kmax .and. rl2 .gt. rl2allow)          k = k + 1          call bc c..point iterative solutions           call sor c..line iterative solution  c        call slor c..update phi_k array , evaluate residual           rl2=0.          j = 2,jmax-1          = 2,imax-1            rl2 = rl2 + (phi_kp1(i,j) - phi_k(i,j))**2            phi_k(i,j)  = phi_kp1(i,j)          enddo          enddo          rl2 = sqrt(rl2/((imax-2)*(jmax-2)))          print*, 'residual@k=',k,rl2 c..output intermediate solutions           if( mod(k,kout).eq.0 .and. k.ne.kmax) call io(k)       enddo       call io(k)        stop       end             subroutine sor        parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)        beta2 = (dr/dth)**2 c..solve phi^k+1        = 2,imax-1       c1 = beta2/r(i)**2       c2 = 0.5*dr/r(i)       c3 = 0.5/(1.+c1)       j = 2,jmax-1 c..implement point jacobi, gauss-seidel , sor methods        phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j)      >                     + c1*(phi_k(i,j-1) + phi_k(i,j+1)) )       enddo       enddo       return        end          subroutine  init       parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)       data r1,r2/1.0,5.0/, imax,jmax/101,91/  c..read input parameters imax, jmax        print*, 'enter kmax , kout :'       read(*,*) kmax, kout        pi  = 4.*atan(1.)       dr  = (r2-r1)/float(imax-1)       dth = pi/float(jmax-1) c..grid generation         j=1,jmax       i=1,imax          r(i)   = r1 + dr*(i-1)           th(j)  = dth*(j-1)           x(i,j) = r(i)*cos(th(j))          y(i,j) = r(i)*sin(th(j)) c...initialize phi arrays           phi_k(i,j)   = r(i)*cos(th(j))          phi_kp1(i,j) = 0.       enddo       enddo       call bc       call io(1)        return        end  c-------------------------------------------------------------------        subroutine bc       parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)        = 1,imax          phi_k(i,1)      = phi_k(i,2)          phi_kp1(i,1)    = phi_k(i,2)          phi_k(i,jmax)   = phi_k(i,jmax-1)          phi_kp1(i,jmax) = phi_k(i,jmax-1)       enddo       j = 1,jmax          phi_k(1,j)      = phi_k(2,j)          phi_kp1(1,j)    = phi_k(2,j)           phi_k(imax,j)   = r(imax)*cos(th(j))          phi_kp1(imax,j) = phi_k(imax,j)       enddo        return        end  c-------------------------------------------------------------------        subroutine velocity(vx,vy)       parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)       dimension vx(imx,jmx),vy(imx,jmx)       j = 1,jmax       = 1,imax          if(i .eq. 1)            vr = (phi_k(2,j)-phi_k(1,j))/dr          elseif(i .eq. imax)            vr = (phi_k(imax,j)-phi_k(imax-1,j))/dr          else            vr = 0.5*(phi_k(i+1,j)-phi_k(i-1,j))/dr          endif          if(j .eq. 1)            vth = (phi_k(i,2)-phi_k(i,1))/(dth*r(i))          elseif(j .eq. jmax)            vth = (phi_k(i,jmax)-phi_k(i,jmax-1))/(dth*r(i))          else            vth = 0.5*(phi_k(i,j+1)-phi_k(i,j-1))/(dth*r(i))          endif          vx(i,j) = vr*cos(th(j)) - vth*sin(th(j))          vy(i,j) = vr*sin(th(j)) + vth*cos(th(j))       enddo       enddo       return        end  c-------------------------------------------------------------------        subroutine  io(k) c..output solution in tecplot format        parameter (imx=201, jmx=201)       common /pars/ imax,jmax,kmax,kout       common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)       common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)       dimension vx(imx,jmx),vy(imx,jmx)       character fname*32,string*8,ext*5        call velocity(vx,vy)       write(string,'(f8.5)') float(k)/100000       read(string,'(3x,a5)') ext       fname = 'vars-'//ext//'.tec'        open(1,file=fname,form='formatted')       write(1,*) ' variables="x","y","phi","u","v" '       write(1,*) ' zone i=',imax, ', j=',jmax       j = 1,jmax       = 1,imax          write(1,*) x(i,j),y(i,j),phi_k(i,j),vx(i,j),vy(i,j)       enddo       enddo       close(1)        return       end  c-------------------------------------------------------------------       subroutine thomas(mx,il,iu,a,b,c,r) c............................................................ c solution of tridiagonal system of n equations of form c  a(i)*x(i-1) + b(i)*x(i) + c(i)*x(i+1) = r(i)  i=il,iu c  solution x(i) stored in f(i) c  a(il-1) , c(iu+1) not used. c  a,b,c,r arrays provided user c............................................................        dimension  a(mx),b(mx),c(mx),r(mx),x(mx)       x(il)=c(il)/b(il)       r(il)=r(il)/b(il)       i=il+1,iu          z=1./(b(i)-a(i)*x(i-1))          x(i)=c(i)*z          r(i)=(r(i)-a(i)*r(i-1))*z       enddo       i=iu-1,il,-1          r(i)=r(i)-x(i)*r(i+1)       enddo       return       end 

looking @ iterative part of code (lines 14-37) observe there 2 invocations (bc , sor) execute code in loops. these potential places put openmp directives. in both routines, loops not expose data dependencies prevent execution in parallel apply following transformations.

for bc routine (below) can use 2 times !$omp parallel distribute work of 2 loops (one directive each loop) among available threads.

      subroutine bc                                                                                         parameter (imx=201, jmx=201)                                                                          common /pars/ imax,jmax,kmax,kout                                                                     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)                                             common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)                                                    !$omp parallel shared(imax,phi_k,phi_kp1)                                                                = 1,imax                                                                                            phi_k(i,1)      = phi_k(i,2)                                                                          phi_kp1(i,1)    = phi_k(i,2)                                                                          phi_k(i,jmax)   = phi_k(i,jmax-1)                                                                     phi_kp1(i,jmax) = phi_k(i,jmax-1)                                                                  enddo                                                                                           !$omp end parallel                                                                                  !$omp parallel shared(jmax,phi_k,phi_kp1)                                                                j = 1,jmax                                                                                            phi_k(1,j)      = phi_k(2,j)                                                                          phi_kp1(1,j)    = phi_k(2,j)                                                                          phi_k(imax,j)   = r(imax)*cos(th(j))                                                                  phi_kp1(imax,j) = phi_k(imax,j)                                                                    enddo                                                                                           !$omp end parallel                                                                                        return                                                                                                end                                                                                             

and sor, can distribute work of outer loop among threads. in case, each thread require private copy of temporal variables (c1, c2, c3) , iterative variable (j).

      subroutine sor                                                                                         parameter (imx=201, jmx=201)                                                                          common /pars/ imax,jmax,kmax,kout                                                                     common /grid/ dr,dth,r(imx),th(jmx),x(imx,jmx),y(imx,jmx)                                             common /vars/ phi_k(imx,jmx),phi_kp1(imx,jmx)                                                          beta2 = (dr/dth)**2                                                                             c..solve phi^k+1                                                                                   c$omp parallel private(c1,c2,c3,j) shared(imax,jmax,phi_kp1,phi_k)                                       = 2,imax-1                                                                                       c1 = beta2/r(i)**2                                                                                    c2 = 0.5*dr/r(i)                                                                                      c3 = 0.5/(1.+c1)                                                                                      j = 2,jmax-1                                                                                 c..implement point jacobi, gauss-seidel , sor methods                                                  phi_kp1(i,j) = c3*((1.-c2)*phi_k(i-1,j) + (1.+c2)*phi_k(i+1,j)                                       >                     + c1*(phi_k(i,j-1) + phi_k(i,j+1)) )                                             enddo                                                                                                 enddo                                                                                           c$omp end parallel                                                                                        return                                                                                                end                                                                                             

you may want explore rest of code , see further parallelization opportunities. may use profiler (gprof, tau, vampir, paraver) identify application hotspots , focus parallelization there.


Comments

Popular posts from this blog

SVG stroke-linecap doesn't work for circles in Firefox? -

routes - Laravel 4 Wildcard Routing to Different Controllers -

cross browser - XSLT namespace-alias Not Working in Firefox or Chrome -