! Michael MacMahon - Mech. Aerospace Eng, Syracuse University. ! mmcmahon@npac.syr.edu ! Saleh Elmohamed - NPAC, Syracuse University. ! saleh@npac.syr.edu Summer 1997. !------------------------------------------------------------- ! This program uses HPF/Fortran 90 to simulate two-dimensional ! inviscid flow through a nozzle. The numerical scheme is a finite ! volume cell centered scheme employing a fourth stage ! Runge-Kutta time stepping algorithm. Artificial viscosity ! terms are added to stabilize the scheme in areas of large ! gradients, such as shock waves. The program reads in a mesh ! from file ft03.dat and computes needed Delta x and Delta y values. ! The initial conditions are set up as simply uniform flow everywhere. ! The inlet and outlet boundary conditions are determined from ! the information provided by the user, the inlet mach number, back ! pressure and output condition (super or subsonic). ! ! The structure of the code consists of calls to the flux, ! artificial dissipation, and boundary condition routines ! for each stage of the Runge-Kutta algorithm. ! Data is output for contour and line plots as needed. ! ! The code makes heavy use of FORALL statements, which ! are more succint than repeated DO loops, guarentee no ! dependencies and make conversion to HPF easier. ! These do require more mememory than the traditional ! DO loops, but since this code was only tried on eight processors, ! effective use of the cache was not crucial. ! The solution vector V consists of: ! V(1,i,j)=density at point i,j ! V(2,i,j)=density*(X component of Velocity) at point i,j ! V(3,i,j)=density*(Y component of Velocity) at point i, ! V(4,i,j)=internal energy at point i,j Program nozzle Implicit none Integer, Parameter :: lmax=4 Integer, Parameter :: nx=256 Integer, Parameter :: ny=64 Real V(lmax,nx+2,ny+2) Real xloc(nx+1,ny+2),yloc(nx+1,ny+2) Real dnax(nx+1,ny+2),dnay(nx+1,ny+2) Real dnbx(nx+1,ny+2),dnby(nx+1,ny+2) Real min,start,end Integer mx,my ! The following are the HPF statements. PROCS is the one- ! dimensional abstract processor array. It's size ! (the number of processors) is specified at run time. ! The solution vector V is distributed along its third ! dimension, while the first two dimensions are collapsed (notice the ! * in the line !HPF$ DISTRIBUTE V (*,*,BLOCK) onto PROC) . ! The second dimension of the other arrays ! corresponds to the third dimension of V, and as such is ! aligned with the third dimension of V. This alignment ! guarentees no unnescessary communication in statements ! involving both V and the other variables, of which there ! are many. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V (*,*,BLOCK) onto PROC !HPF$ ALIGN (*,:) with V(*,*,:) :: xloc,yloc,dnax,dnay,dnbx,dnby mx=nx+1 my=ny+1 Call Mesh(V,xloc,yloc,dnax,dnay,dnbx,dnby,lmax,mx,my) Call Init_cons(V,lmax,min,mx,my) Call James(V,xloc,yloc,dnax,dnay,dnbx,dnby, $ min,lmax,mx,my) End program subroutine timer(return_time, initial_time) real, intent(in) :: initial_time real, intent(out) :: return_time integer finish,rate CALL system_clock( COUNT=finish,COUNT_RATE=rate) return_time = FLOAT(finish) / FLOAT(rate) - initial_time end subroutine ! This routine reads in the x and y values of the mesh. ! The Delta x and Delta y values for the cell sides are computed ! and stored in dnax,dnay. Subroutine Mesh(V,xloc,yloc,dnax,dnay,dnbx,dnby,lmax,mx,my) Implicit none Integer mx,my,lmax Real V(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1) Real dnax(mx,my+1),dnay(mx,my+1) Real dnbx(mx,my+1),dnby(mx,my+1) Integer i,j ! Notice the * in front of (*,*,:) and PROC in the ! distribute directive. This tells the compiler that ! any incoming distribution V had is okay, and thus ! no remapping occurs. Similarly, the align statement ! shows *V(*,*,:) indicating incoming alignment is fine. ! If the subroutine is called many times and the programmer ! does not use the * notation, the compiler may ! remap the arrays every time, resulting in poor performance ! You will see this type of distribution in all ! the subroutines from now on. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: xloc,yloc,dnax,dnay,dnbx,dnby open(unit=3,file='ft03.dat',status='old') dnax=0.0 dnay=0.0 dnbx=0.0 dnby=0.0 xloc=0.0 yloc=0.0 Do i=1,mx Do j=1,my read(3,*) xloc(i,j),yloc(i,j) End Do End Do !____________DEBUGGER MESH___________________________________ ! Forall(i=1:mx,j=1:my) xloc(i,j)=0.0133*real(i-1) ! Forall(i=1:mx,j=1:my) yloc(i,j)=0.015*real(j-1) !----------------------------------------------------------- Forall(i=2:mx,j=2:my) dnax(i,j)=yloc(i,j-1)-yloc(i-1,j-1) Forall(i=2:mx,j=2:my) dnay(i,j)=-1.0*(xloc(i,j-1) $ -xloc(i-1,j-1)) Forall(i=2:mx,j=2:my) dnbx(i,j)=yloc(i,j)-yloc(i,j-1) Forall(i=2:mx,j=2:my) dnby(i,j)=-1.0*(xloc(i,j) $ -xloc(i,j-1)) End subroutine ! Just set up the initial conditions of the nozzle, ! such as the density, pressure, velocity and internal energy. ! Subroutine Init_cons(V,lmax,min,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real p(mx+1,my+1) Real min Integer l,i,j ! Again, distributing V *(*,*,BLOCK) insures ! incoming mapping is to be left alone. ! The third dimension of P is aligned with ! the third dimension of V. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with V(*,*,:) :: p write(6,*) 'Enter inlet mach no.' read(5,*) min Forall(j=2:my) P(1,j)=1.0/(min**2/2.0 *(1.4-1.0) $ +1.0)**(1.4/0.4) write(6,*) 'Inlet pressure is:',p(1,2) Forall(j=2:my) V(1,1,j)=P(1,j)*(1.0 $ + (1.4-1.0)/2.0*min**2) Forall(j=2:my) V(2,1,j)=V(1,1,j)*min $ *sqrt(1.4*p(1,j)/V(1,1,j)) Forall(j=2:my) V(3,1,j)=0.0 Forall(j=2:my) V(4,1,j)=p(1,j)/(0.4) + 0.5*V(1,1,j) $ *(V(2,1,j)**2 / V(1,1,j)**2) Forall(l=1:lmax,i=2:mx+1,j=2:my) V(l,i,j)=V(l,1,j) End subroutine ! This is the four stage Runge-Kutta algorithm, which ! solves the necessary differential equations. RES ! is the time derivative of the solution vector, and ! when it approaches zero out solution is converged. ! I have hardwired the appropriate number of iterations ! required into the code. See note above the ! loop for more information. Subroutine James(V,xloc,yloc,dnax,dnay,dnbx,dnby,min, $ lmax,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1),dnbx(mx,my+1),dnby(mx,my+1) Real dnax(mx,my+1),dnay(mx,my+1) Real Fluxab(lmax,mx+1,my+1),Fluxbc(lmax,mx+1,my+1) Real Fluxcd(lmax,mx+1,my+1),Fluxda(lmax,mx+1,my+1) Real Res(lmax,mx,my+1),h(lmax,mx+1,my+1) Real Q(lmax,mx+1,my+1) Real p(mx+1,my+1) Real avab(lmax,mx+1,my+1),avbc(lmax,mx+1,my+1), $ avcd(lmax,mx+1,my+1) Real avda(lmax,mx+1,my+1) Real dt(mx,my+1) Real area(mx,my+1) Real pback,cfl,min,start,end Integer l,i,j,z Integer exit,iter,itermax ! The incoming arrays are not remapped or realigned, a fact ! the * in front of (*,*,BLOCK) and *V(*,*,:) insures. ! The local arrays are then aligned with V so that no ! extra communication occurs when they interact, which ! they do many times. See the line where RES is calculated ! to validate this. The expression for RES is choppy ! because it had to be hacked into small pieces. Originally ! it was one large clean statement but the compiler ! complained it contained too many run time calls, ergo ! the present form. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: xloc,yloc,dnax,dnay,dnbx,dnby !HPF$ ALIGN (*,*,:) with V (*,*,:):: Q,Fluxab,Fluxbc,Fluxcd,Fluxda,h, $ avab,avbc,avcd,avda,res !HPF$ ALIGN (*,:) with V (*,*,:) :: p,dt,area write(6,*) 'Please enter pback' Read(5,*) pback write(6,*) 'Please enter CFl number' read(5,*) cfl write(6,*) 'enter exit condition: 1=subsonic, 2=supersonic' read(5,*) exit iter=1 if (exit .eq. 2) then itermax=1350. end if if (exit .eq. 1) then itermax=4000. end if call timer(start,0.0) res(3,4,5)=0.4 ! These array operations set every element of the array to ! the prescribed value. No communication nessecary. avab=0.0 avbc=0.0 avcd=0.0 avda=0.0 dt=9.9e-3 p=pback open(unit=8,file='err.dat',status='unknown') open(unit=9,file='in.dat',status='unknown') Forall(i=2:mx,j=2:my) area(i,j)=0.5*abs((xloc(i,j) $ -xloc(i-1,j-1))*(yloc(i-1,j)-yloc(i,j-1)) - $ (xloc(i-1,j)-xloc(i,j-1))*(yloc(i,j)-yloc(i-1,j-1))) !-----------Start looping------------- !General form of the loop: ! -Compute time step at each axial location ! -Compute flux vectors for each "volume" ! -Compute dissipation vectors ! -Compute residual ! -Compute stage of Runge Kutta ! -Repeat for all 4 stages ! -Update solution ! Although not used, the code is able to add a source term !(ie heat transfer into the nozzle) to the equations using ! the source subroutine and H vector. Do While(iter .le. itermax) Call time_step(V,dt,p,xloc,yloc,cfl,lmax,mx,my) Call Flux(V,Fluxab,Fluxbc,Fluxcd,Fluxda,xloc,yloc,p,lmax,mx,my) Call Artvisc(V,avab,avbc,avcd,avda,xloc,yloc, $ dnax,dnay,dnbx,dnby,p,lmax,mx,my) ! Call source(V,h,p,yloc,lmax,mx,my) Forall(l=1:lmax,i=2:mx,j=2:my) Res(l,i,j)=-1.0/area(i,j) $ * (Fluxab(l,i,j)+Fluxbc(l,i,j)+Fluxcd(l,i,j)+Fluxda(l,i,j)) Res(l,i,j)=Res(l,i,j) $ + 1.0/area(i,j)*(avbc(l,i,j)*sqrt(dnbx(i,j)**2 $ + dnby(i,j)**2)-avda(l,i,j)*sqrt(dnbx(i-1,j)**2 $ + dnby(i-1,j)**2)) Res(l,i,j)=Res(l,i,j) +1.0/area(i,j)*( avcd(l,i,j) $ *sqrt(dnax(i,j)**2 $ + dnay(i,j)**2)-avab(l,i,j)*sqrt(dnax(i,j-1)**2+dnay(i,j-1) $ **2)) + h(l,i,j) Q(l,i,j)=V(l,i,j) $ + 0.25 *Res(l,i,j)*dt(i,j) End forall Forall(i=2:mx,j=2:my) p(i,j)=0.4*(Q(4,i,j)-0.5/Q(1,i,j) $ *(Q(2,i,j)**2 + Q(3,i,j)**2)) End Forall Call bound(Q,p,min,pback,lmax,mx,my,exit) ! Call time_step(Q,dt,xloc,yloc,lmax,mx,my) Call Flux(Q,Fluxab,Fluxbc,Fluxcd,Fluxda,xloc,yloc,p,lmax,mx,my) Call Artvisc(Q,avab,avbc,avcd,avda,xloc,yloc, $ dnax,dnay,dnbx,dnby,p,lmax,mx,my) ! Call source(Q,h,p,yloc,lmax,mx,my) Forall(l=1:lmax,i=2:mx,j=2:my) Res(l,i,j)=-1.0/area(i,j) $ * (Fluxab(l,i,j)+Fluxbc(l,i,j)+Fluxcd(l,i,j)+Fluxda(l,i,j)) Res(l,i,j)=Res(l,i,j) + $ + 1.0/area(i,j)*(avbc(l,i,j)*sqrt(dnbx(i,j)**2 $ + dnby(i,j)**2)-avda(l,i,j)*sqrt(dnbx(i-1,j)**2 $ + dnby(i-1,j)**2)) Res(l,i,j)=Res(l,i,j) +1.0/area(i,j)*( avcd(l,i,j) $ *sqrt(dnax(i,j)**2 $ + dnay(i,j)**2)-avab(l,i,j)*sqrt(dnax(i,j-1)**2+dnay(i,j-1) $ **2)) + h(l,i,j) Q(l,i,j)=V(l,i,j) $ + 1.0/3.0 *Res(l,i,j)*dt(i,j) End Forall Forall(i=2:mx,j=2:my) p(i,j)=0.4*(Q(4,i,j)-0.5/Q(1,i,j) $ *(Q(2,i,j)**2 + Q(3,i,j)**2)) End Forall Call bound(Q,p,min,pback,lmax,mx,my,exit) ! Call time_step(Q,dt,xloc,yloc,lmax,mx,my) Call Flux(Q,Fluxab,Fluxbc,Fluxcd,Fluxda,xloc,yloc,p,lmax,mx,my) Call Artvisc(Q,avab,avbc,avcd,avda,xloc,yloc, $ dnax,dnay,dnbx,dnby,p,lmax,mx,my) ! Call source(Q,h,p,yloc,lmax,mx,my) Forall(l=1:lmax,i=2:mx,j=2:my) Res(l,i,j)=-1.0/area(i,j) $ * (Fluxab(l,i,j)+Fluxbc(l,i,j)+Fluxcd(l,i,j)+Fluxda(l,i,j)) Res(l,i,j)=Res(l,i,j) + $ + 1.0/area(i,j)*(avbc(l,i,j)*sqrt(dnbx(i,j)**2 $ + dnby(i,j)**2)-avda(l,i,j)*sqrt(dnbx(i-1,j)**2 $ + dnby(i-1,j)**2)) Res(l,i,j)=Res(l,i,j) +1.0/area(i,j)*( avcd(l,i,j) $ *sqrt(dnax(i,j)**2 $ + dnay(i,j)**2)-avab(l,i,j)*sqrt(dnax(i,j-1)**2+dnay(i,j-1) $ **2)) + h(l,i,j) Q(l,i,j)=V(l,i,j) $ + 1.0/2.0 *Res(l,i,j)*dt(i,j) End Forall Forall(i=2:mx,j=2:my) p(i,j)=0.4*(Q(4,i,j)-0.5/Q(1,i,j) $ *(Q(2,i,j)**2 + Q(3,i,j)**2)) End Forall Call bound(Q,p,min,pback,lmax,mx,my,exit) ! Call time_step(Q,dt,xloc,yloc,lmax,mx,my) Call Flux(Q,Fluxab,Fluxbc,Fluxcd,Fluxda,xloc,yloc,p,lmax,mx,my) Call Artvisc(Q,avab,avbc,avcd,avda,xloc,yloc, $ dnax,dnay,dnbx,dnby,p,lmax,mx,my) ! Call source(Q,h,p,yloc,lmax,mx,my) Forall(l=1:lmax,i=2:mx,j=2:my) Res(l,i,j)=-1.0/area(i,j) $ * (Fluxab(l,i,j)+Fluxbc(l,i,j)+Fluxcd(l,i,j)+Fluxda(l,i,j)) Res(l,i,j)=Res(l,i,j) + $ + 1.0/area(i,j)*(avbc(l,i,j)*sqrt(dnbx(i,j)**2 $ + dnby(i,j)**2)-avda(l,i,j)*sqrt(dnbx(i-1,j)**2 $ + dnby(i-1,j)**2)) Res(l,i,j)=Res(l,i,j) +1.0/area(i,j)*( avcd(l,i,j) $ *sqrt(dnax(i,j)**2 $ + dnay(i,j)**2)-avab(l,i,j)*sqrt(dnax(i,j-1)**2+dnay(i,j-1) $ **2)) + h(l,i,j) V(l,i,j)=V(l,i,j) $ + 1.0 *Res(l,i,j)*dt(i,j) End Forall Forall(i=2:mx,j=2:my) p(i,j)=0.4*(V(4,i,j)-0.5/V(1,i,j) $ *(V(2,i,j)**2 + V(3,i,j)**2)) End Forall Call bound(V,p,min,pback,lmax,mx,my,exit) iter=iter+1 End do call timer(end,start) write(6,*) 'Execution took:',end,'seconds' call Contour(V,xloc,yloc,lmax,mx,my ) do i=1,mx write(8,*) xloc(i,8),sqrt((V(2,i,my)/V(1,i,my))**2 $ + (V(3,i,my)/V(1,i,my))**2)/ $ sqrt(1.4*p(i,my)/V(1,i,my)), sqrt((V(2,i,2)/V(1,i,2))**2 $ + (V(3,i,2)/V(1,i,2))**2)/ $ sqrt(1.4*p(i,2)/V(1,i,2)),v(3,i,2)/V(1,i,2) end do write(8,*) 'and this was last residual',maxval(abs(res)) End subroutine ! This routine computes the flux vectors across the edges of ! our finite volume element. The values of the flow variables ! at the cell edges are assumed to be the average of the neighboring ! cell centers, hence the 0.5*( everything). The average ! is multipled by the appropriate edge Deltax or Deltay. ! Forall constructs are used throughout, but some of ! the statements appear choppy because I had to cut them ! into chunks. The compiler complained that too many run-time ! calls were used when these were single statements. Subroutine Flux(V,Fluxab,Fluxbc,Fluxcd,Fluxda,xloc,yloc, $ p,lmax,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real Fluxab(lmax,mx+1,my+1),Fluxbc(lmax,mx+1,my+1), $ Fluxcd(lmax,mx+1,my+1),Fluxda(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1) Real p(mx+1,my+1) Integer l,i,j !All arrays used in this routine have already been distributed ! or aligned in the calling routine, thus the * notation. ! No remapping or realigning should occur. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: xloc,yloc !HPF$ ALIGN (*,*,:) with *V (*,*,:) ::Fluxab,Fluxbc,Fluxcd,Fluxda !HPF$ ALIGN (*,:) with *V (*,*,:) :: p !************Compute Boundary Fluxes*************** ! --------------Top (solid) Boundary--------------- Fluxcd=0.0 Forall(i=2:mx) Fluxcd(2,i,my)=p(i,my)*(yloc(i-1,my)-yloc(i,my)) Fluxcd(3,i,my)=-1.0*p(i,my)*(xloc(i-1,my) $ -xloc(i,my)) End Forall ! -------------Bottom (symmetry) Boundary----------------- Fluxab=0.0 Forall(i=2:mx) Fluxab(1,i,2)=0.0 Fluxab(2,i,2)=( $ p(i,2))*(yloc(i,1)-yloc(i-1,1)) Fluxab(3,i,2)=-1.0*p(i,2) $ *(xloc(i,1)-xloc(i-1,1)) Fluxab(4,i,2)=0.0 End Forall !****************Start Interior Points************* Forall(i=2:mx,j=3:my) Fluxab(1,i,j)=0.5*(V(2,i,j)+ $ V(2,i,j-1))*(yloc(i,j-1)-yloc(i-1,j-1)) $ -0.5*(V(3,i,j)+V(3,i,j-1))*(xloc(i,j-1)-xloc(i-1,j-1)) End forall Forall(i=2:mx,j=3:my) Fluxab(2,i,j)=0.5*(V(2,i,j)**2 $ / V(1,i,j) +p(i,j) + V(2,i,j-1)**2/V(1,i,j-1) +p(i,j-1)) $ *(yloc(i,j-1)-yloc(i-1,j-1)) Fluxab(2,i,j)=Fluxab(2,i,j) -0.5*(V(2,i,j)*V(3,i,j)/V(1,i,j) $ + V(2,i,j-1)*V(3,i,j-1)/V(1,i,j-1)) $ *(xloc(i,j-1)-xloc(i-1,j-1)) End Forall Forall(i=2:mx,j=3:my) Fluxab(3,i,j)=0.5*(V(2,i,j) $ *v(3,i,j)/V(1,i,j) + V(2,i,j-1)*V(3,i,j-1)/V(1,i,j-1) $ )*(yloc(i,j-1)-yloc(i-1,j-1)) Fluxab(3,i,j)=Fluxab(3,i,j) -0.5*(V(3,i,j)**2/V(1,i,j) $ +p(i,j) +p(i,j-1)+ V(3,i,j-1)**2/V(1,i,j-1)) $ *(xloc(i,j-1)-xloc(i-1,j-1)) end forall Forall(i=2:mx,j=3:my) Fluxab(4,i,j)=(V(4,i,j) $ + p(i,j))*V(2,i,j)/V(1,i,j) Fluxab(4,i,j)=Fluxab(4,i,j) + (V(4,i,j-1) + $ p(i,j-1))*V(2,i,j-1)/V(1,i,j-1) Fluxab(4,i,j)=0.5*Fluxab(4,i,j)*(yloc(i,j-1)-yloc(i-1,j-1)) Fluxab(4,i,j)=Fluxab(4,i,j) $ -0.5*(V(4,i,j)+p(i,j))*V(3,i,j)/V(1,i,j) $ *(xloc(i,j-1)-xloc(i-1,j-1)) Fluxab(4,i,j)=Fluxab(4,i,j)- $ 0.5*(V(4,i,j-1)+p(i,j-1))*V(3,i,j-1)/V(1,i,j-1) $ * (xloc(i,j-1)-xloc(i-1,j-1)) End Forall ! Now bc fluxes Forall(i=1:mx,j=2:my) Fluxbc(1,i,j)=0.5*(V(2,i,j) $ + V(2,i+1,j))*(yloc(i,j)-yloc(i,j-1)) -0.5*(V(3,i,j) $ + V(3,i+1,j))*(xloc(i,j)-xloc(i,j-1)) End Forall Forall(i=1:mx,j=2:my) Fluxbc(2,i,j)=0.5*(V(2,i,j)**2 $ /V(1,i,j) + p(i,j)+V(2,i+1,j)**2/V(1,i+1,j)+p(i+1,j)) $ *(yloc(i,j)-yloc(i,j-1)) Fluxbc(2,i,j)=Fluxbc(2,i,j) $ -0.5*(V(3,i,j)*V(2,i,j)/V(1,i,j) $ + V(3,i+1,j)*V(2,i+1,j)/V(1,i+1,j)) $ * (xloc(i,j)-xloc(i,j-1)) end Forall Forall(i=1:mx,j=2:my) Fluxbc(3,i,j)=0.5*(V(2,i,j) $ *V(3,i,j)/V(1,i,j) + V(2,i+1,j)*V(3,i+1,j)/V(1,i+1,j)) $ *(yloc(i,j)-yloc(i,j-1)) Fluxbc(3,i,j)=Fluxbc(3,i,j)-0.5*(V(3,i,j)**2/V(1,i,j) $ + V(3,i+1,j)**2/V(1,i+1,j) +p(i,j) +p(i+1,j)) $ *(xloc(i,j)-xloc(i,j-1)) End Forall Forall(i=1:mx,j=2:my) Fluxbc(4,i,j)=(V(4,i,j) $ + P(i,j))*V(2,i,j)/V(1,i,j) Fluxbc(4,i,j)=Fluxbc(4,i,j) + (V(4,i+1,j)+p(i+1,j)) $ *V(2,i+1,j)/V(1,i+1,j) Fluxbc(4,i,j)=0.5*Fluxbc(4,i,j)*(yloc(i,j)-yloc(i,j-1)) $ -0.5*(V(4,i,j) $ + P(i,j))*V(3,i,j)/V(1,i,j)*(xloc(i,j)-xloc(i,j-1)) Fluxbc(4,i,j)=Fluxbc(4,i,j)-0.5* (V(4,i+1,j)+p(i+1,j)) $ *V(3,i+1,j)/V(1,i+1,j)*(xloc(i,j)-xloc(i,j-1)) End Forall Forall(l=1:lmax,i=2:mx,j=2:my-1) fluxcd(l,i,j)= $ -1.0*fluxab(l,i,j+1) End FORALL Forall(l=1:lmax,i=2:mx,j=2:my) fluxda(l,i,j)= $ -1.0*fluxbc(l,i-1,j) End Forall End Subroutine ! This routine computes the arificial dissipation for the scheme. ! The dissipation vectors avab,avbc,avcd,acda are necessary to ! dampen the numerical oscillations that a shock wave produces. ! The vectors are the sum of a second order term and fourth ! order term. The fourth order term is turned "off" when ! a pressure gradient sensor EPS2 reaches a critical value (kp4). ! Leaving it in would blow the solution up near shock waves. ! Two HPF_LOCAL routines are used because the boundary conditions ! the routine requires (Derivatives are zero at boundaries) ! somehow required communication when they shouldn't have. ! Using HPF_LOCAL insures the procedure is done on a single ! node with no communication. extrinsic (hpf_local) subroutine any(V,lmax,mx,my) integer lmax,mx,my Real V(:,:,:) integer myid,nprocs,ub,lb,i,j,k !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC myid = pghpf_myprocnum() nprocs = pghpf_nprocs() ! lb and ub are the upper and lower bound of the array's 3rd ! dimension lb = lbound(V,3) ub = ubound(V,3) Do i=2,mx !Make derivaties zero at top and lower boundary Do k=1,lmax if (myid == nprocs-1) then V(k,i,ub)=V(k,i,ub-1) else if (myid == 0) then V(k,i,lb)=V(k,i,lb+1) endif End Do End Do end subroutine extrinsic (hpf_local) subroutine notany(V,p,lmax,mx,my) integer mx,my,lmax Real V(:,:,:) Real p(:,:) integer myid,nprocs,ub,lb,i,j,k !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: p myid = pghpf_myprocnum() nprocs = pghpf_nprocs() lb = lbound(p,2) ub = ubound(p,2) ! Make derivatives zero at top and lower boundary Do i=2,mx if (myid == nprocs-1) then p(i,ub)=p(i,ub-1) else if (myid == 0) then p(i,lb)=p(i,lb+1) endif End Do end subroutine Subroutine Artvisc(V,avab,avbc,avcd,avda,xloc,yloc, $ dnax,dnay,dnbx,dnby,p,lmax,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real avab(lmax,mx+1,my+1),avbc(lmax,mx+1,my+1), $ avcd(lmax,mx+1,my+1),avda(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1),dnbx(mx,my+1),dnby(mx,my+1) Real dnax(mx,my+1),dnay(mx,my+1) Real p(mx+1,my+1) Real eps2(mx+1,my+1),eps4(mx+1,my+1) Real kp2,kp4,start,end Integer k, i,j,imy ! Execpt for eps2 and eps4, all arrays have been previously ! distributed and aligned. Again, the * tells ! the compiler not to remap or realign. !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: xloc,yloc,dnax,dnay,dnbx,dnby !HPF$ ALIGN (*,*,:) with *V (*,*,:) ::avab,avbc,avcd,avda !HPF$ ALIGN (*,:) with *V(*,*,:) :: p !HPF$ ALIGN (*,:) with V(*,*,:) :: eps2,eps4 !! HPF_Local routines must be interfaced, and they ! must have assumed shaped (:,:,:) array declarations ! They also use same mapping. interface extrinsic (hpf_local) subroutine any(V,lmax,mx,my) integer lmax,mx,my Real V(:,:,:) integer myid,nprocs,ub,lb,i,j,k !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC end subroutine end interface interface extrinsic (hpf_local) subroutine notany(V,p,lmax,mx,my) integer mx,my,lmax Real V(:,:,:) Real p(:,:) integer myid,nprocs,ub,lb,i,j,k !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: p end subroutine end interface kp2=4.0/4.0 kp4=4.0/256.0 avab=0.0 avbc=0.0 avcd=0.0 avda=0.0 !***********Side AB & CD fluxes********** ! call any(V,lmax,mx,my) ! call notany(V,p,lmax,mx,my) ! What the HPF_LOCAL routines do p(:,my+1) = p(:,my) p(:,1) = p(:,2) V(:,:,my+1)=V(:,:,my) V(:,:,1)=V(:,:,2) Forall(k=1:lmax,i=2:mx) avab(k,i,2)=0.0 avcd(k,i,my)=0.0 End Forall ! Instead of the Forall Constructs, I could have used the ! following Independent DO loops with scalar eps2 and eps4 variables ! !HPF$ INDEPENDENT, new(i) ! DO j=3,my ! !HPF$ INDEPENDENT, new(k,eps2,eps4) ! DO i=2,mx ! !HPF$ INDEPENDENT ! DO k=1,4 ! eps2=abs(p(i,j+1) ! $-2.0*p(i,j)+p(i,j-1))/abs(p(i,j+1)+2.0*p(i,j) + ! $ p(i,j-1)) ! eps2=kp2*max(eps2,abs(p(i,j)-2.0*p(i,j-1)+p(i,j-2)) ! $ /abs( p(i,j)+2.0*p(i,j-1)+p(i,j-2))) ! ! eps4=max(0.0,(kp4-eps2)) ! avab(k,i,j)=0.5*(V(2,i,j)/V(1,i,j)+V(2,i,j-1)/V(1,i,j-1)) ! $ *dnax(i,j) ! ! avab(k,i,j)=avab(k,i,j)+0.5*(V(3,i,j)/V(1,i,j)+ ! $V(3,i,j-1)/V(1,i,j-1))* ! $ dnay(i,j) ! avab(k,i,j)=abs(avab(k,i,j)) ! avab(k,i,j)=avab(k,i,j)/sqrt(dnax(i,j)**2 + dnay(i,j)**2) ! avab(k,i,j)=avab(k,i,j)+sqrt(1.4*(p(i,j)+p(i,j-1))/ ! $ (V(1,i,j)+V(1,i,j-1))) ! avab(k,i,j)=avab(k,i,j)*(eps2(i,j)*(V(k,i,j)-V(k,i,j-1)) ! $ -eps4(i,j)*(V(k,i,j+1)-V(k,i,j)-V(k,i,j) ! End DO ! End DO ! End DO Forall(i=2:mx,j=3:my) eps2(i,j)=abs(p(i,j+1) $-2.0*p(i,j)+p(i,j-1))/abs(p(i,j+1)+2.0*p(i,j) + $ p(i,j-1)) eps2(I,J)=kp2*MAX(eps2(i,j),abs(p(i,j)-2.0*p(i,j-1)+p(i,j-2)) $ /abs( p(i,j)+2.0*p(i,j-1)+p(i,j-2))) eps4(i,j)=max(0.0,(kp4-eps2(i,j))) End Forall Forall(k=1:lmax,i=2:mx,j=3:my) avab(k,i,j)=0.5*(V(2,i,j)/V(1,i,j)+V(2,i,j-1)/V(1,i,j-1)) $ *dnax(i,j) avab(k,i,j)=avab(k,i,j)+0.5*(V(3,i,j)/V(1,i,j)+ $V(3,i,j-1)/V(1,i,j-1))* $ dnay(i,j) avab(k,i,j)=abs(avab(k,i,j)) avab(k,i,j)=avab(k,i,j)/sqrt(dnax(i,j)**2 + dnay(i,j)**2) avab(k,i,j)=avab(k,i,j)+sqrt(1.4*(p(i,j)+p(i,j-1))/ $ (V(1,i,j)+V(1,i,j-1))) avab(k,i,j)=avab(k,i,j)*(eps2(i,j)*(V(k,i,j)-V(k,i,j-1)) $ -eps4(i,j)*(V(k,i,j+1)-V(k,i,j)-V(k,i,j)-V(k,i,j) $ +V(k,i,j-1)+V(k,i,j-1)+V(k,i,j-1)-V(k,i,j-2))) End Forall Forall(k=1:lmax,i=2:mx,j=2:my-1) avcd(k,i,j)=1.0*avab(k,i,j+1) !***********now BC and DA dissipation ********* Forall(k=1:lmax,j=1:my+1) avda(k,2,j)=0.0 avbc(k,mx,j)=0.0 avbc(k,mx+1,j)=0.0 V(k,mx+1,j)=V(k,mx,j) V(k,1,j)=V(k,2,j) End Forall Forall(j=1:my+1) p(mx+1,j)=p(mx,j) p(1,j)=p(2,j) End Forall Forall(i=2:mx-1,j=2:my) eps2(i,j)=abs(p(i+1,j) $ -2.0*p(i,j)+p(i-1,j))/abs(p(i+1,j)+2.0*p(i,j)+p(i-1,j)) eps2(i,j)=kp2*max(eps2(i,j), $ abs(p(i+2,j)-2.0*p(i+1,j)+p(i,j))/abs(p(i+2,j)+2.0*p(i+1,j) $ +p(i,j))) eps4(i,j)=max(0.0,(kp4-eps2(i,j))) End Forall Forall(k=1:lmax,i=2:mx-1,j=2:my) avbc(k,i,j)= $ 0.5* $ (V(2,i,j)/V(1,i,j)+V(2,i+1,j)/V(1,i+1,j))*dnbx(i,j) avbc(k,i,j)=avbc(k,i,j)+ $ 0.5*(V(3,i,j)/V(1,i,j)+V(3,i+1,j)/V(1,i+1,j))*dnby(i,j) avbc(k,i,j)=abs(avbc(k,i,j)) avbc(k,i,j)=avbc(k,i,j) $ /sqrt(dnbx(i,j)**2+dnby(i,j)**2) avbc(k,i,j)=avbc(k,i,j)+ sqrt(1.4*(p(i,j) $ + p(i+1,j))/(V(1,i,j)+V(1,i+1,j))) avbc(k,i,j)=avbc(k,i,j)*(eps2(i,j)*(V(k,i+1,j) $ -V(k,i,j)) -eps4(i,j)*(V(k,i+2,j)-V(k,i+1,j)-V(k,i+1,j) $ - V(k,i+1,j)+V(k,i,j)+V(k,i,j)+V(k,i,j) $ -V(k,i-1,j))) End Forall Forall(k=1:lmax,i=3:mx,j=2:my) avda(k,i,j)=1.0*avbc(k,i-1,j) End subroutine ! This routine computes the local time step, which varies ! varies with mesh spacing. This provides much quicker ! convergence than the global time step. Subroutine time_step(V,dt,p,xloc,yloc,cfl,lmax,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1) Real p(mx+1,my+1) Real dt(mx,my+1) Real cfl Integer i,j,l ! Simply distribute and align as done in the calling routine !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto *PROC !HPF$ ALIGN (*,:) with *V(*,*,:) :: xloc,yloc,dt !HPF$ ALIGN (*,:) with *V(*,*,:) :: p Forall(i=2:mx,j=2:my) dt(i,j)=cfl/(abs(V(2,i,j)/V(1,i,j))/(xloc(i,j)-xloc(i-1,j)) $ + abs(V(3,i,j)/V(1,i,j))/(yloc(i,j)-yloc(i,j-1)) + $ sqrt(1.4*p(i,j)/v(1,i,j))*sqrt(1.0/(xloc(i,j)-xloc(i-1,j))**2 $ +1.0/(yloc(i,j)-yloc(i,j-1))**2)) End Forall End subroutine !This routine computes the necessary boundary conditions ! at the nozzle exit and entrance. For a subsonic inlet, ! pressure is simply extrapolated backwards from the ! upstream. Supersonic inlet pressure is computed ! on the basis of given mach number. ! A supersonic outlet extrapolates the pressure, while ! a subsonic inlet sets the back pressure to the value ! specified by the user. Subroutine Bound(V,p,min,pback,lmax,mx,my,exit) Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real p(mx+1,my+1) Real mach(my+1) Real min,pback Integer exit ! Simple leave incoming distribution and alignment alone !HPF$ PROCESSORS PROC(NUMBER_OF_PROCESSORS()) !HPF$ DISTRIBUTE V *(*,*,BLOCK) onto * PROC !HPF$ ALIGN (*,:) with *V (*,*,:) :: p If (min .gt. 1.0) then Forall(j=1:my+1) P(1,j)=1.0/((min**2)/2.0 *(1.4-1.0) $ +1.0)**(1.4/0.4) V(1,1,j)=P(1,j)*(1.0 $ + (1.4-1.0)/2.0*min**2) V(2,1,j)=V(1,1,j)*min* $ sqrt(1.4*p(1,j)/V(1,1,j)) V(3,1,j)=0.0 V(4,1,j)=p(1,j)/(0.4) + 0.5*V(1,1,j) $ *(V(2,1,j)**2 / V(1,1,j)**2) end Forall End IF If (min .le. 1.0) then Forall(j=1:my+1) p(1,j)=2.0*p(2,j)-p(3,j) mach(j)=2.0/(1.4-1.0)*(p(1,j)** $ ((1.0-1.4)/1.4) -1.0) V(1,1,j)=p(1,j)* $ (1.0+0.2*mach(j)) V(2,1,j)=sqrt(mach(j))*sqrt(1.4/(1.0+0.2*mach(j))) $ *V(1,1,j) V(3,1,j)=0.0 V(4,1,j)=p(1,j)/(0.4) + 0.5*V(1,1,j) $ *(V(2,1,j)**2 / V(1,1,j)**2) end Forall End IF If (exit .eq. 1) then Forall(j=1:my+1) p(mx+1,j)=pback V(1,mx+1,j)=2.0*V(1,mx,j)-V(1,mx-1,j) V(2,mx+1,j)=2.0*V(2,mx,j)-V(2,mx-1,j) V(3,mx+1,j)=2.0*V(3,mx,j)-V(3,mx-1,j) V(4,mx+1,j)=pback/0.4 $ + 0.5*V(1,mx+1,j)*(V(2,mx+1,j)**2/v(1,mx+1,j)**2 + $ V(3,mx+1,j)**2/V(1,mx+1,j)**2) end Forall End if If (exit .eq. 2 ) then Forall(j=1:my+1) p(mx+1,j) =p(mx,j) V(1,mx+1,j)=V(1,mx,j) V(2,mx+1,j)=V(2,mx,j) V(3,mx+1,j)=V(3,mx,j) V(4,mx+1,j)=p(mx+1,j)/0.4 $ + 0.5*V(1,mx+1,j)*(V(2,mx+1,j)**2/v(1,mx+1,j)**2 + $ V(3,mx+1,j)**2/V(1,mx+1,j)**2) end Forall End if End subroutine Subroutine Contour(V,xloc,yloc,lmax,mx,my) Implicit none Integer lmax,mx,my Real V(lmax,mx+1,my+1) Real xloc(mx,my+1),yloc(mx,my+1) Real p(mx,my) Real wcell(mx,my) Real wnode(mx,my) Integer i,j open(unit=10,file='info.dat',status='unknown') write(10,*) 'VARIABLES= "x","y","u","v","mach","entropy","enthalpy"' write(10,*) 'ZONE I=',mx,' J=',my,'F=BLOCK' 1 Format(6f12.6) Forall(i=2:mx,j=2:my) p(i,j)=0.4*(V(4,i,j) $ -0.5/V(1,i,j)*(V(2,i,j)**2 + V(3,i,j)**2)) write(10,1) ((xloc(i,j),i=1,mx),j=1,my) Write(10,1) ((yloc(i,j),i=1,mx),j=1,my) !---------Put u component of velocity into file----- Forall(i=2:mx,j=2:my) wcell(i,j)=V(2,i,j)/V(1,i,j) Forall(i=2:mx-1,j=2:my-1) wnode(i,j)=0.25*(wcell(i+1,j) $ + wcell(i+1,j+1) + wcell(i,j+1) + wcell(i,j)) wnode(1,1)=wcell(2,2) wnode(1,my)=wcell(2,my) wnode(mx,my)=wcell(mx,my) Wnode(mx,1)=wcell(mx,2) Forall(j=2:my-1) wnode(1,j)=0.5*(wcell(2,j+1)+ $ wcell(2,j)) Forall(j=2:my-1) wnode(mx,j)=0.5*(wcell(mx,j+1) $ + wcell(mx,j)) Forall(i=2:mx-1) wnode(i,1)=0.5*(wcell(i,2)+ $ wcell(i+1,2)) Forall(i=2:mx-1) wnode(i,my)=0.5*(wcell(i,my)+wcell(i+1,my)) write(10,1) ((wnode(i,j),i=1,mx),j=1,my) !--------Put v component of velocity into file-------- Forall(i=2:mx,j=2:my) wcell(i,j)=V(3,i,j)/V(1,i,j) Forall(i=2:mx-1,j=2:my-1) wnode(i,j)=0.25*(wcell(i+1,j) $ + wcell(i+1,j+1) + wcell(i,j+1) + wcell(i,j)) wnode(1,1)=wcell(2,2) wnode(1,my)=wcell(2,my) wnode(mx,my)=wcell(mx,my) Wnode(mx,1)=wcell(mx,2) Forall(j=2:my-1) wnode(1,j)=0.5*(wcell(2,j+1)+ $ wcell(2,j)) Forall(j=2:my-1) wnode(mx,j)=0.5*(wcell(mx,j+1) $ + wcell(mx,j)) Forall(i=2:mx-1) wnode(i,1)=0.5*(wcell(i,2)+ $ wcell(i+1,2)) Forall(i=2:mx-1) wnode(i,my)=0.5*(wcell(i,my)+wcell(i+1,my)) write(10,1) ((wnode(i,j),i=1,mx),j=1,my) !----------Put mach number into file---------------- Forall(i=2:mx,j=2:my) wcell(i,j)=sqrt((V(2,i,j)/V(1,i,j))**2 $ + (V(3,i,j)/V(1,i,j))**2)/sqrt(1.4*p(i,j)/V(1,i,j)) Forall(i=2:mx-1,j=2:my-1) wnode(i,j)=0.25*(wcell(i+1,j) $ + wcell(i+1,j+1) + wcell(i,j+1) + wcell(i,j)) wnode(1,1)=wcell(2,2) wnode(1,my)=wcell(2,my) wnode(mx,my)=wcell(mx,my) Wnode(mx,1)=wcell(mx,2) Forall(j=2:my-1) wnode(1,j)=0.5*(wcell(2,j+1)+ $ wcell(2,j)) Forall(j=2:my-1) wnode(mx,j)=0.5*(wcell(mx,j+1) $ + wcell(mx,j)) Forall(i=2:mx-1) wnode(i,1)=0.5*(wcell(i,2)+ $ wcell(i+1,2)) Forall(i=2:mx-1) wnode(i,my)=0.5*(wcell(i,my)+wcell(i+1,my)) write(10,1) ((wnode(i,j),i=1,mx),j=1,my) !--------Stick entropy in there------- Forall(i=2:mx,j=2:my) wcell(i,j)=1.4/(1.4-1.0)*log(p(i,j) $ /V(1,i,j)) - log(p(i,j)) Forall(i=2:mx-1,j=2:my-1) wnode(i,j)=0.25*(wcell(i+1,j) $ + wcell(i+1,j+1) + wcell(i,j+1) + wcell(i,j)) wnode(1,1)=wcell(2,2) wnode(1,my)=wcell(2,my) wnode(mx,my)=wcell(mx,my) Wnode(mx,1)=wcell(mx,2) Forall(j=2:my-1) wnode(1,j)=0.5*(wcell(2,j+1)+ $ wcell(2,j)) Forall(j=2:my-1) wnode(mx,j)=0.5*(wcell(mx,j+1) $ + wcell(mx,j)) Forall(i=2:mx-1) wnode(i,1)=0.5*(wcell(i,2)+ $ wcell(i+1,2)) Forall(i=2:mx-1) wnode(i,my)=0.5*(wcell(i,my)+wcell(i+1,my)) write(10,1) ((wnode(i,j),i=1,mx),j=1,my) !----------Enthalpy----------------- Forall(i=2:mx,j=2:my) wcell(i,j)=V(4,i,j)/V(1,i,j) +p(i,j)/V(1,i,j) Forall(i=2:mx-1,j=2:my-1) wnode(i,j)=0.25*(wcell(i+1,j) $ + wcell(i+1,j+1) + wcell(i,j+1) + wcell(i,j)) wnode(1,1)=wcell(2,2) wnode(1,my)=wcell(2,my) wnode(mx,my)=wcell(mx,my) Wnode(mx,1)=wcell(mx,2) Forall(j=2:my-1) wnode(1,j)=0.5*(wcell(2,j+1)+ $ wcell(2,j)) Forall(j=2:my-1) wnode(mx,j)=0.5*(wcell(mx,j+1) $ + wcell(mx,j)) Forall(i=2:mx-1) wnode(i,1)=0.5*(wcell(i,2)+ $ wcell(i+1,2)) Forall(i=2:mx-1) wnode(i,my)=0.5*(wcell(i,my)+wcell(i+1,my)) write(10,1) ((wnode(i,j),i=1,mx),j=1,my) close(unit=10) End subroutine