Long-Range N-Body Code


An overview of the N-body code is available.
c
c  This program finds the force on each of a set of particles interacting
c  via a long-range 1/r**2 force law.
c
c  The number of processes must be even, and the total number of points
c  must be exactly divisible by the number of processes.
c
c  This is the MPI version.
c
c  Author: David W. Walker
c  Date:   March 10, 1995
c
        program nbody
	implicit none
	include 'mpif.h'
	integer myrank, ierr, nprocs, npts, nlocal
	integer pseudohost, NN, MM, PX, PY, PZ, FX, FY, FZ
	real G
	parameter (pseudohost = 0)
        parameter (NN=10000, G = 1.0)
	parameter (MM=0, PX=1, PY=2, PZ=3, FX=4, FY=5, FZ=6)
        real dx(0:NN-1), dy(0:NN-1), dz(0:NN-1)
	real dist(0:NN-1), sq(0:NN-1)
        real fac(0:NN-1), tx(0:NN-1), ty(0:NN-1), tz(0:NN-1)
        real p(0:6,0:NN-1), q(0:6,0:NN-1) 
	integer i, j, k, dest, src
	double precision timebegin, timeend
	integer status(MPI_STATUS_SIZE)
	integer newtype
	double precision ran
	integer iran
c
c  Initialize MPI, find rank of each process, and the number of processes
c
        call mpi_init (ierr)
        call mpi_comm_rank (MPI_COMM_WORLD, myrank, ierr)
	call mpi_comm_size (MPI_COMM_WORLD, nprocs, ierr)
c
c  One process acts as the host and reads in the number of particles
c
	if (myrank .eq. pseudohost) then
           open (4,file='nbody.input')
	   if (mod(nprocs,2) .eq. 0) then
	      read  (4,*) npts
	      if (npts .gt. nprocs*NN) then
           	 print *,'Warning!! Size out of bounds!!'
		 npts = -1
              else if (mod(npts,nprocs) .ne. 0) then
		 print *,'Number of processes must divide npts'
		 npts = -1
              end if
           else
	      print *,'Number of processes must be even'
	      npts = -1
	   end if
	end if
c
c  The number of particles is broadcast to all processes
c
	call mpi_bcast (npts, 1, MPI_INTEGER, pseudohost, 
     #            	MPI_COMM_WORLD, ierr)
c
c  Abort if number of processes and/or particles is incorrect
c
	if (npts .eq. -1) goto 999
c
c  Work out number of particles in each process
c
	nlocal  = npts/nprocs
c
c  The pseudocode hosts initializes the particle data and sends each 
c  process its particles.
c
        if (myrank .eq. pseudohost) then
	   iran = myrank + 111
	   do i=0,nlocal-1
	      p(MM,i) = sngl(ran(iran))
	      p(PX,i) = sngl(ran(iran))
	      p(PY,i) = sngl(ran(iran))
	      p(PZ,i) = sngl(ran(iran))
	      p(FX,i) = 0.0
	      p(FY,i) = 0.0
	      p(FZ,i) = 0.0
           end do
	   do k=0,nprocs-1
	      if (k .ne. pseudohost) then
	         do i=0,nlocal-1
		    q(MM,i) = sngl(ran(iran))
		    q(PX,i) = sngl(ran(iran))
		    q(PY,i) = sngl(ran(iran))
		    q(PZ,i) = sngl(ran(iran))
		    q(FX,i) = 0.0
		    q(FY,i) = 0.0
		    q(FZ,i) = 0.0
                 end do
		 call mpi_send (q, 7*nlocal, MPI_REAL, 
     #			        k, 100, MPI_COMM_WORLD, ierr)
	      end if
           end do
        else
	   call mpi_recv (p, 7*nlocal, MPI_REAL, 
     #  	          pseudohost, 100, MPI_COMM_WORLD, status, ierr)
        end if
c
c  Initialization is now complete. Start the clock and begin work.
c  First each process makes a copy of its particles.
c
        timebegin = mpi_wtime ()
        do i= 0,nlocal-1
            q(MM,i) = p(MM,i)
            q(PX,i) = p(PX,i)
            q(PY,i) = p(PY,i)
            q(PZ,i) = p(PZ,i)
            q(FX,i) = 0.0
            q(FY,i) = 0.0
            q(FZ,i) = 0.0
        end do
c
c  Now the interactions between the particles in a single process are
c  computed.
c
        do i=0,nlocal-1
          do j=i+1,nlocal-1
            dx(i) = p(PX,i) - q(PX,j)
            dy(i) = p(PY,i) - q(PY,j)
            dz(i) = p(PZ,i) - q(PZ,j)
            sq(i) = dx(i)**2+dy(i)**2+dz(i)**2
            dist(i) = sqrt(sq(i))
            fac(i) = p(MM,i) * q(MM,j) / (dist(i) * sq(i))
            tx(i) = fac(i) * dx(i)
            ty(i) = fac(i) * dy(i)
            tz(i) = fac(i) * dz(i)
            p(FX,i) = p(FX,i)-tx(i)
            q(FX,j) = q(FX,j)+tx(i)
            p(FY,i) = p(FY,i)-ty(i)
            q(FY,j) = q(FY,j)+ty(i)
            p(FZ,i) = p(FZ,i)-tz(i)
            q(FZ,j) = q(FZ,j)+tz(i)
          end do
        end do
c
c  The processes are arranged in a ring. Data will be passed in an
C  anti-clockwise direction around the ring.
c
	dest = mod (nprocs+myrank-1, nprocs)
	src  = mod (myrank+1, nprocs)
c
c  Each process interacts with the particles from its nprocs/2-1
c  anti-clockwise neighbors. At the end of this loop p(i) in each
c  process has accumulated the force from interactions with particles
c  i+1, ...,nlocal-1 in its own process, plus all the particles from its
c  nprocs/2-1 anti-clockwise neighbors. The "home" of the q array is
C  regarded as the process from which it originated. At the end of
c  this loop q(i) has accumulated the force from interactions with 
C  particles 0,...,i-1 in its home process, plus all the particles from the
C  nprocs/2-1 processes it has rotated to.
c
        do k=0,nprocs/2-2
	  call mpi_sendrecv_replace (q, 7*nlocal, MPI_REAL, dest, 200,
     #                         src, 200, MPI_COMM_WORLD, status, ierr)
          do i=0,nlocal-1
            do j=0,nlocal-1
              dx(i) = p(PX,i) - q(PX,j)
              dy(i) = p(PY,i) - q(PY,j)
              dz(i) = p(PZ,i) - q(PZ,j)
              sq(i) = dx(i)**2+dy(i)**2+dz(i)**2
              dist(i) = sqrt(sq(i))
              fac(i) = p(MM,i) * q(MM,j) / (dist(i) * sq(i))
              tx(i) = fac(i) * dx(i)
              ty(i) = fac(i) * dy(i)
              tz(i) = fac(i) * dz(i)
              p(FX,i) = p(FX,i)-tx(i)
              q(FX,j) = q(FX,j)+tx(i)
              p(FY,i) = p(FY,i)-ty(i)
              q(FY,j) = q(FY,j)+ty(i)
              p(FZ,i) = p(FZ,i)-tz(i)
              q(FZ,j) = q(FZ,j)+tz(i)
            end do
          end do
        end do
c
c  Now q is rotated once more so it is diametrically opposite its home
c  process. p(i) accumulates forces from the interaction with particles
c  0,..,i-1 from its opposing process. q(i) accumulates force from the
c  interaction of its home particles with particles i+1,...,nlocal-1 in
c  its current location.
c
        if (nprocs .gt. 1) then
	  call mpi_sendrecv_replace (q, 7*nlocal, MPI_REAL, dest, 300,
     #                          src, 300, MPI_COMM_WORLD, status, ierr)
          do i=nlocal-1,0,-1
            do j=i-1,0,-1
              dx(i) = p(PX,i) - q(PX,j)
              dy(i) = p(PY,i) - q(PY,j)
              dz(i) = p(PZ,i) - q(PZ,j)
              sq(i) = dx(i)**2+dy(i)**2+dz(i)**2
              dist(i) = sqrt(sq(i))
              fac(i) = p(MM,i) * q(MM,j) / (dist(i) * sq(i))
              tx(i) = fac(i) * dx(i)
              ty(i) = fac(i) * dy(i)
              tz(i) = fac(i) * dz(i)
              p(FX,i) = p(FX,i)-tx(i)
              q(FX,j) = q(FX,j)+tx(i)
              p(FY,i) = p(FY,i)-ty(i)
              q(FY,j) = q(FY,j)+ty(i)
              p(FZ,i) = p(FZ,i)-tz(i)
              q(FZ,j) = q(FZ,j)+tz(i)
            end do
          end do
c
c  In half the processes we include the interaction of each particle with
c  the corresponding particle in the opposing process.
c
          if (myrank .lt. nprocs/2) then
            do i=0,nlocal-1
              dx(i) = p(PX,i) - q(PX,i)
              dy(i) = p(PY,i) - q(PY,i)
              dz(i) = p(PZ,i) - q(PZ,i)
              sq(i) = dx(i)**2+dy(i)**2+dz(i)**2
              dist(i) = sqrt(sq(i))
              fac(i) = p(MM,i) * q(MM,i) / (dist(i) * sq(i))
              tx(i) = fac(i) * dx(i)
              ty(i) = fac(i) * dy(i)
              tz(i) = fac(i) * dz(i)
              p(FX,i) = p(FX,i)-tx(i)
              q(FX,i) = q(FX,i)+tx(i)
              p(FY,i) = p(FY,i)-ty(i)
              q(FY,i) = q(FY,i)+ty(i)
              p(FZ,i) = p(FZ,i)-tz(i)
              q(FZ,i) = q(FZ,i)+tz(i)
            end do
          endif
c
c  Now the q array is returned to its home process.
c
	  dest = mod (nprocs+myrank-nprocs/2, nprocs)
	  src  = mod (myrank+nprocs/2, nprocs)
	  call mpi_sendrecv_replace (q, 7*nlocal, MPI_REAL, dest, 400,
     #                         src, 400, MPI_COMM_WORLD, status, ierr)
        end if
c
c  The p and q arrays are summed to give the total force on each particle.
c
        do i=0,nlocal-1
          p(FX,i) = p(FX,i) + q(FX,i)
          p(FY,i) = p(FY,i) + q(FY,i)
          p(FZ,i) = p(FZ,i) + q(FZ,i)
        end do
c
c  Stop clock and write out timings
c
        timeend = mpi_wtime ()
        print *,'Node', myrank,'  Elapsed time: ',
     #          timeend-timebegin,' seconds'
c
c  Do a barrier to make sure the timings are written out first
c
        call mpi_barrier (MPI_COMM_WORLD, ierr)
c
c  Each process returns its forces to the pseudohost which prints them out.
c
        if (myrank .eq. pseudohost) then
           open (7,file='nbody.output')
	   write (7,100) (p(FX,i),p(FY,i),p(FZ,i),i=0,nlocal-1)
	   call mpi_type_vector (nlocal, 3, 7, MPI_REAL, newtype, ierr)
	   call mpi_type_commit (newtype, ierr)
	   do k=0,nprocs-1
	      if (k .ne. pseudohost) then
		 call mpi_recv (q(FX,0), 1, newtype,
     #			        k, 100, MPI_COMM_WORLD, status, ierr)
		 write (7,100) (q(FX,i),q(FY,i),q(FZ,i),i=0,nlocal-1)  
	      end if
           end do
        else
	   call mpi_type_vector (nlocal, 3, 7, MPI_REAL, newtype, ierr)
	   call mpi_type_commit (newtype, ierr)
	   call mpi_send (p(FX,0), 1, newtype, 
     #  	          pseudohost, 100, MPI_COMM_WORLD, ierr)
        end if
c
c  Close MPI
c
  999   call mpi_finalize (ierr)

        stop
  100   format(3e15.6)
        end

If you want to save this program without the HTML stuff in it, and you don't have a web browser that will take it out, then save the file as is to nbody.html and run the following sed command:

sed -e 's/\<\/a\>//' -e 's/\<.*\>//' -e '/^If/,$d' nbody.html >nbody.f
This is not a general prescription for removing HTML from a file, but it works in this case.
Click here to return to the MPI home page.
http://www.epm.ornl.gov/~walker/mpi/examples/nbody.html
David W. Walker, Oak Ridge National Laboratory / (walkerdw@ornl.gov)
Last Modified April 7, 1995