2011 PLSI 병렬컴퓨팅경진대회문제 01. 대학원팀 02. 학부팀 - 경진내용 - 경진환경 주어진순차코드를병렬화하여성능향상도 ( 획득점수 ) 를측정 점수 = ( 순차코드수행시간 ) / ( 병렬화코드수행시간 ) 프로그래밍언어 : C, Fortran 순차코드는 50 라인내외로작성하여제공됨 참가자들은컴파일러 (intel, gnu, pgi) 선택가능 시간제한 6 일오후 1 시 ~6 시 (5 시간 2 문제 ), 7 일오전 9 시 ~12 시 (3 시간 1 문제 ) 제한시간내병렬화코드를제출 제출파일 : 병렬화코드, 병렬화코드수행을위한 Makefile 특정라이브러리및컴파일러최적화옵션사용불가 - 출제문제유형 지난해의체험대회를바탕으로일반적인문제출제를원칙으로하며, 문제해결을통해병렬화기법을익힐수있는데초점을맞춤 예상문제 Monte Carlo Method 를이용한과학문제해결 Molecular Dynamics(MD) 문제 - gnuplot 등을통한 visualization Finite Difference Method(FDM) 문제 - 경진내용및경진환경은대학원팀과동일 - 출제문제유형 예상문제 sin 적분문제 Taylor expansion : sin Matrix 곱하기
01. [ 학부팀유형 1] - 수치적분 아래적분식의수치적분을이용하여 PI 의근사값을구하는순차코드가주어져있다. 이코드를병렬화 (MPI) 하여최대성능을얻으시오. (CPU 는최대 16 개까지사용 ) sin => sin 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간 - 순차코드 : 347.908 초 - 병렬코드 : 42.206 초 - 성능향상도 : 8.243
02. [ 학부팀유형 2] - 행렬곱 주어진 (n,n) 행렬에대하여 A B를구하고모든원소의합을구하는순차코드를병렬화 (MPI) 하여최대성능을얻으시오. (CPU는최대 16개까지사용 ) 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간 - 순차코드 : 247.670 초 - 병렬코드 : 39.488 초 - 성능향상도 : 6.272 03. [ 학부팀유형 3] - Taylor 급수를이용한 sin(x) 구하기 아래의 sin(x) 에대한 Taylor 급수를이용하여근사값을구하는순차코드를병렬화 (MPI) 하여최대성능을얻으시오. (CPU는최대 16개까지사용 ) 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간 - 순차코드 : 108.717 초 - 병렬코드 : 8.772 초 - 성능향상도 : 12.394
04. [ 대학원팀유형 1] - 2D Random Walk 1000000개입자에대한 1000번의 2D Random Walk를수행하고분포를구하는순차코드를병렬화 (MPI) 하여최대성능을얻으시오. (CPU는최대 16개까지사용 ) 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간 - 순차코드 : 28.370 초 - 병렬코드 : 4.993 초 - 성능향상도 : 5.682 05. [ 대학원팀유형 2] - Molecular Dynamics 주어진 1000개입자의초기위치에대하여각각의입자는 1/R( x1-x2 ) 의힘을받는다고하면, 10000번의 step 후의위치를구할수있다. 순차코드에대하여 MPI 병렬화를이용하여최대성능을얻으시오. (CPU는최대 16개까지사용 ) 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간 - 순차코드 : 29.153 초 - 병렬코드 : 9.761 초 - 성능향상도 : 2.987 06. [ 대학원팀유형 3] - 2D FDM 주어진 2D 경계조건에서 FDM을이용하면유일해를구할수있다. 300X300 grid를이용하여 FDM의해를구하는순차코드에대하여 MPI 병렬화를이용하여최대성능을얻으시오. (CPU는최대 16개까지사용 ) 평가방법 - 병렬코드, 실행파일, 컴파일환경 ( 컴파일러, 옵션, 사용 MPI 라이브러리 ) 제출. - 순차코드와결과가일치하는병렬코드에대하여성능향상도를점수화. 예제수행시간
- 순차코드 : 44.202 초 - 병렬코드 : 7.554 초 - 성능향상도 : 5.851
01. 학부팀유형 1 코드 순차코드 (Fortran) program integration real(8) :: pi, x, x_max integer(8) :: num_step, i pi=0.0d0 x=0.0d0 x_max=100000.0d0 num_step=5000000000 do i = 1, num_step x = x_max*real(i,8)/real(num_step,8) pi = pi + 2.0d0*sin(x)/x*x_max/real(num_step,8) print *,'PI = ',pi end program integration
병렬코드 (Fortran) program integration include 'mpif.h' real(8) :: pi, x, x_max, sum integer(8) :: num_step, i, is, ie integer(4) :: myrank, nprocs, ierr call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs,ierr) sum=0.0d0 x=0.0d0 x_max=100000.0d0 num_step=5000000000 is = 1 ; ie = num_step call para_range(is,ie,nprocs,myrank,is,ie) do i = is, ie x = x_max*real(i,8)/real(num_step,8) sum = sum + 2.0d0*sin(x)/x*x_max/real(num_step,8) call MPI_REDUCE(sum,pi,1,MPI_DOUBLE_PRECISION,MPI_SUM,0,MPI_COMM_WORLD,ierr) if(myrank.eq.0) then print *, 'PI = ',pi endif call MPI_FINALIZE(ierr) stop contains subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: nprocs,irank integer(8) :: np,ir integer(8) :: n1,n2,ista,iend integer(8) :: iwork1,iwork2 np=int(nprocs,8) ir=int(irank,8) iwork1 = (n2 - n1 + 1) / np iwork2 = mod(n2 - n1 + 1, np) ista = ir * iwork1 + n1 + min(ir,iwork2)
iend = ista + iwork1-1 if(iwork2.gt. ir) iend = iend + 1 return end subroutine end program integration
02. 학부팀유형 2 코드 순차코드 (Fortran) program matrix integer :: i,j,k,n real,dimension(:,:),allocatable :: a,b,c real :: sum n=5000 sum=0.0 allocate (a(n,n)) allocate (b(n,n)) allocate (c(n,n)) c=0.0 do j=1,n do i=1,n a(i,j)=real(i)/real( j) b(i,j)=real( j)/real(i) do k=1,n do j=1,n do i=1,n c(i,j)=c(i,j)+a(i,k)*b(k,j) do j=1,n do i=1,n sum=sum+c(i,j) print *, 'SUM = ',sum deallocate(a,b,c) end program matrix
병렬코드 (Fortran) program matrix include 'mpif.h' integer :: i,j,k,n,is,ie integer(4) :: myrank, nprocs, ierr real,dimension(:,:),allocatable :: a,b,c real :: sum,psum call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs,ierr) n=5000 sum=0.0 psum=0.0 allocate (a(n,n)) allocate (b(n,n)) allocate (c(n,n)) c=0.0 is=1;ie=n call para_range(is,ie,nprocs,myrank,is,ie) do j=1,n do i=1,n a(i,j)=real(i)/real( j) b(i,j)=real( j)/real(i) do k=1,n do j=is,ie do i=1,n c(i,j)=c(i,j)+a(i,k)*b(k,j) do j=is,ie do i=1,n psum=psum+c(i,j) call MPI_REDUCE(psum,sum,1,MPI_REAL,MPI_SUM,0,MPI_COMM_WORLD,ierr)
if(myrank.eq.0) then print *, 'SUM = ',sum endif deallocate(a,b,c) call MPI_FINALIZE(ierr) stop contains subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: nprocs,irank integer :: np,ir integer :: n1,n2,ista,iend integer :: iwork1,iwork2 np=nprocs ir=irank iwork1 = (n2 - n1 + 1) / np iwork2 = mod(n2 - n1 + 1, np) ista = ir * iwork1 + n1 + min(ir,iwork2) iend = ista + iwork1-1 if(iwork2.gt. ir) iend = iend + 1 return end subroutine end program matrix
03. 학부팀유형 3 코드 순차코드 (Fortran) program taylor real(8) :: x, mysin, tmp, x_max, step integer(4) :: i, j, k, n, num_step x_max=4.0d0 x=0.0d0 num_step=10000 n=1000 step=x_max/real(num_step,8) do i=0,num_step mysin=0.0d0 do j=1,n tmp=1.0d0 do k=1,2*j-1 tmp=tmp*x/real(k,8) if(mod( j,2)) then mysin=mysin+tmp else mysin=mysin-tmp endif print *, x, mysin, sin(x) x=x+step end program taylor
병렬코드 (Fortran) program taylor include 'mpif.h' real(8) :: x, mysin, tmp, x_max, step integer(4) :: i, j, k, n, num_step integer(4) :: myrank, nprocs, ierr, is, ie call MPI_INIT(ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,myrank,ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs,ierr) x_max=4.0d0 num_step=10000 n=1000 step=x_max/real(num_step,8) is=0;ie=num_step call para_range(is,ie,nprocs,myrank,is,ie) do i=is,ie x=real(i,8)*step mysin=0.0d0 do j=1,n tmp=1.0d0 do k=1,2*j-1 tmp=tmp*x/real(k,8) if(mod( j,2)) then mysin=mysin+tmp else mysin=mysin-tmp endif print *, x, mysin, sin(x) call MPI_FINALIZE(ierr) contains subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: nprocs,irank integer :: np,ir integer :: n1,n2,ista,iend integer :: iwork1,iwork2
np=nprocs ir=irank iwork1 = (n2 - n1 + 1) / np iwork2 = mod(n2 - n1 + 1, np) ista = ir * iwork1 + n1 + min(ir,iwork2) iend = ista + iwork1-1 if(iwork2.gt. ir) iend = iend + 1 return end subroutine end program taylor
04. 대학원팀유형 1 코드 순차코드 (Fortran) program randomwalk integer :: i,j,rtc integer, parameter :: N_P=1000000, N_W=1000 integer, dimension(n_p) :: part_x, part_y integer, dimension(:,:),allocatable :: pos real :: seed, rand,temp part_x=0 part_y=0 call srand(0.5) do i=1, N_P do j=1,n_w temp=rand(0) if(temp <= 0.25) then part_x(i)=part_x(i)+1 elseif(temp <= 0.5) then part_y(i)=part_y(i)+1 elseif(temp <=0.75) then part_x(i)=part_x(i)-1 else part_y(i)=part_y(i)-1 endif allocate(pos(-n_w:n_w,-n_w:n_w)) pos=0 do i=1,n_p pos(part_x(i),part_y(i))=pos(part_x(i),part_y(i))+1 do i=-n_w,n_w,2 do j=-n_w,n_w,2 if(pos(i,j).ne. 0) print*, i,j, pos(i,j) deallocate(pos)
end program randomwalk
병렬코드 (Fortran) program randomwalk include 'mpif.h' integer :: mysize, myrank, ierr, pos_count, ista, iend integer :: i,j,d,min_p,max_p,rtc integer, parameter :: N_P=1000000, N_W=1000 integer, dimension(n_p) :: part_x, part_y integer, dimension(:,:),allocatable :: pos,pos_all real :: seed, rand,temp part_x=0 part_y=0 call MPI_INIT(ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,mysize, ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,myrank, ierr) call srand(0.5+myrank*0.1) call para_range(1,n_p,mysize,myrank,ista,iend) do i=ista,iend do j=1,n_w temp=rand(0) if(temp <= 0.25) then part_x(i)=part_x(i)+1 elseif(temp <= 0.5) then part_y(i)=part_y(i)+1 elseif(temp <=0.75) then part_x(i)=part_x(i)-1 else part_y(i)=part_y(i)-1 endif allocate(pos(-n_w:n_w,-n_w:n_w)) allocate(pos_all(-n_w:n_w,-n_w:n_w)) pos=0 do i=1,n_p pos(part_x(i),part_y(i))=pos(part_x(i),part_y(i))+1 pos_count=size(pos) call MPI_REDUCE(pos,pos_all,pos_count,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ierr) if(myrank==0) then pos=pos_all do i=-n_w,n_w,2 do j=-n_w,n_w,2
if(pos(i,j).ne. 0) print*, i,j, pos(i,j) endif deallocate(pos) deallocate(pos_all) call MPI_FINALIZE(ierr) CONTAINS subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: n1,n2,nprocs,irank,ista,iend integer :: iwork1,iwork2 iwork1 = (n2 - n1 + 1) / nprocs iwork2 = mod(n2 - n1 + 1, nprocs) ista = irank * iwork1 + n1 + min(irank,iwork2) iend = ista + iwork1-1 if(iwork2.gt. irank) iend = iend + 1 return end subroutine end program randomwalk
05. 대학원팀유형 2 코드 순차코드 (Fortran) program md integer :: i,j,k integer, parameter :: N_P=1000, step=10000 real, dimension(n_p) :: x, force real :: f_jk do i=1,n_p read *, x(i) x(i)=x(i)*10000000.0 do i=1,step force=0.0 do j=1,n_p-1 do k= j+1,n_p f_jk = 1.0/(x(k)-x( j)) force( j)=force( j)+f_jk force(k)=force(k)-f_jk do j=1,n_p x( j)=x( j)+force( j) do i=1,n_p print *, i,x(i) end program md
병렬코드 (Fortran) program md include 'mpif.h' integer :: mysize, myrank, ierr, ista, iend integer :: i,j,k integer, parameter :: N_P=1000, step=10000 real, dimension(n_p) :: x, force, force_all real :: f_jk call MPI_INIT(ierr) call MPI_COMM_SIZE(MPI_COMM_WORLD,mysize, ierr) call MPI_COMM_RANK(MPI_COMM_WORLD,myrank, ierr) if(myrank==0) then do i=1,n_p read *, x(i) x(i)=x(i)*10000000.0 print *,'start' endif call MPI_BCAST(x,N_P,MPI_REAL,0,MPI_COMM_WORLD,ierr) do i=1,step force=0.0 call para_range(1,n_p-1,mysize,myrank,ista,iend) do j=ista,iend do k= j+1,n_p f_jk = 1.0/(x(k)-x( j)) force( j)=force( j)+f_jk force(k)=force(k)-f_jk call MPI_REDUCE(force,force_all,N_P,MPI_REAL,MPI_SUM,0,MPI_COMM_WORLD,ierr) if(myrank==0) then do j=1,n_p x( j)=x( j)+force_all( j) endif if(myrank==0) then do i=1,n_p print *, i,x(i)
endif call MPI_FINALIZE(ierr) CONTAINS subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: n1,n2,nprocs,irank,ista,iend integer :: iwork1,iwork2 iwork1 = (n2 - n1 + 1) / nprocs iwork2 = mod(n2 - n1 + 1, nprocs) ista = irank * iwork1 + n1 + min(irank,iwork2) iend = ista + iwork1-1 if(iwork2.gt. irank) iend = iend + 1 return end subroutine end program md
06. 대학원팀유형 3 코드 순차코드 (Fortran) program FDM2D integer im,jm,im1,jm1 parameter(im=300,jm=300) integer is,ie,js,je integer iter,itermax,nprt real tolerance real bc(4)!left,right,bottom,top real u(0:im+1,0:jm+1) real error! read input data itermax=100000 nprt=500 tolerance = 1.0e-7 bc(1) = 10.0 bc(2) = 10.0 bc(3) = 10.0 bc(4) = 20.0! initialize im1=im+1 jm1=jm+1 do j=0,jm1 do i=0,im1 u(i,j) = 0.0! boundary conditions do j=0,jm1 u(0,j) = bc(1)!left u(im1,j) = bc(2)!right do i=0,im1 u(i,0 ) = bc(3)!bottom u(i,jm1) = bc(4)!top! set computation range is = 1 ie = im
js = 1 je = jm! main routine iter = 0 error = 1000.! ********************************************************************* do while(iter.le.itermax.and.error.gt.tolerance) call jacobi(u,im,jm,is,ie,js,je,error) iter = iter + 1 if(mod(iter,nprt).eq.0) write(*,100) iter,error! ********************************************************************* print*,'error=',error print*,'converged after ',iter,'iteration' 100 format('iteration=',i6,' Error=',e9.4)! OPEN(UNIT=10, FILE='outFDM2Seq.dat', STATUS='REPLACE', ACTION='WRITE') DO j=1,jm! WRITE (*, '128(F7.4,1x)'), (u(i,j), i=1,im)! WRITE (10, '128(F7.4,1x)'), (u(i,j), i=1,im) ENDDO! CLOSE(10) stop end subroutine jacobi(u,im,jm,is,ie,js,je,error) integer im,jm,is,ie,js,je integer i,j real error real u(0:im+1,0:jm+1), uo(0:im+1,0:jm+1)! store old data do j=0,jm+1 do i=0,im+1 uo(i,j) = u(i,j)! jacobi do j=js,je do i=is,ie u(i,j) = (uo(i-1,j)+uo(i+1,j)+uo(i,j-1)+uo(i,j+1))/4.0
! error error = 0.0 do j=js,je do i=is,ie error = error + (u(i,j) - uo(i,j))**2 return end
병렬코드 (Fortran) PROGRAM FDM2D IMPLICIT NONE INCLUDE 'mpif.h' INTEGER :: nprocs, myrank, viewrank, ista, iend, jsta, jend, ierr, status(mpi_status_size), inext, iprev, isend1, isend2, irecv1, irecv2 INTEGER :: i, j, k INTEGER :: im,jm,im1,jm1, njm PARAMETER(im=300,jm=300) INTEGER :: is,ie,js,je INTEGER :: iter,itermax,nprt REAL :: tolerance REAL :: bc(4)!left,right,bottom,top REAL :: u(0:im+1,0:jm+1), newu(0:im+1, 0:jm+1), works1( jm), works2( jm), workr1( jm), workr2( jm) REAL :: error, errorsum CHARACTER*2 :: number CHARACTER*30 :: filename(0:10) INTEGER :: irecv(0:3), iscnt, ircnt(0:3), idisp(0:3)=(/1,33,65,97/) viewrank = 3! mpi ready CALL MPI_INIT(ierr) CALL MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr) CALL MPI_COMM_RANK(MPI_COMM_WORLD, myrank, ierr) CALL para_range(1,im, nprocs, myrank, ista, iend)! read input data itermax=100000 nprt=500 tolerance = 1.0e-7 bc(1) = 10.0 bc(2) = 10.0 bc(3) = 10.0 bc(4) = 20.0! initialize im1=im+1 jm1=jm+1 DO j=0,jm1 DO i=0,im1 u(i,j) = 0.0 END DO END DO! boundary conditions
IF(myrank == 0) THEN DO j=0, jm+1 u(0,j) = bc(1)!left ENDDO ENDIF IF(myrank == nprocs-1) THEN DO j=0, jm+1 u(im1,j) = bc(2)!right END DO ENDIF DO i=ista, iend u(i,0 ) = bc(3)!bottom u(i,jm1) = bc(4)!top END DO! set computation range is = ista ie = iend js = 1 je = jm inext = myrank + 1 iprev = myrank - 1 IF(myrank == 0) iprev = MPI_PROC_NULL IF(myrank == nprocs -1 ) inext = MPI_PROC_NULL! main routine iter = 0 error = 1000. errorsum = 1000.! ********************************************************************* DO WHILE(iter.LE.itermax.AND.errorsum.GT.tolerance)! do while(iter.le.itermax) IF(myrank /= nprocs-1) THEN DO j=1, jm works1( j) = u(iend, j) ENDDO ENDIF IF(myrank /= 0) THEN DO j=1, jm works2( j) = u(ista, j) ENDDO ENDIF CALL MPI_ISEND(works1, jm, MPI_REAL, inext, 1, MPI_COMM_WORLD, isend1, ierr) CALL MPI_ISEND(works2, jm, MPI_REAL, iprev, 1, MPI_COMM_WORLD, isend2, ierr)
CALL MPI_IRECV(workr1, jm, MPI_REAL, iprev, 1, MPI_COMM_WORLD, irecv1, ierr) CALL MPI_IRECV(workr2, jm, MPI_REAL, inext, 1, MPI_COMM_WORLD, irecv2, ierr) CALL MPI_WAIT(isend1, status, ierr) CALL MPI_WAIT(isend2, status, ierr) CALL MPI_WAIT(irecv1, status, ierr) CALL MPI_WAIT(irecv2, status, ierr) IF(myrank /= 0)THEN DO j=1, jm u(ista-1, j)=workr1( j) ENDDO ENDIF IF(myrank /= nprocs-1) THEN DO j=1, jm u(iend+1, j)= workr2( j) ENDDO ENDIF call jacobi(u,im,jm,is,ie,js,je,error) iter = iter + 1 CALL MPI_ALLREDUCE(error, errorsum, 1, MPI_REAL, MPI_SUM, MPI_COMM_WORLD, ierr) if((mod(iter,nprt).eq.0).and. myrank == viewrank) write(*,100) iter,errorsum! ********************************************************************* 100 format('iteration=',i6,' Error=',e9.4) IF(myrank == 0) THEN print*,'error=',errorsum print*,'converged after ',iter,'iteration' 200 format('iteration=',i6,' Error=',e9.4) ENDIF! delete communication data IF(myrank /= 0)THEN DO j=1, jm u(ista-1, j)=0.0 ENDDO ENDIF IF(myrank /= nprocs-1) THEN DO j=1, jm u(iend+1, j)=0.0 ENDDO ENDIF CALL MPI_REDUCE(u, newu, (im+2)*( jm+2), MPI_REAL, MPI_SUM, 0, MPI_COMM_WORLD, ierr) CALL MPI_FINALIZE(ierr) STOP
CONTAINS subroutine para_range(n1,n2,nprocs,irank,ista,iend) integer :: n1,n2,nprocs,irank,ista,iend integer :: iwork1,iwork2 iwork1 = (n2 - n1 + 1) / nprocs iwork2 = mod(n2 - n1 + 1, nprocs) ista = irank * iwork1 + n1 + min(irank,iwork2) iend = ista + iwork1-1 if(iwork2.gt. irank) iend = iend + 1 return end subroutine! jacobi subroutine subroutine jacobi(u,im,jm,is,ie,js,je,error) integer im,jm,is,ie,js,je,njm integer i,j real error real u(0:im+1,0:jm+1), uo(0:im+1,0:jm+1) do j=js-1,je+1 do i=is-1,ie+1 uo(i,j) = u(i,j)! jacobi do j=js,je do i=is,ie u(i,j) = (uo(i-1,j)+uo(i+1,j)+uo(i,j-1)+uo(i,j+1))/4.0! error error = 0.0 do j=js,je do i=is,ie error = error + (u(i,j) - uo(i,j))**2 return end subroutine end program FDM2D