Commit 65186b5b authored by jg548024's avatar jg548024

initial commit

parents
## mpicc & mpicxx must exist in $PATH
export CC=gcc
export CXX=g++
export FC=gfortran
export MPICC=mpicc
export MPICXX=mpic++
export MPIFC=mpif90
./configure
make
## mpicc & mpicxx must exist in $PATH
export CC=icc
export CXX=icpc
export FC=ifort
export MPICC=mpicc
export MPICXX=mpicxx
export MPIFC=mpifort
./configure
make
This diff is collapsed.
compile:
--------
1) Extract nb3dFFT-x.x.x.tar.gz to any location.
2) All compile settings are defined in 'Makefile.in' located in the folder 'Makefiles'.
Copy or link the appropriated Makefile.in_* as 'Makefile.in' from 'mkspecs' to the base directory of nb3dFFT.
3) Check the settings in this Makefile.in
- set FFT library (USE_FFT = FFTW, ESSL, NONE)
- enable/disable hybird parallelization (USE_OPENMP)
- set double/single precision (USE_SINGLE_PRECISION)
- enable print timer to stdout (PRINT_TIMING)
4) Compile library
- 'cd nb3dFFT/src'
- 'make release' or 'make debug'
6) Compile test program
- 'cd nb3dFFT/test'
- 'make release' or 'make debug'
run job:
--------
1) run mpi test program
'mpirun -np <mpiranks> test_nb3dfft <n1> <n2> <n3> <loops> <do_filter> <do_check> <iproc>'
- n1,n2,n3 : size of computational domain in x,y and z direction
- loops : number of 3d forward and backward FFT until test end of test
- do_filter: if > 0 domain is filtered in all directions by 1/3 while transforming to spectral space
- do_check : do time consuming test to check the computational error
- iproc : size of 2d processor grid in i-direction(ranks close to each other), choose no. cores per node.
Makefiles/Makefile.in.intel
\ No newline at end of file
########################################################################
#
# general commands
#
########################################################################
### File operations
COPY = cp -f
COPY_FILE = $(COPY)
COPY_DIR = $(COPY) -r
DEL_FILE = rm -f
DEL_DIR = rm -rf
MOVE = mv -f
SYMLINK = ln -sf
CHK_DIR_EXISTS= test -d
CHK_APP_EXISTS= command -v
MKDIR = mkdir -p
RL = echo
########################################################################
#
# directories
#
########################################################################
HOMEDIR = $(shell pwd | sed -e 's/\/nb3dFFT.*//')/nb3dFFT
ifeq ($(wildcard $(HOMEDIR)/Makefile.in),)
define errortxt
ERROR in Makefile.in:
Project directory $(HOMEDIR) does not exit
or link to Makefile.in is missing in $(HOMEDIR).
endef
$(error $(errortxt))
endif
OBJDIR = $(HOMEDIR)/.obj
MODDIR = $(HOMEDIR)/.mod
LIBDIR = $(HOMEDIR)/.lib
TMPFILES= *.mod.F90 *.mod.f90 *.opari.* pompregions_* *~
########################################################################
#
# settings
#
########################################################################
# enable different fft libraries
# FLAGS: ESSL - for IBM BlueGene architecture (JUGENE,JUQUEEN etc.)
# FFTW - Intel Architecture
# NONE - disable FFT (only for debug)
USE_FFT = ESSL
# YES will enable hybrid parallelization (default = YES)
# INFORMATION: Depending on the system variable OMP_NUM_THREADS on runtime,
# multiple threads will be used by fft routines, if the library supports it.
# Routines for packing and unpacking the messages send with mpi_alltoall()
# are implemented with different OpenMP strategies and can be enabled.
# FFTW manual: "As a general rule, you don't want to use more threads than you have processors."
USE_OPENMP = YES
# more: PACKMEMCP, PACKLOOP
# YES will enable single precision (default = NO)
# ATTENTION: this affects all input,output arrays and fft functions
USE_SINGLE_PRECISION = NO
# use non-blocking communicators like MPI_IALLTOALLV
# They are part of MPI 3.0
# but also implemented in libNBC (http://htor.inf.ethz.ch/research/nbcoll/libnbc/) of Torsten Hoefler.
# IBM provides in their extended MPI library "MPE" their own implementation
# nb3dFFT supports all three possible libraries by the flag USE_NBC = 1,2,3
# 0 - disable non-blocking communication
# 1 - MPI 3.0 (no extra lib needed)
# 2 - libNBC (compile libNBC located in directory '3rdparty' using GCC)
# 3 - MPIX - IBMs extention (only usable with IBM MPI)
USE_NBC = 3
# YES will enable printing of detailed timing information to stdout (default = NO)
# INFORMATION: only for debug or speed/scaling tests
CALC_TIMING = YES
PRINT_TIMING = NO
# YES will result in global MPI ranks to be mapped onto the 2d processor grid that way,
# that they increase first in J instead of I direction. (default = NO)
# This results in MPI ranks close for the mpi_alltoall() of the Y<->Z transpose
# instead for the mpi_alltoall() of the X<->Y transpose.
# In general it makes no sense to enable this !!
# With a real and spectral grid of the same size it should have no impact,
# but with a smaller spectral grid (filtering) do NOT use REVERSE_DIMS.
# In this case it is in general slower, because in this case the X<->Y transform sends more data,
# than the Y<->Z transform - and close global MPI ranks have in general faster network connectivity.
# => Do not change USE_REVERSE_DIMS, without detailed knowledge of the MPI process mapping on your cluster.
# => Processor mapping is very important: nb3dFFT expects that close MPI ranks have highest network bandwidth.
USE_REVERSE_DIMS = NO
# YES will result in settings of interest if developing nb3dFFT (default = NO)
USE_DEVELOP = NO
# YES is 'use mpi' shall be used instead of '#include "mpif.h"'
USE_MPI_MODULE = YES
########################################################################
#
# libraries flags
#
########################################################################
### FFT ###
ifeq ($(USE_FFT),FFTW)
ifeq ($(USE_SINGLE_PRECISION),YES)
FFT_BASE = /bgsys/local/fftw3/3.3.3/fftw
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lfftw3f
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lfftw3f_omp -lfftw3f
else
FFT_BASE = /bgsys/local/fftw3/3.3.3/fftw
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lfftw3
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lfftw3_omp -lfftw3
endif
FFT_INCDIR = -I$(FFT_BASE)/include
FFT_FLAGS = -WF,"-DFFT_FFTW","-DFFTW_MEASURE"
# "-DFFTW_ESTIMATE", "-DFFTW_PATIENT" or "-DFFTW_MEASURE"
# Choose for time FFTW is allowed to find best setup on initialization.
endif
#------
ifeq ($(USE_FFT),ESSL)
FFT_BASE = /bgsys/local
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lesslbg
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lesslsmpbg -lmass_simd
FFT_INCDIR = -I$(FFT_BASE)/include
FFT_FLAGS = -WF,"-DFFT_ESSL"
endif
#------
ifeq ($(USE_FFT),NONE)
FFT_BASE = $(HOMEDIR)
FFT_INCDIR =
FFT_FLAGS = -WF,"-DFFT_NONE"
endif
ifeq ($(wildcard $(FFT_BASE)),)
define errortxt
ERROR in Makefile.in:
FFT directory $(FFT_BASE) does not exit.
endef
$(error $(errortxt))
endif
### OPENMP ###
ifeq ($(USE_OPENMP),YES)
OMP_FLAGS = -WF,"-DOPENMP" \
-qsmp=omp -qthreaded
# -parallel -par-report1
FFT_LIBS = $(FFT_LIBS_OMP)
else
FFT_LIBS = $(FFT_LIBS_SER)
endif
### NBC - non-blocking communication ###
# use MPI 3.0
ifeq ($(USE_NBC),1)
NBC_FLAGS = -WF,"-DUSE_NBC=1"
endif
# use Thorsten Hoeflers libNBC as 3rdparty lib
ifeq ($(USE_NBC),2)
NBC_BASE = $(HOMEDIR)/3rdparty/libNBC-1.1.1_gcc/.libs
NBC_LIBS = $(NBC_BASE)/libnbc.a -lstdc++ -lmpi_cxx
NBC_FLAGS = -WF,"-DUSE_NBC=2"
endif
# use IBMs MPE
ifeq ($(USE_NBC),3)
NBC_FLAGS = -WF,"-DUSE_NBC=3"
endif
### NON-BLOCKING THREAD ###
ifneq ($(USE_NBC),0)
USE_NBC_THREAD = YES
endif
ifeq ($(USE_NBC_THREAD),YES)
NBC_FLAGS := $(NBC_FLAGS) -WF,"-DUSE_NBC_THREAD" \
-qsmp=omp -qthreaded
ifeq ($(USE_OPENMP),YES)
NBC_FLAGS := $(NBC_FLAGS) -qsmp=nested_par
endif
endif
########################################################################
#
# additional flags
#
########################################################################
EXT_FLAGS = -WF,-D__bg__ -WF,-DIBM_BG
### SINGLE PRECISION ###
ifeq ($(USE_SINGLE_PRECISION),YES)
EXT_FLAGS := $(EXT_FLAGS) -WF,"-DSINGLE_PREC"
endif
### PRINT TIMING ###
ifeq ($(PRINT_TIMING),YES)
EXT_FLAGS := $(EXT_FLAGS) -WF,"-DPRINT_TIMING"
CALC_TIMING = YES
endif
ifeq ($(CALC_TIMING),YES)
EXT_FLAGS := $(EXT_FLAGS) -WF,"-DCALC_TIMING"
endif
### REVERSE DIMS ###
ifeq ($(USE_REVERSE_DIMS),YES)
EXT_FLAGS := $(EXT_FLAGS) -WF,"-DREVERSE_DIMS"
endif
### DEVELOP EXTRAS ###
ifeq ($(USE_DEVELOP),YES)
# Direct the preprocessor to expand your source module and store the result in a .i file.
EXT_FLAGS := $(EXT_FLAGS) \
-WF,"-P" \
-WF,"-DUSE_DEBMODE" \
-d -qreport -qlist -qsource
endif
### include of MPI Definitions ###
ifeq ($(USE_MPI_MODULE),YES)
EXT_FLAGS := $(EXT_FLAGS) -WF,"-DUSE_MPI_MODULE"
endif
########################################################################
#
# compile settings
#
########################################################################
BGQ_SYS = /bgsys/drivers/ppcfloor/comm/xl
#BGQ_SYS = /bgsys/drivers/ppcfloor/comm/xl.ndebug !! DO NOT USE IF USE_NBC_THREADS=YES
ARCH = qp
#PREP := scalasca -instrument -v
#MPIFC := $(PREP) $(BGQ_SYS)/bin/mpixlf90 !! DO NOT USE IF USE_NBC_THREADS=YES
MPIFC := $(PREP) $(BGQ_SYS)/bin/mpixlf90_r
MPILD := ${MPIFC}
AR = ar -v -r -u
LDFLAGS = $(FFT_LIBS) $(NBC_LIBS) \
-lmass -lmassv -lm -Wl,--allow-multiple-definition
F90FLAGS = -qarch=$(ARCH) -qtune=qp -qalign=4k \
-qsuffix=f=F90 -qfree=f90 \
-qalias=noaryovrlp:nopteovrlp -qassert=refalign:contig \
$(OMP_FLAGS) \
$(FFT_FLAGS) \
$(EXT_FLAGS) \
$(NBC_FLAGS)
INCFLAGS = -I. -I$(BGQ_SYS)/include \
$(FFT_INCDIR)
MODFLAGS = -I$(MODDIR) \
$(FFT_INCDIR) \
-qmoddir=$(MODDIR)
DBGFLAGS = -g
OPTFLAGS := -O3 \
-qunroll \
-qhot=simd:vector:level=1 \
-qsimd=auto \
-qprefetch=aggressive \
-qinline \
-qipa
# -qessl
# -qhot=simd:vector:level=1 (simd needs -qsimd=auto)
# -qsimd: automatic generation of vector instructions (default: -qsimd=noauto),(disable if scalasca?)
# -qunroll: bodies of inner loops will be unrolled, or duplicated (default: -qunroll=auto)
# -qassert=contiguous
# -qprefetch=aggressive
# bg_shape
########################################################################
#
# general commands
#
########################################################################
### File operations
COPY = cp -f
COPY_FILE = $(COPY)
COPY_DIR = $(COPY) -r
DEL_FILE = rm -f
DEL_DIR = rm -rf
MOVE = mv -f
SYMLINK = ln -sf
CHK_DIR_EXISTS= test -d
CHK_APP_EXISTS= command -v
MKDIR = mkdir -p
RL = echo
########################################################################
#
# directories
#
########################################################################
HOMEDIR = $(shell pwd | sed -e 's/\/nb3dFFT.*//')/nb3dFFT
ifeq ($(wildcard $(HOMEDIR)/Makefile.in),)
define errortxt
ERROR in Makefile.in:
Project directory $(HOMEDIR) does not exit
or link to Makefile.in is missing in $(HOMEDIR).
endef
$(error $(errortxt))
endif
OBJDIR = $(HOMEDIR)/.obj
MODDIR = $(HOMEDIR)/.mod
LIBDIR = $(HOMEDIR)/.lib
TMPFILES= *.i90 *~
########################################################################
#
# settings
#
########################################################################
# enable different fft libraries
# FLAGS: ESSL - for IBM BlueGene architecture (JUGENE,JUQUEEN etc.)
# FFTW - Intel Architecture
# NONE - disable FFT (only for debug)
USE_FFT = FFTW
# YES will enable hybrid parallelization (default = YES)
# INFORMATION: Depending on the system variable OMP_NUM_THREADS on runtime,
# multiple threads will be used by fft routines, if the library supports it.
# Routines for packing and unpacking the messages send with mpi_alltoall()
# are implemented with different OpenMP strategies and can be enabled.
# FFTW manual: "As a general rule, you don't want to use more threads than you have processors."
USE_OPENMP = YES
# more: PACKMEMCP, PACKLOOP
# YES will enable single precision (default = NO)
# ATTENTION: this affects all input,output arrays and fft functions
USE_SINGLE_PRECISION = NO
# use non-blocking communicators like MPI_IALLTOALLV
# They are part of MPI 3.0
# but also implemented in libNBC (http://htor.inf.ethz.ch/research/nbcoll/libnbc/) of Torsten Hoefler.
# IBM provides in their extended MPI library "MPE" their own implementation
# nb3dFFT supports all three possible libraries by the flag USE_NBC = 1,2,3
# 0 - disable non-blocking communication
# 1 - MPI 3.0 (no extra lib needed)
# 2 - libNBC (compile libNBC located in directory '3rdparty' using GCC)
# 3 - MPIX - IBMs extention (only usable with IBM MPI)
USE_NBC = 1
# YES will enable printing of detailed timing information to stdout (default = NO)
# INFORMATION: only for debug or speed/scaling tests
CALC_TIMING = YES
PRINT_TIMING = NO
# YES will result in global MPI ranks to be mapped onto the 2d processor grid that way,
# that they increase first in J instead of I direction. (default = NO)
# This results in MPI ranks close for the mpi_alltoall() of the Y<->Z transpose
# instead for the mpi_alltoall() of the X<->Y transpose.
# In general it makes no sense to enable this !!
# With a real and spectral grid of the same size it should have no impact,
# but with a smaller spectral grid (filtering) do NOT use REVERSE_DIMS.
# In this case it is in general slower, because in this case the X<->Y transform sends more data,
# than the Y<->Z transform - and close global MPI ranks have in general faster network connectivity.
# => Do not change USE_REVERSE_DIMS, without detailed knowledge of the MPI process mapping on your cluster.
# => Processor mapping is very important: nb3dFFT expects that close MPI ranks have highest network bandwidth.
USE_REVERSE_DIMS = NO
# YES will result in settings of interest if developing nb3dFFT (default = NO)
USE_DEVELOP = YES
# YES is 'use mpi' shall be used instead of '#include "mpif.h"'
USE_MPI_MODULE = YES
########################################################################
#
# libraries flags
#
########################################################################
### FFT ###
ifeq ($(USE_FFT),FFTW)
ifeq ($(USE_SINGLE_PRECISION),YES)
# FFT_BASE = /opt/fftw/3.3.3sp_gcc44_openmp
FFT_BASE = /home/itv/lib/fftw/3.3.0sp_gcc44
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lfftw3f
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lfftw3f_omp -lfftw3f
else
# FFT_BASE = /opt/fftw/3.3.3_gcc44_openmp
FFT_BASE = /home/itv/lib/fftw/3.3.0_gcc44
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lfftw3
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lfftw3_omp -lfftw3
endif
FFT_INCDIR = -I$(FFT_BASE)/include
FFT_FLAGS = "-DFFT_FFTW","-DFFTW_MEASURE"
# "-DFFTW_ESTIMATE", "-DFFTW_PATIENT" or "-DFFTW_MEASURE"
# Choose for time FFTW is allowed to find best setup on initialization.
endif
#------
ifeq ($(USE_FFT),ESSL)
FFT_BASE = [only for IBM]
FFT_LIBS_SER = -L$(FFT_BASE)/lib -lmass -lmassv -lesslbg -lmass -lmassv
FFT_LIBS_OMP = -L$(FFT_BASE)/lib -lmass_simd -lmassv -lesslsmpbg -lmass_simd -lmassv
FFT_INCDIR = -I$(FFT_BASE)/include
FFT_FLAGS = "-DFFT_ESSL"
endif
#------
ifeq ($(USE_FFT),NONE)
FFT_BASE = $(HOMEDIR)
FFT_INCDIR =
FFT_FLAGS = "-DFFT_NONE"
endif
ifeq ($(wildcard $(FFT_BASE)),)
define errortxt
ERROR in Makefile.in:
FFT directory $(FFT_BASE) does not exit.
endef
$(error $(errortxt))
endif
### OPENMP ###
ifeq ($(USE_OPENMP),YES)
O