Hello everyone,
I compile vasp-5.4.4 with aocc4.0 and openmpi4.1.4 on amd-zen2-rocky8.6 platform,and as suggested my config file is:
CPP = flang -E -P -C -w -Mfree $*$(FUFFIX) >$*$(SUFFIX) $(CPP_OPTIONS)
FC = mpif90 -fopenmp
FCL = mpif90 -fopenmp
FREE = -ffree-form -ffree-line-length-none
FFLAGS = -w -march=znver3 -fno-fortran-main $(CPP_OPTIONS) -Mbackslash -ffunc-args-alias
But when I run the single-process calculation example, the following error occurs:
[gb1703:452077:0:452077] Caught signal 11 (Segmentation fault: Sent by the kernel at address (nil))
==== backtrace (tid: 452077) ====
0 0x0000000000012ce0 __funlockfile() :0
1 0x000000000126c540 dlansy_() ???:0
2 0x0000000001270100 dsyev_() ???:0
3 0x00000000007db995 diag_and_sort() /public1/home/yaoming/vasp-src/vasp.5.4.4/build/std/paw.f90:257
4 0x00000000007db995 set_opt_proj() /public1/home/yaoming/vasp-src/vasp.5.4.4/build/std/paw.f90:225
5 0x00000000007db995 set_paw_aug() /public1/home/yaoming/vasp-src/vasp.5.4.4/build/std/paw.f90:112
6 0x0000000000f9586f MAIN_() /public1/home/yaoming/vasp-src/vasp.5.4.4/build/std/main.f90:669
7 0x0000000002191403 main() /home/amd/JENKINS/workspace/AOCC_4_0_0_INTERIM/flang/runtime/flangmain/flangmain.c:59
8 0x000000000003aca3 __libc_start_main() ???:0
9 0x000000000058e64e _start() ???:0
=================================
[gb1703:452077] *** Process received signal ***
[gb1703:452077] Signal: Segmentation fault (11)
[gb1703:452077] Signal code: (-6)
[gb1703:452077] Failing at address: 0xa3b0006e5ed
[gb1703:452077] [ 0] /usr/lib64/libpthread.so.0(+0x12ce0)[0x147ca9328ce0]
[gb1703:452077] [ 1] ../vasp-src/vasp.5.4.4/bin/vasp_std[0x126c540]
[gb1703:452077] [ 2] ../vasp-src/vasp.5.4.4/bin/vasp_std[0x1270100]
[gb1703:452077] [ 3] ../vasp-src/vasp.5.4.4/bin/vasp_std[0x7db995]
[gb1703:452077] [ 4] ../vasp-src/vasp.5.4.4/bin/vasp_std[0xf9586f]
[gb1703:452077] [ 5] ../vasp-src/vasp.5.4.4/bin/vasp_std[0x2191403]
[gb1703:452077] [ 6] /usr/lib64/libc.so.6(__libc_start_main+0xf3)[0x147ca8d73ca3]
[gb1703:452077] [ 7] ../vasp-src/vasp.5.4.4/bin/vasp_std[0x58e64e]
[gb1703:452077] *** End of error message ***
I am working reproducing this issue, will get back to you.
Hi,
Can you share some more details about the error? like which input files you are using, the complete "makefile.include" file and run script.
sorry for replying to you so late。The content of makefile.include is as follows:
# Precompiler options
CPP_OPTIONS= -DHOST=\"LinuxGNU\" \
-DMPI -DMPI_BLOCK=8000 \
-Duse_collective \
-DscaLAPACK \
-DCACHE_SIZE=4000 \
-Davoidalloc \
-Duse_bse_te \
-Dtbdyn \
-Duse_shmem \
-DNGZhalf
CPP = flang -E -P -C -w -Mfree $*$(FUFFIX) >$*$(SUFFIX) $(CPP_OPTIONS)
#CPP = flang -E -P -C -w $*$(FUFFIX) >$*$(SUFFIX) $(CPP_OPTIONS)
FC = mpif90 -fopenmp
FCL = mpif90 -fopenmp
FREE = -ffree-form -ffree-line-length-none
#FFLAGS = -w -fno-fortran-main $(CPP_OPTIONS) -Mbackslash -ffunc-args-alias
FFLAGS = -w $(CPP_OPTIONS) -Mbackslash -ffunc-args-alias
OFLAG = -Ofast -march=znver2 -ffp-contract=fast -fopenmp
OFLAG_IN = $(OFLAG)
DEBUG = -O0
#LIBDIR = /public1/home/yaoming/amd/aocl/3.0-6/lib
LIBDIR = /public1/home/yaoming/amd/aocl/4.0/lib
BLAS = -fopenmp $(LIBDIR)/libblis-mt.a
LAPACK = $(LIBDIR)/libalm.a $(LIBDIR)/libflame.a -lm
BLACS =
SCALAPACK = $(LIBDIR)/libscalapack.a $(BLACS)
LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS)
FFTW ?= /public1/home/yaoming/amd/aocl/4.0
LLIBS += $(FFTW)/lib/libfftw3.a
INCS = -I$(FFTW)/include
OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
OBJECTS_O1 += fftw3d.o fftmpi.o fftmpiw.o
OBJECTS_O2 += fft3dlib.o
# For what used to be vasp.5.lib
CPP_LIB = $(CPP)
FC_LIB = $(FC)
CC_LIB = clang
CFLAGS_LIB = -O2
FFLAGS_LIB = -O2
FREE_LIB = $(FREE)
OBJECTS_LIB= linpack_double.o getshmem.o
# For the parser library
CXX_PARS = clang++
LIBS += parser
LLIBS += -Lparser -lparser -lstdc++
# Normally no need to change this
SRCDIR = ../../src
BINDIR = ../../bin
#================================================
# GPU Stuff
CPP_GPU = -DCUDA_GPU -DRPROMU_CPROJ_OVERLAP -DCUFFT_MIN=28 -UscaLAPACK # -DUSE_PINNED_MEMORY
OBJECTS_GPU= fftmpiw.o fftmpi_map.o fft3dlib.o fftw3d_gpu.o fftmpiw_gpu.o
CC = gcc
CXX = g++
CFLAGS = -fPIC -DADD_ -openmp -DMAGMA_WITH_MKL -DMAGMA_SETAFFINITY -DGPUSHMEM=300 -DHAVE_CUBLAS
CUDA_ROOT ?= /usr/local/cuda
NVCC := $(CUDA_ROOT)/bin/nvcc
CUDA_LIB := -L$(CUDA_ROOT)/lib64 -lnvToolsExt -lcudart -lcuda -lcufft -lcublas
GENCODE_ARCH := -gencode=arch=compute_30,code=\"sm_30,compute_30\" \
-gencode=arch=compute_35,code=\"sm_35,compute_35\" \
-gencode=arch=compute_60,code=\"sm_60,compute_60\"
MPI_INC = #/public1/home/deploy/yaom/openmpi-4.1.1/build/include
The content of INCAR is as follows:
PREC = High
ENCUT = 650
EDIFF = 0.001000
IBRION = 2
ISIF = 3
NSW = 10
IALGO=48
ISMEAR = 0; SIGMA = 0.020
POTIM = 0.100000
SYMPREC = 0.000010
KSPACING = 0.500000
KGAMMA = .TRUE.
ISTART = 0
LCHARG = FALSE
LWAVE = FALSE
NPAR = 1
PSTRESS=0
LUSE_VDW = .TRUE.
AGGAC = 0.0000
GGA = OR
And the content of run script is as follows:
mpirun --mca btl_openib_allow_ib 1 --map-by l3 -n 64 -x OMP_NUM_THREADS=1 ./vasp_std-aocc4.0-aocl4.0
Still i am not able to reproduce the issue. Tried with your makefile and input files also. The error msg you have shared might be for two reasons
1. due to __funlockfile(). try to increase open file limit.
2. and dlansy_() is part of libflame package. So please try with the latest libflame package in aocl-4.0.