# Invocation command line: # /local/home/mcolgrove/HPC2021/bin/harness/runhpc -c nvhpc_acc --reportable tiny # output_root was not used for this run ############################################################################ teeout = yes makeflags=-j 40 flagsurl000=http://www.spec.org/hpc2021/flags/nv2021_flags_v1.0.3.xml envars = 1 license_num =019 test_sponsor = NVIDIA Corporation tester = NVIDIA Corporation ###################################################### # SUT Section ###################################################### #include: snow.inc # ----- Begin inclusion of 'snow.inc' ############################################################################ ###################################################### # Example configuration information for a # system under test (SUT) Section ###################################################### # General SUT info system_vendor = GIGA-BYTE TECHNOLOGY CO., LTD system_name = GIGA-BYTE G242-P31 (Ampere Altra Q80-33, Tesla A100-PCIE-40GB) node_compute_sw_accel_driver = NVIDIA UNIX aarch64 Kernel Module 460.32.03 hw_avail = Jun-2021 sw_avail = Sep-2021 prepared_by = Mathew Colgrove (mcolgrove@nvidia.com) # Computation node info # [Node_Description: Hardware] node_compute_syslbl = Ampere Altra node_compute_order = 1 node_compute_count = 1 node_compute_purpose = compute node_compute_hw_vendor = GIGA-BYTE TECHNOLOGY CO., LTD node_compute_hw_model = G242-P31 node_compute_hw_cpu_name = Ampere Altra Q80-33 node_compute_hw_ncpuorder = 1 chips node_compute_hw_nchips = 1 node_compute_hw_ncores = 80 node_compute_hw_ncoresperchip = 80 node_compute_hw_nthreadspercore = 1 node_compute_hw_cpu_char = Max Frequency 3300Mhz node_compute_hw_cpu_mhz = 3000 node_compute_hw_pcache = 64 KB I + 64 KB D on chip per core node_compute_hw_scache = 1 MB I+D on chip per core node_compute_hw_tcache = 32 MB I+D on chip per core node_compute_hw_ocache = None node_compute_hw_memory = 256 GB (16 x 16 GB 2Rx8 PC4-3200AA-R) node_compute_hw_disk = 1 x 960 GB, NVME, M.2, PCIe Gen3 node_compute_hw_other = None #[Node_Description: Accelerator] node_compute_hw_accel_model = Tesla A100-PCIE-40GB node_compute_hw_accel_count = 2 node_compute_hw_accel_vendor= NVIDIA Corporation node_compute_hw_accel_type = GPU node_compute_hw_accel_connect = PCIe 3.0 16x node_compute_hw_accel_ecc = Yes node_compute_hw_accel_desc = See Notes #[Node_Description: Software] node_compute_hw_adapter_fs_model = None node_compute_hw_adapter_fs_count = 0 node_compute_hw_adapter_fs_slot_type = None node_compute_hw_adapter_fs_data_rate = None node_compute_hw_adapter_fs_ports_used = 0 node_compute_hw_adapter_fs_interconnect = None node_compute_hw_adapter_fs_driver = None node_compute_hw_adapter_fs_firmware = None node_compute_sw_os = CentOS 8.3-2011 node_compute_sw_localfile = xfs node_compute_sw_sharedfile = None node_compute_sw_state = Multi-user, run level 3 node_compute_sw_other = None #[Interconnect] interconnect_fs_syslbl = None interconnect_fs_order = 0 interconnect_fs_purpose = N/A interconnect_fs_hw_vendor = N/A interconnect_fs_hw_model = N/A interconnect_fs_hw_switch_fs_model= N/A interconnect_fs_hw_switch_fs_count = 0 interconnect_fs_hw_switch_fs_ports = 0 interconnect_fs_hw_topo = N/A interconnect_fs_hw_switch_fs_data_rate = 0 interconnect_fs_hw_switch_fs_firmware = 0 ####################################################################### # End of SUT section # If this config file were to be applied to several SUTs, edits would # be needed only ABOVE this point. ###################################################################### # ---- End inclusion of '/local/home/mcolgrove/HPC2021/config/snow.inc' #[Software] sw_compiler000 = C/C++/Fortran: Version 21.9 of sw_compiler001 = NVIDIA HPC SDK for Linux sw_mpi_library = OpenMPI Version 4.0.5, included with NVHPC SDK sw_mpi_other = None system_class = SMP sw_other = None #[General notes] ####################################################################### # End of SUT section ###################################################################### label = nv_acc tune = base,peak output_format = text,html,pdf use_submit_for_speed = 1 reportable = 1 # Compiler Settings default: CC = mpicc CXX = mpicxx FC = mpif90 # Compiler Version Flags CC_VERSION_OPTION = -V CXX_VERSION_OPTION = -V FC_VERSION_OPTION = -V MPIRUN_OPTS = --bind-to none submit = mpirun ${MPIRUN_OPTS} -np $ranks $command ####################################################################### # Optimization ####################################################################### default: pmodel=ACC default=base=default: ranks=2 OPTIMIZE = -w -Mfprelaxed -Mnouniform -Mstack_arrays -fast -acc=gpu CXXPORTABILITY = --c++17 505.lbm_t=peak=default: basepeak=1 513.soma_t=peak=default: ranks=2 OPTIMIZE = -w -fast -O3 -acc=gpu -gpu=pinned 518.tealeaf_t=peak=default: ranks=2 OPTIMIZE = -w -fast -Msafeptr -acc=gpu 519.clvleaf_t=peak=default: ranks=2 OPTIMIZE = -w -Mfprelaxed -fast -acc=gpu -gpu=pinned 521.miniswp_t=peak=default: ranks=2 OPTIMIZE = -w -Mfprelaxed -Mnouniform -Mstack_arrays -fast -acc=gpu -gpu=pinned 528.pot3d_t=peak=default: ranks=2 OPTIMIZE = -w -Mstack_arrays -fast -acc=gpu 532.sph_exa_t=peak=default: ranks=16 OPTIMIZE = -w -Mfprelaxed -Mnouniform -Mstack_arrays -fast -acc=gpu 534.hpgmgfv_t=peak=default: ranks=2 OPTIMIZE = -w -fast -acc=gpu -gpu=pinned -static-nvidia 535.weather_t=peak=default: ranks=2 OPTIMIZE = -w -Mfprelaxed -Mnouniform -Mstack_arrays -fast -acc=gpu # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_submit_000= MPI startup command: notes_submit_005= mpirun command was used to start MPI jobs. notes_plat_000= notes_plat_005= Information from nvaccelinfo notes_plat_010= CUDA Driver Version: 11020 notes_plat_015= NVRM version: NVIDIA UNIX aarch64 Kernel Module 460.32.03 notes_plat_020= Device Number: 0 notes_plat_025= Device Name: A100-PCIE-40GB notes_plat_030= Device Revision Number: 8.0 notes_plat_035= Global Memory Size: 42505273344 notes_plat_040= Number of Multiprocessors: 108 notes_plat_045= Concurrent Copy and Execution: Yes notes_plat_050= Total Constant Memory: 65536 notes_plat_055= Total Shared Memory per Block: 49152 notes_plat_060= Registers per Block: 65536 notes_plat_065= Warp Size: 32 notes_plat_070= Maximum Threads per Block: 1024 notes_plat_075= Maximum Block Dimensions: 1024, 1024, 64 notes_plat_080= Maximum Grid Dimensions: 2147483647 x 65535 x 65535 notes_plat_085= Maximum Memory Pitch: 2147483647B notes_plat_090= Texture Alignment: 512B notes_plat_095= Clock Rate: 1410 MHz notes_plat_100= Execution Timeout: No notes_plat_105= Integrated Device: No notes_plat_110= Can Map Host Memory: Yes notes_plat_115= Compute Mode: default notes_plat_120= Concurrent Kernels: Yes notes_plat_125= ECC Enabled: Yes notes_plat_130= Memory Clock Rate: 1215 MHz notes_plat_135= Memory Bus Width: 5120 bits notes_plat_140= L2 Cache Size: 41943040 bytes notes_plat_145= Max Threads Per SMP: 2048 notes_plat_150= Async Engines: 3 notes_plat_155= Unified Addressing: Yes notes_plat_160= Managed Memory: Yes notes_plat_165= Concurrent Managed Memory: Yes notes_plat_170= Preemption Supported: Yes notes_plat_175= Cooperative Launch: Yes notes_plat_180= Multi-Device: Yes notes_plat_185= Default Target: cc80