# Invocation command line: # /mnt/ramdisk/cpu2017-1.1.7-aocc300-B2/bin/harness/runcpu --configfile amd_speed_aocc300_milan_B2.cfg --tune all --reportable --iterations 2 --nopower --runmode speed --tune base:peak --size test:train:refspeed intspeed # output_root was not used for this run ############################################################################ ################################################################################ # AMD AOCC 300 SPEC CPU2017 V1.1.5 Speed Configuration File for 64-bit Linux # # File name : amd_speed_aocc300_milan_B2.cfg # Creation Date : May 4, 2021 # CPU2017 Version : 1.1.8 # Supported benchmarks : All Speed benchmarks (intspeed, fpspeed) # Compiler name/version : AOCC 3.0.0 # Operating system version : OpenSUSE 15.2 # Supported OS's : Ubuntu 20.04, RHEL 8.3, SLES 15 SP2 # Hardware : AMD Milan, Rome, Naples (AMD64) # FP Base Pointer Size : 64-bit # FP Peak Pointer Size : 64-bit # INT Base Pointer Size : 64-bit # INT Peak Pointer Size : 64-bit # Auto Parallization : No # # Note: DO NOT EDIT THIS FILE, the only edits required to properly run these # binaries are made in the ini Python file. Please consult Readme.amd_speed_aocc300_milan_B2.txt # for a few uncommon exceptions which require edits to this file. # # Description: # # This binary package automates away many of the complexities necessary to set # up and run SPEC CPU2017 under optimized conditions on AMD Milan/Rome/Naples-based # server platforms within Linux (AMD64). # # The binary package was built specifically for AMD Milan/Rome/Naples microprocessors and # is not intended to run on other products. # # Please install the binary package by following the instructions in # "Readme.amd_speed_aocc300_milan_B2.txt" under the "How To Use the Binaries" section. # # The binary package is designed to work without alteration on two socket AMD # Milan/Rome/Naples-based servers with 64 cores per socket, SMT enabled and 1 TiB of DDR4 # memory distributed evenly among all 16 channels using 32 GiB DIMMs. # # To run the binary package on other Milan/Rome/Naples configurations, please review # "Readme.amd_speed_aocc300_milan_B2.txt". In general, Milan/Rome or Naples CPUs # should be autodetected with no action required by the user. # # In most cases, it should be unnecessary to edit "amd_speed_aocc300_milan_B2.cfg" or any # other file besides "ini_amd_speed_aocc300_milan_B2.py" where reporting fields # and run conditions are set. # # The run script automatically sets the optimal number of speed copies and binds # them appropriately. # # The run script and accompanying binary package are designed to work on Ubuntu # 20.04, RHEL 8.3 and SLES 15 SP2. # # Important! If you write your own run script, please set the stack size to # "unlimited" when executing this binary package. Failure to do so may cause # some benchmarks to overflow the stack. For example, to set stack size within # the bash shell, include the following line somewhere at the top of your run # script before the runcpu invocation: # # ulimit -s unlimited # # Modification of this config file should only be necessary if you intend to # rebuild the binaries. General instructions for rebuilding the binaries are # found in-line below. # ################################################################################ # Modifiable macros: ################################################################################ # Change the following line to true if you intend to REBUILD the binaries (AMD # does not support this). Valid values are "true" or "false" (no quotes). %define allow_build false # Only change these macros if you are rebuilding the binary package: %define compiler_name aocc300 %define binary_package_name amd_speed_%{compiler_name}_milan_B %define binary_package_revision 2 %define build_path /sppo/bin/cpu2017v115aocc3-b1/ %define flags_file_name %{compiler_name}-flags-B2.xml # To enable the platform file, be sure to uncomment the flagsurl02 header line # below. %define platform_file_name INVALID_platform_%{binary_package_name}.xml # You should never have to change binary_package_full_name: %define binary_package_full_name %{binary_package_name}%{binary_package_revision} ################################################################################ # Include file name ################################################################################ # The include file contains fields that are commonly changed. This file is auto- # generated based upon INI file settings and should not need user modification # for runs. %define inc_file_name %{binary_package_full_name}.inc ################################################################################ # Binary label extension and "allow_build"" switch ################################################################################ # Only modify the binary label extension if you plan to rebuild the binaries. %define ext %{binary_package_name} # If you plan to recompile these CPU2017 binaries, please choose a new extension # name (ext above) to avoid confusion with the current binary set on your system # under test, and to avoid confusion for SPEC submission reviewers. You will # also need to set "allow_build" to true below. Finally, you must modify the # Paths section below to point to your library locations if the paths are not # already set up in your build environment. ################################################################################ # Paths and Environment Variables # ** MODIFY AS NEEDED (modification should not be necessary for runs) ** ################################################################################ # Allow environment variables to be set before runs: preenv = 1 # Necessary to avoid gcc out-of-memory exceptions on certain SUTs: preENV_MALLOC_CONF = retain:true # OpenMP environment variables: preENV_OMP_SCHEDULE = static preENV_OMP_DYNAMIC = false preENV_OMP_STACKSIZE = 128M # Define the name of the directory that holds AMD library files: %define lib_dir %{binary_package_name}_lib %define build_lib_dir %{binary_package_name}_lib # Set the shared object library path for runs and builds: preENV_LD_LIBRARY_PATH = $[top]/%{lib_dir}/lib;$[top]/%{lib_dir}/lib32:%{ENV_LD_LIBRARY_PATH} # Define 32-bit library build paths: # Do not use $[top] with the 32-bit libraries because doing so will cause an # options checksum error triggering a xalanc recompile attempt on SUTs having # different file paths. # NOTE: no 32-bit libraries are currently needed with Speed. JEMALLOC_LIB32_PATH = %{build_path}%{build_lib_dir}/lib32 %if '%{allow_build}' eq 'false' # The include file is only needed for runs, but not for builds. # include: %{inc_file_name} # ----- Begin inclusion of 'amd_speed_aocc300_milan_B2.inc' ############################################################################ ################################################################################ ################################################################################ # File name: amd_speed_aocc300_milan_B2.inc # File generation code date: May 4, 2021 # File generation date/time: May 12, 2021 / 04:19:17 # # This file is automatically generated during a SPEC CPU2017 run. # # To modify inc file generation, please consult the readme file or the run # script. ################################################################################ ################################################################################ ################################################################################ ################################################################################ # The following macros are generated for use in the cfg file. ################################################################################ ################################################################################ %define logical_core_count 128 %define physical_core_count 64 %define physical_core_max 63 %define logical_core_max 127 ################################################################################ ################################################################################ # The following inc blocks set the speed thread counts and affinity settings. # # intspeed benchmarks: 600.perlbench_s,602.gcc_s,605.mcf_s,620.omnetpp_s, # 623.xalancbmk_s,625.x264_s,631.deepsjeng_s,641.leela_s,648.exchange2_s, # 657.xz_s # fpspeed benchmarks: 603.bwaves_s,607.cactuBSSN_s,619.lbm_s,621.wrf_s, # 627.cam4_s,628.pop2_s,638.imagick_s,644.nab_s,649.fotonik3d_s, # 654.roms_s # # Selected thread counts from '32p' section of CPU info ################################################################################ # default preENV thread settings: default: preENV_OMP_THREAD_LIMIT = 128 preENV_GOMP_CPU_AFFINITY = 0-127 ################################################################################ ################################################################################ # intspeed base thread counts: intspeed=base: threads = 64 ENV_GOMP_CPU_AFFINITY = 0-63 bind0 = numactl --physcpubind=0-63 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ # fpspeed base thread counts: fpspeed=base: threads = 64 ENV_GOMP_CPU_AFFINITY = 0-63 bind0 = numactl --physcpubind=0-63 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ # peak thread counts: 1 600.perlbench_s,602.gcc_s,605.mcf_s,620.omnetpp_s,623.xalancbmk_s,625.x264_s,631.deepsjeng_s,641.leela_s,648.exchange2_s=peak: threads = 1 ENV_GOMP_CPU_AFFINITY = 0 bind0 = numactl --physcpubind=0 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ # peak thread counts: 128 603.bwaves_s,619.lbm_s,627.cam4_s,638.imagick_s,644.nab_s=peak: threads = 128 ENV_GOMP_CPU_AFFINITY = 0 64 1 65 2 66 3 67 4 68 5 69 6 70 7 71 8 72 9 73 10 74 11 75 12 76 13 77 14 78 15 79 16 80 17 81 18 82 19 83 20 84 21 85 22 86 23 87 24 88 25 89 26 90 27 91 28 92 29 93 30 94 31 95 32 96 33 97 34 98 35 99 36 100 37 101 38 102 39 103 40 104 41 105 42 106 43 107 44 108 45 109 46 110 47 111 48 112 49 113 50 114 51 115 52 116 53 117 54 118 55 119 56 120 57 121 58 122 59 123 60 124 61 125 62 126 63 127 bind0 = numactl --physcpubind=0-127 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ # peak thread counts: 64 607.cactuBSSN_s,621.wrf_s,628.pop2_s,649.fotonik3d_s,654.roms_s,657.xz_s=peak: threads = 64 ENV_GOMP_CPU_AFFINITY = 0-63 bind0 = numactl --physcpubind=0-63 submit = echo "$command" > run.sh ; $BIND bash run.sh ################################################################################ ################################################################################ ################################################################################ # Switch back to default: default: ################################################################################ ################################################################################ ################################################################################ # The remainder of this file defines CPU2017 report parameters. ################################################################################ ################################################################################ ################################################################################ # SPEC CPU 2017 report header ################################################################################ license_num =000 # (Your SPEC license number) tester =unknown tester test_sponsor =unknown sponsor hw_vendor =unknown vendor #--------- If you install new compilers, edit this section -------------------- sw_compiler =C/C++/Fortran: Version 3.0.0 of AOCC ################################################################################ ################################################################################ # Hardware, firmware and software information ################################################################################ hw_avail =Mar-2021 sw_avail =Mar-2021 hw_cpu_name =AMD EPYC 7513 hw_cpu_nominal_mhz =2600 hw_cpu_max_mhz =3650 hw_ncores =64 hw_nthreadspercore =2 hw_ncpuorder =1,2 chips hw_other =None # Other perf-relevant hw, or "None" fw_bios =unknown bios sw_base_ptrsize =64-bit hw_pcache =32 KB I + 32 KB D on chip per core hw_scache =512 KB I+D on chip per core hw_tcache000 =128 MB I+D on chip per chip, 32 MB shared / 8 hw_tcache001 = cores hw_ocache =None ################################################################################ # Notes ################################################################################ # Enter notes_000 through notes_100 here. notes_000 =Binaries were compiled on a system with 2x AMD EPYC 7742 CPU + 1TiB Memory using openSUSE 15.2 notes_005 = notes_010 =NA: The test sponsor attests, as of date of publication, that CVE-2017-5754 (Meltdown) notes_015 =is mitigated in the system as tested and documented. notes_020 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5753 (Spectre variant 1) notes_025 =is mitigated in the system as tested and documented. notes_030 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5715 (Spectre variant 2) notes_035 =is mitigated in the system as tested and documented. notes_040 = notes_submit_000 ='numactl' was used to bind copies to the cores. notes_submit_005 =See the configuration file for details. notes_os_000 ='ulimit -s unlimited' was used to set environment stack size limit notes_os_005 ='ulimit -l 2097152' was used to set environment locked pages in memory limit notes_os_010 = notes_os_015 =runcpu command invoked through numactl i.e.: notes_os_020 =numactl --interleave=all runcpu notes_os_025 = notes_os_030 ='echo 8 > /proc/sys/vm/dirty_ratio' run as root to limit dirty cache to 8% of notes_os_035 =memory. notes_os_040 ='echo 1 > /proc/sys/vm/swappiness' run as root to limit swap usage to minimum notes_os_045 =necessary. notes_os_050 ='echo 1 > /proc/sys/vm/zone_reclaim_mode' run as root to free node-local memory notes_os_055 =and avoid remote memory usage. notes_os_060 ='sync; echo 3 > /proc/sys/vm/drop_caches' run as root to reset filesystem caches. notes_os_065 ='sysctl -w kernel.randomize_va_space=0' run as root to disable address space layout notes_os_070 =randomization (ASLR) to reduce run-to-run variability. notes_os_075 = notes_os_thp_000 =To enable Transparent Hugepages (THP) for all allocations, notes_os_thp_005 ='echo always > /sys/kernel/mm/transparent_hugepage/enabled' and notes_os_thp_010 ='echo always > /sys/kernel/mm/transparent_hugepage/defrag' run as root. notes_comp_000 =The AMD64 AOCC Compiler Suite is available at notes_comp_005 =http://developer.amd.com/amd-aocc/ notes_comp_010 = notes_jemalloc_000 = notes_jemalloc_005 =jemalloc: configured and built with GCC v4.8.2 in RHEL 7.4 (No options specified) notes_jemalloc_010 =jemalloc 5.1.0 is available here: notes_jemalloc_015 =https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2 notes_jemalloc_020 = sw_other =jemalloc: jemalloc memory allocator library v5.1.0 ################################################################################ # The following note fields describe platorm settings. ################################################################################ # example: (uncomment as necessary) # notes_plat_000 =BIOS settings: # notes_plat_002 = cTDP: 280 # notes_plat_004 = Determinism Slider set to Power # notes_plat_006 = Package Power: 280 # notes_plat_008 = EDC: 300 # notes_plat_010 = NPS: 1 # notes_plat_014 = 4-link xGMI max speed: 16Gbps # notes_plat_015 = Fan Speed: Maximum ################################################################################ # The following are custom fields: ################################################################################ # Use custom_fields to enter lines that are not listed here. For example: # notes_plat_100 = Energy Bias set to Max Performance # new_field = Ambient temperature set to 10C ################################################################################ # The following fields must be set here for only Int benchmarks. ################################################################################ intspeed: sw_peak_ptrsize =64-bit notes_os_thp_015 = ################################################################################ # The following fields must be set here for FP benchmarks. ################################################################################ fpspeed: sw_peak_ptrsize =64-bit notes_os_thp_003 =To enable THP only on request for peak runs of 628.pop2_s, and 638.imagick_s, notes_os_thp_004 ='echo madvise > /sys/kernel/mm/transparent_hugepage/enabled' run as root. notes_os_thp_005 =To disable THP for peak runs of 627.cam4_s, 644.nab_s, 649.fotonik3d_s, and 654.roms_s, notes_os_thp_006 ='echo never > /sys/kernel/mm/transparent_hugepage/enabled' run as root. notes_os_thp_007 = ################################################################################ # The following fields must be set here or they will be overwritten by sysinfo. ################################################################################ intspeed,fpspeed: hw_disk =unknown hw_nchips =2 prepared_by =prepared by unknown sw_file =unknown file sw_state =Run level 3 (multi-user) ################################################################################ # End of inc file ################################################################################ # Switch back to the default block after the include file: default: # ---- End inclusion of '/mnt/ramdisk/cpu2017-1.1.7-aocc300-B2/config/amd_speed_aocc300_milan_B2.inc' # Switch back to default block after the include file: default: fail_build = 1 %elif '%{allow_build}' eq 'true' # If you intend to rebuild, be sure to set the library paths either in the # build script or here: preENV_LIBRARY_PATH = $[top]/%{build_lib_dir}/lib;$[top]/%{build_lib_dir}/lib32:%{ENV_LIBRARY_PATH} % define build_ncpus 64 # controls number of simultaneous compiles fail_build = 0 makeflags = --jobs=%{build_ncpus} --load-average=%{build_ncpus} %else % error The value of "allow_build" is %{allow_build}, but it can only be "true" or "false". This error was generated %endif ################################################################################ # Enable automated data collection per benchmark ################################################################################ # Data collection is not enabled for reportable runs. # teeout is necessary to get data collection stdout into the logs. Best # practices for the individual data collection items would be to have # them store important output in separate files. Filenames could be # constructed from $SPEC (environment), $lognum (result number from runcpu), # and benchmark name/number. teeout = yes # Run runcpu with '-v 35' (or greater) to log lists of variables which can # be used in substitutions as below. # For CPU2006, change $label to $ext %define data-collection-parameters benchname='$name' benchnum='$num' benchmark='$benchmark' iteration=$iter size='$size' tune='$tune' label='$label' log='$log' lognum='$lognum' from_runcpu='$from_runcpu' %define data-collection-start $[top]/data-collection/data-collection start %{data-collection-parameters} %define data-collection-stop $[top]/data-collection/data-collection stop %{data-collection-parameters} monitor_specrun_wrapper = %{data-collection-start} ; $command ; %{data-collection-stop} ################################################################################ # Header settings ################################################################################ backup_config = 0 # set to 0 if you do not want backup files bench_post_setup = sync # command_add_redirect: If set, the generated ${command} will include # redirection operators (stdout, stderr), which are passed along to the shell # that executes the command. If this variable is not set, specinvoke does the # redirection. command_add_redirect = yes env_vars = yes flagsurl000 = http://www.spec.org/cpu2017/flags/aocc300-flags-A1.xml #flagsurl02 = $[top]/%{platform_file_name} # label: User defined extension string that tags your binaries & directories: label = %{ext} line_width = 1020 log_line_width = 1020 mean_anyway = yes output_format = all reportable = yes size = test,train,ref teeout = yes teerunout = yes tune = base,peak use_submit_for_speed = yes ################################################################################ # Compilers ################################################################################ default: CROSSPLAT_PORT_OPTS = -mno-adx -mno-sse4a CC = clang -m64 $[CROSSPLAT_PORT_OPTS] CXX = clang++ -m64 -std=c++98 $[CROSSPLAT_PORT_OPTS] FC = flang -m64 $[CROSSPLAT_PORT_OPTS] CLD = clang -m64 CXXLD = clang++ -m64 FLD = flang -m64 CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version ################################################################################ # Portability Flags ################################################################################ default:# data model applies to all benchmarks ################################################################################ # Default Flags ################################################################################ EXTRA_LIBS = -fopenmp=libomp -lomp -ljemalloc -lamdlibm -lm MATHLIBOPT = #clearing this variable or else SPEC will set it to -lm VECMATHLIB = -fveclib=AMDLIBM OPT_ROOT = -march=znver3 $(VECMATHLIB) -ffast-math OPT_ROOT_BASE = -O3 $(OPT_ROOT) OPT_ROOT_PEAK = -Ofast $(OPT_ROOT) -flto ################################################################################ # Portability Flags ################################################################################ default: EXTRA_PORTABILITY = -DSPEC_LP64 # *** Benchmark-specific portability *** # Anything other than the data model is only allowed where a need is proven. # (ordered by last 2 digits of benchmark number) 600.perlbench_s: #lang='C' PORTABILITY = -DSPEC_LINUX_X64 621.wrf_s: #lang='F,C' CPORTABILITY = -DSPEC_CASE_FLAG FPORTABILITY = -Mbyteswapio 623.xalancbmk_s: #lang='CXX' PORTABILITY = -DSPEC_LINUX 627.cam4_s: #lang='F,C' PORTABILITY = -DSPEC_CASE_FLAG 628.pop2_s: #lang='F,C' CPORTABILITY = -DSPEC_CASE_FLAG FPORTABILITY = -Mbyteswapio ################################################################################ # Tuning Flags ################################################################################ ##################### # Base tuning flags # ##################### default=base: COPTIMIZE = $(OPT_ROOT_BASE) -flto -fstruct-layout=5 \ -mllvm -unroll-threshold=50 \ -mllvm -inline-threshold=1000 -fremap-arrays \ -mllvm -function-specialize -flv-function-specialization \ -mllvm -enable-gvn-hoist \ -mllvm -global-vectorize-slp=true \ -mllvm -enable-licm-vrp \ -mllvm -reduce-array-computations=3 \ -Wno-unused-command-line-argument CXXOPTIMIZE = $(OPT_ROOT_BASE) -flto \ -mllvm -enable-partial-unswitch \ -mllvm -unroll-threshold=100 \ -finline-aggressive -flv-function-specialization \ -mllvm -loop-unswitch-threshold=200000 \ -mllvm -reroll-loops \ -mllvm -aggressive-loop-unswitch \ -mllvm -extra-vectorizer-passes \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -Wno-unused-command-line-argument \ -mllvm -convert-pow-exp-to-int=false FOPTIMIZE = -Hz,1,0x1 $(OPT_ROOT_BASE) -Mrecursive \ -mllvm -fuse-tile-inner-loop -funroll-loops \ -mllvm -extra-vectorizer-passes \ -mllvm -lsr-in-nested-loop \ -mllvm -enable-licm-vrp \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -Wno-unused-command-line-argument LDCXXFLAGS = -Wl,-mllvm -Wl,-x86-use-vzeroupper=false EXTRA_LDFLAGS = -Wl,-mllvm -Wl,-region-vectorize \ -Wl,-mllvm -Wl,-function-specialize \ -Wl,-mllvm -Wl,-align-all-nofallthru-blocks=6 \ -Wl,-mllvm -Wl,-reduce-array-computations=3 LDFFLAGS = -Wl,-mllvm -Wl,-enable-X86-prefetching \ -Wl,-mllvm -Wl,-enable-licm-vrp #other libraries # Put OpenMP and math libraries here: # -lm needed at the end for some transcendental functions: EXTRA_LIBS = -fopenmp=libomp -lomp -lamdlibm -ljemalloc -lflang -lflangrti -lm EXTRA_FLIBS = # Don't put the AMD and mvec math libraries in MATHLIBOPT because it will trigger a reporting issue # because GCC won't use them. Forcefeed all benchmarks the math libraries in EXTRA_LIBS and clear # out MATHLIBOPT. MATHLIBOPT = # The following is necessary for 502/602 gcc: LDOPTIMIZE = -z muldefs # The following is necessary for 502/602 gcc: EXTRA_OPTIMIZE = -DSPEC_OPENMP -fopenmp -Wno-return-type ######################## # intspeed tuning flags # ######################## intspeed: FOPTIMIZE = $(OPT_ROOT_BASE) -flto EXTRA_FFLAGS = -mllvm -unroll-aggressive \ -mllvm -unroll-threshold=150 EXTRA_CXXFLAGS = -mllvm -do-block-reorder=aggressive \ -fvirtual-function-elimination -fvisibility=hidden LDCFLAGS = -Wl,-allow-multiple-definition -Wl,-mllvm \ -Wl,-enable-licm-vrp LDCXXFLAGS = -Wl,-mllvm -Wl,-do-block-reorder=aggressive LDFFLAGS = -Wl,-mllvm -Wl,-inline-recursion=4 \ -Wl,-mllvm -Wl,-lsr-in-nested-loop \ -Wl,-mllvm -Wl,-enable-iv-split intspeed=base: submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command intspeed=peak: submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command ######################## # fpspeed tuning flags # ######################## fpspeed: CXX = clang++ -m64 -std=c++98 $[CROSSPLAT_PORT_OPTS] fpspeed=base: submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command fpspeed=peak: submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command ##################### # Peak tuning flags # ##################### default=peak: COPTIMIZE = $(OPT_ROOT_PEAK) -fstruct-layout=5 \ -mllvm -unroll-threshold=50 -fremap-arrays \ -flv-function-specialization -mllvm \ -inline-threshold=1000 -mllvm -enable-gvn-hoist \ -mllvm -global-vectorize-slp=true -mllvm \ -function-specialize -mllvm -enable-licm-vrp \ -mllvm -reduce-array-computations=3 \ -Wno-unused-command-line-argument CXXOPTIMIZE = $(OPT_ROOT_PEAK) -finline-aggressive \ -mllvm -unroll-threshold=100 \ -flv-function-specialization -mllvm -enable-licm-vrp \ -mllvm -reroll-loops -mllvm \ -aggressive-loop-unswitch -mllvm \ -reduce-array-computations=3 -mllvm \ -global-vectorize-slp=true \ -Wno-unused-command-line-argument FOPTIMIZE = $(OPT_ROOT_PEAK) -Mrecursive \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -mllvm -enable-licm-vrp \ -Wno-unused-command-line-argument EXTRA_LDFLAGS = -Wl,-mllvm -Wl,-function-specialize \ -Wl,-mllvm -Wl,-align-all-nofallthru-blocks=6 \ -Wl,-mllvm -Wl,-reduce-array-computations=3 LDFFLAGS = -Wl,-mllvm -Wl,-enable-X86-prefetching \ -Wl,-mllvm -Wl,-enable-licm-vrp LDCXXFLAGS = -Wl,-mllvm -Wl,-x86-use-vzeroupper=false \ -Wl,-mllvm -Wl,-enable-licm-vrp EXTRA_LIBS = -fopenmp=libomp -lomp -lamdlibm -ljemalloc -lflang -lm EXTRA_OPTIMIZE = -DSPEC_OPENMP -fopenmp -Wno-return-type feedback = 0 PASS1_CFLAGS = -fprofile-instr-generate PASS2_CFLAGS = -fprofile-instr-use PASS1_FFLAGS = -fprofile-generate PASS2_FFLAGS = -fprofile-use PASS1_CXXFLAGS = -fprofile-instr-generate PASS2_CXXFLAGS = -fprofile-instr-use PASS1_LDFLAGS = -fprofile-instr-generate PASS2_LDFLAGS = -fprofile-instr-use fdo_run1 = $command ; llvm-profdata merge --output=default.profdata *.profraw # Int benchmark specific peak tuning flags: # FP benchmark specific peak tuning flags: 603.bwaves_s=peak: FOPTIMIZE = -Ofast $(OPT_ROOT) -Mrecursive \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -mllvm -enable-licm-vrp \ -Wno-unused-command-line-argument submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 607.cactuBSSN_s=peak: submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 621.wrf_s=peak: FOPTIMIZE = -Hz,1,0x1 $(OPT_ROOT_BASE) -Mrecursive \ -mllvm -fuse-tile-inner-loop -funroll-loops \ -mllvm -extra-vectorizer-passes \ -mllvm -lsr-in-nested-loop \ -mllvm -enable-licm-vrp \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -Wno-unused-command-line-argument submit = echo always > /sys/kernel/mm/transparent_hugepage/enabled; numactl --interleave=all --physcpubind=0-%{physical_core_max} $command 627.cam4_s=peak: submit = echo never > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 628.pop2_s=peak: FOPTIMIZE = $(OPT_ROOT) -Ofast -Mrecursive \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -mllvm -enable-licm-vrp \ -Wno-unused-command-line-argument submit = echo madvise > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 638.imagick_s=peak: submit = echo madvise > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 644.nab_s=peak: EXTRA_LDFLAGS = -Wl,-mllvm -Wl,-region-vectorize \ -Wl,-mllvm -Wl,-function-specialize submit = echo never > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 649.fotonik3d_s=peak: ENV_PGHPF_ZMEM =yes submit = echo never > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command 654.roms_s=peak: FOPTIMIZE = -Ofast $(OPT_ROOT) -Mrecursive \ -mllvm -reduce-array-computations=3 \ -mllvm -global-vectorize-slp=true \ -mllvm -enable-licm-vrp \ -Wno-unused-command-line-argument submit = echo never > /sys/kernel/mm/transparent_hugepage/enabled; $BIND $command # Dell Custom #include: Dell-info-AMD.inc # ----- Begin inclusion of 'Dell-info-AMD.inc' ############################################################################ #------------------------------------------------------- # Dell EMC (Dell Inc.) # # AMD #------------------------------------------------------- default: flagsurl001=http://www.spec.org/cpu2017/flags/Dell-Platform-Flags-PowerEdge-AMD-Milan-rev2.2.xml fprate,fpspeed,intspeed,intrate: # # Make sure Hardware availability and Software availability are correct for this platform - Mmm-yyyy # hw_avail = Jun-2021 # Date of LAST hardware component to ship sw_avail = Mar-2021 # Date of LAST software component to ship # # No need to edit these if you use a temporary file system (like ramdisk). hw_disk = 1 x 1.6 TB SATA SSD # Size, type, other perf-relevant info sw_file = xfs # File system # # BIOS Settings # - It is rare that these settings would need to be changed. # fprate,fpspeed,intspeed,intrate: fprate,intrate: #notes_plat_form_100 = Memory Interleaving : Disabled notes_plat_form_105 = NUMA Nodes per Socket : 4 fpspeed,intspeed: fprate,fpspeed,intspeed,intrate: #notes_plat_form_280 = apbdis : Enabled P0 power_management000 = BIOS and OS set to prefer performance power_management001 = at the cost of additional power usage. #------------------------------------------------------------------------------- # **** DO NOT EDIT BELOW HERE!!! #------------------------------------------------------------------------------- # Company Name, licnese, etc fprate,fpspeed,intspeed,intrate: hw_vendor = Dell Inc. tester = Dell Inc. test_sponsor = Dell Inc. license_num = 55 prepared_by = Dell Inc. # cleanup - sysinfo/AMD scripts #sw_os000 = %undef% #sw_os001 = %undef% sw_os002 = %undef% #hw_memory000 = %undef% hw_memory001 = %undef% hw_memory002 = %undef% #hw_model000 = %undef% #hw_model001 = %undef% #include: dell-system-info.inc # ----- Begin inclusion of 'dell-system-info.inc' ############################################################################ fprate,fpspeed,intrate,intspeed: hw_model = PowerEdge R6525 (AMD EPYC 7513 32-Core Processor) hw_cpu_name = AMD EPYC 7513 hw_nchips = 2 hw_ncpuorder = 1,2 chips hw_ncores = 64 hw_nthreadspercore = 2 hw_pcache = 32 KB I + 32 KB D on chip per core hw_scache = 512 KB I+D on chip per core fw_bios = Version 2.2.5 released Apr-2021 sw_state = Run level 3 (multi-user) sw_file = tmpfs hw_disk = 128 GB on tmpfs notes_tmpfs_000 = notes_tmpfs_005 = Benchmark run from a 128 GB ramdisk created with the cmd: "mount -t tmpfs -o size=128G tmpfs /mnt/ramdisk" sw_os000 = Red Hat Enterprise Linux 8.3 (Ootpa) sw_os001 = 4.18.0-240.el8.x86_64 hw_cpu_nominal_mhz = 2600 hw_memory000 = 2 TB (16 x 128 GB 4Rx4 PC4-3200AA-L) # ---- End inclusion of '/mnt/ramdisk/cpu2017-1.1.7-aocc300-B2/config/dell-system-info.inc' # ---- End inclusion of '/mnt/ramdisk/cpu2017-1.1.7-aocc300-B2/config/Dell-info-AMD.inc' # The following settings were obtained by running the sysinfo_program # 'specperl $[top]/bin/sysinfo' (sysinfo:SHA:60a26e139a7df7ba5521c983304469c762a79f3394ac112dddae4bac7d1a4f55) default: notes_plat_sysinfo_000 = notes_plat_sysinfo_005 = Sysinfo program /mnt/ramdisk/cpu2017-1.1.7-aocc300-B2/bin/sysinfo notes_plat_sysinfo_010 = Rev: r6538 of 2020-09-24 e8664e66d2d7080afeaa89d4b38e2f1c notes_plat_sysinfo_015 = running on rhel-8-3-amd Wed May 12 04:19:27 2021 notes_plat_sysinfo_020 = notes_plat_sysinfo_025 = SUT (System Under Test) info as seen by some common utilities. notes_plat_sysinfo_030 = For more information on this section, see notes_plat_sysinfo_035 = https://www.spec.org/cpu2017/Docs/config.html#sysinfo notes_plat_sysinfo_040 = notes_plat_sysinfo_045 = From /proc/cpuinfo notes_plat_sysinfo_050 = model name : AMD EPYC 7513 32-Core Processor notes_plat_sysinfo_055 = 2 "physical id"s (chips) notes_plat_sysinfo_060 = 128 "processors" notes_plat_sysinfo_065 = cores, siblings (Caution: counting these is hw and system dependent. The following notes_plat_sysinfo_070 = excerpts from /proc/cpuinfo might not be reliable. Use with caution.) notes_plat_sysinfo_075 = cpu cores : 32 notes_plat_sysinfo_080 = siblings : 64 notes_plat_sysinfo_085 = physical 0: cores 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 notes_plat_sysinfo_090 = 25 26 27 28 29 30 31 notes_plat_sysinfo_095 = physical 1: cores 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 notes_plat_sysinfo_100 = 25 26 27 28 29 30 31 notes_plat_sysinfo_105 = notes_plat_sysinfo_110 = From lscpu: notes_plat_sysinfo_115 = Architecture: x86_64 notes_plat_sysinfo_120 = CPU op-mode(s): 32-bit, 64-bit notes_plat_sysinfo_125 = Byte Order: Little Endian notes_plat_sysinfo_130 = CPU(s): 128 notes_plat_sysinfo_135 = On-line CPU(s) list: 0-127 notes_plat_sysinfo_140 = Thread(s) per core: 2 notes_plat_sysinfo_145 = Core(s) per socket: 32 notes_plat_sysinfo_150 = Socket(s): 2 notes_plat_sysinfo_155 = NUMA node(s): 8 notes_plat_sysinfo_160 = Vendor ID: AuthenticAMD notes_plat_sysinfo_165 = CPU family: 25 notes_plat_sysinfo_170 = Model: 1 notes_plat_sysinfo_175 = Model name: AMD EPYC 7513 32-Core Processor notes_plat_sysinfo_180 = Stepping: 1 notes_plat_sysinfo_185 = CPU MHz: 2936.898 notes_plat_sysinfo_190 = BogoMIPS: 5190.12 notes_plat_sysinfo_195 = Virtualization: AMD-V notes_plat_sysinfo_200 = L1d cache: 32K notes_plat_sysinfo_205 = L1i cache: 32K notes_plat_sysinfo_210 = L2 cache: 512K notes_plat_sysinfo_215 = L3 cache: 32768K notes_plat_sysinfo_220 = NUMA node0 CPU(s): 0-7,64-71 notes_plat_sysinfo_225 = NUMA node1 CPU(s): 8-15,72-79 notes_plat_sysinfo_230 = NUMA node2 CPU(s): 16-23,80-87 notes_plat_sysinfo_235 = NUMA node3 CPU(s): 24-31,88-95 notes_plat_sysinfo_240 = NUMA node4 CPU(s): 32-39,96-103 notes_plat_sysinfo_245 = NUMA node5 CPU(s): 40-47,104-111 notes_plat_sysinfo_250 = NUMA node6 CPU(s): 48-55,112-119 notes_plat_sysinfo_255 = NUMA node7 CPU(s): 56-63,120-127 notes_plat_sysinfo_260 = Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov notes_plat_sysinfo_265 = pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm notes_plat_sysinfo_270 = constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq notes_plat_sysinfo_275 = monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c notes_plat_sysinfo_280 = rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch notes_plat_sysinfo_285 = osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb notes_plat_sysinfo_290 = cat_l3 cdp_l3 invpcid_single hw_pstate sme ssbd mba sev ibrs ibpb stibp vmmcall notes_plat_sysinfo_295 = fsgsbase bmi1 avx2 smep bmi2 invpcid cqm rdt_a rdseed adx smap clflushopt clwb notes_plat_sysinfo_300 = sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total notes_plat_sysinfo_305 = cqm_mbm_local clzero irperf xsaveerptr wbnoinvd amd_ppin arat npt lbrv svm_lock notes_plat_sysinfo_310 = nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold notes_plat_sysinfo_315 = v_vmsave_vmload vgif umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca notes_plat_sysinfo_320 = notes_plat_sysinfo_325 = /proc/cpuinfo cache data notes_plat_sysinfo_330 = cache size : 512 KB notes_plat_sysinfo_335 = notes_plat_sysinfo_340 = From numactl --hardware WARNING: a numactl 'node' might or might not correspond to a notes_plat_sysinfo_345 = physical chip. notes_plat_sysinfo_350 = available: 8 nodes (0-7) notes_plat_sysinfo_355 = node 0 cpus: 0 1 2 3 4 5 6 7 64 65 66 67 68 69 70 71 notes_plat_sysinfo_360 = node 0 size: 257373 MB notes_plat_sysinfo_365 = node 0 free: 257345 MB notes_plat_sysinfo_370 = node 1 cpus: 8 9 10 11 12 13 14 15 72 73 74 75 76 77 78 79 notes_plat_sysinfo_375 = node 1 size: 257927 MB notes_plat_sysinfo_380 = node 1 free: 257928 MB notes_plat_sysinfo_385 = node 2 cpus: 16 17 18 19 20 21 22 23 80 81 82 83 84 85 86 87 notes_plat_sysinfo_390 = node 2 size: 257952 MB notes_plat_sysinfo_395 = node 2 free: 257934 MB notes_plat_sysinfo_400 = node 3 cpus: 24 25 26 27 28 29 30 31 88 89 90 91 92 93 94 95 notes_plat_sysinfo_405 = node 3 size: 257945 MB notes_plat_sysinfo_410 = node 3 free: 257901 MB notes_plat_sysinfo_415 = node 4 cpus: 32 33 34 35 36 37 38 39 96 97 98 99 100 101 102 103 notes_plat_sysinfo_420 = node 4 size: 257909 MB notes_plat_sysinfo_425 = node 4 free: 253451 MB notes_plat_sysinfo_430 = node 5 cpus: 40 41 42 43 44 45 46 47 104 105 106 107 108 109 110 111 notes_plat_sysinfo_435 = node 5 size: 257999 MB notes_plat_sysinfo_440 = node 5 free: 257761 MB notes_plat_sysinfo_445 = node 6 cpus: 48 49 50 51 52 53 54 55 112 113 114 115 116 117 118 119 notes_plat_sysinfo_450 = node 6 size: 257981 MB notes_plat_sysinfo_455 = node 6 free: 257863 MB notes_plat_sysinfo_460 = node 7 cpus: 56 57 58 59 60 61 62 63 120 121 122 123 124 125 126 127 notes_plat_sysinfo_465 = node 7 size: 257969 MB notes_plat_sysinfo_470 = node 7 free: 257682 MB notes_plat_sysinfo_475 = node distances: notes_plat_sysinfo_480 = node 0 1 2 3 4 5 6 7 notes_plat_sysinfo_485 = 0: 10 12 12 12 32 32 32 32 notes_plat_sysinfo_490 = 1: 12 10 12 12 32 32 32 32 notes_plat_sysinfo_495 = 2: 12 12 10 12 32 32 32 32 notes_plat_sysinfo_500 = 3: 12 12 12 10 32 32 32 32 notes_plat_sysinfo_505 = 4: 32 32 32 32 10 12 12 12 notes_plat_sysinfo_510 = 5: 32 32 32 32 12 10 12 12 notes_plat_sysinfo_515 = 6: 32 32 32 32 12 12 10 12 notes_plat_sysinfo_520 = 7: 32 32 32 32 12 12 12 10 notes_plat_sysinfo_525 = notes_plat_sysinfo_530 = From /proc/meminfo notes_plat_sysinfo_535 = MemTotal: 2113259036 kB notes_plat_sysinfo_540 = HugePages_Total: 0 notes_plat_sysinfo_545 = Hugepagesize: 2048 kB notes_plat_sysinfo_550 = notes_plat_sysinfo_555 = /sbin/tuned-adm active notes_plat_sysinfo_560 = Current active profile: throughput-performance notes_plat_sysinfo_565 = notes_plat_sysinfo_570 = From /etc/*release* /etc/*version* notes_plat_sysinfo_575 = os-release: notes_plat_sysinfo_580 = NAME="Red Hat Enterprise Linux" notes_plat_sysinfo_585 = VERSION="8.3 (Ootpa)" notes_plat_sysinfo_590 = ID="rhel" notes_plat_sysinfo_595 = ID_LIKE="fedora" notes_plat_sysinfo_600 = VERSION_ID="8.3" notes_plat_sysinfo_605 = PLATFORM_ID="platform:el8" notes_plat_sysinfo_610 = PRETTY_NAME="Red Hat Enterprise Linux 8.3 (Ootpa)" notes_plat_sysinfo_615 = ANSI_COLOR="0;31" notes_plat_sysinfo_620 = redhat-release: Red Hat Enterprise Linux release 8.3 (Ootpa) notes_plat_sysinfo_625 = system-release: Red Hat Enterprise Linux release 8.3 (Ootpa) notes_plat_sysinfo_630 = system-release-cpe: cpe:/o:redhat:enterprise_linux:8.3:ga notes_plat_sysinfo_635 = notes_plat_sysinfo_640 = uname -a: notes_plat_sysinfo_645 = Linux rhel-8-3-amd 4.18.0-240.el8.x86_64 #1 SMP Wed Sep 23 05:13:10 EDT 2020 x86_64 notes_plat_sysinfo_650 = x86_64 x86_64 GNU/Linux notes_plat_sysinfo_655 = notes_plat_sysinfo_660 = Kernel self-reported vulnerability status: notes_plat_sysinfo_665 = notes_plat_sysinfo_670 = CVE-2018-12207 (iTLB Multihit): Not affected notes_plat_sysinfo_675 = CVE-2018-3620 (L1 Terminal Fault): Not affected notes_plat_sysinfo_680 = Microarchitectural Data Sampling: Not affected notes_plat_sysinfo_685 = CVE-2017-5754 (Meltdown): Not affected notes_plat_sysinfo_690 = CVE-2018-3639 (Speculative Store Bypass): Mitigation: Speculative Store notes_plat_sysinfo_695 = Bypass disabled via prctl and notes_plat_sysinfo_700 = seccomp notes_plat_sysinfo_705 = CVE-2017-5753 (Spectre variant 1): Mitigation: usercopy/swapgs notes_plat_sysinfo_710 = barriers and __user pointer notes_plat_sysinfo_715 = sanitization notes_plat_sysinfo_720 = CVE-2017-5715 (Spectre variant 2): Mitigation: Full AMD retpoline, notes_plat_sysinfo_725 = IBPB: conditional, IBRS_FW, STIBP: notes_plat_sysinfo_730 = always-on, RSB filling notes_plat_sysinfo_735 = CVE-2020-0543 (Special Register Buffer Data Sampling): Not affected notes_plat_sysinfo_740 = CVE-2019-11135 (TSX Asynchronous Abort): Not affected notes_plat_sysinfo_745 = notes_plat_sysinfo_750 = run-level 3 May 12 04:09 notes_plat_sysinfo_755 = notes_plat_sysinfo_760 = SPEC is set to: /mnt/ramdisk/cpu2017-1.1.7-aocc300-B2 notes_plat_sysinfo_765 = Filesystem Type Size Used Avail Use% Mounted on notes_plat_sysinfo_770 = tmpfs tmpfs 128G 4.0G 125G 4% /mnt/ramdisk notes_plat_sysinfo_775 = notes_plat_sysinfo_780 = From /sys/devices/virtual/dmi/id notes_plat_sysinfo_785 = Vendor: Dell Inc. notes_plat_sysinfo_790 = Product: PowerEdge R6525 notes_plat_sysinfo_795 = Product Family: PowerEdge notes_plat_sysinfo_800 = Serial: 1234567 notes_plat_sysinfo_805 = notes_plat_sysinfo_810 = Additional information from dmidecode follows. WARNING: Use caution when you interpret notes_plat_sysinfo_815 = this section. The 'dmidecode' program reads system data which is "intended to allow notes_plat_sysinfo_820 = hardware to be accurately determined", but the intent may not be met, as there are notes_plat_sysinfo_825 = frequent changes to hardware, firmware, and the "DMTF SMBIOS" standard. notes_plat_sysinfo_830 = Memory: notes_plat_sysinfo_835 = 16x 802C8632802C 72ASS16G72LZ-3G2B3 128 GB 4 rank 3200 notes_plat_sysinfo_840 = 16x Not Specified Not Specified notes_plat_sysinfo_845 = notes_plat_sysinfo_850 = BIOS: notes_plat_sysinfo_855 = BIOS Vendor: Dell Inc. notes_plat_sysinfo_860 = BIOS Version: 2.2.5 notes_plat_sysinfo_865 = BIOS Date: 04/08/2021 notes_plat_sysinfo_870 = BIOS Revision: 2.2 notes_plat_sysinfo_875 = notes_plat_sysinfo_880 = (End of data from sysinfo program) hw_cpu_name = AMD EPYC 7513 32-Core hw_disk = 128 GB add more disk info here hw_nchips = 2 prepared_by = root (is never output, only tags rawfile) sw_file = tmpfs sw_os001 = Red Hat Enterprise Linux release 8.3 (Ootpa) sw_state = Run level 3 (add definition here) # End of settings added by sysinfo_program 657.xz_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 648.exchange2_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 641.leela_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 631.deepsjeng_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 623.xalancbmk_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 620.omnetpp_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 605.mcf_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 600.perlbench_s: # The following setting was inserted automatically as a result of # post-run basepeak application. basepeak = 1 # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_plat_form_000 = notes_plat_form_005 =BIOS settings: notes_plat_form_010 = L3 Cache as NUMA Domain : Enabled notes_plat_form_015 = Virtualization Technology : Disabled notes_plat_form_020 = DRAM Refresh Delay : Performance notes_plat_form_025 = notes_plat_form_030 = System Profile : Custom notes_plat_form_035 = CPU Power Management : Maximum Performance notes_plat_form_040 = Memory Patrol Scrub : Disabled notes_plat_form_045 = PCI ASPM L1 Link notes_plat_form_050 = Power Management : Disabled notes_plat_form_055 = Algorithm Performance notes_plat_form_060 = Boost Disable (ApbDis): Enabled