# Invocation command line: # /home/amd/CPU2017/bin/harness/runcpu --configfile amd1704na-rate-revC.cfg --tune all --nopower --runmode rate --tune base:peak --size test:train:refrate intrate # output_root was not used for this run ############################################################################ ################################################################################ # AMD1704 SPEC CPU2017 V1.0.2 Rate Configuration File for 64-bit Linux # # File name : amd1704na-rate-revC.cfg # Creation Date : July 31, 2017 # CPU2017 Version : 1.0.2 # Supported benchmarks : All Rate benchmarks (intrate, fprate) # Compiler name/version : AOCC/1.0.0 # Operating system version : Ubuntu 16.04 # Supported OS's : Ubuntu 16.04/17.04, RHEL 7.4 # Hardware : AMD Naples (AMD64) # FP Base Pointer Size : 64-bit # FP Peak Pointer Size : 32/64-bit # INT Base Pointer Size : 64-bit # INT Peak Pointer Size : 32/64-bit # Auto Parallization : No # # Note: DO NOT EDIT THIS FILE, the only edits required to properly run these # binaries are made in AMD1704na-rate-revC.inc. Please consult Readme.amd1704na_revC.txt # for a few uncommon exceptions which require edits to this file. # # Description:# # This binary package automates away many of the complexities necessary to set # up and run SPEC CPU2017 under optimized conditions on AMD Naples-based # server platforms within Linux (AMD64). # # The binary package was built specifically for AMD Naples microprocessors and # is not intended to run on other products. # # Please install the binary package by following the instructions in # "Readme.amd1704na_revC.txt" under the "How To Use the Binaries" section. # # The binary package is designed to work without alteration on two socket AMD # Naples-based servers with 32 cores per socket, SMT enabled and 512 GiB of DDR4 # memory distributed evenly among all 16 channels using 16 GiB DIMMs. # # To run the binary package on other Naples configurations, please review # "Readme.amd1704na_revC.txt" for instructions on how to easily modify # the CPU2017 configuration include file "AMD1704na-rate-revC.inc". Typically, # the number of sockets, number of cores per socket, SMT state and memory # size values will need to be changed in amd1704na-rate-revX.inc. Furthermore, # modify necessary system specific and test specific documentation within this # file. Again, refer to Readme.amd1704na-rate-revX.txt for more information. # # In most cases, it should be unnecessary to edit "AMD1704na-rate-revC.cfg" or any # other file besides "AMD1704na-rate-revC.inc". # # The config file automatically sets the number of rate copies equal to the # number of logical processors and automatically binds each copy to a unique # logical core. # # The run script and accompanying binary package are designed to work on Ubuntu # 17.04/16.04 and RHEL 7.4. # # Important! If you write your own run script, please set the stack size to # "unlimited" when executing this binary package. Failure to do so may cause # some benchmarks to overflow the stack. For example, to set stack size within # the bash shell, include the following line somewhere at the top of your run # script before the runcpu invocation: # # ulimit -s unlimited # # Modification of this config file should only be necessary if you intend to # rebuild the binaries. General instructions for rebuilding the binaries are # found in-line below. # ################################################################################ # Binary label extension and "allow_build"" switch ################################################################################ # Only modify the binary label extension if you plan to rebuild the binaries. %define ext amd1704-rate-revC # If you plan to recompile these CPU2017 binaries, please choose a new extension # name (ext above) to avoid confusion with the current binary set on your system # under test, and to avoid confusion for SPEC submission reviewers. You will # also need to set "allow_build" to true below. Finally, you must modify the # Paths section below to point to your library locations if the paths are not # already set up in your build environment. # Change the following line to true if you intend to REBUILD the binaries (AMD # does not support this). Valid values are "true" or "false" (no quotes). %define allow_build false # ################################################################################ # Paths -- MODIFY AS NEEDED (modification should not be necessary for runs) ################################################################################ # Set location of runtime libraries for runs or builds. preenv = 1 %define lib_dir amd1704-rate-libs-revC # The following path will have to be changed on the build system if different: JEMALLOC_LIB32_PATH = /root/work/lib/jemalloc/lib32 %if '%{allow_build}' eq 'false' fail_build = 1 # Runtime libraries: preENV_LD_LIBRARY_PATH = $[top]/%{lib_dir}/64;$[top]/%{lib_dir}/32:%{ENV_LD_LIBRARY_PATH} %elif '%{allow_build}' eq 'true' # If you intend to rebuild, be sure to set the library paths either in the # build script or here: % define build_ncpus 16 # controls number of simultaneous compiles fail_build = 0 makeflags = --jobs=%{build_ncpus} --load-average=%{build_ncpus} %else % error The value of "allow_build" is %{allow_build}, but it can only be "true" or "false". This error was generated %endif ################################################################################ # Header settings ################################################################################ backup_config = 1 # set to 0 if you do not want backup files bench_post_setup = sync # command_add_redirect: If set, the generated ${command} will include # redirection operators (stdout, stderr), which are passed along to the shell # that executes the command. If this variable is not set, specinvoke does the # redirection. NOTE: this value must be "yes" for the affinity generation # section of this config file. command_add_redirect = yes check_md5 = yes env_vars = no flagsurl000 = http://www.spec.org/cpu2017/flags/gcc.2018-02-16.xml flagsurl001 = http://www.spec.org/cpu2017/flags/aocc100-flags-revC-I.2018-02-16.xml #flagsurl03 = $[top]/amd1704-INVALID-platform-revC-I.xml ignore_errors = no # label: Arbitrary string, tags your binaries & directories. label = %{ext} line_width = 1020 log_line_width = 1020 mean_anyway = yes output_format = all reportable = yes size = test,train,ref teeout = yes teerunout = yes tune = base,peak ################################################################################ # Include file name ################################################################################ # The include file contains fields that are commonly changed. This is usually # the only file that needs modification for runs. %define inc_file_name AMD1704na-rate-revC.inc #include: %{inc_file_name} # ----- Begin inclusion of 'AMD1704na-rate-revC.inc' ############################################################################ ################################################################################ # Machine Specific Settings ################################################################################ # This binary package is designed to work without alteration on two socket AMD # Naples-based servers with 32 cores per socket, SMT enabled and 512 GiB of DDR4 # memory distributed evenly among all 16 channels using 16 GiB DIMMs. # # To run the binary package on other Naples configurations, please review # "Readme.amd1704na_revC.txt" for instructions on how to easily modify # this include file ("AMD1704na-rate-revC.inc"). Typically, the number of sockets, # number of cores and NUMA nodes per socket, SMT state and memory size values # will need to be changed in in this file. Furthermore, modify necessary system # specific and test specific documentation within this file. Again, refer to # "Readme.amd1704na-rate-revX.txt"" for more information. # # Note that the submit command is written to dynamically generate bash scripts # which assume that each socket has the same number of nodes and that each node # services the same number of logical cores and that both logical cores and NUMA # nodes are enumerated sequentially. If these assumptions are not correct for # your system, process affinity for each benchmark copy might not get set # correctly, which can lead to suboptimal results or even run errors. ################################################################################ # ****************************** Required fields ******************************* # The following fields must be updated to reflect SUT configuration for # functional purposes. If these fields are incorrect, CPU2017 might not run or # might run suboptimally. ################################################################################ # How many sockets does your test system have? (Naples supports 1 or 2) %define num_sockets 2 # How many physical cores per socket does your system have? # (1, 2, 4, 8, 16, 24 or 32) %define cores_per_socket 32 # All Naples parts have four NUMA nodes, but the core count differs from 8, 16, # 24 to 32. Ryzen 7 parts have 1 node and 8 cores. %define nodes_per_socket 4 # Is SMT enabled for the upcoming SPEC CPU2017 run? (disabled or enabled) # Setting enabled will account for and configure two cpu2017 copies per # physical core. %define smt enabled # Approximately how much memory (in GiB) do you have in your system? # Typical values are 8, 16, 32, 64, 128, 256 or 512. If your SUT's memory # capacity is between two of those values then round down. If this value is # over 512 GiB, choose 512. # # Update the line below if your SUT's memory capacity is below 512 GiB: %define memory_size 512 ################################################################################ # ****************************** Optional fields ******************************* # The following optional fields can be updated to simplify memory documentation. # Alternatively, you can edit the fields hw_memory000 and hw_memory001 as # required in the documentation section at the end of this file. See: # https://www.jedec.org/standards-documents/docs/module4_20_26 ################################################################################ # What is the rated frequency (MT/s) of the DDR4 memory in your system? # (2400, 2667) %define memory_freq 2667 # At what speed is your DDR4 memory running? For example, its rated frequency # might be 2667 MT/s, but it is running at a down-clocked speed of 2400. # (2400, 2667) %define memory_freq_actual 2667 # Specify the number of DDR4 memory sticks in your SUT: %define mem_sticks 16 # What is your DDR4 memory rank? (1 or 2) %define memory_rank 4 # What is the DDR4 device organization bit width? (4 or 8) %define dram_bus_width 4 # What is the DDR4 speed grade (CL-tRCD-tRP)? # J = 10-10-10 # K = 11-11-11 # L = 12-12-12 # M = 13-13-13 # N = 14-14-14 # P = 15-15-15 # R = 16-16-16 # U = 18-18-18 # T = 17-17-17 # V = 19-19-19 %define memory_speed_grade V # What is the DDR4 module type? %define mem_module_type R ################################################################################ # You should not need to change anything in this section unless you have a non- # standard configuration. See Readme.amd1704na-rate-revC for more information. # You will likely have to edit the Test Information further down. ################################################################################ %define mem_stick_size %{memory_size} / %{mem_sticks} %define cpucores %{cores_per_socket} * %{num_sockets} # Naples is only available in one or two socket configurations: %if ((%{num_sockets} != 1) && (%{num_sockets} != 2)) % error Please define "num_sockets" to either 1 or 2 in %{inc_file_name}. This error was generated %endif # This config file supports 1, 2, 4, 8, 16, 24 and 32 physical cores / socket: %if ((%{cores_per_socket} == 1) || (%{cores_per_socket} == 2) || (%{cores_per_socket} == 4) || (%{cores_per_socket}==8) || (%{cores_per_socket}==16) || (%{cores_per_socket}==24) || (%{cores_per_socket}==32)) % define cores_per_node %{cores_per_socket} / %{nodes_per_socket} %else % error Please define "cores_per_node" to 1, 2, 4, 8, 16, 24 or 32 in %{inc_file_name}. This error was generated %endif # Calculate the total number of logical cores: %define physical_core_count %{cores_per_socket} * %{num_sockets} %if ('%{smt}' eq 'enabled') % define threadspercore 2 % define logical_core_count 2 * %{physical_core_count} %elif ('%{smt}' eq 'disabled') % define threadspercore 1 % define logical_core_count %{physical_core_count} %else % error Please define "smt" to either "enabled" or "disabled" in %{inc_file_name}. This error was generated %endif %define total_nodes %{nodes_per_socket} * %{num_sockets} %if (%{logical_core_count} >= %{total_nodes}) % define logical_cores_per_node %{logical_core_count} / %{total_nodes} %else % define logical_cores_per_node 1 %endif # Calculate the amount of memory per logical core: %define GB_mem_per_core %{memory_size} / %{logical_core_count} # CPU2017 requires at least 4 GiB per logical core: %if %{GB_mem_per_core} < 4 % error These SPEC CPU binaries require at least 4 GiB of memory per copy. The value for "memory_size" in %{inc_file_name} indicates you have not met this requirement. This error was generated %endif # Do you want to use hugectl? Define the hugectl command here: #%define hugectl_cmd hugectl -heap %define hugectl_cmd # %if %{GB_mem_per_core} >= 4 # % define huge_pages_per_copy 896 # Number of huge pages / copy # % define num_system_huge_pages %{logical_core_count} * %{huge_pages_per_copy} # % define thp_setting disabled # %elif %{GB_mem_per_core} >= 3 # % define huge_pages_per_copy 480 # Number of huge pages / copy # % define num_system_huge_pages %{logical_core_count} * %{huge_pages_per_copy} # % define thp_setting disabled # %else # % define num_system_huge_pages 0 # % define thp_setting enabled # %endif %define num_system_huge_pages 0 %define thp_setting enabled # Naples has 64kB L1i / physical core: %define chip_total_L1i 64 * %{cores_per_socket} # Naples has 32kB L1d / physical core: %define chip_total_L1d 32 * %{cores_per_socket} # Naples has 512 kB L2 / core: %define chip_total_L2_MB %{cores_per_socket} / 2 ################################################################################ # Test information # # Most of the lines below will need to be modified to reflect your test # conditions. ################################################################################ # Tester information # To understand the difference between hw_vendor/sponsor/tester, see: # www.spec.org/auto/cpu2017/Docs/config.html#test_sponsor ################################################################################ license_num =4872 # (Your SPEC license number) tester =GIGA-BYTE TECHNOLOGY CO., LTD. test_sponsor =GIGA-BYTE TECHNOLOGY CO., LTD. hw_vendor =GIGA-BYTE TECHNOLOGY CO., LTD. hw_model000 =R281-Z91 hw_model001 =(AMD EPYC 7601, 2.20 GHz) #--------- If you install new compilers, edit this section -------------------- sw_compiler000 =C/C++: Version 1.0.0 of AOCC sw_compiler001 =Fortran: Version 4.8.2 of GCC ################################################################################ # Hardware information ################################################################################ # Example # Brief info about field hw_avail =Mar-2018 # Date of LAST hardware component to ship sw_avail =Jun-2018 # Date of LAST software component to ship #hw_cpu_name =AMD EPYC 7601 # Usually set correctly by sysinfo hw_cpu_nominal_mhz =2200 # Nominal chip frequency, in MHz hw_cpu_max_mhz =3200 # Max chip frequency, in MHz hw_ncores =64 hw_nthreadspercore =2 %if %{num_sockets} == 1 hw_ncpuorder =1 chip %elif %{num_sockets} == 2 hw_ncpuorder =1,2 chips %endif #num_sockets hw_other =None # Other perf-relevant hw, or "None" sw_other000 =jemalloc general malloc implementation # Other perf-relevant sw, or "None" sw_other001 =V4.5.0 fw_bios =Version F07 released Jun-2018 # Ex: BIOS American Megatrends Inc. 0205 Jul-2017 sw_base_ptrsize =64-bit hw_pcache =64 KB I + 32 KB D on chip per core hw_scache =512 KB I+D on chip per core hw_tcache =64 MB I+D on chip per chip, 8 MB shared / 4 cores hw_ocache =None ################################################################################ # Notes ################################################################################ notes_020 =The AMD64 AOCC Compiler Suite is available at notes_025 =http://developer.amd.com/amd-aocc/ notes_030 = notes_035 =Binaries were compiled on a system with 2x AMD EPYC 7601 CPU + 512GB Memory using RHEL 7.4 notes_040 = notes_045 =jemalloc, a general purpose malloc implementation, was obtained at notes_050 =https://github.com/jemalloc/jemalloc/releases/download/4.5.0/jemalloc-4.5.0.tar.bz2 notes_055 =jemalloc was built with GCC v4.8.5 in RHEL v7.2 under default conditions. notes_060 =jemalloc uses environment variable MALLOC_CONF with values narenas and lg_chunk: notes_065 = narenas: sets the maximum number of arenas to use for automatic multiplexing notes_070 = of threads and arenas. notes_075 = lg_chunk: set the virtual memory chunk size (log base 2). For example, notes_080 = lg_chunk:21 sets the default chunk size to 2^21 = 2MiB. notes_085 = notes_090 =The AOCC Gold Linker plugin was installed and used for the link stage. notes_095 = notes_100 =The AOCC Fortran Plugin version 1.0 was used to leverage AOCC optimizers notes_105 =with gfortran. It is available here: notes_110 =http://developer.amd.com/amd-aocc/ notes_115 =NA: The test sponsor attests, as of date of publication, that CVE-2017-5754 (Meltdown) notes_120 =is mitigated in the system as tested and documented. notes_125 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5753 (Spectre variant 1) notes_130 =is mitigated in the system as tested and documented. notes_135 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5715 (Spectre variant 2) notes_140 =is mitigated in the system as tested and documented. notes_submit_000 ='numactl' was used to bind copies to the cores. notes_submit_005 =See the configuration file for details. notes_os_000 ='ulimit -s unlimited' was used to set environment stack size notes_os_005 ='ulimit -l 2097152' was used to set environment locked pages in memory limit notes_os_010 = notes_os_015 =runspec command invoked through numactl i.e.: notes_os_020 =numactl --interleave=all runspec notes_os_025 = notes_os_030 =Set dirty_ratio=8 to limit dirty cache to 8% of memory notes_os_035 =Set swappiness=1 to swap only if necessary notes_os_040 =Set zone_reclaim_mode=1 to free local node memory and avoid remote memory notes_os_045 =sync then drop_caches=3 to reset caches before invoking runcpu notes_os_050 = notes_os_055 =dirty_ratio, swappiness, zone_reclaim_mode and drop_caches were %if ('%{thp_setting}' eq 'enabled') notes_os_060 =all set using privileged echo (e.g. echo 1 > /proc/sys/vm/swappiness). notes_os_065 =Transparent huge pages were enabled for this run (OS default) %else % warning Transparent huge pages define not set properly, so its setting was not included in this run's documentation. This warning was generated %endif #thp_setting %if %{num_system_huge_pages} != 0 notes_os_020 = notes_os_025 =Set vm/nr_hugepages=%{num_system_huge_pages} in /etc/sysctl.conf notes_os_030 =mount -t hugetlbfs nodev /mnt/hugepages %else notes_os_070 = notes_os_075 =Huge pages were not configured for this run. notes_os_080 = %endif #num_system_huge_pages intrate,fprate: # the following fields must be set here or they will be overwritten by sysinfo: hw_disk =1 x 512 GB AHCI PCIe M.2 SSD # ex: 1 x 1 TB SSD #%if (('%{memory_freq}' eq '2400') || ('%{memory_freq}' eq '2667')) #% if (%{memory_freq_actual} != %{memory_freq}) # hw_memory =%{memory_size} GB (%{mem_sticks} x %{mem_stick_size} GB %{memory_rank}Rx%{dram_bus_width} PC4-%{memory_freq}%{memory_speed_grade}-%{mem_module_type}) # hw_memory000 =1 TB (16 x 64 GB 4Rx4 PC4-2667V-L) #% else # hw_memory001 = #% endif #%else #% error Please define "memory_freq" to a valid value in %{inc_file_name}. This error was generated #%endif #memory_freq # If you do not want to use the memory information constructed above, override # them here by uncommenting the following example fields and fill them out # according to: nnn GB (i x j GB kRxl PC4-mmmmn-o). See: # https://www.jedec.org/standards-documents/docs/module4_20_26 # Example: hw_memory000 =1 TB (16 x 64 GB 4Rx4 PC4-2667V-L) hw_memory001 = hw_memory002 = # The following field must be made blank or sysinfo will output placeholder # information in it: hw_nchips =2 prepared_by =GIGA-BYTE TECHNOLOGY CO., LTD. # ex: Advanced Micro Devices sw_file =xfs # ex1: ext4, ex2: ntfs sw_os000 =SUSE Linux Enterprise Server 12 SP3 (x86_64) # ex: Ubuntu 16.04 LTS, sw_os001 =kernel 4.4.73-5-default # ex: Kernel 4.4.0-87-generic sw_os002 = sw_state =Run Level 3 (multi-user) # ex: Run level 3 (Full multiuser with network) # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: notes_000 =Environment variables set by runcpu before the start of the run: notes_005 =LD_LIBRARY_PATH = "/home/amd/CPU2017/amd1704-rate-libs-revC/64;/home/amd/CPU2017/amd1704-rate-libs-revC/32:" notes_010 =MALLOC_CONF = "lg_chunk:26" notes_015 = notes_plat_000 =BIOS Settings: notes_plat_005 =Determinism Slider = Power notes_plat_010 =cTDP Control = Manual notes_plat_015 =cTDP = 200 # ---- End inclusion of '/home/amd/CPU2017/config/AMD1704na-rate-revC.inc' # Switch back to default block after the include file: default: ################################################################################ # Auto-configured settings based on your .inc file ################################################################################ # "copies" applies only to intrate and fprate: copies = %{logical_core_count} # Verify computed logical core count: %if (%{logical_core_count} < 1) || (%{logical_core_count} > 128) % error The number of logical cores is calculated to be %{logical_core_count}, which is invalid. Check "num_sockets" and "cores_per_socket". This error was generated %endif #%if %{num_system_huge_pages} > 0 # preENV_HUGETLB_LIMIT = %{huge_pages_per_copy} #%endif ############################################################################### ############################################################################### # The following lines create a bash script that is used to calculate the NUMA # node number and the physical core number for setting the affinity for each # benchmark copy. Be careful to protect (with "\") all "$" characters that need # to be in the shell script. Also, do not use single quotes since that character # is used during dynamic script creation. # # Note that the generated script assumes that each socket has the same number of # nodes and that each node services the same number of logical cores and that # both logical cores and NUMA nodes are enumerated sequentially. # # Name the script files uniquely for each copy: %define bindcmdname run.$SPECCOPYNUM.sh # Start the script: %define bindcmd01 \#!/bin/bash # The first argument passed into the script is the current thread number. Read this # argument and skip to the next: %define bindcmd02 threadnum=\$1; shift # Set the script variable threads_per_node based upon logical_cores_per_node # in this config file: %define bindcmd03 threads_per_node=%{logical_cores_per_node} # Calculate the node for the thread (not used with --localalloc): %define bindcmd04 ((nodenum=threadnum/threads_per_node)) # Search for the delimiter "--" that precedes $command %define bindcmd05 \# Skip to command... %define bindcmd06 while [[ \$1 != -- ]]; do %define bindcmd07 shift %define bindcmd08 done %define bindcmd09 shift %define bindcmd10 \# What is left in \$* are the commands to execute # Create the command to launch the executable: %define run_command exec numactl --localalloc --physcpubind=\$threadnum %{hugectl_cmd} \$* # echo this command to a file for debug purposes: %define bindcmd11 echo %{run_command} > run.$SPECCOPYNUM.out 2>&1 # This line executes the fully formed command for the current copy: %define bindcmd12 %{run_command} # The submit writes out the script, provides it with arguments calculated above, # and runs it. # # We are building our submit command based upon the bindcmd# lines that we # created above. Note that the numbers have to match exactly or the script will # not run properly. Also note that < %{bindcmdname} echo '%{bindcmd02}' >> %{bindcmdname} echo '%{bindcmd03}' >> %{bindcmdname} echo '%{bindcmd04}' >> %{bindcmdname} echo '%{bindcmd05}' >> %{bindcmdname} echo '%{bindcmd06}' >> %{bindcmdname} echo '%{bindcmd07}' >> %{bindcmdname} echo '%{bindcmd08}' >> %{bindcmdname} echo '%{bindcmd09}' >> %{bindcmdname} echo '%{bindcmd10}' >> %{bindcmdname} echo '%{bindcmd11}' >> %{bindcmdname} echo '%{bindcmd12}' >> %{bindcmdname} # Now we echo the command to execute the script to a debug file: %define script_run_command exec /bin/bash ./%{bindcmdname} $SPECCOPYNUM -- $command echo %{script_run_command} > run.$SPECCOPYNUM.cmd # And now we execute the script we just created: %{script_run_command} EOS ################################################################################ ################################################################################ ################################ # End auto-configured settings # ################################ ################################################################################ # Compilers ################################################################################ default: CC = clang CXX = clang++ FC = gfortran CLD = clang FLD = clang CC_VERSION_OPTION = --version CXX_VERSION_OPTION = --version FC_VERSION_OPTION = --version ################################################################################ # Portability Flags ################################################################################ default:# data model applies to all benchmarks EXTRA_PORTABILITY = -DSPEC_LP64 # Benchmark-specific portability (anything other than data model # is allowed only where need is proven # Benchmark-specific portability (anything other than data model # is allowed only where need is proven 500.perlbench_r,600.perlbench_s: #lang='C' PORTABILITY = -DSPEC_LINUX_X64 521.wrf_r,621.wrf_s: #lang='F,C' CPORTABILITY = -DSPEC_CASE_FLAG FPORTABILITY = -fconvert=big-endian 523.xalancbmk_r,623.xalancbmk_s: #lang='CXX' PORTABILITY = -DSPEC_LINUX 526.blender_r: #lang='CXX,C' CPORTABILITY = -funsigned-char CXXPORTABILITY = -D__BOOL_DEFINED 527.cam4_r,627.cam4_s: #lang='F,C' PORTABILITY = -DSPEC_CASE_FLAG 628.pop2_s: #lang='F,C' CPORTABILITY = -DSPEC_CASE_FLAG FPORTABILITY = -fconvert=big-endian ################################################################################ # Tuning Flags ################################################################################ ##################### # Base tuning flags # ##################### default=base: COPTIMIZE = -O3 -flto -ffast-math -march=znver1 -fstruct-layout=2 \ -mllvm -unroll-threshold=100 -fremap-arrays \ -mno-avx2 -mllvm -inline-threshold=1000 \ -mllvm -disable-vect-cmp CXXOPTIMIZE = -O3 -flto -march=znver1 \ -mllvm -unroll-threshold=100 -finline-aggressive \ -fremap-arrays -mllvm -inline-threshold=1000 \ -mllvm -disable-vect-cmp FOPTIMIZE = -O3 -mavx -madx -funroll-loops -ffast-math EXTRA_FFLAGS = -fplugin=dragonegg.so \ -fplugin-arg-dragonegg-llvm-option="-merge-constant -disable-vect-cmp" EXTRA_FLIBS = -lgfortran -lamdlibm -lm EXTRA_LDFLAGS = -flto -Wl,-plugin-opt=-merge-constant \ -Wl,-plugin-opt=-lsr-in-nested-loop \ -Wl,-plugin-opt=-disable-vect-cmp EXTRA_LIBS = -ljemalloc # The following is necessary for 502/602 gcc: LDOPTIMIZE = -z muldefs ######################## # intrate tuning flags # ######################## intrate: EXTRA_FFLAGS = -Ofast -fdefault-integer-8 -fplugin=dragonegg.so \ -fplugin-arg-dragonegg-llvm-option="-lsr-in-nested-loop \ -enable-iv-split -merge-constant -inline-threshold:1000 -disable-vect-cmp" preENV_MALLOC_CONF = lg_chunk:26 sw_peak_ptrsize =32/64-bit ####################### # fprate tuning flags # ####################### fprate: preENV_MALLOC_CONF = lg_chunk:28 sw_peak_ptrsize =64-bit ##################### # Peak tuning flags # ##################### default=peak: COPTIMIZE = -Ofast -flto -march=znver1 -fstruct-layout=3 \ -mllvm -vectorize-memory-aggressively -mno-avx2 \ -mllvm -unroll-threshold=100 -fremap-arrays \ -mllvm -inline-threshold=1000 CXXOPTIMIZE = -Ofast -flto -march=znver1 -finline-aggressive \ -mllvm -unroll-threshold=100 -fremap-arrays \ -mllvm -inline-threshold=1000 FOPTIMIZE = -O3 -mavx2 -madx -funroll-loops -ffast-math EXTRA_FFLAGS = -fplugin=dragonegg.so \ -fplugin-arg-dragonegg-llvm-option="-merge-constant -inline-threshold:1000" EXTRA_FLIBS = -lgfortran -lamdlibm -lm EXTRA_LDFLAGS = -flto -Wl,-plugin-opt=-merge-constant \ -Wl,-plugin-opt=-lsr-in-nested-loop EXTRA_LIBS = -ljemalloc feedback = 0 PASS1_CFLAGS = -fprofile-instr-generate PASS2_CFLAGS = -fprofile-instr-use PASS1_FFLAGS = -fprofile-generate PASS2_FFLAGS = -fprofile-use PASS1_CXXFLAGS = -fprofile-instr-generate PASS2_CXXFLAGS = -fprofile-instr-use PASS1_LDFLAGS = -fprofile-instr-generate PASS2_LDFLAGS = -fprofile-instr-use fdo_run1 = $command ; llvm-profdata merge -output=default.profdata *.profraw 500.perlbench_r=peak: #lang='C' feedback = 1 502.gcc_r=peak: #lang='C' EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 EXTRA_COPTIMIZE = -fgnu89-inline CC = clang -m32 CLD = clang -m32 EXTRA_LIBS = -L$[JEMALLOC_LIB32_PATH] -ljemalloc copies = %{physical_core_count} 503.bwaves_r=peak: copies = %{physical_core_count} 510.parest_r=peak: copies = %{physical_core_count} 519.lbm_r=peak: copies = %{physical_core_count} 521.wrf_r,621.wrf_s=peak: #lang='F,C' COPTIMIZE = -O3 -mavx -ffast-math FOPTIMIZE = -O3 -mavx -funroll-loops -ffast-math copies = %{physical_core_count} 523.xalancbmk_r=peak: #lang='CXX` EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 CXX = clang++ -m32 CXXLD = clang++ -m32 EXTRA_LIBS = -L$[JEMALLOC_LIB32_PATH] -ljemalloc 525.x264_r=peak: #lang='C' feedback = 1 541.leela_r=peak: #lang="C++" CXXOPTIMIZE = -Ofast -flto -march=znver1 -mllvm -unroll-count=8 \ -mllvm -unroll-threshold=100 feedback = 1 549.fotonik3d_r=peak: copies = %{physical_core_count} 554.roms_r=peak: copies = %{physical_core_count} # The following settings were obtained by running the sysinfo_program # 'specperl $[top]/bin/sysinfo' (sysinfo:SHA:ecd2bef08f316af97f5a7768b641e2a3307c1b4b68efb5a57fa76367d790d233) default: notes_plat_sysinfo_000 = Sysinfo program /home/amd/CPU2017/bin/sysinfo notes_plat_sysinfo_005 = Rev: r5797 of 2017-06-14 96c45e4568ad54c135fd618bcc091c0f notes_plat_sysinfo_010 = running on linux-ojy8 Fri Jun 29 21:02:06 2018 notes_plat_sysinfo_015 = notes_plat_sysinfo_020 = SUT (System Under Test) info as seen by some common utilities. notes_plat_sysinfo_025 = For more information on this section, see notes_plat_sysinfo_030 = https://www.spec.org/cpu2017/Docs/config.html#sysinfo notes_plat_sysinfo_035 = notes_plat_sysinfo_040 = From /proc/cpuinfo notes_plat_sysinfo_045 = model name : AMD EPYC 7601 32-Core Processor notes_plat_sysinfo_050 = 2 "physical id"s (chips) notes_plat_sysinfo_055 = 128 "processors" notes_plat_sysinfo_060 = cores, siblings (Caution: counting these is hw and system dependent. The following notes_plat_sysinfo_065 = excerpts from /proc/cpuinfo might not be reliable. Use with caution.) notes_plat_sysinfo_070 = cpu cores : 32 notes_plat_sysinfo_075 = siblings : 64 notes_plat_sysinfo_080 = physical 0: cores 0 1 2 3 4 5 6 7 notes_plat_sysinfo_085 = physical 1: cores 0 1 2 3 4 5 6 7 notes_plat_sysinfo_090 = notes_plat_sysinfo_095 = From lscpu: notes_plat_sysinfo_100 = Architecture: x86_64 notes_plat_sysinfo_105 = CPU op-mode(s): 32-bit, 64-bit notes_plat_sysinfo_110 = Byte Order: Little Endian notes_plat_sysinfo_115 = CPU(s): 128 notes_plat_sysinfo_120 = On-line CPU(s) list: 0-127 notes_plat_sysinfo_125 = Thread(s) per core: 2 notes_plat_sysinfo_130 = Core(s) per socket: 32 notes_plat_sysinfo_135 = Socket(s): 2 notes_plat_sysinfo_140 = NUMA node(s): 8 notes_plat_sysinfo_145 = Vendor ID: AuthenticAMD notes_plat_sysinfo_150 = CPU family: 23 notes_plat_sysinfo_155 = Model: 1 notes_plat_sysinfo_160 = Model name: AMD EPYC 7601 32-Core Processor notes_plat_sysinfo_165 = Stepping: 2 notes_plat_sysinfo_170 = CPU MHz: 2200.000 notes_plat_sysinfo_175 = CPU max MHz: 2200.0000 notes_plat_sysinfo_180 = CPU min MHz: 1200.0000 notes_plat_sysinfo_185 = BogoMIPS: 4400.22 notes_plat_sysinfo_190 = Virtualization: AMD-V notes_plat_sysinfo_195 = L1d cache: 32K notes_plat_sysinfo_200 = L1i cache: 64K notes_plat_sysinfo_205 = L2 cache: 512K notes_plat_sysinfo_210 = L3 cache: 8192K notes_plat_sysinfo_215 = NUMA node0 CPU(s): 0-7,64-71 notes_plat_sysinfo_220 = NUMA node1 CPU(s): 8-15,72-79 notes_plat_sysinfo_225 = NUMA node2 CPU(s): 16-23,80-87 notes_plat_sysinfo_230 = NUMA node3 CPU(s): 24-31,88-95 notes_plat_sysinfo_235 = NUMA node4 CPU(s): 32-39,96-103 notes_plat_sysinfo_240 = NUMA node5 CPU(s): 40-47,104-111 notes_plat_sysinfo_245 = NUMA node6 CPU(s): 48-55,112-119 notes_plat_sysinfo_250 = NUMA node7 CPU(s): 56-63,120-127 notes_plat_sysinfo_255 = Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov notes_plat_sysinfo_260 = pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm notes_plat_sysinfo_265 = constant_tsc rep_good nopl nonstop_tsc extd_apicid amd_dcm aperfmperf eagerfpu pni notes_plat_sysinfo_270 = pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c notes_plat_sysinfo_275 = rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch notes_plat_sysinfo_280 = osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_l2 mwaitx arat cpb notes_plat_sysinfo_285 = hw_pstate npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists notes_plat_sysinfo_290 = pausefilter pfthreshold vmmcall avic fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap notes_plat_sysinfo_295 = clflushopt sha_ni xsaveopt xsavec xgetbv1 clzero irperf overflow_recov succor smca notes_plat_sysinfo_300 = notes_plat_sysinfo_305 = /proc/cpuinfo cache data notes_plat_sysinfo_310 = cache size : 512 KB notes_plat_sysinfo_315 = notes_plat_sysinfo_320 = From numactl --hardware WARNING: a numactl 'node' might or might not correspond to a notes_plat_sysinfo_325 = physical chip. notes_plat_sysinfo_330 = available: 8 nodes (0-7) notes_plat_sysinfo_335 = node 0 cpus: 0 1 2 3 4 5 6 7 64 65 66 67 68 69 70 71 notes_plat_sysinfo_340 = node 0 size: 128896 MB notes_plat_sysinfo_345 = node 0 free: 128677 MB notes_plat_sysinfo_350 = node 1 cpus: 8 9 10 11 12 13 14 15 72 73 74 75 76 77 78 79 notes_plat_sysinfo_355 = node 1 size: 129021 MB notes_plat_sysinfo_360 = node 1 free: 128843 MB notes_plat_sysinfo_365 = node 2 cpus: 16 17 18 19 20 21 22 23 80 81 82 83 84 85 86 87 notes_plat_sysinfo_370 = node 2 size: 129021 MB notes_plat_sysinfo_375 = node 2 free: 128858 MB notes_plat_sysinfo_380 = node 3 cpus: 24 25 26 27 28 29 30 31 88 89 90 91 92 93 94 95 notes_plat_sysinfo_385 = node 3 size: 129021 MB notes_plat_sysinfo_390 = node 3 free: 128834 MB notes_plat_sysinfo_395 = node 4 cpus: 32 33 34 35 36 37 38 39 96 97 98 99 100 101 102 103 notes_plat_sysinfo_400 = node 4 size: 129021 MB notes_plat_sysinfo_405 = node 4 free: 128867 MB notes_plat_sysinfo_410 = node 5 cpus: 40 41 42 43 44 45 46 47 104 105 106 107 108 109 110 111 notes_plat_sysinfo_415 = node 5 size: 129021 MB notes_plat_sysinfo_420 = node 5 free: 128876 MB notes_plat_sysinfo_425 = node 6 cpus: 48 49 50 51 52 53 54 55 112 113 114 115 116 117 118 119 notes_plat_sysinfo_430 = node 6 size: 129021 MB notes_plat_sysinfo_435 = node 6 free: 128873 MB notes_plat_sysinfo_440 = node 7 cpus: 56 57 58 59 60 61 62 63 120 121 122 123 124 125 126 127 notes_plat_sysinfo_445 = node 7 size: 116924 MB notes_plat_sysinfo_450 = node 7 free: 116771 MB notes_plat_sysinfo_455 = node distances: notes_plat_sysinfo_460 = node 0 1 2 3 4 5 6 7 notes_plat_sysinfo_465 = 0: 10 16 16 16 32 32 32 32 notes_plat_sysinfo_470 = 1: 16 10 16 16 32 32 32 32 notes_plat_sysinfo_475 = 2: 16 16 10 16 32 32 32 32 notes_plat_sysinfo_480 = 3: 16 16 16 10 32 32 32 32 notes_plat_sysinfo_485 = 4: 32 32 32 32 10 16 16 16 notes_plat_sysinfo_490 = 5: 32 32 32 32 16 10 16 16 notes_plat_sysinfo_495 = 6: 32 32 32 32 16 16 10 16 notes_plat_sysinfo_500 = 7: 32 32 32 32 16 16 16 10 notes_plat_sysinfo_505 = notes_plat_sysinfo_510 = From /proc/meminfo notes_plat_sysinfo_515 = MemTotal: 1044426224 kB notes_plat_sysinfo_520 = HugePages_Total: 0 notes_plat_sysinfo_525 = Hugepagesize: 2048 kB notes_plat_sysinfo_530 = notes_plat_sysinfo_535 = From /etc/*release* /etc/*version* notes_plat_sysinfo_540 = SuSE-release: notes_plat_sysinfo_545 = SUSE Linux Enterprise Server 12 (x86_64) notes_plat_sysinfo_550 = VERSION = 12 notes_plat_sysinfo_555 = PATCHLEVEL = 3 notes_plat_sysinfo_560 = # This file is deprecated and will be removed in a future service pack or release. notes_plat_sysinfo_565 = # Please check /etc/os-release for details about this release. notes_plat_sysinfo_570 = os-release: notes_plat_sysinfo_575 = NAME="SLES" notes_plat_sysinfo_580 = VERSION="12-SP3" notes_plat_sysinfo_585 = VERSION_ID="12.3" notes_plat_sysinfo_590 = PRETTY_NAME="SUSE Linux Enterprise Server 12 SP3" notes_plat_sysinfo_595 = ID="sles" notes_plat_sysinfo_600 = ANSI_COLOR="0;32" notes_plat_sysinfo_605 = CPE_NAME="cpe:/o:suse:sles:12:sp3" notes_plat_sysinfo_610 = notes_plat_sysinfo_615 = uname -a: notes_plat_sysinfo_620 = Linux linux-ojy8 4.4.73-5-default #1 SMP Tue Jul 4 15:33:39 UTC 2017 (b7ce4e4) x86_64 notes_plat_sysinfo_625 = x86_64 x86_64 GNU/Linux notes_plat_sysinfo_630 = notes_plat_sysinfo_635 = run-level 3 Jun 29 20:55 notes_plat_sysinfo_640 = notes_plat_sysinfo_645 = SPEC is set to: /home/amd/CPU2017 notes_plat_sysinfo_650 = Filesystem Type Size Used Avail Use% Mounted on notes_plat_sysinfo_655 = /dev/sda4 xfs 435G 4.7G 430G 2% /home notes_plat_sysinfo_660 = notes_plat_sysinfo_665 = Additional information from dmidecode follows. WARNING: Use caution when you interpret notes_plat_sysinfo_670 = this section. The 'dmidecode' program reads system data which is "intended to allow notes_plat_sysinfo_675 = hardware to be accurately determined", but the intent may not be met, as there are notes_plat_sysinfo_680 = frequent changes to hardware, firmware, and the "DMTF SMBIOS" standard. notes_plat_sysinfo_685 = BIOS GIGABYTE F07 06/13/2018 notes_plat_sysinfo_690 = Memory: notes_plat_sysinfo_695 = 16x Samsung M386A8K40BM2-CTD 64 GB 4 rank 2667 notes_plat_sysinfo_700 = 16x Unknown Unknown notes_plat_sysinfo_705 = notes_plat_sysinfo_710 = (End of data from sysinfo program) hw_cpu_name = AMD EPYC 7601 hw_disk = 435 GB add more disk info here hw_memory001 = 996.042 GB fixme: If using DDR3, format is: hw_memory002 = 'N GB (M x N GB nRxn PCn-nnnnnR-n, ECC)' hw_nchips = 2 prepared_by = root (is never output, only tags rawfile) sw_file = xfs sw_os001 = SUSE Linux Enterprise Server 12 (x86_64) sw_os002 = 4.4.73-5-default sw_state = Run level 3 (add definition here) # End of settings added by sysinfo_program