# Invocation command line: # /home/spec2017/bin/harness/runcpu --configfile BiSheng.cfg --rebuild --copies 128 --reportable --define fastmath=0 --define jemalloc=1 --define hugepages=0 --nopower --runmode rate --tune base --size refrate intrate # output_root was not used for this run ############################################################################ #------------------------------------------------------------------------------ # SPEC CPU2017 config file for: LLVM / Linux / AArch64 #------------------------------------------------------------------------------ #--------- System include file ------------------------------------------------ # If the user specifies an include file, include it now to override default # values. %ifdef %{include} #include: %{include}.inc %endif # IMPORTANT: Reset to the default scope explicitly after an inclusion. default: #--------- CPU tuning --------------------------------------------------------- %ifndef %{mcpu} %define mcpu native %endif #--------- OpenMP ------------------------------------------------------------- %ifndef %{openmp} % define openmp 1 %endif #--------- Core Affinity Style ------------------------------------------------ # For SPECrate only, if you want to bind processes to cores that are spaced # apart evenly (e.g. when using --copies=4 on a 16-core system, bind to cores # 0, 4, 8, and 12), set spreadcopies to 1. Set to 0 if you just want to bind # to the first N cores. For backward compatibility, this value defaults to 1. %ifndef %{spreadcopies} % define spreadcopies 1 %endif #--------- Extra Compiler Flags ----------------------------------------------- %ifdef %{cflags} % define extra_cflags %{cflags} %else % define extra_cflags %endif #--------- Extra Linker Flags ------------------------------------------------- %ifdef %{ldflags} % define extra_ldflags %{ldflags} %else % define extra_ldflags %endif #--------- LTO settings ------------------------------------------------------- %ifndef %{LTO} % define LTO full %endif %if %{LTO} eq "full" % define lto_compiler_opt -flto=full % define lto_label _full_lto % ifdef %{gold} % define lto_linker_opt -fuse-ld=gold -flto=full % else % define lto_linker_opt -fuse-ld=lld -flto=full % endif %elif %{LTO} eq "thin" % define lto_compiler_opt -flto=thin % define lto_label _thin_lto % ifdef %{gold} % define lto_linker_opt -fuse-ld=gold -flto=thin % else % define lto_linker_opt -fuse-ld=lld -flto=thin % endif %elif %{LTO} eq "none" % define lto_compiler_opt % define lto_linker_opt % define lto_label %else % error Unexpected LTO mode: "%{LTO}" %endif #--------- Static linking ----------------------------------------------------- %ifndef %{static} % define static 0 %endif %if %{static} == 1 % define static_opt -static % define static_label _static %else % define static_opt % define static_label %endif #--------- jemalloc ----------------------------------------------------------- %define jemalloc_opt -ljemalloc %ifndef %{jemalloc} % define jemalloc_label %elif %{jemalloc} == 0 % define jemalloc_label _nojemalloc %else % define jemalloc_label _jemalloc %endif #--------- Debugging symbols -------------------------------------------------- %ifdef %{debug} % define debug_opt -g %else % define debug_opt %endif #--------- -ffast-math -------------------------------------------------------- # Enable -ffast-math by default. %ifndef %{fastmath} % define fastmath 1 %endif #--------- dev_12x ------------------------------------------------------------ %ifndef %{dev_12x} % define dev_12x 0 %endif %if %{dev_12x} == 0 dev_10x_cflags = -mllvm -enable-struct-padding=false -mllvm -enable-struct-repacking=true dev_10x_ldcflags = -Wl,-mllvm,-enable-struct-padding=false -Wl,-mllvm,-enable-struct-repacking=true %endif #--------- Optimized math library --------------------------------------------- # Use "-S mathlib=1" if you prefer libmathlib over libm (e.g. on CentOS 7). # When testing Arm HPC Compiler, use "-S libm=amath". %if defined(%{libm}) % define libm_linker_opt -l%{libm} % define libm_label _%{libm} %elif defined(%{mathlib}) && %{mathlib} == 0 % define libm_linker_opt % define libm_label %else % define libm_linker_opt -lmathlib % define libm_label _mathlib %endif #--------- Instrumented Profile Guided Optimization --------------------------- # Disable PGO by default. Use "-S pgo=1" if you want to enable PGO only for # benchmarks where it is profitable. Use "-S pgo=2" to force PGO everywhere. %ifndef %{pgo} % define pgo 0 %endif %if %{pgo} == 0 % define pgo_label %elif %{pgo} == 1 % define pgo_label _pgo %elif %{pgo} == 2 % define pgo_label _forcepgo %else % error Valid values for "pgo": 0, 1, 2. %endif #--------- Label -------------------------------------------------------------- # Arbitrary string to tag binaries (no spaces allowed) # Two Suggestions: # (1) EDIT this label as you try new ideas. # (2) Use a label meaningful to *you*. %ifndef %{label} % if %{openmp} == 1 % if %{fastmath} == 1 % define label llvm%{pgo_label}%{lto_label}%{static_label}%{libm_label}%{jemalloc_label}_fastmath_openmp % else % define label llvm%{pgo_label}%{lto_label}%{static_label}%{libm_label}%{jemalloc_label}_openmp % endif % else % if %{fastmath} == 1 % define label llvm%{pgo_label}%{lto_label}%{static_label}%{libm_label}%{jemalloc_label}_fastmath % else % define label llvm%{pgo_label}%{lto_label}%{static_label}%{libm_label}%{jemalloc_label} % endif % endif %endif %if %{label} =~ m/ / % error Your label "%{label}" contains spaces. Please try underscores instead. %endif %if %{label} !~ m/^[a-zA-Z0-9._-]+$/ % error Illegal character in label "%{label}". Please use only alphanumerics, underscore, hyphen, and period. %endif #--------- Profile Directory -------------------------------------------------- # Location to store profiles for instrumented PGO/FDO %ifndef %{profdir} % ifndef %{ENV_USER} % define profdir /tmp/profs % else % define profdir /tmp/%{ENV_USER}/profs % endif %endif # TODO test to see if supplied directory is valid #--------- Preprocessor ------------------------------------------------------- %ifndef %{bits} # EDIT to control 32 or 64 bit compilation. Or, % define bits 64 # you can set it on the command line using: %endif # 'runcpu --define bits=nn' %ifndef %{build_ncpus} # EDIT to adjust number of simultaneous compiles. % define build_ncpus 24 # Or, you can set it on the command line: %endif # 'runcpu --define build_ncpus=nn' # Don't change this part. %define os LINUX %if %{bits} == 64 % define model %elif %{bits} == 32 % define model -mabi=ilp32 %else % error Please define number of bits - see instructions in config file %endif #--------- Global Settings ---------------------------------------------------- # For info, see: # https://www.spec.org/cpu2017/Docs/config.html#fieldname # Example: https://www.spec.org/cpu2017/Docs/config.html#tune # rebuild = 1 backup_config = 0 command_add_redirect = 1 flagsurl000 = http://www.spec.org/cpu2017/flags/Bisheng-compiler-flags.xml flagsurl001 = http://www.spec.org/cpu2017/flags/PCL-Platform-Settings-Kunpeng-V1.0-revF.xml ignore_errors = 1 iterations = 4 label = %{hostname}_%{label}_m%{bits} line_width = 1020 log_line_width = 1020 makeflags = --jobs=%{build_ncpus} mean_anyway = 1 output_format = txt,html,cfg,pdf,csv reportable = 0 preenv = 1 tune = base %ifdef %{verify_binaries} verify_binaries = %{verify_binaries} %endif # ----------------- Run with perf stat monitoring --------------------------- # Specify events for perf stat to sample %ifdef %{events} % define perf_events %{events} %else % define perf_events task-clock,instructions,r0010,cycles,r001b,r0027,r0028,r002e,r0030,r1001,r2013,r2014,r201d,r7000,r7001,r7002,r7003,r7004,r7005,r7006,r7007 %endif %ifdef %{perf} monitor_wrapper = perf stat -x , -e %{perf_events} -o ${top}/result/CPU2017.${lognum}.perf_${baseexe}.csv --append -- ${command} %endif # ----------------- Run with perf stat monitoring --------------------------- #--------- How Many CPUs? ----------------------------------------------------- # Both SPECrate and SPECspeed can test multiple chips / cores / hw threads # - For SPECrate, you set the number of copies. # - For SPECspeed, you set the number of threads. # See: https://www.spec.org/cpu2017/Docs/system-requirements.html#MultipleCPUs # # q. How many should I set? # a. Unknown, you will have to try it and see! # # To get you started: # # copies - Set this value to the number of cores available on the system. # Please be sure you have enough memory; if you do not, you might # need to run a smaller number of copies. See: # https://www.spec.org/cpu2017/Docs/system-requirements.html#memory # # threads - Set this value to the number of hardware threads available. # Higher thread counts are much more likely to be useful for # fpspeed than for intspeed. # # You can also use the --copies and --threads options on the runcpu command # line to override these settings. intrate,fprate: copies = 128 intspeed,fpspeed: threads = 128 # Use numactl to bind core for intspeed and fpspeed, but only when OpenMP is # not enabled. 657.xz_s is the only intspeed benchmark that can use OpenMP. intspeed: use_submit_for_speed = yes %if %{openmp} == 1 intspeed: use_submit_for_speed = no %else fpspeed: use_submit_for_speed = yes %endif default: %if %{spreadcopies} == 0 submit = numactl --localalloc --physcpubind=$SPECCOPYNUM $command %else ############################################################################### # The following lines create a bash script that is used to calculate the NUMA # node number and the physical core number for setting the affinity for each # benchmark copy. Be careful to protect (with "\") all "$" characters that need # to be in the shell script. Also, do not use single quotes since that character # is used during dynamic script creation. # # Note that the generated script assumes that each socket has the same number of # nodes and that each node services the same number of logical cores and that # both logical cores and NUMA nodes are enumerated sequentially. # # The total number of copies must be a multiple of numa node count (usually 4) # in order to spread evenly. # # Name the script files uniquely for each copy: %define bindcmdname run.$SPECCOPYNUM.sh # Start the script: %define bindcmd01 \#!/bin/bash %define bindcmd02 numcore=`nproc --all`; thiscpy=$SPECCOPYNUM; numcpy=$copies; %define bindcmd03 if [[ \$numcpy -gt 1 ]]; then %define bindcmd04 numa=`lscpu |grep "node(s)" | tr -dc 0-9`; if [[ \$[numcpy%numa] -ne 0 ]]; then echo bad copies; exit -1; fi %define bindcmd05 if [[ \$numcpy -gt \$numcore ]]; then %define bindcmd06 if [[ \$thiscpy -ge \$numcore ]]; then numcpy=\$[numcpy%numcore]; thiscpy=\$[thiscpy%numcore]; %define bindcmd07 else numcpy=\$numcore; %define bindcmd08 fi %define bindcmd09 fi %define bindcmd10 numasz=\$[numcore/numa]; grpsz=\$[numcpy/numa]; tgtnuma=\$[thiscpy/grpsz] %define bindcmd11 grpid=\$[thiscpy-tgtnuma*grpsz]; offset=\$[grpid*numasz/grpsz] %define bindcmd12 tgtcore=\$[tgtnuma*numasz + offset]; %define bindcmd13 else %define bindcmd14 tgtcore=0; %define bindcmd15 fi; # The first argument passed into the script is the current thread number. Read this # argument and skip to the next: # Search for the delimiter "--" that precedes $command %define bindcmd16 \# Skip to command... %define bindcmd17 shift; while [[ \$1 != -- ]]; do %define bindcmd18 shift %define bindcmd19 done %define bindcmd20 shift %define bindcmd21 \# What is left in \$* are the commands to execute # Create the command to launch the executable: %define run_command exec numactl --localalloc --physcpubind=\$tgtcore \$* # echo this command to a file for debug purposes: %define bindcmd22 echo %{run_command} > run.$SPECCOPYNUM.out 2>&1 # This line executes the fully formed command for the current copy: %define bindcmd23 %{run_command} # The submit writes out the script, provides it with arguments calculated above, # and runs it. # # We are building our submit command based upon the bindcmd# lines that we # created above. Note that the numbers have to match exactly or the script will # not run properly. Also note that < %{bindcmdname} echo '%{bindcmd02}' >> %{bindcmdname} echo '%{bindcmd03}' >> %{bindcmdname} echo '%{bindcmd04}' >> %{bindcmdname} echo '%{bindcmd05}' >> %{bindcmdname} echo '%{bindcmd06}' >> %{bindcmdname} echo '%{bindcmd07}' >> %{bindcmdname} echo '%{bindcmd08}' >> %{bindcmdname} echo '%{bindcmd09}' >> %{bindcmdname} echo '%{bindcmd10}' >> %{bindcmdname} echo '%{bindcmd11}' >> %{bindcmdname} echo '%{bindcmd12}' >> %{bindcmdname} echo '%{bindcmd13}' >> %{bindcmdname} echo '%{bindcmd14}' >> %{bindcmdname} echo '%{bindcmd15}' >> %{bindcmdname} echo '%{bindcmd16}' >> %{bindcmdname} echo '%{bindcmd17}' >> %{bindcmdname} echo '%{bindcmd18}' >> %{bindcmdname} echo '%{bindcmd19}' >> %{bindcmdname} echo '%{bindcmd20}' >> %{bindcmdname} echo '%{bindcmd21}' >> %{bindcmdname} echo '%{bindcmd22}' >> %{bindcmdname} echo '%{bindcmd23}' >> %{bindcmdname} # Now we echo the command to execute the script to a debug file: %define script_run_command exec /bin/bash ./%{bindcmdname} $SPECCOPYNUM -- $command echo %{script_run_command} > run.$SPECCOPYNUM.cmd # And now we execute the script we just created: %{script_run_command} EOS %endif #------- Compilers ------------------------------------------------------------ # LLVM_DIR specifies the directory where LLVM and libraries are installed. # The user should set this with the `-S LLVM_DIR=/path/to/install` runcpu # option, or set it via the LLVM_DIR environment variable. # %ifndef %{LLVM_DIR} %ifndef %{ENV_LLVM_DIR} % error Please define LLVM_DIR to point to your LLVM installation. %else % define LLVM_DIR %{ENV_LLVM_DIR} %endif %endif default: LLVM_BIN_DIR = %{LLVM_DIR}/bin LLVM_LIB_DIR = %{LLVM_DIR}/lib LLVM_INCLUDE_DIR = %{LLVM_DIR}/include # EDIT if needed: the preENV line adds library directories to the runtime # path. You can adjust it, or add lines for other environment variables. # See: https://www.spec.org/cpu2017/Docs/config.html#preenv # and: https://gcc.gnu.org/onlinedocs/gcc/Environment-Variables.html %if !defined(%{hugepages}) || %{hugepages} == 1 preENV_HUGETLB_MORECORE = yes preENV_LD_PRELOAD = /usr/lib64/libhugetlbfs.so %endif preENV_LD_LIBRARY_PATH = $[LLVM_LIB_DIR]:%{ENV_LD_LIBRARY_PATH} preENV_PATH = $[LLVM_BIN_DIR]:%{ENV_PATH} CC = $(LLVM_BIN_DIR)/clang %{debug_opt} %{model} %{lto_compiler_opt} CXX = $(LLVM_BIN_DIR)/clang++ -std=c++03 %{debug_opt} %{model} %{lto_compiler_opt} # -Mallocatable=03 is equivalent to GCC's -frealloc-lhs, which is enabled by default. FC = $(LLVM_BIN_DIR)/flang %{debug_opt} -Mallocatable=03 %{model} %{lto_compiler_opt} CLD = $(LLVM_BIN_DIR)/clang %{model} FLD = $(LLVM_BIN_DIR)/flang %{model} LDFLAGS = -v %{static_opt} -Wl,--build-id %{lto_linker_opt} %{extra_ldflags} # MATHLIBOPT defaults to "-lm". It is inserted before OPTIMIZATION_*LIBS, # when a C benchmark defines NEED_MATH. Both clang++ and flang adds -lm by # default (after -lstdc++ and -lpgmath, respectively). Setting all variables # explicitly ensures that libmathlib is always preferred over libm. # Reference: https://www.spec.org/cpu2017/Docs/makevars.html MATHLIBOPT = %{libm_linker_opt} -lm OPTIMIZATION_CXXLIBS = %{libm_linker_opt} OPTIMIZATION_FLIBS = %{libm_linker_opt} # How to say "Show me your version, please" CC_VERSION_OPTION = -v CXX_VERSION_OPTION = -v FC_VERSION_OPTION = -v intrate,intspeed: EXTRA_CLIBS = ${MATHLIBOPT} %if !defined(%{jemalloc}) || %{jemalloc} != 0 EXTRA_LDFLAGS = %{jemalloc_opt} %endif fprate,fpspeed: %if defined(%{jemalloc}) && %{jemalloc} != 0 EXTRA_LDFLAGS = %{jemalloc_opt} %endif default: %if %{bits} == 64 sw_base_ptrsize = 64-bit sw_peak_ptrsize = Not Applicable sw_other = jemalloc memory allocator V5.2.1 %else sw_base_ptrsize = 32-bit sw_peak_ptrsize = 32-bit %endif #--------- Portability -------------------------------------------------------- default: # data model applies to all benchmarks %if %{bits} == 32 # Strongly recommended because at run-time, operations using modern file # systems may fail spectacularly and frequently (or, worse, quietly and # randomly) if a program does not accommodate 64-bit metadata. EXTRA_PORTABILITY = -D_FILE_OFFSET_BITS=64 %else EXTRA_PORTABILITY = -DSPEC_LP64 %endif # Benchmark-specific portability (ordered by last 2 digits of bmark number) 500.perlbench_r: #lang='C' %if %{bits} == 32 % define suffix AARCH32 %else % define suffix AARCH64 %endif PORTABILITY = -DSPEC_%{os}_%{suffix} 523.xalancbmk_r: #lang='CXX' PORTABILITY = -DSPEC_%{os} #-------- Tuning Flags common to Base and Peak -------------------------------- # # Speed (OpenMP and Autopar allowed); enable with `-S openmp=1`. # %if %{bits} == 32 intspeed,fpspeed: # # Many of the speed benchmarks (6nn.benchmark_s) do not fit in 32 bits # If you wish to run SPECint2017_speed or SPECfp2017_speed, please use # # runcpu --define bits=64 # fail_build = 1 %elif %{openmp} == 1 intspeed,fpspeed: EXTRA_OPTIMIZE = -DSPEC_OPENMP -fopenmp -Wno-return-type preENV_OMP_THREAD_LIMIT = $[threads] preENV_OMP_PROC_BIND = spread # # 627.cam4 needs a big stack; the preENV will apply it to all # benchmarks in the set, as required by the rules. # preENV_OMP_STACKSIZE = 128M %endif #-------- Baseline Tuning Flags ---------------------------------------------- default=base: %if %{fastmath} == 1 COPTIMIZE = -O3 -mcpu=%{mcpu} -ffast-math CXXOPTIMIZE = -O3 -mcpu=%{mcpu} -ffast-math FOPTIMIZE = -O3 -mcpu=%{mcpu} -ffast-math %else COPTIMIZE = -O3 -mcpu=%{mcpu} CXXOPTIMIZE = -O3 -mcpu=%{mcpu} FOPTIMIZE = -O3 -mcpu=%{mcpu} -Kieee %endif EXTRA_CFLAGS = -mllvm -enable-loopinterchange-boole=true -mllvm -licm-safe-hoist=true \ -mllvm -enable-diamond-load-hoist=true %{extra_cflags} EXTRA_FFLAGS = -mllvm -enable-loopinterchange-boole=true %{extra_cflags} EXTRA_CXXFLAGS = -mllvm -enable-loopinterchange-boole=true -mllvm -licm-safe-hoist=true %{extra_cflags} intrate,intspeed=base: %if %{bits} == 32 EXTRA_COPTIMIZE = -fgnu89-inline -fno-strict-aliasing -mllvm -disable-extra-gate-for-loop-heuristic=false $(dev_10x_cflags) %else EXTRA_COPTIMIZE = -fno-strict-aliasing -mllvm -enable-loop-load-widen=true -mllvm -disable-extra-gate-for-loop-heuristic=false $(dev_10x_cflags) EXTRA_FOPTIMIZE = -mllvm -enable-large-loop-bp-enhancement=true LDCFLAGS = -z muldefs -Wl,-mllvm,-licm-safe-hoist=true -Wl,-mllvm,-enable-diamond-load-hoist=true -Wl,-mllvm,-enable-loop-load-widen=true \ -Wl,-mllvm,-disable-extra-gate-for-loop-heuristic=false -Wl,-mllvm,-aarch64-optimize-vector-mul=true $(dev_10x_ldcflags) LDFFLAGS = -Wl,-mllvm,-enable-large-loop-bp-enhancement=true LDCXXFLAGS = -Wl,-mllvm,-licm-safe-hoist=true %endif fprate,fpspeed=base: %if %{fastmath} == 1 EXTRA_CXXOPTIMIZE = -mrecip=div %endif LDFFLAGS = -Wl,-mllvm,-nonconstant-prefetch-stride=true # Notes about the above: # - 500.perlbench_r/600.perlbench_s needs -fno-strict-aliasing. # - 502.gcc_r/602.gcc_s also needs -fgnu89-inline or -z muldefs. # - For 'base', all benchmarks in a set must use the same options. # - Therefore, all base benchmarks get the above. See: # www.spec.org/cpu2017/Docs/runrules.html#BaseFlags # www.spec.org/cpu2017/Docs/benchmarks/500.perlbench_r.html # www.spec.org/cpu2017/Docs/benchmarks/502.gcc_r.html #-------- Peak Tuning Flags ---------------------------------------------- default=peak: %if %{pgo} != 0 # instrumented PGO benchprofdir = %{profdir}/${num} profdata = ${benchprofdir}/${num}.profdata fdo_pre0 = rm -rf ${benchprofdir}; mkdir -p ${benchprofdir} PASS1_OPTIMIZE = -fprofile-generate=${benchprofdir} fdo_run1 = ${command} fdo_post1 = ${LLVM_BIN_DIR}/llvm-profdata merge -output=${profdata} ${benchprofdir}/* PASS2_OPTIMIZE = -fprofile-use=${profdata} %endif EXTRA_CFLAGS = -mllvm -enable-loopinterchange-boole=true -mllvm -licm-safe-hoist=true \ -mllvm -enable-diamond-load-hoist=true %{extra_cflags} EXTRA_FFLAGS = -mllvm -enable-loopinterchange-boole=true %{extra_cflags} EXTRA_CXXFLAGS = -mllvm -enable-loopinterchange-boole=true -mllvm -licm-safe-hoist=true %{extra_cflags} intrate,intspeed=peak: OPTIMIZE = -Ofast -mcpu=%{mcpu} EXTRA_COPTIMIZE = -fno-strict-aliasing -mllvm -enable-loop-load-widen=true -mllvm -disable-extra-gate-for-loop-heuristic=false $(dev_10x_cflags) EXTRA_FOPTIMIZE = -mllvm -enable-large-loop-bp-enhancement=true LDCFLAGS = -z muldefs -Wl,-mllvm,-licm-safe-hoist=true -Wl,-mllvm,-enable-diamond-load-hoist=true -Wl,-mllvm,-enable-loop-load-widen=true \ -Wl,-mllvm,-disable-extra-gate-for-loop-heuristic=false -Wl,-mllvm,-aarch64-optimize-vector-mul=true $(dev_10x_ldcflags) LDFFLAGS = -Wl,-mllvm,-enable-large-loop-bp-enhancement=true LDCXXFLAGS = -Wl,-mllvm,-licm-safe-hoist=true fprate,fpspeed=peak: %if %{fastmath} == 1 COPTIMIZE = -Ofast -mcpu=%{mcpu} CXXOPTIMIZE = -Ofast -mcpu=%{mcpu} FOPTIMIZE = -Ofast -mcpu=%{mcpu} EXTRA_CXXOPTIMIZE = -mrecip=div %else COPTIMIZE = -O3 -mcpu=%{mcpu} CXXOPTIMIZE = -O3 -mcpu=%{mcpu} FOPTIMIZE = -O3 -mcpu=%{mcpu} -Kieee %endif LDFFLAGS = -Wl,-mllvm,-nonconstant-prefetch-stride=true # Specify extra options (or override defaults) for individual benchmarks. # Some are required for functional correctness; see SPEC CPU2017 documentation. 500.perlbench_r,600.perlbench_s=peak: EXTRA_COPTIMIZE = -fno-strict-aliasing -fno-unsafe-math-optimizations -fno-finite-math-only 502.gcc_r,602.gcc_s=peak: EXTRA_COPTIMIZE = -fgnu89-inline 505.mcf_r,605.mcf_s=peak: EXTRA_COPTIMIZE = -fno-strict-aliasing 531.deepsjeng_r,631.deepsjeng_s=peak: EXTRA_CXXOPTIMIZE = -mllvm -inline-threshold=1000 554.roms_r=peak: EXTRA_OPTIMIZE = -mllvm -force-vector-interleave=1 -mllvm -unroll-force-peel-count=3 623.xalancbmk_s=peak: EXTRA_CXXOPTIMIZE = -mllvm -inline-threshold=1000 # Turn off PGO for benchmarks where it hurts performance, unless it is forced. %if %{pgo} != 2 505.mcf_r,520.omnetpp_r,525.x264_r,541.leela_r=peak: feedback = no 525.x264_r=peak: EXTRA_COPTIMIZE = -fcommon 605.mcf_s,620.omnetpp_s,625.x264_s,641.leela_s=peak: feedback = no 503.bwaves_r,519.lbm_r,538.imagick_r=peak: feedback = no 603.bwaves_s,619.lbm_s,638.imagick_s=peak: feedback = no %endif #------------------------------------------------------------------------------ # Tester and System Descriptions - EDIT all sections below this point #------------------------------------------------------------------------------ # For info about any field, see # https://www.spec.org/cpu2017/Docs/config.html#fieldname # Example: https://www.spec.org/cpu2017/Docs/config.html#hw_memory #------------------------------------------------------------------------------- #--------- EDIT to match your version ----------------------------------------- default: sw_compiler000 = C/C++/Fortran: Version 1.3.3 of BiSheng #--------- EDIT info about you ------------------------------------------------ # To understand the difference between hw_vendor/sponsor/tester, see: # https://www.spec.org/cpu2017/Docs/config.html#test_sponsor intrate,intspeed,fprate,fpspeed: # Important: keep this line hw_vendor = Huawei hw_nthreadspercore = 1 hw_ncores = 128 tester = Peng Cheng Laboratory test_sponsor = Peng Cheng Laboratory license_num = 5036 # prepared_by = # Ima Pseudonym # Whatever you like: is never output #--------- EDIT system availability dates ------------------------------------- intrate,intspeed,fprate,fpspeed: # Important: keep this line # Example # Brief info about field hw_avail = Sep-2019 sw_avail = Jul-2021 # Note: Delete the __HASH__ section before committing changes to GitLab! # The following settings were obtained by running the sysinfo_program # 'specperl $[top]/bin/sysinfo' (sysinfo:SHA:679c83684f6f4fc369a093999b6661d0a378911de2a006d3245423ad80d3fb9a) default: notes_plat_sysinfo_000 = notes_plat_sysinfo_005 = Sysinfo program /home/spec2017/bin/sysinfo notes_plat_sysinfo_010 = Rev: r6622 of 2021-04-07 982a61ec0915b55891ef0e16acafc64d notes_plat_sysinfo_015 = running on localhost.localdomain Tue Oct 12 19:21:47 2021 notes_plat_sysinfo_020 = notes_plat_sysinfo_025 = SUT (System Under Test) info as seen by some common utilities. notes_plat_sysinfo_030 = For more information on this section, see notes_plat_sysinfo_035 = https://www.spec.org/cpu2017/Docs/config.html#sysinfo notes_plat_sysinfo_040 = notes_plat_sysinfo_045 = From /proc/cpuinfo notes_plat_sysinfo_050 = * notes_plat_sysinfo_055 = * Did not identify cpu model. If you would notes_plat_sysinfo_060 = * like to write your own sysinfo program, see notes_plat_sysinfo_065 = * www.spec.org/cpu2017/config.html#sysinfo notes_plat_sysinfo_070 = * notes_plat_sysinfo_075 = * notes_plat_sysinfo_080 = * 0 "physical id" tags found. Perhaps this is an older system, notes_plat_sysinfo_085 = * or a virtualized system. Not attempting to guess how to notes_plat_sysinfo_090 = * count chips/cores for this system. notes_plat_sysinfo_095 = * notes_plat_sysinfo_100 = 128 "processors" notes_plat_sysinfo_105 = cores, siblings (Caution: counting these is hw and system dependent. The following notes_plat_sysinfo_110 = excerpts from /proc/cpuinfo might not be reliable. Use with caution.) notes_plat_sysinfo_115 = notes_plat_sysinfo_120 = From lscpu from util-linux 2.35.2: notes_plat_sysinfo_125 = Architecture: aarch64 notes_plat_sysinfo_130 = CPU op-mode(s): 64-bit notes_plat_sysinfo_135 = Byte Order: Little Endian notes_plat_sysinfo_140 = CPU(s): 128 notes_plat_sysinfo_145 = On-line CPU(s) list: 0-127 notes_plat_sysinfo_150 = Thread(s) per core: 1 notes_plat_sysinfo_155 = Core(s) per socket: 64 notes_plat_sysinfo_160 = Socket(s): 2 notes_plat_sysinfo_165 = NUMA node(s): 4 notes_plat_sysinfo_170 = Vendor ID: HiSilicon notes_plat_sysinfo_175 = Model: 0 notes_plat_sysinfo_180 = Model name: Kunpeng-920 notes_plat_sysinfo_185 = Stepping: 0x1 notes_plat_sysinfo_190 = BogoMIPS: 200.00 notes_plat_sysinfo_195 = L1d cache: 8 MiB notes_plat_sysinfo_200 = L1i cache: 8 MiB notes_plat_sysinfo_205 = L2 cache: 64 MiB notes_plat_sysinfo_210 = L3 cache: 128 MiB notes_plat_sysinfo_215 = NUMA node0 CPU(s): 0-31 notes_plat_sysinfo_220 = NUMA node1 CPU(s): 32-63 notes_plat_sysinfo_225 = NUMA node2 CPU(s): 64-95 notes_plat_sysinfo_230 = NUMA node3 CPU(s): 96-127 notes_plat_sysinfo_235 = Vulnerability Itlb multihit: Not affected notes_plat_sysinfo_240 = Vulnerability L1tf: Not affected notes_plat_sysinfo_245 = Vulnerability Mds: Not affected notes_plat_sysinfo_250 = Vulnerability Meltdown: Not affected notes_plat_sysinfo_255 = Vulnerability Spec store bypass: Not affected notes_plat_sysinfo_260 = Vulnerability Spectre v1: Mitigation; __user pointer sanitization notes_plat_sysinfo_265 = Vulnerability Spectre v2: Not affected notes_plat_sysinfo_270 = Vulnerability Srbds: Not affected notes_plat_sysinfo_275 = Vulnerability Tsx async abort: Not affected notes_plat_sysinfo_280 = Flags: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics notes_plat_sysinfo_285 = fphp asimdhp cpuid asimdrdm jscvt fcma dcpop asimddp asimdfhm notes_plat_sysinfo_290 = notes_plat_sysinfo_295 = From lscpu --cache: notes_plat_sysinfo_300 = NAME ONE-SIZE ALL-SIZE WAYS TYPE LEVEL SETS PHY-LINE COHERENCY-SIZE notes_plat_sysinfo_305 = L1d 64K 8M 4 Data 1 256 64 notes_plat_sysinfo_310 = L1i 64K 8M 4 Instruction 1 256 64 notes_plat_sysinfo_315 = L2 512K 64M 8 Unified 2 1024 64 notes_plat_sysinfo_320 = L3 32M 128M 15 Unified 3 2048 128 notes_plat_sysinfo_325 = notes_plat_sysinfo_330 = From numactl --hardware notes_plat_sysinfo_335 = WARNING: a numactl 'node' might or might not correspond to a physical chip. notes_plat_sysinfo_340 = available: 4 nodes (0-3) notes_plat_sysinfo_345 = node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 notes_plat_sysinfo_350 = 28 29 30 31 notes_plat_sysinfo_355 = node 0 size: 130329 MB notes_plat_sysinfo_360 = node 0 free: 101996 MB notes_plat_sysinfo_365 = node 1 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 notes_plat_sysinfo_370 = 57 58 59 60 61 62 63 notes_plat_sysinfo_375 = node 1 size: 130937 MB notes_plat_sysinfo_380 = node 1 free: 110916 MB notes_plat_sysinfo_385 = node 2 cpus: 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 notes_plat_sysinfo_390 = 89 90 91 92 93 94 95 notes_plat_sysinfo_395 = node 2 size: 130937 MB notes_plat_sysinfo_400 = node 2 free: 114246 MB notes_plat_sysinfo_405 = node 3 cpus: 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 notes_plat_sysinfo_410 = 115 116 117 118 119 120 121 122 123 124 125 126 127 notes_plat_sysinfo_415 = node 3 size: 129912 MB notes_plat_sysinfo_420 = node 3 free: 113872 MB notes_plat_sysinfo_425 = node distances: notes_plat_sysinfo_430 = node 0 1 2 3 notes_plat_sysinfo_435 = 0: 10 16 32 33 notes_plat_sysinfo_440 = 1: 16 10 25 32 notes_plat_sysinfo_445 = 2: 32 25 10 16 notes_plat_sysinfo_450 = 3: 33 32 16 10 notes_plat_sysinfo_455 = notes_plat_sysinfo_460 = From /proc/meminfo notes_plat_sysinfo_465 = MemTotal: 534648768 kB notes_plat_sysinfo_470 = HugePages_Total: 0 notes_plat_sysinfo_475 = Hugepagesize: 524288 kB notes_plat_sysinfo_480 = notes_plat_sysinfo_485 = /sbin/tuned-adm active notes_plat_sysinfo_490 = Current active profile: throughput-performance notes_plat_sysinfo_495 = notes_plat_sysinfo_500 = From /etc/*release* /etc/*version* notes_plat_sysinfo_505 = openEuler-release: openEuler release 20.03 (LTS-SP2) notes_plat_sysinfo_510 = os-release: notes_plat_sysinfo_515 = NAME="openEuler" notes_plat_sysinfo_520 = VERSION="20.03 (LTS-SP2)" notes_plat_sysinfo_525 = ID="openEuler" notes_plat_sysinfo_530 = VERSION_ID="20.03" notes_plat_sysinfo_535 = PRETTY_NAME="openEuler 20.03 (LTS-SP2)" notes_plat_sysinfo_540 = ANSI_COLOR="0;31" notes_plat_sysinfo_545 = notes_plat_sysinfo_550 = system-release: openEuler release 20.03 (LTS-SP2) notes_plat_sysinfo_555 = system-release-cpe: cpe:/o:openEuler:openEuler:20.03LTS_SP2:ga:server notes_plat_sysinfo_560 = notes_plat_sysinfo_565 = uname -a: notes_plat_sysinfo_570 = Linux localhost.localdomain 4.19.90-2106.3.0.0095.oe1.aarch64 #1 SMP Wed Jun 23 notes_plat_sysinfo_575 = 14:51:58 UTC 2021 aarch64 aarch64 aarch64 GNU/Linux notes_plat_sysinfo_580 = notes_plat_sysinfo_585 = Kernel self-reported vulnerability status: notes_plat_sysinfo_590 = notes_plat_sysinfo_595 = CVE-2018-12207 (iTLB Multihit): Not affected notes_plat_sysinfo_600 = CVE-2018-3620 (L1 Terminal Fault): Not affected notes_plat_sysinfo_605 = Microarchitectural Data Sampling: Not affected notes_plat_sysinfo_610 = CVE-2017-5754 (Meltdown): Not affected notes_plat_sysinfo_615 = CVE-2018-3639 (Speculative Store Bypass): Not affected notes_plat_sysinfo_620 = CVE-2017-5753 (Spectre variant 1): Mitigation: __user pointer notes_plat_sysinfo_625 = sanitization notes_plat_sysinfo_630 = CVE-2017-5715 (Spectre variant 2): Not affected notes_plat_sysinfo_635 = CVE-2020-0543 (Special Register Buffer Data Sampling): Not affected notes_plat_sysinfo_640 = CVE-2019-11135 (TSX Asynchronous Abort): Not affected notes_plat_sysinfo_645 = notes_plat_sysinfo_650 = run-level 3 Oct 9 12:48 notes_plat_sysinfo_655 = notes_plat_sysinfo_660 = SPEC is set to: /home/spec2017 notes_plat_sysinfo_665 = Filesystem Type Size Used Avail Use% Mounted on notes_plat_sysinfo_670 = /dev/mapper/openeuler-home ext4 15T 134G 14T 1% /home notes_plat_sysinfo_675 = notes_plat_sysinfo_680 = From /sys/devices/virtual/dmi/id notes_plat_sysinfo_685 = Vendor: Huawei notes_plat_sysinfo_690 = Product: TaiShan 200 (Model 2280) notes_plat_sysinfo_695 = Serial: 2102312PRNN0KC001136 notes_plat_sysinfo_700 = notes_plat_sysinfo_705 = Additional information from dmidecode 3.2 follows. WARNING: Use caution when you notes_plat_sysinfo_710 = interpret this section. The 'dmidecode' program reads system data which is "intended to notes_plat_sysinfo_715 = allow hardware to be accurately determined", but the intent may not be met, as there are notes_plat_sysinfo_720 = frequent changes to hardware, firmware, and the "DMTF SMBIOS" standard. notes_plat_sysinfo_725 = Memory: notes_plat_sysinfo_730 = 16x NO DIMM NO DIMM notes_plat_sysinfo_735 = 16x Samsung M393A4K40CB2-CVF 32 GB 2 rank 2933 notes_plat_sysinfo_740 = notes_plat_sysinfo_745 = BIOS: notes_plat_sysinfo_750 = BIOS Vendor: Huawei Corp. notes_plat_sysinfo_755 = BIOS Version: 1.80 notes_plat_sysinfo_760 = BIOS Date: 09/23/2021 notes_plat_sysinfo_765 = BIOS Revision: 1.80 notes_plat_sysinfo_770 = notes_plat_sysinfo_775 = (End of data from sysinfo program) hw_cpu_name = Huawei Kunpeng 920 7260 hw_cpu_max_mhz = 2600 hw_cpu_nominal_mhz = 2600 hw_ncpuorder = 1,2 chips hw_pcache = 64 KB I + 64 KB D on chip per core hw_scache = 512 KB I+D on chip per core hw_tcache = 64 MB I+D on chip per chip hw_ocache = None hw_other = None hw_memory000 = 512 GB (16 x 32 GB 2Rx8 PC4-2933P-R) hw_nchips = 2 prepared_by = Peng Cheng Laboratory fw_bios = Huawei Corp. Version 1.80 released Sep-2021 sw_file = ext4 sw_os000 = openEuler release 20.03 (LTS-SP2) sw_os001 = 4.19.90-2106.3.0.0095.oe1.aarch64 sw_state000 = Run level 3 # End of settings added by sysinfo_program # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: hw_disk001 = 15 TB LVM RAID-0 stripe on hw_disk002 = 4 x 3.638 TB SATA HDD, 7200 RPM hw_model000 = Huawei TaiShan 200 Server (Model 2280) hw_model001 = (2.6 GHz,Huawei Kunpeng 920 7260) power_management000 = BIOS and OS set to prefer performance at the cost power_management001 = of additional power usage notes_plat_000 = BIOS configuration: notes_plat_005 = Power Policy Set to Performance notes_plat_010 = Custom Refresh Rate Set to 64ms notes_plat_015 = CPU Prefetcher Set to Enabled notes_os_000 = Stack size set to unlimited using "ulimit -s unlimited" notes_000 =Binaries were compiled on a system with 2x ARM Kunpeng 920 7260 CPU + 512M notes_005 =memory using 16x Samsung M393A4K40CB2-CVF 32 GB 2 rank 2933 notes_010 =Transparent Huge Pages enabled by default notes_015 =Prior to runcpu invocation notes_020 =Filesystem page cache synced and cleared with: notes_025 =sync; echo 3> /proc/sys/vm/drop_caches notes_030 =jemalloc: configured and built at default for 64bit targets notes_035 =jemalloc: built with the openEuler V20.03, and the system compiler gcc 7.3.0 notes_040 =jemalloc: sources available via jemalloc.net notes_045 =NA: The test sponsor attests, as of date of publication, that CVE-2017-5754 (Meltdown) notes_050 =is mitigated in the system as tested and documented. notes_055 =Yes: The test sponsor attests, as of date of publication, that CVE-2017-5753 (Spectre variant 1) notes_060 =is mitigated in the system as tested and documented. notes_065 =NA: The test sponsor attests, as of date of publication, that CVE-2017-5715 (Spectre variant 2) notes_070 =is mitigated in the system as tested and documented. notes_submit_000 = The config file option 'submit' was used. notes_submit_005 = 'numactl' was used to bind copies to the cores. notes_submit_010 = For details, please see the config file. notes_comp_iler_000 = The BiSheng Compiler Suite is available at. notes_comp_iler_005 = https://www.hikunpeng.com/en/developer/devkit/compiler notes_plat_update_000 =The sysinfo is missing the cpu name, the processor under test is Huawei Kunpeng 920 7260.