# Invocation command line: # /cpu2006/bin/runspec --reportable -c amd123GH.cfg -F amd123GH-flags.xml --rate 16 -T all int # output_root was not used for this run ############################################################################ # AMD64 SPEC CPU2006 Configuration File for 64-bit Linux (Quad-Core) # # Compiler name/version: PGI 7.2, Pathscale 3.1 # Operating system version: 64-bit SLES10 SP1 # Hardware: AMD Opteron (Quad-core) # FP Base Pointer Size: 64-bit only # FP Peak Pointer Size: 64-bit only # INT Base Pointer Size: 32/64-bit # INT Peak Pointer Size: 32/64-bit # Auto Parallization: Not Used # # Important! Please run with your stack size set to 'unlimited'. # Failure to do so may cause 483.xalancbmk to get a stack overflow during execution. # Using csh: unlimit # Using bash: ulimit -s unlimited # # Set your LD_LIBRARY_PATH to the location of the dependency runtime libraries # # Please adjust the SHL_DIR variable to the directory containing the Smartheap library. ##################################################################### # Header Section ##################################################################### ext = amd123GH ignore_errors = no tune = base,peak output_format = asc,pdf,raw,flags,cfg,html,csv size = test,train,ref check_md5 = yes reportable = yes env_vars = no teeout = yes teerunout = yes mean_anyway = yes # Adjust the make jobs flag for the number of cores. makeflags = -j 4 ##################################################################### # Macro section #################################################################### # Modify this section to use the appropriate architecture flags %define pgi_tp64 -tp barcelona-64 %define pgi_tp32 -tp barcelona # Adjust the build jobs to the number of concurrent build processses %define build_jobs 4 #################################################################### # Include file containing the SUT hardware information # as well as the submit command, tester information and notes #################################################################### #include: AMD123sut.inc # ----- Begin inclusion of 'AMD123sut.inc' ############################################################################ ##################################################################### # Submit Section ##################################################################### # RATE #This config file is set to run these binaries on a 2P, 8 core system. If this #does not match the description of your system, change this config file, #specifically "bind0","bind1","bind2", etc. to match your system. Use #"man numactl" and "numactl --hardware" to better understand how to use #this command. bind0 = numactl -m 0 --physcpubind=0 bind1 = numactl -m 1 --physcpubind=1 bind2 = numactl -m 2 --physcpubind=2 bind3 = numactl -m 3 --physcpubind=3 bind4 = numactl -m 0 --physcpubind=4 bind5 = numactl -m 1 --physcpubind=5 bind6 = numactl -m 2 --physcpubind=6 bind7 = numactl -m 3 --physcpubind=7 bind8 = numactl -m 0 --physcpubind=8 bind9 = numactl -m 1 --physcpubind=9 bind10 = numactl -m 2 --physcpubind=10 bind11 = numactl -m 3 --physcpubind=11 bind12 = numactl -m 0 --physcpubind=12 bind13 = numactl -m 1 --physcpubind=13 bind14 = numactl -m 2 --physcpubind=14 bind15 = numactl -m 3 --physcpubind=15 # SPEED # Below is an example numactl command for a auto-parallel speed run using # 4 threads on a single Quad-core chip. # Please adjust as needed for your system # # bind0 = numactl -l --physcpubind=0,1,2,3 use_submit_for_speed = 1 submit = echo "$command" > run.sh ; $BIND bash run.sh #################################################################### # Tester information #################################################################### license_num = 11 prepared_by = IBM Corporation tester = IBM Corporation test_sponsor = IBM Corporation test_date = Jun-2008 hw_vendor = IBM Corporation hw_model = IBM BladeCenter LS42 (AMD Opteron 8347 HE) #################################################################### # Hardware information #################################################################### default=default=default=default: hw_avail = Sep-2008 hw_cpu_name = AMD Opteron 8347 HE hw_cpu_mhz = 1900 hw_fpu = Integrated hw_nchips = 4 hw_ncores = 16 hw_ncoresperchip = 4 hw_nthreadspercore = 1 hw_ncpuorder = 1,2,3,4 chips hw_pcache = 64 KB I + 64 KB D on chip per core hw_scache = 512 KB I+D on chip per core hw_tcache = 2 MB I+D on chip per chip hw_ocache = None hw_disk = 1 x 73 GB SAS, 10000 RPM hw_memory = 64 GB (16 x 4 GB DDR2-6400 ECC) hw_other = None sw_file = ext2 sw_os000 = SuSE Linux Enterprise Server 10 (x86_64) SP1, sw_os001 = Kernel 2.6.16.46-0.12-smp sw_state = Run level 3 (Full multiuser with network) ##################################################################### # Notes ##################################################################### notes_os_000 = 'numactl' was used to bind copies to the cores notes_os_005 = 'ulimit -s unlimited' was used to set environment stack size notes_os_010 = 'ulimit -l 4915200' was used to set environment locked pages in memory limit # Uncomment out which section you need # 1P configuration # notes_os_015 = 'ulimit -l 1228800' was used to set environment locked pages in memory quantity # notes_os_020 = Set vm/nr_hugepages=600 in /etc/sysctl.conf # 2P configuration # notes_os_015 = 'ulimit -l 2457600' was used to set environment locked pages in memory quantity # notes_os_020 = Set vm/nr_hugepages=1200 in /etc/sysctl.conf # 4P configuration # notes_os_015 = 'ulimit -l 4915200' was used to set environment locked pages in memory quantity # notes_os_020 = Set vm/nr_hugepages=2400 in /etc/sysctl.conf notes_os_015 = Environment variable PGI_HUGE_PAGES set to 896 notes_os_020 = Set vm/nr_hugepages=14336 in /etc/sysctl.conf notes_os_025 = mount -t hugetlbfs nodev /mnt/hugepages notes_os_030 = Processor Performance States Disabled in BIOS notes_os_035 = Memory ChipKill Disabled in BIOS # ---- End inclusion of '/cpu2006/config/AMD123sut.inc' ############################ Software Info ############################ fp=default=default=default: sw_peak_ptrsize = 32/64-bit sw_base_ptrsize = 64-bit sw_auto_parallel = No sw_other = None sw_avail = May-2008 sw_compiler1 = PGI Server Complete Version 7.2 sw_compiler2 = PathScale Compiler Suite Version 3.1 int=default=default=default: sw_peak_ptrsize = 32/64-bit sw_base_ptrsize = 32/64-bit sw_auto_parallel = No sw_other = SmartHeap 8.0 32-bit Library for Linux sw_avail = May-2008 sw_compiler000 = PGI Server Complete Version 7.2 sw_compiler001 = PathScale Compiler Suite Version 3.1 default=default=default=default: ##################################################################### # Compiler selection # default compiler is PGI # CC = pgcc -w CXX = pgcpp -w FC = pgf95 -w SHL_DIR = /root/work/cpu2006/amd123GH.libs/32 ##################################################################### # Optimization ##################################################################### default=base=default=default: EXTRA_LDFLAGS = -Bstatic_pgi fp=base=default=default: FOPTIMIZE = -fast -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 %{pgi_tp64} COPTIMIZE = -fast -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 %{pgi_tp64} CXXOPTIMIZE = -fast -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 --zc_eh %{pgi_tp64} int=base=default=default: COPTIMIZE = -fast -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 %{pgi_tp64} CXXOPTIMIZE = -fastsse -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 --zc_eh %{pgi_tp32} ##################################################################### ## FP Peak Flags ##################################################################### fp=peak=default=default: EXTRA_LIBS = feedback = 0 basepeak = 0 410.bwaves=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 -w FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc -Mprefetch=distance:12,nta %{pgi_tp64} PASS1_FFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_FFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo PASS2_LDFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo feedback = 1 416.gamess=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -O2 -OPT:Ofast:ro=3:unroll_size=256 PASS1_FFLAGS = -fb_create fbdata PASS2_FFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 433.milc=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Msmartalloc=huge:150 -Msafeptr -Mfprelaxed -Mipa=jobs:%{build_jobs},inline,arg,const,ptr,shape %{pgi_tp64} 434.zeusmp=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 -w FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge:150 -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} 435.gromacs=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi FC = pgf95 -w CC = pgcc -w COPTIMIZE = -fast -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 %{pgi_tp64} FOPTIMIZE = -fast -Mfpapprox=rsqrt -Mipa=jobs:%{build_jobs},fast,inline -Mfprelaxed -Msmartalloc=huge:150 %{pgi_tp64} 436.cactusADM=peak=default=default: CC = pathcc -march=barcelona FC = pathf95 -march=barcelona COPTIMIZE = -Ofast -WOPT:aggstr=0 FOPTIMIZE = -Ofast -WOPT:aggstr=0 PASS1_CFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS1_FFLAGS = -fb_create fbdata PASS2_FFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 437.leslie3d=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -Ofast -m3dnow -OPT:unroll_size=256 -CG:load_exe=0 -OPT:malloc_alg=1 444.namd=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CXX = pgcpp -w CXXOPTIMIZE = -fast -Mfprelaxed -Msmartalloc=huge:150 --zc_eh -Mnodepchk -Munroll=n:4,m:8 %{pgi_tp64} PASS1_CXXFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CXXFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo PASS2_LDFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo feedback = 1 447.dealII=peak=default=default: CXX = pathCC -march=barcelona # Needed to avoid -DSPEC_CPU_LP64 PORTABILITY = %ifdef %{gnu3_fe} CXXOPTIMIZE = -Ofast -INLINE:aggressive=on -LNO:opt=0 -OPT:alias=disjoint -m32 -fno-exceptions %else CXXOPTIMIZE = -Ofast -static -INLINE:aggressive=on -OPT:malloc_alg=1 -m32 -fno-exceptions LDCXXFLAGS = -lm %endif 450.soplex=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -m32 -O3 -TENV:frame_pointer=off -LNO:prefetch=1 # Needed to avoid -DSPEC_CPU_LP64 PORTABILITY = PASS1_CXXFLAGS = -fb_create fbdata PASS2_CXXFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 453.povray=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -Ofast -CG:load_exe=0 PASS1_CXXFLAGS = -fb_create fbdata PASS2_CXXFLAGS = -fb_opt fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 454.calculix=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w FC = pgf95 -w COPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge:150 -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} FOPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge:150 -Mipa=jobs:%{build_jobs},fast,inline %{pgi_tp64} 459.GemsFDTD=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -Ofast -LNO:fission=2:simd=2 -OPT:malloc_alg=1 465.tonto=peak=default=default: FC = pathf95 -march=barcelona FOPTIMIZE = -Ofast -OPT:malloc_alg=1 -OPT:alias=no_f90_pointer_alias -LNO:blocking=off -CG:load_exe=1 -IPA:plimit=525 470.lbm=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -Ofast -m3dnow 481.wrf=peak=default=default: CC = pathcc -march=barcelona FC = pathf95 -march=barcelona CPORTABILITY = -DSPEC_CPU_LINUX FPORTABILITY = -fno-second-underscore COPTIMIZE = -Ofast FOPTIMIZE = -Ofast -LNO:blocking=off:prefetch_ahead=10 -OPT:malloc_alg=1 -m3dnow -LANG:copyinout=off -IPA:callee_limit=5000 482.sphinx3=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -Ofast ##################################################################### ## INT Peak Flags ##################################################################### int=peak=default=default: EXTRA_LIBS = feedback = 0 basepeak = 0 400.perlbench=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -Ofast -IPA:plimit=20000 -LNO:opt=0 -WOPT:if_conv=0 -CG:local_sched_alg=1 PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 401.bzip2=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi COPTIMIZE = -fast -O4 -Msmartalloc=huge:150 -Mnounroll %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mpfo PASS2_LDFLAGS = -Mpfo feedback = 1 403.gcc=peak=default=default: CC = pathcc -march=barcelona PORTABILITY = COPTIMIZE = -m32 -O3 -OPT:Ofast PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 429.mcf=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Mipa=jobs:%{build_jobs},fast,inline:1 -Msmartalloc=huge:150 %{pgi_tp32} 445.gobmk=peak=default=default: CC = pathcc -march=barcelona COPTIMIZE = -O3 -OPT:alias=restrict -LNO:opt=0 -CG:p2align=on PASS1_CFLAGS = -fb_create fbdata PASS1_LDFLAGS = -fb_create fbdata PASS2_CFLAGS = -fb_opt fbdata PASS2_LDFLAGS = -fb_opt fbdata feedback = 1 456.hmmer=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Munroll=n:8 -Msmartalloc=huge:150 -Mfprelaxed -Mvect=partial -Msafeptr -Mipa=jobs:%{build_jobs},const,ptr,arg,inline %{pgi_tp64} 458.sjeng=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Msmartalloc=huge:150 -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi PASS1_LDFLAGS = -Mpfi PASS2_CFLAGS = -Mipa=jobs:%{build_jobs},fast,inline:1,noarg -Mpfo PASS2_LDFLAGS = -Mipa=jobs:%{build_jobs},fast,inline:1,noarg -Mpfo feedback = 1 462.libquantum=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Mfprelaxed -Msmartalloc=huge:150 -Munroll=m:8 -Mipa=jobs:%{build_jobs},fast,inline,noarg %{pgi_tp64} 464.h264ref=peak=default=default: EXTRA_LDFLAGS = -Bstatic_pgi CC = pgcc -w COPTIMIZE = -fastsse -Msmartalloc=huge:150 -Mfprelaxed %{pgi_tp64} PASS1_CFLAGS = -Mpfi=indirect PASS1_LDFLAGS = -Mpfi=indirect PASS2_CFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo=indirect PASS2_LDFLAGS = -Mipa=jobs:%{build_jobs},fast,inline -Mpfo=indirect feedback = 1 471.omnetpp=peak=default=default: basepeak = 1 473.astar=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -Ofast -TENV:frame_pointer=off -WOPT:if_conv=0 -GRA:optimize_boundary=on -IPA:plimit=525 -m32 EXTRA_CXXLIBS = -L$(SHL_DIR) -lsmartheap 483.xalancbmk=peak=default=default: CXX = pathCC -march=barcelona CXXOPTIMIZE = -Ofast -m32 -OPT:unroll_times_max=8 -CG:push_pop_int_saved_regs=off:ptr_load_use=0 EXTRA_CXXLIBS = -L$(SHL_DIR) -lsmartheap ##################################################################### # Portability ##################################################################### fp=default=default=default: PORTABILITY = -DSPEC_CPU_LP64 int=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 400.perlbench=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX_X64 403.gcc=peak=default=default: CPORTABILITY = 429.mcf=peak=default=default: CPORTABILITY = 435.gromacs=default=default=default: LDPORTABILITY = -Mnomain 436.cactusADM=base=default=default: LDPORTABILITY = -Mnomain 436.cactusADM=peak=default=default: FPORTABILITY = -fno-second-underscore 454.calculix=default=default=default: LDPORTABILITY = -Mnomain 447.dealII=peak=default=default: %ifdef %{gnu3_fe} 447.dealII=default=default: CXXPORTABILITY = -DSPEC_CPU_TABLE_WORKAROUND %else 447.dealII=default=default: CXXPORTABILITY = %endif 450.soplex=peak=default=default: # Needed to avoid -DSPEC_CPU_LP64 PORTABILITY = 462.libquantum=default=default=default: CPORTABILITY = -DSPEC_CPU_LP64 -DSPEC_CPU_LINUX 471.omnetpp=peak=default=default: # Needed to avoid -DSPEC_CPU_LP64 on -m32 C++ base codes PORTABILITY = 473.astar=default=default: # Needed to avoid -DSPEC_CPU_LP64 on -m32 C++ base codes PORTABILITY = 481.wrf=default=default=default: CPORTABILITY = -DSPEC_CPU_CASE_FLAG -DSPEC_CPU_LINUX 483.xalancbmk=default=default: CXXPORTABILITY = -DSPEC_CPU_LINUX # Needed to avoid -DSPEC_CPU_LP64 on -m32 C++ base codes PORTABILITY = # The following section was added automatically, and contains settings that # did not appear in the original configuration file, but were added to the # raw file after the run. default: flagsurl000= http://www.spec.org/cpu2006/flags/amd123GH-flags.20090714.01.xml