View Single Post
Old 2020-01-24, 22:59   #1806
kriesel
 
kriesel's Avatar
 
"TF79LL86GIMPS96gpu17"
Mar 2017
US midwest

10010111011112 Posts
Default Gpuowl -use options tune on RX480 for 4.5M fft length

Likes a somewhat different combination than for 5M
Quote:
gpuowl v6.11-134-g1e0ce1d
RX480 8GB
Win7 x64
exponent 82053239 PRP
4.5M fft
-iters 10000 -time
all timings below are in microsec/iteration


NO_ASM 3021
NO_ASM 3022
NO_ASM,UNROLL_ALL 3010 *
NO_ASM,UNROLL_NONE 3039
NO_ASM,UNROLL_WIDTH 3035
NO_ASM,UNROLL_HEIGHT 3038
NO_ASM,UNROLL_MIDDLEMUL1 3036
NO_ASM,UNROLL_MIDDLEMUL2 3027

NO_ASM,UNROLL_WIDTH,UNROLL_MIDDLEMUL1 3028
NO_ASM,UNROLL_WIDTH,UNROLL_MIDDLEMUL2 3019, 3028
NO_ASM,NO_ASM,UNROLL_MIDDLEMUL2,UNROLL_MIDDLEMUL1 2989 *
NO_ASM,UNROLL_WIDTH,UNROLL_MIDDLEMUL1,UNROLL_MIDDLEMUL2 2996

NO_ASM,MERGED_MIDDLE,WORKINGIN 5309
NO_ASM,MERGED_MIDDLE,WORKINGIN 5306
NO_ASM,MERGED_MIDDLE,WORKINGIN1 3032
NO_ASM,MERGED_MIDDLE,WORKINGIN1A 3052
NO_ASM,MERGED_MIDDLE,WORKINGIN2 3111
NO_ASM,MERGED_MIDDLE,WORKINGIN3 3133
NO_ASM,MERGED_MIDDLE,WORKINGIN4 3454
NO_ASM,MERGED_MIDDLE,WORKINGIN5 2995 *

NO_ASM,MERGED_MIDDLE,WORKINGOUT 5224
NO_ASM,MERGED_MIDDLE,WORKINGOUT0 4036
NO_ASM,MERGED_MIDDLE,WORKINGOUT1 2984 *
NO_ASM,MERGED_MIDDLE,WORKINGOUT1A 3012/2982
NO_ASM,MERGED_MIDDLE,WORKINGOUT2 3353
NO_ASM,MERGED_MIDDLE,WORKINGOUT3 2986
NO_ASM,MERGED_MIDDLE,WORKINGOUT4 3137
NO_ASM,MERGED_MIDDLE,WORKINGOUT5 2995

NO_ASM,MERGED_MIDDLE,%wkgin%,%wkgout% 2973
NO_ASM,MERGED_MIDDLE,%wkgin%,%wkgout%,T2_SHUFFLE_WIDTH 2957 *
NO_ASM,MERGED_MIDDLE,%wgkin%,%wkgout%,T2_SHUFFLE_MIDDLE 3026
NO_ASM,MERGED_MIDDLE,%wkgin%,%wkgout%,T2_SHUFFLE_HEIGHT 2966
NO_ASM,MERGED_MIDDLE,%wkgin%,%wkgout%,T2_SHUFFLE_REVERSELINE 2972
NO_ASM,MERGED_MIDDLE,%wkgin%,%wkgout%,T2_SHUFFLE 2992

set allotheroptions=NO_ASM,WORKINGIN5,WORKINGOUT1,UNROLL_MIDDLEMUL2,UNROLL_MIDDLEMUL1
%allotheroptions%,T2_SHUFFLE_WIDTH,T2_SHUFFLE_HEIGHT 2938 *
%allotheroptions%,T2_SHUFFLE_HEIGHT,T2_SHUFFLE_MIDDLE,T2_SHUFFLE_WIDTH 2989
%allotheroptions%,T2_SHUFFLE_HEIGHT,T2_SHUFFLE_MIDDLE,T2_SHUFFLE_WIDTH,T2_SHUFFLE_REVERSELINE 2987

set allotheroptions=NO_ASM,MERGED_MIDDLE,UNROLL_HEIGHT,UNROLL_WIDTH,WORKINGIN1,WORKINGOUT1,T2_SHUFFLE_WIDTH,T2_SHUFFLE_HEIGHT
%allotheroptions%,CARRY32 2940 *
%allotheroptions%,CARRY64 3054

set allotheroptions=NO_ASM,MERGED_MIDDLE,WORKINGIN5,WORKINGOUT1,T2_SHUFFLE_WIDTH,T2_SHUFFLE_HEIGHT,UNROLL_MIDDLEMUL2,UNROLL_MIDDLEMUL1,CARRY32
%allotheroptions%,FANCY_MIDDLEMUL1 "error: Clang front-end compilation failed!"
%allotheroptions%,MORE_SQUARES_MIDDLEMUL1 2985
%allotheroptions%,CHEBYSHEV_METHOD 2919
%allotheroptions%,CHEBYSHEV_METHOD_FMA 2911 *
%allotheroptions%,ORIGINAL_METHOD 2942
%allotheroptions%,ORIGINAL_TWEAKED 2937

set allotheroptions=NO_ASM,MERGED_MIDDLE,WORKINGIN5,WORKINGOUT1,T2_SHUFFLE_WIDTH,T2_SHUFFLE_HEIGHT,UNROLL_MIDDLEMUL2,UNROLL_MIDDLEMUL1,CARRY32,CHEBYSHEV_METHOD_FMA
%allotheroptions%,ORIG_MIDDLEMUL2 2926
%allotheroptions%,CHEBYSHEV_MIDDLEMUL2 2916 *

%allotheroptions%,ORIG_SLOWTRIG 3058
%allotheroptions%,NEW_SLOWTRIG 2910
%allotheroptions%,MORE_ACCURATE 2921
%allotheroptions%,LESS_ACCURATE 2909 *

NO_ASM,MERGED_MIDDLE,WORKINGIN5,WORKINGOUT1,T2_SHUFFLE_WIDTH,T2_SHUFFLE_HEIGHT,UNROLL_MIDDLEMUL2,UNROLL_MIDDLEMUL1,CARRY32,CHEBYSHEV_METHOD_FMA,CHEBYSHEV_MIDDLEMUL2,LESS_ACCURATE

base 3021.5
repeatability +-1.5/5307.5 =~ +-0.028%
best 2909
ratio 3021.5/2909 = 1.039
kriesel is online now   Reply With Quote