Translate C constant to assembly
spectatora opened this issue · 0 comments
spectatora commented
We have the following C constant
typedef struct{
double v[4];
} __attribute__ ((aligned (32))) limb;
typedef struct{
limb v[12];
} gfe4x;
#define repeat4x(x) {x, x, x, x}
const gfe4x Gk =
{{
{ repeat4x(3338585.0) } ,
{ repeat4x(3934835965952.0) } ,
{ repeat4x(16993937369696567296.0) } ,
{ repeat4x(4464222746302153748381696.0) } ,
{ repeat4x(93371163235585075216663357423616.0) } ,
{ repeat4x(1163399014865459815517614333765877760.0) } ,
{ repeat4x(441936960085431936284569284157504919873519616.0) } ,
{ repeat4x(355047131404459050871642921761149483359549389799424.0) } ,
{ repeat4x(626647004757192365988092839070681114614100044180388577280.0) } ,
{ repeat4x(13159058716893486699394031679446200360393917757201178927420145664.0) } ,
{ repeat4x(12842070454865951878207543570322902610654944894655310136406629955928064.0) } ,
{ repeat4x(16295354408597167049195255459117446390458785936524946835293367493552880222208.0) }
}};
We need to translate it to an assembly constant which could be then used
# qhasm: input_3 = Gk
# asm 1: mov $Gk,>input_3=int64#4
# asm 2: mov $Gk,>input_3=%rcx
mov $Gk,%rcx
# qhasm: mul_y0 aligned= mem256[input_3 + 0]
# asm 1: vmovupd 0(<input_3=int64#4),>mul_y0=reg256#3
# asm 2: vmovupd 0(<input_3=%rcx),>mul_y0=%ymm2
vmovupd 0(%rcx),%ymm2
# qhasm: 4x r0 = approx mul_x0 * mul_y0
# asm 1: vmulpd <mul_x0=reg256#2,<mul_y0=reg256#3,>r0=reg256#4
# asm 2: vmulpd <mul_x0=%ymm1,<mul_y0=%ymm2,>r0=%ymm3
vmulpd %ymm1,%ymm2,%ymm3
# qhasm: 4x r1 = approx mul_x0 * mem256[input_3 + 32]
# asm 1: vmulpd 32(<input_3=int64#4),<mul_x0=reg256#2,>r1=reg256#5
# asm 2: vmulpd 32(<input_3=%rcx),<mul_x0=%ymm1,>r1=%ymm4
vmulpd 32(%rcx),%ymm1,%ymm4
# qhasm: 4x r2 = approx mul_x0 * mem256[input_3 + 64]
# asm 1: vmulpd 64(<input_3=int64#4),<mul_x0=reg256#2,>r2=reg256#6
# asm 2: vmulpd 64(<input_3=%rcx),<mul_x0=%ymm1,>r2=%ymm5
vmulpd 64(%rcx),%ymm1,%ymm5
# qhasm: 4x r3 = approx mul_x0 * mem256[input_3 + 96]
# asm 1: vmulpd 96(<input_3=int64#4),<mul_x0=reg256#2,>r3=reg256#7
# asm 2: vmulpd 96(<input_3=%rcx),<mul_x0=%ymm1,>r3=%ymm6
vmulpd 96(%rcx),%ymm1,%ymm6
# qhasm: 4x r4 = approx mul_x0 * mem256[input_3 + 128]
# asm 1: vmulpd 128(<input_3=int64#4),<mul_x0=reg256#2,>r4=reg256#8
# asm 2: vmulpd 128(<input_3=%rcx),<mul_x0=%ymm1,>r4=%ymm7
vmulpd 128(%rcx),%ymm1,%ymm7
# qhasm: 4x r5 = approx mul_x0 * mem256[input_3 + 160]
# asm 1: vmulpd 160(<input_3=int64#4),<mul_x0=reg256#2,>r5=reg256#9
# asm 2: vmulpd 160(<input_3=%rcx),<mul_x0=%ymm1,>r5=%ymm8
vmulpd 160(%rcx),%ymm1,%ymm8
# qhasm: 4x r6 = approx mul_x0 * mem256[input_3 + 192]
# asm 1: vmulpd 192(<input_3=int64#4),<mul_x0=reg256#2,>r6=reg256#10
# asm 2: vmulpd 192(<input_3=%rcx),<mul_x0=%ymm1,>r6=%ymm9
vmulpd 192(%rcx),%ymm1,%ymm9
# qhasm: 4x r7 = approx mul_x0 * mem256[input_3 + 224]
# asm 1: vmulpd 224(<input_3=int64#4),<mul_x0=reg256#2,>r7=reg256#11
# asm 2: vmulpd 224(<input_3=%rcx),<mul_x0=%ymm1,>r7=%ymm10
vmulpd 224(%rcx),%ymm1,%ymm10
...