Skip to content

Commit

Permalink
dh/sidh: Avoid reference to global variable with MULX for rdcP434.
Browse files Browse the repository at this point in the history
See bug in the compiler (issue #58735).
  • Loading branch information
armfazh committed Feb 27, 2023
1 parent be39e0e commit 98ff19f
Showing 1 changed file with 42 additions and 22 deletions.
64 changes: 42 additions & 22 deletions dh/sidh/internal/p434/arith_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,36 @@
// |-128-| x |--- 256 ---| = |------ 384 ------|
// Assuming the first digit multiplication was already performed.
#define MULX128x256(I1, M1, T1, T2, T3, T4, T5) \
MULXQ M1+ 8(SB), T4, T2 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, T4, T2 \
XORQ AX, AX \
MULXQ M1+16(SB), T5, T3 \
MOVQ M1+16(SB), AX \
MULXQ AX, T5, T3 \
ADOXQ T4, T1 \ // T1: interm1
ADOXQ T5, T2 \ // T2: interm2
MULXQ M1+24(SB), T5, T4 \
MOVQ M1+24(SB), AX \
MULXQ AX, T5, T4 \
ADOXQ T5, T3 \ // T3: interm3
MOVL $0, AX \
ADOXQ AX, T4 \ // T4: interm4
\
XORQ AX, AX \
MOVQ I1, DX \
MULXQ M1+ 0(SB), T5, I1 \ // T0 <- C0
MOVQ M1+ 0(SB), AX \
MULXQ AX, T5, I1 \ // T0 <- C0
ADCXQ T5, T1 \
ADCXQ I1, T2 \ // T1 <- C1
MULXQ M1+ 8(SB), I1, T5 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, I1, T5 \
ADCXQ T5, T3 \
ADOXQ I1, T2 \ // T2 <- C2
MULXQ M1+16(SB), I1, T5 \
MOVQ M1+16(SB), AX \
MULXQ AX, I1, T5 \
ADCXQ T5, T4 \
ADOXQ I1, T3 \ // T3 <- C3
MULXQ M1+24(SB), I1, T5 \
MOVQ M1+24(SB), AX \
MULXQ AX, I1, T5 \
MOVL $0, AX \
ADCXQ AX, T5 \
ADOXQ I1, T4 \ // T4 <- C4
ADOXQ AX, T5 // T5 <- C5
Expand All @@ -63,13 +72,17 @@
// |64| x |--- 256 ---| = |----- 320 ----|
// Assuming the first digit multiplication was already performed.
#define MULX64x256(M1, T1, T2, T3, T4, T5) \
MULXQ M1+ 8(SB), T4, T2 \
MOVQ M1+ 8(SB), AX \
MULXQ AX, T4, T2 \
XORQ AX, AX \
MULXQ M1+16(SB), T5, T3 \
MOVQ M1+16(SB), AX \
MULXQ AX, T5, T3 \
ADOXQ T4, T1 \ // T1 <- C1
ADOXQ T5, T2 \ // T2 <- C2
MULXQ M1+24(SB), T5, T4 \
MOVQ M1+24(SB), AX \
MULXQ AX, T5, T4 \
ADOXQ T5, T3 \ // T3 <- C3
MOVL $0, AX \
ADOXQ AX, T4 // T4 <- C4

// Performs schoolbook multiplication of two 192-bit numbers
Expand Down Expand Up @@ -284,7 +297,8 @@
#define REDC_MULX(P1, MUL01, MUL23, MUL45, MUL67) \
MOVQ 0x0(DI), DX \
MOVQ 0x8(DI), R14 \
MULXQ P1, R8, R9 \
MOVQ P1, AX \
MULXQ AX, R8, R9 \
MUL01 \
MOVQ 0x10(DI), DX \
MOVQ 0x48(DI), CX \
Expand All @@ -295,12 +309,14 @@
ADCQ 0x38(DI), R12 \
ADCQ 0x40(DI), R13 \
ADCQ $0, CX \
MULXQ P1, BX, BP \
MOVQ P1, AX \
MULXQ AX, BX, BP \
MOVQ R9, 0x0(SI) \
MOVQ R10, 0x8(SI) \
MOVQ R11, 0x10(SI) \
MOVQ R12, 0x18(SI) \
MOVQ R13, 0x20(SI) \
MOVQ CX, 0x28(SI) \
MOVQ 0x50(DI), R9 \
MOVQ 0x58(DI), R10 \
MOVQ 0x60(DI), R11 \
Expand All @@ -315,11 +331,14 @@
ADCQ 0x10(SI), BP \
ADCQ 0x18(SI), R12 \
ADCQ 0x20(SI), R13 \
ADCQ CX, R14 \
ADCQ 0x28(SI), R14 \
MOVQ R14, 0x18(SI) \
MOVQ CX, R14 \
MOVQ $0, CX \
ADCQ R9, R15 \
ADCQ R9, R14 \
ADCQ R10, CX \
MULXQ P1, R8, R9 \
MOVQ P1, AX \
MULXQ AX, R8, R9 \
MOVQ BP, 0x0(SI) \
MOVQ R12, 0x8(SI) \
MOVQ R13, 0x10(SI) \
Expand All @@ -329,22 +348,23 @@
MOVQ 0x0(SI), DX \
ADDQ 0x8(SI), R8 \
ADCQ 0x10(SI), R9 \
ADCQ R14, R10 \
ADCQ R15, BP \
ADCQ 0x18(SI), R10 \
ADCQ R14, BP \
ADCQ CX, R12 \
ADCQ R11, R13 \
ADCQ $0, DI \
MULXQ P1, R14, R15 \
MOVQ P1, AX \
MULXQ AX, R14, BX \
MOVQ R8, 0x0(SI) \
MOVQ R9, 0x8(SI) \
MUL67 \
ADDQ R10, R14 \
ADCQ BP, R15 \
ADCQ BP, BX \
ADCQ R12, R8 \
ADCQ R13, R9 \
ADCQ DI, R11 \
MOVQ R14, 0x10(SI) \
MOVQ R15, 0x18(SI) \
MOVQ BX, 0x18(SI) \
MOVQ R8, 0x20(SI) \
MOVQ R9, 0x28(SI) \
MOVQ R11, 0x30(SI)
Expand Down Expand Up @@ -1314,9 +1334,9 @@ TEXT ·rdcP434(SB),$0-16
// available on Broadwell micro-architectures and newer.
redc_bdw:
#define MULX01 MULX128x256(R14,·P434p1+(8*P434_P1_ZEROS),R9 ,R10,R11,R12,R13)
#define MULX23 MULX128x256(R8 ,·P434p1+(8*P434_P1_ZEROS),BP ,R12,R13,R14,R15)
#define MULX23 MULX128x256(R8 ,·P434p1+(8*P434_P1_ZEROS),BP ,R12,R13,R14,CX )
#define MULX45 MULX128x256(BX ,·P434p1+(8*P434_P1_ZEROS),R9 ,R10,BP ,R12,R13)
#define MULX67 MULX64x256 ( ·P434p1+(8*P434_P1_ZEROS),R15,R8 ,R9 ,R11,CX )
#define MULX67 MULX64x256 ( ·P434p1+(8*P434_P1_ZEROS),BX ,R8 ,R9 ,R11,CX )
REDC_MULX(·P434p1+(8*P434_P1_ZEROS)+0(SB), MULX01, MULX23, MULX45, MULX67)
#undef MULX01
#undef MULX23
Expand Down

0 comments on commit 98ff19f

Please sign in to comment.