Skip to content

Commit aa61f98

Browse files
committed
Kyber: Improve performance
Unroll loops and use larger types. Allow benchmark to run each kyber parameter separately. Allow benchmark to have -ml-dsa specified which runs all parameters. Fix thumb2 ASM C code to not have duplicate includes and ifdef checks. Fix thumb2 ASM C code to include error-crypt.h to ensure no empty translation unit. Check for WOLFSSL_SHA3 before including Thumb2 SHA-3 assembly code.
1 parent 5793f62 commit aa61f98

10 files changed

Lines changed: 224 additions & 151 deletions

wolfcrypt/benchmark/benchmark.c

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,6 @@
654654
#define BENCH_RSA 0x00000002
655655
#define BENCH_RSA_SZ 0x00000004
656656
#define BENCH_DH 0x00000010
657-
#define BENCH_KYBER 0x00000020
658657
#define BENCH_ECC_MAKEKEY 0x00001000
659658
#define BENCH_ECC 0x00002000
660659
#define BENCH_ECC_ENCRYPT 0x00004000
@@ -681,11 +680,22 @@
681680
#define BENCH_SAKKE 0x80000000
682681

683682
/* Post-Quantum Asymmetric algorithms. */
683+
#define BENCH_KYBER512 0x00000020
684+
#define BENCH_KYBER768 0x00000040
685+
#define BENCH_KYBER1024 0x00000080
686+
#define BENCH_KYBER (BENCH_KYBER512 | BENCH_KYBER768 | \
687+
BENCH_KYBER1024)
684688
#define BENCH_FALCON_LEVEL1_SIGN 0x00000001
685689
#define BENCH_FALCON_LEVEL5_SIGN 0x00000002
686690
#define BENCH_DILITHIUM_LEVEL2_SIGN 0x04000000
687691
#define BENCH_DILITHIUM_LEVEL3_SIGN 0x08000000
688692
#define BENCH_DILITHIUM_LEVEL5_SIGN 0x10000000
693+
#define BENCH_ML_DSA_44_SIGN 0x04000000
694+
#define BENCH_ML_DSA_65_SIGN 0x08000000
695+
#define BENCH_ML_DSA_87_SIGN 0x10000000
696+
#define BENCH_ML_DSA_SIGN (BENCH_ML_DSA_44_SIGN | \
697+
BENCH_ML_DSA_65_SIGN | \
698+
BENCH_ML_DSA_87_SIGN)
689699

690700
/* Post-Quantum Asymmetric algorithms. (Part 2) */
691701
#define BENCH_SPHINCS_FAST_LEVEL1_SIGN 0x00000001
@@ -959,9 +969,6 @@ static const bench_alg bench_asym_opt[] = {
959969
#ifndef NO_DH
960970
{ "-dh", BENCH_DH },
961971
#endif
962-
#ifdef WOLFSSL_HAVE_KYBER
963-
{ "-kyber", BENCH_KYBER },
964-
#endif
965972
#ifdef HAVE_ECC
966973
{ "-ecc-kg", BENCH_ECC_MAKEKEY },
967974
{ "-ecc", BENCH_ECC },
@@ -1060,7 +1067,8 @@ static const bench_pq_hash_sig_alg bench_pq_hash_sig_opt[] = {
10601067
};
10611068
#endif /* BENCH_PQ_STATEFUL_HBS */
10621069

1063-
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
1070+
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
1071+
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
10641072
/* The post-quantum-specific mapping of command line option to bit values and
10651073
* OQS name. */
10661074
typedef struct bench_pq_alg {
@@ -1073,18 +1081,25 @@ typedef struct bench_pq_alg {
10731081
/* All recognized post-quantum asymmetric algorithm choosing command line
10741082
* options. */
10751083
static const bench_pq_alg bench_pq_asym_opt[] = {
1076-
{ "-pq", 0xffffffff },
1084+
{ "-pq", 0xffffffff },
1085+
#ifdef WOLFSSL_HAVE_KYBER
1086+
{ "-kyber", BENCH_KYBER },
1087+
{ "-kyber512", BENCH_KYBER512 },
1088+
{ "-kyber768", BENCH_KYBER768 },
1089+
{ "-kyber1024", BENCH_KYBER1024 },
1090+
#endif
10771091
#if defined(HAVE_FALCON)
1078-
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
1079-
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
1092+
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
1093+
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
10801094
#endif
10811095
#if defined(HAVE_DILITHIUM)
1082-
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
1083-
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
1084-
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
1085-
{ "-ml-dsa-44", BENCH_DILITHIUM_LEVEL2_SIGN },
1086-
{ "-ml-dsa-65", BENCH_DILITHIUM_LEVEL3_SIGN },
1087-
{ "-ml-dsa-87", BENCH_DILITHIUM_LEVEL5_SIGN },
1096+
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
1097+
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
1098+
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
1099+
{ "-ml-dsa", BENCH_ML_DSA_SIGN },
1100+
{ "-ml-dsa-44", BENCH_ML_DSA_44_SIGN },
1101+
{ "-ml-dsa-65", BENCH_ML_DSA_65_SIGN },
1102+
{ "-ml-dsa-87", BENCH_ML_DSA_87_SIGN },
10881103
#endif
10891104
{ NULL, 0 }
10901105
};
@@ -3576,15 +3591,21 @@ static void* benchmarks_do(void* args)
35763591
#endif
35773592

35783593
#ifdef WOLFSSL_HAVE_KYBER
3579-
if (bench_all || (bench_asym_algs & BENCH_KYBER)) {
3594+
if (bench_all || (bench_pq_asym_algs & BENCH_KYBER)) {
35803595
#ifdef WOLFSSL_KYBER512
3581-
bench_kyber(KYBER512);
3596+
if (bench_pq_asym_algs & BENCH_KYBER512) {
3597+
bench_kyber(KYBER512);
3598+
}
35823599
#endif
35833600
#ifdef WOLFSSL_KYBER768
3584-
bench_kyber(KYBER768);
3601+
if (bench_pq_asym_algs & BENCH_KYBER768) {
3602+
bench_kyber(KYBER768);
3603+
}
35853604
#endif
35863605
#ifdef WOLFSSL_KYBER1024
3587-
bench_kyber(KYBER1024);
3606+
if (bench_pq_asym_algs & BENCH_KYBER1024) {
3607+
bench_kyber(KYBER1024);
3608+
}
35883609
#endif
35893610
}
35903611
#endif
@@ -14523,7 +14544,8 @@ static void Usage(void)
1452314544
print_alg(bench_asym_opt[i].str, &line);
1452414545
for (i=0; bench_other_opt[i].str != NULL; i++)
1452514546
print_alg(bench_other_opt[i].str, &line);
14526-
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
14547+
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
14548+
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
1452714549
for (i=0; bench_pq_asym_opt[i].str != NULL; i++)
1452814550
print_alg(bench_pq_asym_opt[i].str, &line);
1452914551
#if defined(HAVE_SPHINCS)
@@ -14799,8 +14821,8 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
1479914821
optMatched = 1;
1480014822
}
1480114823
}
14802-
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || \
14803-
defined(HAVE_SPHINCS)
14824+
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
14825+
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
1480414826
/* Known asymmetric post-quantum algorithms */
1480514827
for (i=0; !optMatched && bench_pq_asym_opt[i].str != NULL; i++) {
1480614828
if (string_matches(argv[1], bench_pq_asym_opt[i].str)) {

wolfcrypt/src/port/arm/armv8-32-sha3-asm.S

Lines changed: 56 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#ifdef WOLFSSL_ARMASM
3333
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
3434
#ifndef WOLFSSL_ARMASM_INLINE
35+
#ifdef WOLFSSL_SHA3
36+
#ifndef WOLFSSL_ARMASM_NO_NEON
3537
.text
3638
.type L_sha3_arm2_neon_rt, %object
3739
.size L_sha3_arm2_neon_rt, 192
@@ -85,60 +87,6 @@ L_sha3_arm2_neon_rt:
8587
.word 0x0
8688
.word 0x80008008
8789
.word 0x80000000
88-
.text
89-
.type L_sha3_arm2_rt, %object
90-
.size L_sha3_arm2_rt, 192
91-
.align 4
92-
L_sha3_arm2_rt:
93-
.word 0x1
94-
.word 0x0
95-
.word 0x8082
96-
.word 0x0
97-
.word 0x808a
98-
.word 0x80000000
99-
.word 0x80008000
100-
.word 0x80000000
101-
.word 0x808b
102-
.word 0x0
103-
.word 0x80000001
104-
.word 0x0
105-
.word 0x80008081
106-
.word 0x80000000
107-
.word 0x8009
108-
.word 0x80000000
109-
.word 0x8a
110-
.word 0x0
111-
.word 0x88
112-
.word 0x0
113-
.word 0x80008009
114-
.word 0x0
115-
.word 0x8000000a
116-
.word 0x0
117-
.word 0x8000808b
118-
.word 0x0
119-
.word 0x8b
120-
.word 0x80000000
121-
.word 0x8089
122-
.word 0x80000000
123-
.word 0x8003
124-
.word 0x80000000
125-
.word 0x8002
126-
.word 0x80000000
127-
.word 0x80
128-
.word 0x80000000
129-
.word 0x800a
130-
.word 0x0
131-
.word 0x8000000a
132-
.word 0x80000000
133-
.word 0x80008081
134-
.word 0x80000000
135-
.word 0x8080
136-
.word 0x80000000
137-
.word 0x80000001
138-
.word 0x0
139-
.word 0x80008008
140-
.word 0x80000000
141-
#ifndef WOLFSSL_ARMASM_NO_NEON
14290
.text
14391
.align 4
14492
.globl BlockSha3
@@ -407,6 +355,59 @@ L_sha3_arm32_neon_begin:
407355
.size BlockSha3,.-BlockSha3
408356
#endif /* WOLFSSL_ARMASM_NO_NEON */
409357
#ifdef WOLFSSL_ARMASM_NO_NEON
358+
.text
359+
.type L_sha3_arm2_rt, %object
360+
.size L_sha3_arm2_rt, 192
361+
.align 4
362+
L_sha3_arm2_rt:
363+
.word 0x1
364+
.word 0x0
365+
.word 0x8082
366+
.word 0x0
367+
.word 0x808a
368+
.word 0x80000000
369+
.word 0x80008000
370+
.word 0x80000000
371+
.word 0x808b
372+
.word 0x0
373+
.word 0x80000001
374+
.word 0x0
375+
.word 0x80008081
376+
.word 0x80000000
377+
.word 0x8009
378+
.word 0x80000000
379+
.word 0x8a
380+
.word 0x0
381+
.word 0x88
382+
.word 0x0
383+
.word 0x80008009
384+
.word 0x0
385+
.word 0x8000000a
386+
.word 0x0
387+
.word 0x8000808b
388+
.word 0x0
389+
.word 0x8b
390+
.word 0x80000000
391+
.word 0x8089
392+
.word 0x80000000
393+
.word 0x8003
394+
.word 0x80000000
395+
.word 0x8002
396+
.word 0x80000000
397+
.word 0x80
398+
.word 0x80000000
399+
.word 0x800a
400+
.word 0x0
401+
.word 0x8000000a
402+
.word 0x80000000
403+
.word 0x80008081
404+
.word 0x80000000
405+
.word 0x8080
406+
.word 0x80000000
407+
.word 0x80000001
408+
.word 0x0
409+
.word 0x80008008
410+
.word 0x80000000
410411
.text
411412
.align 4
412413
.globl BlockSha3
@@ -2391,6 +2392,7 @@ L_sha3_arm32_begin:
23912392
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
23922393
.size BlockSha3,.-BlockSha3
23932394
#endif /* WOLFSSL_ARMASM_NO_NEON */
2395+
#endif /* WOLFSSL_SHA3 */
23942396
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
23952397
#endif /* WOLFSSL_ARMASM */
23962398

wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
#define __asm__ __asm
5252
#define __volatile__ volatile
5353
#endif /* __KEIL__ */
54+
#ifdef WOLFSSL_SHA3
55+
#ifndef WOLFSSL_ARMASM_NO_NEON
5456
static const uint64_t L_sha3_arm2_neon_rt[] = {
5557
0x0000000000000001UL, 0x0000000000008082UL,
5658
0x800000000000808aUL, 0x8000000080008000UL,
@@ -66,29 +68,12 @@ static const uint64_t L_sha3_arm2_neon_rt[] = {
6668
0x0000000080000001UL, 0x8000000080008008UL,
6769
};
6870

69-
static const uint64_t L_sha3_arm2_rt[] = {
70-
0x0000000000000001UL, 0x0000000000008082UL,
71-
0x800000000000808aUL, 0x8000000080008000UL,
72-
0x000000000000808bUL, 0x0000000080000001UL,
73-
0x8000000080008081UL, 0x8000000000008009UL,
74-
0x000000000000008aUL, 0x0000000000000088UL,
75-
0x0000000080008009UL, 0x000000008000000aUL,
76-
0x000000008000808bUL, 0x800000000000008bUL,
77-
0x8000000000008089UL, 0x8000000000008003UL,
78-
0x8000000000008002UL, 0x8000000000000080UL,
79-
0x000000000000800aUL, 0x800000008000000aUL,
80-
0x8000000080008081UL, 0x8000000000008080UL,
81-
0x0000000080000001UL, 0x8000000080008008UL,
82-
};
83-
8471
#include <wolfssl/wolfcrypt/sha3.h>
8572

86-
#ifndef WOLFSSL_ARMASM_NO_NEON
8773
void BlockSha3(word64* state_p)
8874
{
8975
register word64* state asm ("r0") = (word64*)state_p;
9076
register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt;
91-
register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt;
9277

9378
__asm__ __volatile__ (
9479
"sub sp, sp, #16\n\t"
@@ -348,16 +333,31 @@ void BlockSha3(word64* state_p)
348333
"vst1.8 {d20-d23}, [%[state]]!\n\t"
349334
"vst1.8 {d24}, [%[state]]\n\t"
350335
"add sp, sp, #16\n\t"
351-
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
336+
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
352337
:
353-
: "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
338+
: "memory", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
354339
);
355340
}
356341

357342
#endif /* WOLFSSL_ARMASM_NO_NEON */
343+
#ifdef WOLFSSL_ARMASM_NO_NEON
344+
static const uint64_t L_sha3_arm2_rt[] = {
345+
0x0000000000000001UL, 0x0000000000008082UL,
346+
0x800000000000808aUL, 0x8000000080008000UL,
347+
0x000000000000808bUL, 0x0000000080000001UL,
348+
0x8000000080008081UL, 0x8000000000008009UL,
349+
0x000000000000008aUL, 0x0000000000000088UL,
350+
0x0000000080008009UL, 0x000000008000000aUL,
351+
0x000000008000808bUL, 0x800000000000008bUL,
352+
0x8000000000008089UL, 0x8000000000008003UL,
353+
0x8000000000008002UL, 0x8000000000000080UL,
354+
0x000000000000800aUL, 0x800000008000000aUL,
355+
0x8000000080008081UL, 0x8000000000008080UL,
356+
0x0000000080000001UL, 0x8000000080008008UL,
357+
};
358+
358359
#include <wolfssl/wolfcrypt/sha3.h>
359360

360-
#ifdef WOLFSSL_ARMASM_NO_NEON
361361
void BlockSha3(word64* state_p)
362362
{
363363
register word64* state asm ("r0") = (word64*)state_p;
@@ -2348,6 +2348,7 @@ void BlockSha3(word64* state_p)
23482348
}
23492349

23502350
#endif /* WOLFSSL_ARMASM_NO_NEON */
2351+
#endif /* WOLFSSL_SHA3 */
23512352
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
23522353
#endif /* WOLFSSL_ARMASM */
23532354
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */

wolfcrypt/src/port/arm/thumb2-aes-asm_c.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,12 @@
2828
#include <config.h>
2929
#endif /* HAVE_CONFIG_H */
3030
#include <wolfssl/wolfcrypt/settings.h>
31+
#include <wolfssl/wolfcrypt/error-crypt.h>
3132

3233
#ifdef WOLFSSL_ARMASM
3334
#if !defined(__aarch64__) && defined(__thumb__)
34-
#include <stdint.h>
35-
#ifdef HAVE_CONFIG_H
36-
#include <config.h>
37-
#endif /* HAVE_CONFIG_H */
38-
#include <wolfssl/wolfcrypt/settings.h>
3935
#ifdef WOLFSSL_ARMASM_INLINE
4036

41-
#ifdef WOLFSSL_ARMASM
42-
#if !defined(__aarch64__) && defined(__thumb__)
43-
4437
#ifdef __IAR_SYSTEMS_ICC__
4538
#define __asm__ asm
4639
#define __volatile__ volatile
@@ -3056,7 +3049,4 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long
30563049
#endif /* !NO_AES */
30573050
#endif /* !__aarch64__ && __thumb__ */
30583051
#endif /* WOLFSSL_ARMASM */
3059-
#endif /* !defined(__aarch64__) && defined(__thumb__) */
3060-
#endif /* WOLFSSL_ARMASM */
3061-
30623052
#endif /* WOLFSSL_ARMASM_INLINE */

0 commit comments

Comments
 (0)