Skip to content

Commit e9e856a

Browse files
mscdexruyadorno
authored andcommittedSep 23, 2022
deps: upgrade base64 to dc6a41ce36e
Fixes: #43894 PR-URL: #44032 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: Mestery <mestery@protonmail.com> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: Feng Yu <F3n67u@outlook.com> Reviewed-By: Chengzhong Wu <legendecas@gmail.com>
1 parent 8ea9a71 commit e9e856a

File tree

4 files changed

+536
-69
lines changed

4 files changed

+536
-69
lines changed
 

‎deps/base64/base64/defines.txt

+362
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
#define __DBL_MIN_EXP__ (-1021)
2+
#define __LDBL_MANT_DIG__ 113
3+
#define STOR(A,B,C,D) "st4 {"A".16b, "B".16b, "C".16b, "D".16b}, [%[dst]], #64 \n\t"
4+
#define ROUND_A() SHUF("v2", "v3", "v4") LOAD("v12", "v13", "v14") TRAN("v2", "v3", "v4", "v5") STOR("v2", "v3", "v4", "v5")
5+
#define __UINT_LEAST16_MAX__ 0xffff
6+
#define __ARM_SIZEOF_WCHAR_T 4
7+
#define __DBL_DECIMAL_DIG__ 17
8+
#define __ATOMIC_ACQUIRE 2
9+
#define __FLT128_MAX_10_EXP__ 4932
10+
#define __FLT_MIN__ 1.17549435082228750796873653722224568e-38F
11+
#define __GCC_IEC_559_COMPLEX 2
12+
#define __UINT_LEAST8_TYPE__ unsigned char
13+
#define __FLT128_DIG__ 33
14+
#define __INTMAX_C(c) c ## L
15+
#define __CHAR_BIT__ 8
16+
#define __UINT8_MAX__ 0xff
17+
#define __WCHAR_MAX__ 0xffffffffU
18+
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
19+
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
20+
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
21+
#define __GCC_ATOMIC_CHAR_LOCK_FREE 2
22+
#define __GCC_IEC_559 2
23+
#define __FLT32X_DECIMAL_DIG__ 17
24+
#define __FLT_EVAL_METHOD__ 0
25+
#define HAVE_NEON64 1
26+
#define __FLT64_DECIMAL_DIG__ 17
27+
#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2
28+
#define __UINT_FAST32_TYPE__ long unsigned int
29+
#define __UINT_FAST64_MAX__ 0xffffffffffffffffUL
30+
#define __SIG_ATOMIC_TYPE__ int
31+
#define __DBL_MIN_10_EXP__ (-307)
32+
#define __FINITE_MATH_ONLY__ 0
33+
#define __FLT32X_MAX_EXP__ 1024
34+
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
35+
#define __GNUC_PATCHLEVEL__ 1
36+
#define __FLT32_HAS_DENORM__ 1
37+
#define __UINT_FAST8_MAX__ 0xff
38+
#define __INT8_C(c) c
39+
#define __INT_LEAST8_WIDTH__ 8
40+
#define __INTMAX_TYPE__ long int
41+
#define __UINT_LEAST64_MAX__ 0xffffffffffffffffUL
42+
#define __SHRT_MAX__ 0x7fff
43+
#define __STDC_ISO_10646__ 201706L
44+
#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L
45+
#define __ARM_FEATURE_IDIV 1
46+
#define __FLT64X_MAX_10_EXP__ 4932
47+
#define __ARM_FP 14
48+
#define __FLT64X_HAS_QUIET_NAN__ 1
49+
#define __WINT_TYPE__ unsigned int
50+
#define __UINT_LEAST8_MAX__ 0xff
51+
#define __FLT128_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966F128
52+
#define __UINTMAX_TYPE__ long unsigned int
53+
#define _STDC_PREDEF_H 1
54+
#define __linux 1
55+
#define __FLT_EVAL_METHOD_TS_18661_3__ 0
56+
#define __CHAR_UNSIGNED__ 1
57+
#define __UINT32_MAX__ 0xffffffffU
58+
#define __DBL_DENORM_MIN__ ((double)4.94065645841246544176568792868221372e-324L)
59+
#define __AARCH64_CMODEL_SMALL__ 1
60+
#define __LDBL_MAX_EXP__ 16384
61+
#define __INT_FAST32_WIDTH__ 64
62+
#define __FLT128_MIN_EXP__ (-16381)
63+
#define __FLT128_MIN_10_EXP__ (-4931)
64+
#define __INT_LEAST16_WIDTH__ 16
65+
#define __FLT64X_MIN_EXP__ (-16381)
66+
#define __SCHAR_MAX__ 0x7f
67+
#define __FLT128_MANT_DIG__ 113
68+
#define __DBL_MAX__ ((double)1.79769313486231570814527423731704357e+308L)
69+
#define __WCHAR_MIN__ 0U
70+
#define __INT64_C(c) c ## L
71+
#define __GCC_ATOMIC_POINTER_LOCK_FREE 2
72+
#define __SIZEOF_INT__ 4
73+
#define __INT_FAST64_WIDTH__ 64
74+
#define __PRAGMA_REDEFINE_EXTNAME 1
75+
#define __FLT32X_MANT_DIG__ 53
76+
#define __USER_LABEL_PREFIX__
77+
#define __FLT32_MAX_10_EXP__ 38
78+
#define __FLT64X_EPSILON__ 1.92592994438723585305597794258492732e-34F64x
79+
#define __STDC_HOSTED__ 1
80+
#define __DBL_DIG__ 15
81+
#define __FLT32_DIG__ 6
82+
#define __FLT_EPSILON__ 1.19209289550781250000000000000000000e-7F
83+
#define __SHRT_WIDTH__ 16
84+
#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L
85+
#define __STDC_UTF_16__ 1
86+
#define __FLT16_HAS_QUIET_NAN__ 1
87+
#define __ARM_SIZEOF_MINIMAL_ENUM 4
88+
#define __FLT64X_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966F64x
89+
#define __FP_FAST_FMA 1
90+
#define __FLT32X_HAS_INFINITY__ 1
91+
#define __INT32_MAX__ 0x7fffffff
92+
#define __FLT16_DIG__ 3
93+
#define __INT_WIDTH__ 32
94+
#define __SIZEOF_LONG__ 8
95+
#define __STDC_IEC_559__ 1
96+
#define __UINT16_C(c) c
97+
#define __DECIMAL_DIG__ 36
98+
#define __STDC_IEC_559_COMPLEX__ 1
99+
#define __FLT64_EPSILON__ 2.22044604925031308084726333618164062e-16F64
100+
#define __gnu_linux__ 1
101+
#define __INT16_MAX__ 0x7fff
102+
#define LOAD(A,B,C) "ld3 {"A".16b, "B".16b, "C".16b}, [%[src]], #48 \n\t"
103+
#define __FLT64X_MIN_10_EXP__ (-4931)
104+
#define __LDBL_HAS_QUIET_NAN__ 1
105+
#define __FLT16_MIN_EXP__ (-13)
106+
#define __FLT64_MANT_DIG__ 53
107+
#define __FLT64X_MANT_DIG__ 113
108+
#define __GNUC__ 10
109+
#define __FLT_HAS_DENORM__ 1
110+
#define __SIZEOF_LONG_DOUBLE__ 16
111+
#define __LDBL_MIN_EXP__ (-16381)
112+
#define __FLT64_MAX_10_EXP__ 308
113+
#define __FLT16_MAX_10_EXP__ 4
114+
#define __INT_FAST32_MAX__ 0x7fffffffffffffffL
115+
#define __DBL_HAS_INFINITY__ 1
116+
#define __HAVE_SPECULATION_SAFE_VALUE 1
117+
#define __INTPTR_WIDTH__ 64
118+
#define __FLT32X_HAS_DENORM__ 1
119+
#define __INT_FAST16_TYPE__ long int
120+
#define __LDBL_HAS_DENORM__ 1
121+
#define __FLT128_HAS_INFINITY__ 1
122+
#define __FLT32_DECIMAL_DIG__ 9
123+
#define __DBL_MAX_EXP__ 1024
124+
#define __WCHAR_WIDTH__ 32
125+
#define __FLT32_MAX__ 3.40282346638528859811704183484516925e+38F32
126+
#define __GCC_ATOMIC_LONG_LOCK_FREE 2
127+
#define __FLT16_DECIMAL_DIG__ 5
128+
#define __FLT32_HAS_QUIET_NAN__ 1
129+
#define __LONG_LONG_MAX__ 0x7fffffffffffffffLL
130+
#define __SIZEOF_SIZE_T__ 8
131+
#define __SIG_ATOMIC_WIDTH__ 32
132+
#define __ARM_ALIGN_MAX_PWR 28
133+
#define __SIZEOF_WINT_T__ 4
134+
#define __LONG_LONG_WIDTH__ 64
135+
#define __FLT32_MAX_EXP__ 128
136+
#define __ARM_FP16_FORMAT_IEEE 1
137+
#define __FLT_MIN_EXP__ (-125)
138+
#define __FLT64_NORM_MAX__ 1.79769313486231570814527423731704357e+308F64
139+
#define __GCC_HAVE_DWARF2_CFI_ASM 1
140+
#define __FLT32X_MIN_EXP__ (-1021)
141+
#define __INT_FAST64_TYPE__ long int
142+
#define __ARM_FP16_ARGS 1
143+
#define __FP_FAST_FMAF 1
144+
#define __FLT128_NORM_MAX__ 1.18973149535723176508575932662800702e+4932F128
145+
#define __FLT64_DENORM_MIN__ 4.94065645841246544176568792868221372e-324F64
146+
#define __DBL_MIN__ ((double)2.22507385850720138309023271733240406e-308L)
147+
#define __ARM_FEATURE_CLZ 1
148+
#define __FLT16_DENORM_MIN__ 5.96046447753906250000000000000000000e-8F16
149+
#define __unix__ 1
150+
#define __FLT64X_NORM_MAX__ 1.18973149535723176508575932662800702e+4932F64x
151+
#define __SIZEOF_POINTER__ 8
152+
#define __GXX_ABI_VERSION 1014
153+
#define __LP64__ 1
154+
#define __DBL_HAS_QUIET_NAN__ 1
155+
#define __FLT_EVAL_METHOD_C99__ 0
156+
#define __FLT32X_EPSILON__ 2.22044604925031308084726333618164062e-16F32x
157+
#define __FLT64_MIN_EXP__ (-1021)
158+
#define __UINT64_MAX__ 0xffffffffffffffffUL
159+
#define __LDBL_DECIMAL_DIG__ 36
160+
#define __FLT_MAX__ 3.40282346638528859811704183484516925e+38F
161+
#define __aarch64__ 1
162+
#define __FLT64_MIN_10_EXP__ (-307)
163+
#define __FLT64X_DECIMAL_DIG__ 36
164+
#define __REGISTER_PREFIX__
165+
#define __UINT16_MAX__ 0xffff
166+
#define __INTMAX_WIDTH__ 64
167+
#define __LDBL_HAS_INFINITY__ 1
168+
#define __FLT32_MIN__ 1.17549435082228750796873653722224568e-38F32
169+
#define __FLT_DIG__ 6
170+
#define __NO_INLINE__ 1
171+
#define __DEC_EVAL_METHOD__ 2
172+
#define __FLT_MANT_DIG__ 24
173+
#define __FLT16_MIN_10_EXP__ (-4)
174+
#define __VERSION__ "10.3.1 20210621"
175+
#define __UINT64_C(c) c ## UL
176+
#define __WINT_MAX__ 0xffffffffU
177+
#define __INT_LEAST32_MAX__ 0x7fffffff
178+
#define __GCC_ATOMIC_INT_LOCK_FREE 2
179+
#define __FLT32X_MIN__ 2.22507385850720138309023271733240406e-308F32x
180+
#define __FLT128_MAX_EXP__ 16384
181+
#define __FLT32_MANT_DIG__ 24
182+
#define __FLOAT_WORD_ORDER__ __ORDER_LITTLE_ENDIAN__
183+
#define __FLT16_MAX_EXP__ 16
184+
#define __BIGGEST_ALIGNMENT__ 16
185+
#define __INT32_C(c) c
186+
#define __FLT128_HAS_DENORM__ 1
187+
#define __SCHAR_WIDTH__ 8
188+
#define __ORDER_PDP_ENDIAN__ 3412
189+
#define __ARM_64BIT_STATE 1
190+
#define __INT_FAST32_TYPE__ long int
191+
#define ROUND() LOAD("v12", "v13", "v14") SHUF("v12", "v13", "v14") TRAN("v12", "v13", "v14", "v15") STOR("v12", "v13", "v14", "v15")
192+
#define __UINT_LEAST16_TYPE__ short unsigned int
193+
#define __SIZE_TYPE__ long unsigned int
194+
#define __FLT64X_DIG__ 33
195+
#define __ARM_FEATURE_FMA 1
196+
#define __INT8_TYPE__ signed char
197+
#define __ELF__ 1
198+
#define __GCC_ASM_FLAG_OUTPUTS__ 1
199+
#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1
200+
#define __FLT_RADIX__ 2
201+
#define __INT_LEAST16_TYPE__ short int
202+
#define __ARM_ARCH_PROFILE 65
203+
#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L
204+
#define __UINTMAX_C(c) c ## UL
205+
#define __ARM_PCS_AAPCS64 1
206+
#define __SIG_ATOMIC_MAX__ 0x7fffffff
207+
#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2
208+
#define __SIZEOF_PTRDIFF_T__ 8
209+
#define __ATOMIC_RELAXED 0
210+
#define __LDBL_DIG__ 33
211+
#define __AARCH64EL__ 1
212+
#define __INT_FAST16_MAX__ 0x7fffffffffffffffL
213+
#define __FLT64_DIG__ 15
214+
#define __UINT_FAST32_MAX__ 0xffffffffffffffffUL
215+
#define __UINT_LEAST64_TYPE__ long unsigned int
216+
#define __FLT16_EPSILON__ 9.76562500000000000000000000000000000e-4F16
217+
#define __FLT_HAS_QUIET_NAN__ 1
218+
#define __FLT_MAX_10_EXP__ 38
219+
#define __LONG_MAX__ 0x7fffffffffffffffL
220+
#define ROUND_A_FIRST() LOAD("v2", "v3", "v4") ROUND_A()
221+
#define __FLT64X_HAS_DENORM__ 1
222+
#define __FLT_HAS_INFINITY__ 1
223+
#define ROUND_B() SHUF("v12", "v13", "v14") LOAD("v2", "v3", "v4") TRAN("v12", "v13", "v14", "v15") STOR("v12", "v13", "v14", "v15")
224+
#define __unix 1
225+
#define __DBL_HAS_DENORM__ 1
226+
#define __UINT_FAST16_TYPE__ long unsigned int
227+
#define __FLT32X_HAS_QUIET_NAN__ 1
228+
#define __CHAR16_TYPE__ short unsigned int
229+
#define __FLT64X_MAX_EXP__ 16384
230+
#define __SIZE_WIDTH__ 64
231+
#define __INT_LEAST16_MAX__ 0x7fff
232+
#define __FLT16_NORM_MAX__ 6.55040000000000000000000000000000000e+4F16
233+
#define __INT64_MAX__ 0x7fffffffffffffffL
234+
#define __FLT32_DENORM_MIN__ 1.40129846432481707092372958328991613e-45F32
235+
#define __INT_LEAST64_TYPE__ long int
236+
#define __INT16_TYPE__ short int
237+
#define __INT_LEAST8_TYPE__ signed char
238+
#define __FLT16_MAX__ 6.55040000000000000000000000000000000e+4F16
239+
#define __STDC_VERSION__ 201710L
240+
#define __INT_FAST8_MAX__ 0x7f
241+
#define __ARM_ARCH 8
242+
#define __FLT128_MAX__ 1.18973149535723176508575932662800702e+4932F128
243+
#define __INTPTR_MAX__ 0x7fffffffffffffffL
244+
#define linux 1
245+
#define __ARM_FEATURE_UNALIGNED 1
246+
#define __FLT64_HAS_QUIET_NAN__ 1
247+
#define __FLT32_MIN_10_EXP__ (-37)
248+
#define __FLT32X_DIG__ 15
249+
#define __UINT8_TYPE__ unsigned char
250+
#define __PTRDIFF_WIDTH__ 64
251+
#define __FLT64_HAS_INFINITY__ 1
252+
#define __FLT64X_MAX__ 1.18973149535723176508575932662800702e+4932F64x
253+
#define __FLT16_HAS_INFINITY__ 1
254+
#define __SIG_ATOMIC_MIN__ (-__SIG_ATOMIC_MAX__ - 1)
255+
#define __PTRDIFF_MAX__ 0x7fffffffffffffffL
256+
#define __FLT16_MANT_DIG__ 11
257+
#define __INTPTR_TYPE__ long int
258+
#define __UINT16_TYPE__ short unsigned int
259+
#define __WCHAR_TYPE__ unsigned int
260+
#define __UINTPTR_MAX__ 0xffffffffffffffffUL
261+
#define __ARM_ARCH_8A 1
262+
#define __INT_FAST64_MAX__ 0x7fffffffffffffffL
263+
#define __FLT_NORM_MAX__ 3.40282346638528859811704183484516925e+38F
264+
#define __FLT32_HAS_INFINITY__ 1
265+
#define __UINT_FAST64_TYPE__ long unsigned int
266+
#define __INT_MAX__ 0x7fffffff
267+
#define __INT64_TYPE__ long int
268+
#define __FLT_MAX_EXP__ 128
269+
#define __ORDER_BIG_ENDIAN__ 4321
270+
#define __DBL_MANT_DIG__ 53
271+
#define __INT_LEAST64_MAX__ 0x7fffffffffffffffL
272+
#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2
273+
#define __FP_FAST_FMAF32 1
274+
#define __UINT_LEAST32_TYPE__ unsigned int
275+
#define __SIZEOF_SHORT__ 2
276+
#define __FLT32_NORM_MAX__ 3.40282346638528859811704183484516925e+38F32
277+
#define __GCC_ATOMIC_BOOL_LOCK_FREE 2
278+
#define __FLT64_MAX__ 1.79769313486231570814527423731704357e+308F64
279+
#define __WINT_WIDTH__ 32
280+
#define __FP_FAST_FMAF64 1
281+
#define __INT_LEAST8_MAX__ 0x7f
282+
#define __INT_LEAST64_WIDTH__ 64
283+
#define __FLT32X_MAX_10_EXP__ 308
284+
#define __SIZEOF_INT128__ 16
285+
#define __FLT16_MIN__ 6.10351562500000000000000000000000000e-5F16
286+
#define __LDBL_MAX_10_EXP__ 4932
287+
#define __DBL_EPSILON__ ((double)2.22044604925031308084726333618164062e-16L)
288+
#define __FLT32_MIN_EXP__ (-125)
289+
#define __FLT128_MIN__ 3.36210314311209350626267781732175260e-4932F128
290+
#define _LP64 1
291+
#define __UINT8_C(c) c
292+
#define __FLT64_MAX_EXP__ 1024
293+
#define __INT_LEAST32_TYPE__ int
294+
#define __UINT64_TYPE__ long unsigned int
295+
#define __ARM_NEON 1
296+
#define __FLT128_HAS_QUIET_NAN__ 1
297+
#define __INTMAX_MAX__ 0x7fffffffffffffffL
298+
#define __UINT_FAST8_TYPE__ unsigned char
299+
#define __INT_FAST8_TYPE__ signed char
300+
#define __FLT64X_MIN__ 3.36210314311209350626267781732175260e-4932F64x
301+
#define __GNUC_STDC_INLINE__ 1
302+
#define __FLT64_HAS_DENORM__ 1
303+
#define __FLT32_EPSILON__ 1.19209289550781250000000000000000000e-7F32
304+
#define __FP_FAST_FMAF32x 1
305+
#define __FLT16_HAS_DENORM__ 1
306+
#define __STDC_UTF_32__ 1
307+
#define __INT_FAST8_WIDTH__ 8
308+
#define __FLT32X_MAX__ 1.79769313486231570814527423731704357e+308F32x
309+
#define __DBL_NORM_MAX__ ((double)1.79769313486231570814527423731704357e+308L)
310+
#define __FLT64X_HAS_INFINITY__ 1
311+
#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
312+
#define __ARM_ALIGN_MAX_STACK_PWR 16
313+
#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L
314+
#define __SIZEOF_WCHAR_T__ 4
315+
#define unix 1
316+
#define __UINT32_C(c) c ## U
317+
#define __FLT_DENORM_MIN__ 1.40129846432481707092372958328991613e-45F
318+
#define __WINT_MIN__ 0U
319+
#define __INT8_MAX__ 0x7f
320+
#define __LONG_WIDTH__ 64
321+
#define __FLT32X_NORM_MAX__ 1.79769313486231570814527423731704357e+308F32x
322+
#define __CHAR32_TYPE__ unsigned int
323+
#define __ARM_FEATURE_NUMERIC_MAXMIN 1
324+
#define __INT32_TYPE__ int
325+
#define __SIZEOF_DOUBLE__ 8
326+
#define __FLT_MIN_10_EXP__ (-37)
327+
#define __FLT64_MIN__ 2.22507385850720138309023271733240406e-308F64
328+
#define __INT_LEAST32_WIDTH__ 32
329+
#define __SIZEOF_FLOAT__ 4
330+
#define __ATOMIC_CONSUME 1
331+
#define __GNUC_MINOR__ 3
332+
#define __INT_FAST16_WIDTH__ 64
333+
#define __UINTMAX_MAX__ 0xffffffffffffffffUL
334+
#define __FLT32X_DENORM_MIN__ 4.94065645841246544176568792868221372e-324F32x
335+
#define SHUF(A,B,C) "ushr %[t0].16b, "A".16b, #2 \n\t" "ushr %[t1].16b, "B".16b, #4 \n\t" "ushr %[t2].16b, "C".16b, #6 \n\t" "sli %[t1].16b, "A".16b, #4 \n\t" "sli %[t2].16b, "B".16b, #2 \n\t" "and %[t1].16b, %[t1].16b, %[n63].16b \n\t" "and %[t2].16b, %[t2].16b, %[n63].16b \n\t" "and %[t3].16b, "C".16b, %[n63].16b \n\t"
336+
#define __DBL_MAX_10_EXP__ 308
337+
#define __INT16_C(c) c
338+
#define __ARM_ARCH_ISA_A64 1
339+
#define __STDC__ 1
340+
#define __PTRDIFF_TYPE__ long int
341+
#define TRAN(A,B,C,D) "tbl "A".16b, {v8.16b-v11.16b}, %[t0].16b \n\t" "tbl "B".16b, {v8.16b-v11.16b}, %[t1].16b \n\t" "tbl "C".16b, {v8.16b-v11.16b}, %[t2].16b \n\t" "tbl "D".16b, {v8.16b-v11.16b}, %[t3].16b \n\t"
342+
#define __ATOMIC_SEQ_CST 5
343+
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1
344+
#define ROUND_B_LAST() SHUF("v12", "v13", "v14") TRAN("v12", "v13", "v14", "v15") STOR("v12", "v13", "v14", "v15")
345+
#define __UINT32_TYPE__ unsigned int
346+
#define __FLT32X_MIN_10_EXP__ (-307)
347+
#define __UINTPTR_TYPE__ long unsigned int
348+
#define __linux__ 1
349+
#define __LDBL_MIN_10_EXP__ (-4931)
350+
#define __FLT128_EPSILON__ 1.92592994438723585305597794258492732e-34F128
351+
#define __SIZEOF_LONG_LONG__ 8
352+
#define __FLT128_DECIMAL_DIG__ 36
353+
#define __GCC_ATOMIC_LLONG_LOCK_FREE 2
354+
#define __FLT_DECIMAL_DIG__ 9
355+
#define __UINT_FAST16_MAX__ 0xffffffffffffffffUL
356+
#define __LDBL_NORM_MAX__ 1.18973149535723176508575932662800702e+4932L
357+
#define __GCC_ATOMIC_SHORT_LOCK_FREE 2
358+
#define __ORDER_LITTLE_ENDIAN__ 1234
359+
#define __SIZE_MAX__ 0xffffffffffffffffUL
360+
#define __UINT_LEAST32_MAX__ 0xffffffffU
361+
#define __ATOMIC_ACQ_REL 4
362+
#define __ATOMIC_RELEASE 3

‎deps/base64/base64/lib/arch/neon64/codec.c

+7-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,13 @@ load_64byte_table (const uint8_t *p)
5858
#include "../generic/32/dec_loop.c"
5959
#include "../generic/64/enc_loop.c"
6060
#include "dec_loop.c"
61-
#include "enc_reshuffle.c"
62-
#include "enc_loop.c"
61+
62+
#ifdef BASE64_NEON64_USE_ASM
63+
# include "enc_loop_asm.c"
64+
#else
65+
# include "enc_reshuffle.c"
66+
# include "enc_loop.c"
67+
#endif
6368

6469
#endif // BASE64_USE_NEON64
6570

‎deps/base64/base64/lib/arch/neon64/enc_loop.c

-67
Original file line numberDiff line numberDiff line change
@@ -1,72 +1,6 @@
1-
#ifdef BASE64_NEON64_USE_ASM
2-
static inline void
3-
enc_loop_neon64_inner_asm (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
4-
{
5-
// This function duplicates the functionality of enc_loop_neon64_inner,
6-
// but entirely with inline assembly. This gives a significant speedup
7-
// over using NEON intrinsics, which do not always generate very good
8-
// code. The logic of the assembly is directly lifted from the
9-
// intrinsics version, so it can be used as a guide to this code.
10-
11-
// Temporary registers, used as scratch space.
12-
uint8x16_t tmp0, tmp1, tmp2, tmp3;
13-
14-
// Numeric constant.
15-
const uint8x16_t n63 = vdupq_n_u8(63);
16-
17-
__asm__ (
18-
19-
// Load 48 bytes and deinterleave. The bytes are loaded to
20-
// hard-coded registers v12, v13 and v14, to ensure that they
21-
// are contiguous. Increment the source pointer.
22-
"ld3 {v12.16b, v13.16b, v14.16b}, [%[src]], #48 \n\t"
23-
24-
// Reshuffle the bytes using temporaries.
25-
"ushr %[t0].16b, v12.16b, #2 \n\t"
26-
"ushr %[t1].16b, v13.16b, #4 \n\t"
27-
"ushr %[t2].16b, v14.16b, #6 \n\t"
28-
"sli %[t1].16b, v12.16b, #4 \n\t"
29-
"sli %[t2].16b, v13.16b, #2 \n\t"
30-
"and %[t1].16b, %[t1].16b, %[n63].16b \n\t"
31-
"and %[t2].16b, %[t2].16b, %[n63].16b \n\t"
32-
"and %[t3].16b, v14.16b, %[n63].16b \n\t"
33-
34-
// Translate the values to the Base64 alphabet.
35-
"tbl v12.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t0].16b \n\t"
36-
"tbl v13.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t1].16b \n\t"
37-
"tbl v14.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t2].16b \n\t"
38-
"tbl v15.16b, {%[l0].16b, %[l1].16b, %[l2].16b, %[l3].16b}, %[t3].16b \n\t"
39-
40-
// Store 64 bytes and interleave. Increment the dest pointer.
41-
"st4 {v12.16b, v13.16b, v14.16b, v15.16b}, [%[dst]], #64 \n\t"
42-
43-
// Outputs (modified).
44-
: [src] "+r" (*s),
45-
[dst] "+r" (*o),
46-
[t0] "=&w" (tmp0),
47-
[t1] "=&w" (tmp1),
48-
[t2] "=&w" (tmp2),
49-
[t3] "=&w" (tmp3)
50-
51-
// Inputs (not modified).
52-
: [n63] "w" (n63),
53-
[l0] "w" (tbl_enc.val[0]),
54-
[l1] "w" (tbl_enc.val[1]),
55-
[l2] "w" (tbl_enc.val[2]),
56-
[l3] "w" (tbl_enc.val[3])
57-
58-
// Clobbers.
59-
: "v12", "v13", "v14", "v15"
60-
);
61-
}
62-
#endif
63-
641
static inline void
652
enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_enc)
663
{
67-
#ifdef BASE64_NEON64_USE_ASM
68-
enc_loop_neon64_inner_asm(s, o, tbl_enc);
69-
#else
704
// Load 48 bytes and deinterleave:
715
uint8x16x3_t src = vld3q_u8(*s);
726

@@ -86,7 +20,6 @@ enc_loop_neon64_inner (const uint8_t **s, uint8_t **o, const uint8x16x4_t tbl_en
8620

8721
*s += 48;
8822
*o += 64;
89-
#endif
9023
}
9124

9225
static inline void
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// Apologies in advance for combining the preprocessor with inline assembly,
2+
// two notoriously gnarly parts of C, but it was necessary to avoid a lot of
3+
// code repetition. The preprocessor is used to template large sections of
4+
// inline assembly that differ only in the registers used. If the code was
5+
// written out by hand, it would become very large and hard to audit.
6+
7+
// Generate a block of inline assembly that loads three user-defined registers
8+
// A, B, C from memory and deinterleaves them, post-incrementing the src
9+
// pointer. The register set should be sequential.
10+
#define LOAD(A, B, C) \
11+
"ld3 {"A".16b, "B".16b, "C".16b}, [%[src]], #48 \n\t"
12+
13+
// Generate a block of inline assembly that takes three deinterleaved registers
14+
// and shuffles the bytes. The output is in temporary registers t0..t3.
15+
#define SHUF(A, B, C) \
16+
"ushr %[t0].16b, "A".16b, #2 \n\t" \
17+
"ushr %[t1].16b, "B".16b, #4 \n\t" \
18+
"ushr %[t2].16b, "C".16b, #6 \n\t" \
19+
"sli %[t1].16b, "A".16b, #4 \n\t" \
20+
"sli %[t2].16b, "B".16b, #2 \n\t" \
21+
"and %[t1].16b, %[t1].16b, %[n63].16b \n\t" \
22+
"and %[t2].16b, %[t2].16b, %[n63].16b \n\t" \
23+
"and %[t3].16b, "C".16b, %[n63].16b \n\t"
24+
25+
// Generate a block of inline assembly that takes temporary registers t0..t3
26+
// and translates them to the base64 alphabet, using a table loaded into
27+
// v8..v11. The output is in user-defined registers A..D.
28+
#define TRAN(A, B, C, D) \
29+
"tbl "A".16b, {v8.16b-v11.16b}, %[t0].16b \n\t" \
30+
"tbl "B".16b, {v8.16b-v11.16b}, %[t1].16b \n\t" \
31+
"tbl "C".16b, {v8.16b-v11.16b}, %[t2].16b \n\t" \
32+
"tbl "D".16b, {v8.16b-v11.16b}, %[t3].16b \n\t"
33+
34+
// Generate a block of inline assembly that interleaves four registers and
35+
// stores them, post-incrementing the destination pointer.
36+
#define STOR(A, B, C, D) \
37+
"st4 {"A".16b, "B".16b, "C".16b, "D".16b}, [%[dst]], #64 \n\t"
38+
39+
// Generate a block of inline assembly that generates a single self-contained
40+
// encoder round: fetch the data, process it, and store the result.
41+
#define ROUND() \
42+
LOAD("v12", "v13", "v14") \
43+
SHUF("v12", "v13", "v14") \
44+
TRAN("v12", "v13", "v14", "v15") \
45+
STOR("v12", "v13", "v14", "v15")
46+
47+
// Generate a block of assembly that generates a type A interleaved encoder
48+
// round. It uses registers that were loaded by the previous type B round, and
49+
// in turn loads registers for the next type B round.
50+
#define ROUND_A() \
51+
SHUF("v2", "v3", "v4") \
52+
LOAD("v12", "v13", "v14") \
53+
TRAN("v2", "v3", "v4", "v5") \
54+
STOR("v2", "v3", "v4", "v5")
55+
56+
// Type B interleaved encoder round. Same as type A, but register sets swapped.
57+
#define ROUND_B() \
58+
SHUF("v12", "v13", "v14") \
59+
LOAD("v2", "v3", "v4") \
60+
TRAN("v12", "v13", "v14", "v15") \
61+
STOR("v12", "v13", "v14", "v15")
62+
63+
// The first type A round needs to load its own registers.
64+
#define ROUND_A_FIRST() \
65+
LOAD("v2", "v3", "v4") \
66+
ROUND_A()
67+
68+
// The last type B round omits the load for the next step.
69+
#define ROUND_B_LAST() \
70+
SHUF("v12", "v13", "v14") \
71+
TRAN("v12", "v13", "v14", "v15") \
72+
STOR("v12", "v13", "v14", "v15")
73+
74+
// Suppress clang's warning that the literal string in the asm statement is
75+
// overlong (longer than the ISO-mandated minimum size of 4095 bytes for C99
76+
// compilers). It may be true, but the goal here is not C99 portability.
77+
#pragma GCC diagnostic push
78+
#pragma GCC diagnostic ignored "-Woverlength-strings"
79+
80+
static inline void
81+
enc_loop_neon64 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
82+
{
83+
size_t rounds = *slen / 48;
84+
85+
if (rounds == 0) {
86+
return;
87+
}
88+
89+
*slen -= rounds * 48; // 48 bytes consumed per round.
90+
*olen += rounds * 64; // 64 bytes produced per round.
91+
92+
// Number of times to go through the 8x loop.
93+
size_t loops = rounds / 8;
94+
95+
// Number of rounds remaining after the 8x loop.
96+
rounds %= 8;
97+
98+
// Temporary registers, used as scratch space.
99+
uint8x16_t tmp0, tmp1, tmp2, tmp3;
100+
101+
__asm__ volatile (
102+
103+
// Load the encoding table into v8..v11.
104+
" ld1 {v8.16b-v11.16b}, [%[tbl]] \n\t"
105+
106+
// If there are eight rounds or more, enter an 8x unrolled loop
107+
// of interleaved encoding rounds. The rounds interleave memory
108+
// operations (load/store) with data operations to maximize
109+
// pipeline throughput.
110+
" cbz %[loops], 4f \n\t"
111+
112+
// The SIMD instructions do not touch the flags.
113+
"88: subs %[loops], %[loops], #1 \n\t"
114+
" " ROUND_A_FIRST()
115+
" " ROUND_B()
116+
" " ROUND_A()
117+
" " ROUND_B()
118+
" " ROUND_A()
119+
" " ROUND_B()
120+
" " ROUND_A()
121+
" " ROUND_B_LAST()
122+
" b.ne 88b \n\t"
123+
124+
// Enter a 4x unrolled loop for rounds of 4 or more.
125+
"4: cmp %[rounds], #4 \n\t"
126+
" b.lt 30f \n\t"
127+
" " ROUND_A_FIRST()
128+
" " ROUND_B()
129+
" " ROUND_A()
130+
" " ROUND_B_LAST()
131+
" sub %[rounds], %[rounds], #4 \n\t"
132+
133+
// Dispatch the remaining rounds 0..3.
134+
"30: cbz %[rounds], 0f \n\t"
135+
" cmp %[rounds], #2 \n\t"
136+
" b.eq 2f \n\t"
137+
" b.lt 1f \n\t"
138+
139+
// Block of non-interlaced encoding rounds, which can each
140+
// individually be jumped to. Rounds fall through to the next.
141+
"3: " ROUND()
142+
"2: " ROUND()
143+
"1: " ROUND()
144+
"0: \n\t"
145+
146+
// Outputs (modified).
147+
: [loops] "+r" (loops),
148+
[src] "+r" (*s),
149+
[dst] "+r" (*o),
150+
[t0] "=&w" (tmp0),
151+
[t1] "=&w" (tmp1),
152+
[t2] "=&w" (tmp2),
153+
[t3] "=&w" (tmp3)
154+
155+
// Inputs (not modified).
156+
: [rounds] "r" (rounds),
157+
[tbl] "r" (base64_table_enc_6bit),
158+
[n63] "w" (vdupq_n_u8(63))
159+
160+
// Clobbers.
161+
: "v2", "v3", "v4", "v5",
162+
"v8", "v9", "v10", "v11",
163+
"v12", "v13", "v14", "v15"
164+
);
165+
}
166+
167+
#pragma GCC diagnostic pop

0 commit comments

Comments
 (0)
Please sign in to comment.