Skip to content

Commit 52d2c03

Browse files
nodejs-github-botaduh95
authored andcommittedNov 3, 2024
deps: update simdutf to 5.3.4
PR-URL: #54312 Reviewed-By: Marco Ippolito <marcoippolito54@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent dd882ac commit 52d2c03

File tree

2 files changed

+568
-409
lines changed

2 files changed

+568
-409
lines changed
 

‎deps/simdutf/simdutf.cpp

+485-366
Large diffs are not rendered by default.

‎deps/simdutf/simdutf.h

+83-43
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-07-31 17:22:10 -0400. Do not edit! */
1+
/* auto-generated on 2024-08-09 09:52:29 -0400. Do not edit! */
22
/* begin file include/simdutf.h */
33
#ifndef SIMDUTF_H
44
#define SIMDUTF_H
@@ -433,8 +433,11 @@
433433
#define SIMDUTF_POP_DISABLE_WARNINGS __pragma(warning( pop ))
434434

435435
#else // SIMDUTF_REGULAR_VISUAL_STUDIO
436-
436+
#if defined(__OPTIMIZE__) || defined(NDEBUG)
437437
#define simdutf_really_inline inline __attribute__((always_inline))
438+
#else
439+
#define simdutf_really_inline inline
440+
#endif
438441
#define simdutf_never_inline inline __attribute__((noinline))
439442

440443
#define simdutf_unused __attribute__((unused))
@@ -594,7 +597,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
594597
#define SIMDUTF_SIMDUTF_VERSION_H
595598

596599
/** The version of simdutf being used (major.minor.revision) */
597-
#define SIMDUTF_VERSION "5.3.1"
600+
#define SIMDUTF_VERSION "5.3.4"
598601

599602
namespace simdutf {
600603
enum {
@@ -609,7 +612,7 @@ enum {
609612
/**
610613
* The revision (major.minor.REVISION) of simdutf being used.
611614
*/
612-
SIMDUTF_VERSION_REVISION = 1
615+
SIMDUTF_VERSION_REVISION = 4
613616
};
614617
} // namespace simdutf
615618

@@ -1314,7 +1317,7 @@ simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input,
13141317
/**
13151318
* Convert valid UTF-8 string into latin1 string.
13161319
*
1317-
* This function assumes that the input string is valid UTF-8.
1320+
* This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1.
13181321
*
13191322
* This function is not BOM-aware.
13201323
*
@@ -1387,7 +1390,8 @@ simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t le
13871390
/**
13881391
* Compute the number of bytes that this UTF-8 string would require in Latin1 format.
13891392
*
1390-
* This function does not validate the input.
1393+
* This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1394+
* the result is implementation defined.
13911395
*
13921396
* This function is not BOM-aware.
13931397
*
@@ -1400,7 +1404,8 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t le
14001404
/**
14011405
* Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
14021406
*
1403-
* This function does not validate the input.
1407+
* This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1408+
* the result is implementation defined.
14041409
*
14051410
* This function is not BOM-aware.
14061411
*
@@ -1415,7 +1420,8 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t len
14151420
*
14161421
* This function is equivalent to count_utf8
14171422
*
1418-
* This function does not validate the input.
1423+
* This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1424+
* the result is implementation defined.
14191425
*
14201426
* This function is not BOM-aware.
14211427
*
@@ -1628,7 +1634,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, s
16281634
/**
16291635
* Using native endianness, convert UTF-16 string into Latin1 string.
16301636
*
1631-
* This function assumes that the input string is valid UTF-8.
1637+
* This function assumes that the input string is valid UTF-16 and that it can be represented as Latin1.
16321638
*
16331639
* This function is not BOM-aware.
16341640
*
@@ -1642,7 +1648,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input,
16421648
/**
16431649
* Convert valid UTF-16LE string into Latin1 string.
16441650
*
1645-
* This function assumes that the input string is valid UTF-16LE.
1651+
* This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1.
16461652
*
16471653
* This function is not BOM-aware.
16481654
*
@@ -1656,7 +1662,7 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * inpu
16561662
/**
16571663
* Convert valid UTF-16BE string into Latin1 string.
16581664
*
1659-
* This function assumes that the input string is valid UTF-16BE.
1665+
* This function assumes that the input string is valid UTF-16BE and that it can be represented as Latin1.
16601666
*
16611667
* This function is not BOM-aware.
16621668
*
@@ -1671,7 +1677,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * inpu
16711677
/**
16721678
* Convert valid UTF-16LE string into UTF-8 string.
16731679
*
1674-
* This function assumes that the input string is valid UTF-16LE.
1680+
* This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1.
16751681
*
16761682
* This function is not BOM-aware.
16771683
*
@@ -1833,7 +1839,8 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input
18331839
/*
18341840
* Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
18351841
*
1836-
* This function does not validate the input.
1842+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1843+
* the result is implementation defined.
18371844
*
18381845
* This function is not BOM-aware.
18391846
*
@@ -1847,7 +1854,8 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
18471854
* Using native endianness; Compute the number of bytes that this UTF-16
18481855
* string would require in UTF-8 format.
18491856
*
1850-
* This function does not validate the input.
1857+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1858+
* the result is implementation defined.
18511859
*
18521860
* @param input the UTF-16 string to convert
18531861
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1858,7 +1866,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t
18581866
/**
18591867
* Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
18601868
*
1861-
* This function does not validate the input.
1869+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1870+
* the result is implementation defined.
18621871
*
18631872
* @param input the UTF-16LE string to convert
18641873
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1869,7 +1878,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size
18691878
/**
18701879
* Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
18711880
*
1872-
* This function does not validate the input.
1881+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1882+
* the result is implementation defined.
18731883
*
18741884
* @param input the UTF-16BE string to convert
18751885
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1986,7 +1996,7 @@ simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *
19861996
/**
19871997
* Convert valid UTF-32 string into Latin1 string.
19881998
*
1989-
* This function assumes that the input string is valid UTF-32.
1999+
* This function assumes that the input string is valid UTF-32 and that it can be represented as Latin1.
19902000
*
19912001
* This function is not BOM-aware.
19922002
*
@@ -2117,7 +2127,8 @@ void change_endianness_utf16(const char16_t * input, size_t length, char16_t * o
21172127
/**
21182128
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
21192129
*
2120-
* This function does not validate the input.
2130+
* This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
2131+
* the result is implementation defined.
21212132
*
21222133
* @param input the UTF-32 string to convert
21232134
* @param length the length of the string in 4-byte code units (char32_t)
@@ -2128,7 +2139,8 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t
21282139
/**
21292140
* Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
21302141
*
2131-
* This function does not validate the input.
2142+
* This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
2143+
* the result is implementation defined.
21322144
*
21332145
* @param input the UTF-32 string to convert
21342146
* @param length the length of the string in 4-byte code units (char32_t)
@@ -2142,7 +2154,8 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_
21422154
*
21432155
* This function is equivalent to count_utf16.
21442156
*
2145-
* This function does not validate the input.
2157+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2158+
* the result is implementation defined.
21462159
*
21472160
* This function is not BOM-aware.
21482161
*
@@ -2157,7 +2170,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_
21572170
*
21582171
* This function is equivalent to count_utf16le.
21592172
*
2160-
* This function does not validate the input.
2173+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2174+
* the result is implementation defined.
21612175
*
21622176
* This function is not BOM-aware.
21632177
*
@@ -2172,7 +2186,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, siz
21722186
*
21732187
* This function is equivalent to count_utf16be.
21742188
*
2175-
* This function does not validate the input.
2189+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2190+
* the result is implementation defined.
21762191
*
21772192
* This function is not BOM-aware.
21782193
*
@@ -2187,6 +2202,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
21872202
* it is valid.
21882203
*
21892204
* This function assumes that the input string is valid UTF-16 (native endianness).
2205+
* It is acceptable to pass invalid UTF-16 strings but in such cases
2206+
* the result is implementation defined.
21902207
*
21912208
* This function is not BOM-aware.
21922209
*
@@ -2201,6 +2218,8 @@ simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) no
22012218
* it is valid.
22022219
*
22032220
* This function assumes that the input string is valid UTF-16LE.
2221+
* It is acceptable to pass invalid UTF-16 strings but in such cases
2222+
* the result is implementation defined.
22042223
*
22052224
* This function is not BOM-aware.
22062225
*
@@ -2215,6 +2234,8 @@ simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length)
22152234
* it is valid.
22162235
*
22172236
* This function assumes that the input string is valid UTF-16BE.
2237+
* It is acceptable to pass invalid UTF-16 strings but in such cases
2238+
* the result is implementation defined.
22182239
*
22192240
* This function is not BOM-aware.
22202241
*
@@ -2229,6 +2250,8 @@ simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length)
22292250
* it is valid.
22302251
*
22312252
* This function assumes that the input string is valid UTF-8.
2253+
* It is acceptable to pass invalid UTF-8 strings but in such cases
2254+
* the result is implementation defined.
22322255
*
22332256
* @param input the UTF-8 string to process
22342257
* @param length the length of the string in bytes
@@ -2739,10 +2762,10 @@ class implementation {
27392762
*/
27402763
simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
27412764

2742-
/**
2765+
/**
27432766
* Convert valid UTF-8 string into latin1 string.
27442767
*
2745-
* This function assumes that the input string is valid UTF-8.
2768+
* This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1.
27462769
*
27472770
* This function is not BOM-aware.
27482771
*
@@ -2871,7 +2894,8 @@ class implementation {
28712894
/**
28722895
* Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
28732896
*
2874-
* This function does not validate the input.
2897+
* This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
2898+
* the result is implementation defined.
28752899
*
28762900
* @param input the UTF-8 string to process
28772901
* @param length the length of the string in bytes
@@ -2882,7 +2906,8 @@ class implementation {
28822906
/**
28832907
* Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
28842908
*
2885-
* This function is equivalent to count_utf8.
2909+
* This function is equivalent to count_utf8. It is acceptable to pass invalid UTF-8 strings but in such cases
2910+
* the result is implementation defined.
28862911
*
28872912
* This function does not validate the input.
28882913
*
@@ -2957,7 +2982,7 @@ class implementation {
29572982
/**
29582983
* Convert valid UTF-16LE string into Latin1 string.
29592984
*
2960-
* This function assumes that the input string is valid UTF-8.
2985+
* This function assumes that the input string is valid UTF-L16LE and that it can be represented as Latin1.
29612986
29622987
* This function is not BOM-aware.
29632988
*
@@ -2971,7 +2996,7 @@ class implementation {
29712996
/**
29722997
* Convert valid UTF-16BE string into Latin1 string.
29732998
*
2974-
* This function assumes that the input string is valid UTF-8.
2999+
* This function assumes that the input string is valid UTF16-BE and that it can be represented as Latin1.
29753000
*
29763001
* This function is not BOM-aware.
29773002
*
@@ -3161,7 +3186,8 @@ class implementation {
31613186
/**
31623187
* Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
31633188
*
3164-
* This function does not validate the input.
3189+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3190+
* the result is implementation defined.
31653191
*
31663192
* This function is not BOM-aware.
31673193
*
@@ -3174,7 +3200,8 @@ class implementation {
31743200
/**
31753201
* Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
31763202
*
3177-
* This function does not validate the input.
3203+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3204+
* the result is implementation defined.
31783205
*
31793206
* This function is not BOM-aware.
31803207
*
@@ -3275,7 +3302,7 @@ class implementation {
32753302
simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
32763303

32773304

3278-
/**
3305+
/**
32793306
* Return the number of bytes that this UTF-16 string would require in Latin1 format.
32803307
*
32813308
*
@@ -3399,7 +3426,8 @@ class implementation {
33993426
/**
34003427
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
34013428
*
3402-
* This function does not validate the input.
3429+
* This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3430+
* the result is implementation defined.
34033431
*
34043432
* @param input the UTF-32 string to convert
34053433
* @param length the length of the string in 4-byte code units (char32_t)
@@ -3410,7 +3438,8 @@ class implementation {
34103438
/**
34113439
* Compute the number of bytes that this UTF-32 string would require in Latin1 format.
34123440
*
3413-
* This function does not validate the input.
3441+
* This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3442+
* the result is implementation defined.
34143443
*
34153444
* @param length the length of the string in 4-byte code units (char32_t)
34163445
* @return the number of bytes required to encode the UTF-32 string as Latin1
@@ -3420,7 +3449,8 @@ class implementation {
34203449
/**
34213450
* Compute the number of bytes that this UTF-8 string would require in Latin1 format.
34223451
*
3423-
* This function does not validate the input.
3452+
* This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
3453+
* the result is implementation defined.
34243454
*
34253455
* @param input the UTF-8 string to convert
34263456
* @param length the length of the string in byte
@@ -3431,7 +3461,8 @@ class implementation {
34313461
/*
34323462
* Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
34333463
*
3434-
* This function does not validate the input.
3464+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3465+
* the result is implementation defined.
34353466
*
34363467
* This function is not BOM-aware.
34373468
*
@@ -3444,7 +3475,8 @@ class implementation {
34443475
/**
34453476
* Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
34463477
*
3447-
* This function does not validate the input.
3478+
* This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3479+
* the result is implementation defined.
34483480
*
34493481
* @param input the UTF-32 string to convert
34503482
* @param length the length of the string in 4-byte code units (char32_t)
@@ -3453,11 +3485,9 @@ class implementation {
34533485
simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
34543486

34553487

3456-
/**
3488+
/**
34573489
* Return the number of bytes that this UTF-32 string would require in Latin1 format.
34583490
*
3459-
* This function does not validate the input.
3460-
*
34613491
* @param input the UTF-32 string to convert
34623492
* @param length the length of the string in 4-byte code units (char32_t)
34633493
* @return the number of bytes required to encode the UTF-32 string as Latin1
@@ -3469,7 +3499,8 @@ class implementation {
34693499
*
34703500
* This function is equivalent to count_utf16le.
34713501
*
3472-
* This function does not validate the input.
3502+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3503+
* the result is implementation defined.
34733504
*
34743505
* This function is not BOM-aware.
34753506
*
@@ -3484,7 +3515,8 @@ class implementation {
34843515
*
34853516
* This function is equivalent to count_utf16be.
34863517
*
3487-
* This function does not validate the input.
3518+
* This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3519+
* the result is implementation defined.
34883520
*
34893521
* This function is not BOM-aware.
34903522
*
@@ -3499,6 +3531,8 @@ class implementation {
34993531
* it is valid.
35003532
*
35013533
* This function assumes that the input string is valid UTF-16LE.
3534+
* It is acceptable to pass invalid UTF-16 strings but in such cases
3535+
* the result is implementation defined.
35023536
*
35033537
* This function is not BOM-aware.
35043538
*
@@ -3513,6 +3547,8 @@ class implementation {
35133547
* it is valid.
35143548
*
35153549
* This function assumes that the input string is valid UTF-16BE.
3550+
* It is acceptable to pass invalid UTF-16 strings but in such cases
3551+
* the result is implementation defined.
35163552
*
35173553
* This function is not BOM-aware.
35183554
*
@@ -3528,6 +3564,8 @@ class implementation {
35283564
* it is valid.
35293565
*
35303566
* This function assumes that the input string is valid UTF-8.
3567+
* It is acceptable to pass invalid UTF-8 strings but in such cases
3568+
* the result is implementation defined.
35313569
*
35323570
* @param input the UTF-8 string to process
35333571
* @param length the length of the string in bytes
@@ -3538,7 +3576,8 @@ class implementation {
35383576
/**
35393577
* Provide the maximal binary length in bytes given the base64 input.
35403578
* In general, if the input contains ASCII spaces, the result will be less than
3541-
* the maximum length.
3579+
* the maximum length. It is acceptable to pass invalid base64 strings but in such cases
3580+
* the result is implementation defined.
35423581
*
35433582
* @param input the base64 input to process
35443583
* @param length the length of the base64 input in bytes
@@ -3549,7 +3588,8 @@ class implementation {
35493588
/**
35503589
* Provide the maximal binary length in bytes given the base64 input.
35513590
* In general, if the input contains ASCII spaces, the result will be less than
3552-
* the maximum length.
3591+
* the maximum length. It is acceptable to pass invalid base64 strings but in such cases
3592+
* the result is implementation defined.
35533593
*
35543594
* @param input the base64 input to process, in ASCII stored as 16-bit units
35553595
* @param length the length of the base64 input in 16-bit units

0 commit comments

Comments
 (0)
Please sign in to comment.