1
- /* auto-generated on 2024-07-31 17:22:10 -0400. Do not edit! */
1
+ /* auto-generated on 2024-08-09 09:52:29 -0400. Do not edit! */
2
2
/* begin file include/simdutf.h */
3
3
#ifndef SIMDUTF_H
4
4
#define SIMDUTF_H
433
433
#define SIMDUTF_POP_DISABLE_WARNINGS __pragma (warning( pop ))
434
434
435
435
#else // SIMDUTF_REGULAR_VISUAL_STUDIO
436
-
436
+ # if defined(__OPTIMIZE__) || defined(NDEBUG)
437
437
#define simdutf_really_inline inline __attribute__ ((always_inline))
438
+ #else
439
+ #define simdutf_really_inline inline
440
+ #endif
438
441
#define simdutf_never_inline inline __attribute__ ((noinline))
439
442
440
443
#define simdutf_unused __attribute__ ((unused))
@@ -594,7 +597,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
594
597
#define SIMDUTF_SIMDUTF_VERSION_H
595
598
596
599
/* * The version of simdutf being used (major.minor.revision) */
597
- #define SIMDUTF_VERSION " 5.3.1 "
600
+ #define SIMDUTF_VERSION " 5.3.4 "
598
601
599
602
namespace simdutf {
600
603
enum {
@@ -609,7 +612,7 @@ enum {
609
612
/* *
610
613
* The revision (major.minor.REVISION) of simdutf being used.
611
614
*/
612
- SIMDUTF_VERSION_REVISION = 1
615
+ SIMDUTF_VERSION_REVISION = 4
613
616
};
614
617
} // namespace simdutf
615
618
@@ -1314,7 +1317,7 @@ simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input,
1314
1317
/* *
1315
1318
* Convert valid UTF-8 string into latin1 string.
1316
1319
*
1317
- * This function assumes that the input string is valid UTF-8.
1320
+ * This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1 .
1318
1321
*
1319
1322
* This function is not BOM-aware.
1320
1323
*
@@ -1387,7 +1390,8 @@ simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t le
1387
1390
/* *
1388
1391
* Compute the number of bytes that this UTF-8 string would require in Latin1 format.
1389
1392
*
1390
- * This function does not validate the input.
1393
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1394
+ * the result is implementation defined.
1391
1395
*
1392
1396
* This function is not BOM-aware.
1393
1397
*
@@ -1400,7 +1404,8 @@ simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t le
1400
1404
/* *
1401
1405
* Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
1402
1406
*
1403
- * This function does not validate the input.
1407
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1408
+ * the result is implementation defined.
1404
1409
*
1405
1410
* This function is not BOM-aware.
1406
1411
*
@@ -1415,7 +1420,8 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t len
1415
1420
*
1416
1421
* This function is equivalent to count_utf8
1417
1422
*
1418
- * This function does not validate the input.
1423
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
1424
+ * the result is implementation defined.
1419
1425
*
1420
1426
* This function is not BOM-aware.
1421
1427
*
@@ -1628,7 +1634,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, s
1628
1634
/* *
1629
1635
* Using native endianness, convert UTF-16 string into Latin1 string.
1630
1636
*
1631
- * This function assumes that the input string is valid UTF-8 .
1637
+ * This function assumes that the input string is valid UTF-16 and that it can be represented as Latin1 .
1632
1638
*
1633
1639
* This function is not BOM-aware.
1634
1640
*
@@ -1642,7 +1648,7 @@ simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input,
1642
1648
/* *
1643
1649
* Convert valid UTF-16LE string into Latin1 string.
1644
1650
*
1645
- * This function assumes that the input string is valid UTF-16LE.
1651
+ * This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1 .
1646
1652
*
1647
1653
* This function is not BOM-aware.
1648
1654
*
@@ -1656,7 +1662,7 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * inpu
1656
1662
/* *
1657
1663
* Convert valid UTF-16BE string into Latin1 string.
1658
1664
*
1659
- * This function assumes that the input string is valid UTF-16BE.
1665
+ * This function assumes that the input string is valid UTF-16BE and that it can be represented as Latin1 .
1660
1666
*
1661
1667
* This function is not BOM-aware.
1662
1668
*
@@ -1671,7 +1677,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * inpu
1671
1677
/* *
1672
1678
* Convert valid UTF-16LE string into UTF-8 string.
1673
1679
*
1674
- * This function assumes that the input string is valid UTF-16LE.
1680
+ * This function assumes that the input string is valid UTF-16LE and that it can be represented as Latin1 .
1675
1681
*
1676
1682
* This function is not BOM-aware.
1677
1683
*
@@ -1833,7 +1839,8 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input
1833
1839
/*
1834
1840
* Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
1835
1841
*
1836
- * This function does not validate the input.
1842
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1843
+ * the result is implementation defined.
1837
1844
*
1838
1845
* This function is not BOM-aware.
1839
1846
*
@@ -1847,7 +1854,8 @@ simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
1847
1854
* Using native endianness; Compute the number of bytes that this UTF-16
1848
1855
* string would require in UTF-8 format.
1849
1856
*
1850
- * This function does not validate the input.
1857
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1858
+ * the result is implementation defined.
1851
1859
*
1852
1860
* @param input the UTF-16 string to convert
1853
1861
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1858,7 +1866,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t
1858
1866
/* *
1859
1867
* Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
1860
1868
*
1861
- * This function does not validate the input.
1869
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1870
+ * the result is implementation defined.
1862
1871
*
1863
1872
* @param input the UTF-16LE string to convert
1864
1873
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1869,7 +1878,8 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size
1869
1878
/* *
1870
1879
* Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
1871
1880
*
1872
- * This function does not validate the input.
1881
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
1882
+ * the result is implementation defined.
1873
1883
*
1874
1884
* @param input the UTF-16BE string to convert
1875
1885
* @param length the length of the string in 2-byte code units (char16_t)
@@ -1986,7 +1996,7 @@ simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *
1986
1996
/* *
1987
1997
* Convert valid UTF-32 string into Latin1 string.
1988
1998
*
1989
- * This function assumes that the input string is valid UTF-32.
1999
+ * This function assumes that the input string is valid UTF-32 and that it can be represented as Latin1 .
1990
2000
*
1991
2001
* This function is not BOM-aware.
1992
2002
*
@@ -2117,7 +2127,8 @@ void change_endianness_utf16(const char16_t * input, size_t length, char16_t * o
2117
2127
/* *
2118
2128
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
2119
2129
*
2120
- * This function does not validate the input.
2130
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
2131
+ * the result is implementation defined.
2121
2132
*
2122
2133
* @param input the UTF-32 string to convert
2123
2134
* @param length the length of the string in 4-byte code units (char32_t)
@@ -2128,7 +2139,8 @@ simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t
2128
2139
/* *
2129
2140
* Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
2130
2141
*
2131
- * This function does not validate the input.
2142
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
2143
+ * the result is implementation defined.
2132
2144
*
2133
2145
* @param input the UTF-32 string to convert
2134
2146
* @param length the length of the string in 4-byte code units (char32_t)
@@ -2142,7 +2154,8 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_
2142
2154
*
2143
2155
* This function is equivalent to count_utf16.
2144
2156
*
2145
- * This function does not validate the input.
2157
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2158
+ * the result is implementation defined.
2146
2159
*
2147
2160
* This function is not BOM-aware.
2148
2161
*
@@ -2157,7 +2170,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_
2157
2170
*
2158
2171
* This function is equivalent to count_utf16le.
2159
2172
*
2160
- * This function does not validate the input.
2173
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2174
+ * the result is implementation defined.
2161
2175
*
2162
2176
* This function is not BOM-aware.
2163
2177
*
@@ -2172,7 +2186,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, siz
2172
2186
*
2173
2187
* This function is equivalent to count_utf16be.
2174
2188
*
2175
- * This function does not validate the input.
2189
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
2190
+ * the result is implementation defined.
2176
2191
*
2177
2192
* This function is not BOM-aware.
2178
2193
*
@@ -2187,6 +2202,8 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
2187
2202
* it is valid.
2188
2203
*
2189
2204
* This function assumes that the input string is valid UTF-16 (native endianness).
2205
+ * It is acceptable to pass invalid UTF-16 strings but in such cases
2206
+ * the result is implementation defined.
2190
2207
*
2191
2208
* This function is not BOM-aware.
2192
2209
*
@@ -2201,6 +2218,8 @@ simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) no
2201
2218
* it is valid.
2202
2219
*
2203
2220
* This function assumes that the input string is valid UTF-16LE.
2221
+ * It is acceptable to pass invalid UTF-16 strings but in such cases
2222
+ * the result is implementation defined.
2204
2223
*
2205
2224
* This function is not BOM-aware.
2206
2225
*
@@ -2215,6 +2234,8 @@ simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length)
2215
2234
* it is valid.
2216
2235
*
2217
2236
* This function assumes that the input string is valid UTF-16BE.
2237
+ * It is acceptable to pass invalid UTF-16 strings but in such cases
2238
+ * the result is implementation defined.
2218
2239
*
2219
2240
* This function is not BOM-aware.
2220
2241
*
@@ -2229,6 +2250,8 @@ simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length)
2229
2250
* it is valid.
2230
2251
*
2231
2252
* This function assumes that the input string is valid UTF-8.
2253
+ * It is acceptable to pass invalid UTF-8 strings but in such cases
2254
+ * the result is implementation defined.
2232
2255
*
2233
2256
* @param input the UTF-8 string to process
2234
2257
* @param length the length of the string in bytes
@@ -2739,10 +2762,10 @@ class implementation {
2739
2762
*/
2740
2763
simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors (const char * input, size_t length, char * latin1_output) const noexcept = 0;
2741
2764
2742
- /* *
2765
+ /* *
2743
2766
* Convert valid UTF-8 string into latin1 string.
2744
2767
*
2745
- * This function assumes that the input string is valid UTF-8.
2768
+ * This function assumes that the input string is valid UTF-8 and that it can be represented as Latin1 .
2746
2769
*
2747
2770
* This function is not BOM-aware.
2748
2771
*
@@ -2871,7 +2894,8 @@ class implementation {
2871
2894
/* *
2872
2895
* Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
2873
2896
*
2874
- * This function does not validate the input.
2897
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
2898
+ * the result is implementation defined.
2875
2899
*
2876
2900
* @param input the UTF-8 string to process
2877
2901
* @param length the length of the string in bytes
@@ -2882,7 +2906,8 @@ class implementation {
2882
2906
/* *
2883
2907
* Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
2884
2908
*
2885
- * This function is equivalent to count_utf8.
2909
+ * This function is equivalent to count_utf8. It is acceptable to pass invalid UTF-8 strings but in such cases
2910
+ * the result is implementation defined.
2886
2911
*
2887
2912
* This function does not validate the input.
2888
2913
*
@@ -2957,7 +2982,7 @@ class implementation {
2957
2982
/* *
2958
2983
* Convert valid UTF-16LE string into Latin1 string.
2959
2984
*
2960
- * This function assumes that the input string is valid UTF-8 .
2985
+ * This function assumes that the input string is valid UTF-L16LE and that it can be represented as Latin1 .
2961
2986
2962
2987
* This function is not BOM-aware.
2963
2988
*
@@ -2971,7 +2996,7 @@ class implementation {
2971
2996
/* *
2972
2997
* Convert valid UTF-16BE string into Latin1 string.
2973
2998
*
2974
- * This function assumes that the input string is valid UTF-8 .
2999
+ * This function assumes that the input string is valid UTF16-BE and that it can be represented as Latin1 .
2975
3000
*
2976
3001
* This function is not BOM-aware.
2977
3002
*
@@ -3161,7 +3186,8 @@ class implementation {
3161
3186
/* *
3162
3187
* Compute the number of bytes that this UTF-16LE string would require in UTF-8 format.
3163
3188
*
3164
- * This function does not validate the input.
3189
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3190
+ * the result is implementation defined.
3165
3191
*
3166
3192
* This function is not BOM-aware.
3167
3193
*
@@ -3174,7 +3200,8 @@ class implementation {
3174
3200
/* *
3175
3201
* Compute the number of bytes that this UTF-16BE string would require in UTF-8 format.
3176
3202
*
3177
- * This function does not validate the input.
3203
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3204
+ * the result is implementation defined.
3178
3205
*
3179
3206
* This function is not BOM-aware.
3180
3207
*
@@ -3275,7 +3302,7 @@ class implementation {
3275
3302
simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8 (const char32_t * input, size_t length, char * utf8_buffer) const noexcept = 0;
3276
3303
3277
3304
3278
- /* *
3305
+ /* *
3279
3306
* Return the number of bytes that this UTF-16 string would require in Latin1 format.
3280
3307
*
3281
3308
*
@@ -3399,7 +3426,8 @@ class implementation {
3399
3426
/* *
3400
3427
* Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
3401
3428
*
3402
- * This function does not validate the input.
3429
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3430
+ * the result is implementation defined.
3403
3431
*
3404
3432
* @param input the UTF-32 string to convert
3405
3433
* @param length the length of the string in 4-byte code units (char32_t)
@@ -3410,7 +3438,8 @@ class implementation {
3410
3438
/* *
3411
3439
* Compute the number of bytes that this UTF-32 string would require in Latin1 format.
3412
3440
*
3413
- * This function does not validate the input.
3441
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3442
+ * the result is implementation defined.
3414
3443
*
3415
3444
* @param length the length of the string in 4-byte code units (char32_t)
3416
3445
* @return the number of bytes required to encode the UTF-32 string as Latin1
@@ -3420,7 +3449,8 @@ class implementation {
3420
3449
/* *
3421
3450
* Compute the number of bytes that this UTF-8 string would require in Latin1 format.
3422
3451
*
3423
- * This function does not validate the input.
3452
+ * This function does not validate the input. It is acceptable to pass invalid UTF-8 strings but in such cases
3453
+ * the result is implementation defined.
3424
3454
*
3425
3455
* @param input the UTF-8 string to convert
3426
3456
* @param length the length of the string in byte
@@ -3431,7 +3461,8 @@ class implementation {
3431
3461
/*
3432
3462
* Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
3433
3463
*
3434
- * This function does not validate the input.
3464
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3465
+ * the result is implementation defined.
3435
3466
*
3436
3467
* This function is not BOM-aware.
3437
3468
*
@@ -3444,7 +3475,8 @@ class implementation {
3444
3475
/* *
3445
3476
* Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
3446
3477
*
3447
- * This function does not validate the input.
3478
+ * This function does not validate the input. It is acceptable to pass invalid UTF-32 strings but in such cases
3479
+ * the result is implementation defined.
3448
3480
*
3449
3481
* @param input the UTF-32 string to convert
3450
3482
* @param length the length of the string in 4-byte code units (char32_t)
@@ -3453,11 +3485,9 @@ class implementation {
3453
3485
simdutf_warn_unused virtual size_t utf16_length_from_utf32 (const char32_t * input, size_t length) const noexcept = 0;
3454
3486
3455
3487
3456
- /* *
3488
+ /* *
3457
3489
* Return the number of bytes that this UTF-32 string would require in Latin1 format.
3458
3490
*
3459
- * This function does not validate the input.
3460
- *
3461
3491
* @param input the UTF-32 string to convert
3462
3492
* @param length the length of the string in 4-byte code units (char32_t)
3463
3493
* @return the number of bytes required to encode the UTF-32 string as Latin1
@@ -3469,7 +3499,8 @@ class implementation {
3469
3499
*
3470
3500
* This function is equivalent to count_utf16le.
3471
3501
*
3472
- * This function does not validate the input.
3502
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3503
+ * the result is implementation defined.
3473
3504
*
3474
3505
* This function is not BOM-aware.
3475
3506
*
@@ -3484,7 +3515,8 @@ class implementation {
3484
3515
*
3485
3516
* This function is equivalent to count_utf16be.
3486
3517
*
3487
- * This function does not validate the input.
3518
+ * This function does not validate the input. It is acceptable to pass invalid UTF-16 strings but in such cases
3519
+ * the result is implementation defined.
3488
3520
*
3489
3521
* This function is not BOM-aware.
3490
3522
*
@@ -3499,6 +3531,8 @@ class implementation {
3499
3531
* it is valid.
3500
3532
*
3501
3533
* This function assumes that the input string is valid UTF-16LE.
3534
+ * It is acceptable to pass invalid UTF-16 strings but in such cases
3535
+ * the result is implementation defined.
3502
3536
*
3503
3537
* This function is not BOM-aware.
3504
3538
*
@@ -3513,6 +3547,8 @@ class implementation {
3513
3547
* it is valid.
3514
3548
*
3515
3549
* This function assumes that the input string is valid UTF-16BE.
3550
+ * It is acceptable to pass invalid UTF-16 strings but in such cases
3551
+ * the result is implementation defined.
3516
3552
*
3517
3553
* This function is not BOM-aware.
3518
3554
*
@@ -3528,6 +3564,8 @@ class implementation {
3528
3564
* it is valid.
3529
3565
*
3530
3566
* This function assumes that the input string is valid UTF-8.
3567
+ * It is acceptable to pass invalid UTF-8 strings but in such cases
3568
+ * the result is implementation defined.
3531
3569
*
3532
3570
* @param input the UTF-8 string to process
3533
3571
* @param length the length of the string in bytes
@@ -3538,7 +3576,8 @@ class implementation {
3538
3576
/* *
3539
3577
* Provide the maximal binary length in bytes given the base64 input.
3540
3578
* In general, if the input contains ASCII spaces, the result will be less than
3541
- * the maximum length.
3579
+ * the maximum length. It is acceptable to pass invalid base64 strings but in such cases
3580
+ * the result is implementation defined.
3542
3581
*
3543
3582
* @param input the base64 input to process
3544
3583
* @param length the length of the base64 input in bytes
@@ -3549,7 +3588,8 @@ class implementation {
3549
3588
/* *
3550
3589
* Provide the maximal binary length in bytes given the base64 input.
3551
3590
* In general, if the input contains ASCII spaces, the result will be less than
3552
- * the maximum length.
3591
+ * the maximum length. It is acceptable to pass invalid base64 strings but in such cases
3592
+ * the result is implementation defined.
3553
3593
*
3554
3594
* @param input the base64 input to process, in ASCII stored as 16-bit units
3555
3595
* @param length the length of the base64 input in 16-bit units
0 commit comments