From 9be5a3534c8488a6a4741e8e6664675eb70a5434 Mon Sep 17 00:00:00 2001 From: dcode Date: Fri, 7 Apr 2023 00:14:57 +0200 Subject: [PATCH 1/3] Implement relaxed SIMD operations --- src/builtins.ts | 622 ++++++++++++++++-- std/assembly/builtins.ts | 114 +++- std/assembly/index.d.ts | 274 +++++++- .../compiler/features/relaxed-simd.debug.wat | 175 +++++ tests/compiler/features/relaxed-simd.json | 9 + .../features/relaxed-simd.release.wat | 4 + tests/compiler/features/relaxed-simd.ts | 83 +++ tests/compiler/simd.debug.wat | 185 +++--- tests/compiler/simd.release.wat | 12 +- tests/compiler/simd.ts | 5 - tests/features.json | 16 + 11 files changed, 1321 insertions(+), 178 deletions(-) create mode 100644 tests/compiler/features/relaxed-simd.debug.wat create mode 100644 tests/compiler/features/relaxed-simd.json create mode 100644 tests/compiler/features/relaxed-simd.release.wat create mode 100644 tests/compiler/features/relaxed-simd.ts diff --git a/src/builtins.ts b/src/builtins.ts index 8b8dd638cf..20cf2e044d 100644 --- a/src/builtins.ts +++ b/src/builtins.ts @@ -450,6 +450,17 @@ export namespace BuiltinNames { export const v128_q15mulr_sat = "~lib/builtins/v128.q15mulr_sat"; export const v128_extmul_low = "~lib/builtins/v128.extmul_low"; export const v128_extmul_high = "~lib/builtins/v128.extmul_high"; + export const v128_relaxed_swizzle = "~lib/builtins/v128.relaxed_swizzle"; + export const v128_relaxed_trunc = "~lib/builtins/v128.relaxed_trunc"; + export const v128_relaxed_trunc_zero = "~lib/builtins/v128.relaxed_trunc_zero"; + export const v128_relaxed_madd = "~lib/builtins/v128.relaxed_madd"; + export const v128_relaxed_nmadd = "~lib/builtins/v128.relaxed_nmadd"; + export const v128_relaxed_laneselect = "~lib/builtins/v128.relaxed_laneselect"; + export const v128_relaxed_min = "~lib/builtins/v128.relaxed_min"; + export const v128_relaxed_max = "~lib/builtins/v128.relaxed_max"; + export const v128_relaxed_q15mulr = "~lib/builtins/v128.relaxed_q15mulr"; + export const v128_relaxed_dot = "~lib/builtins/v128.relaxed_dot"; + export const v128_relaxed_dot_add = "~lib/builtins/v128.relaxed_dot_add"; export const i8x16 = "~lib/builtins/i8x16"; export const i16x8 = "~lib/builtins/i16x8"; @@ -544,7 +555,6 @@ export namespace BuiltinNames { export const i16x8_extmul_high_i8x16_s = "~lib/builtins/i16x8.extmul_high_i8x16_s"; export const i16x8_extmul_high_i8x16_u = "~lib/builtins/i16x8.extmul_high_i8x16_u"; export const i16x8_shuffle = "~lib/builtins/i16x8.shuffle"; - export const i16x8_swizzle = "~lib/builtins/i16x8.swizzle"; export const i32x4_splat = "~lib/builtins/i32x4.splat"; export const i32x4_extract_lane = "~lib/builtins/i32x4.extract_lane"; @@ -589,7 +599,6 @@ export namespace BuiltinNames { export const i32x4_extmul_high_i16x8_s = "~lib/builtins/i32x4.extmul_high_i16x8_s"; export const i32x4_extmul_high_i16x8_u = "~lib/builtins/i32x4.extmul_high_i16x8_u"; export const i32x4_shuffle = "~lib/builtins/i32x4.shuffle"; - export const i32x4_swizzle = "~lib/builtins/i32x4.swizzle"; export const i64x2_splat = "~lib/builtins/i64x2.splat"; export const i64x2_extract_lane = "~lib/builtins/i64x2.extract_lane"; @@ -623,7 +632,6 @@ export namespace BuiltinNames { export const i64x2_extmul_high_i32x4_s = "~lib/builtins/i64x2.extmul_high_i32x4_s"; export const i64x2_extmul_high_i32x4_u = "~lib/builtins/i64x2.extmul_high_i32x4_u"; export const i64x2_shuffle = "~lib/builtins/i64x2.shuffle"; - export const i64x2_swizzle = "~lib/builtins/i64x2.swizzle"; export const f32x4_splat = "~lib/builtins/f32x4.splat"; export const f32x4_extract_lane = "~lib/builtins/f32x4.extract_lane"; @@ -653,7 +661,6 @@ export namespace BuiltinNames { export const f32x4_convert_i32x4_u = "~lib/builtins/f32x4.convert_i32x4_u"; export const f32x4_demote_f64x2_zero = "~lib/builtins/f32x4.demote_f64x2_zero"; export const f32x4_shuffle = "~lib/builtins/f32x4.shuffle"; - export const f32x4_swizzle = "~lib/builtins/f32x4.swizzle"; export const f64x2_splat = "~lib/builtins/f64x2.splat"; export const f64x2_extract_lane = "~lib/builtins/f64x2.extract_lane"; @@ -683,7 +690,27 @@ export namespace BuiltinNames { export const f64x2_convert_low_i32x4_u = "~lib/builtins/f64x2.convert_low_i32x4_u"; export const f64x2_promote_low_f32x4 = "~lib/builtins/f64x2.promote_low_f32x4"; export const f64x2_shuffle = "~lib/builtins/f64x2.shuffle"; - export const f64x2_swizzle = "~lib/builtins/f64x2.swizzle"; + + export const i8x16_relaxed_swizzle = "~lib/builtins/i8x16.relaxed_swizzle"; + export const i32x4_relaxed_trunc_f32x4_s = "~lib/builtins/i32x4.relaxed_trunc_f32x4_s"; + export const i32x4_relaxed_trunc_f32x4_u = "~lib/builtins/i32x4.relaxed_trunc_f32x4_u"; + export const i32x4_relaxed_trunc_f64x2_s_zero = "~lib/builtins/i32x4.relaxed_trunc_f64x2_s_zero"; + export const i32x4_relaxed_trunc_f64x2_u_zero = "~lib/builtins/i32x4.relaxed_trunc_f64x2_u_zero"; + export const f32x4_relaxed_madd = "~lib/builtins/f32x4.relaxed_madd"; + export const f32x4_relaxed_nmadd = "~lib/builtins/f32x4.relaxed_nmadd"; + export const f64x2_relaxed_madd = "~lib/builtins/f64x2.relaxed_madd"; + export const f64x2_relaxed_nmadd = "~lib/builtins/f64x2.relaxed_nmadd"; + export const i8x16_relaxed_laneselect = "~lib/builtins/i8x16.relaxed_laneselect"; + export const i16x8_relaxed_laneselect = "~lib/builtins/i16x8.relaxed_laneselect"; + export const i32x4_relaxed_laneselect = "~lib/builtins/i32x4.relaxed_laneselect"; + export const i64x2_relaxed_laneselect = "~lib/builtins/i64x2.relaxed_laneselect"; + export const f32x4_relaxed_min = "~lib/builtins/f32x4.relaxed_min"; + export const f32x4_relaxed_max = "~lib/builtins/f32x4.relaxed_max"; + export const f64x2_relaxed_min = "~lib/builtins/f64x2.relaxed_min"; + export const f64x2_relaxed_max = "~lib/builtins/f64x2.relaxed_max"; + export const i16x8_relaxed_q15mulr_s = "~lib/builtins/i16x8.relaxed_q15mulr_s"; + export const i16x8_relaxed_dot_i8x16_i7x16_s = "~lib/builtins/i16x8.relaxed_dot_i8x16_i7x16_s"; + export const i32x4_relaxed_dot_i8x16_i7x16_add_s = "~lib/builtins/i32x4.relaxed_dot_i8x16_i7x16_add_s"; export const i31_new = "~lib/builtins/i31.new"; export const i31_get = "~lib/builtins/i31.get"; @@ -6346,7 +6373,8 @@ function builtin_v128_not(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.v128_not, builtin_v128_not); -function builtin_v128_bitwise_ternary(ctx: BuiltinFunctionContext, op: SIMDTernaryOp): ExpressionRef { +// v128.bitselect(v1: v128, v2: v128, c: v128) -> v128 +function builtin_v128_bitselect(ctx: BuiltinFunctionContext): ExpressionRef { let compiler = ctx.compiler; let module = compiler.module; if ( @@ -6361,12 +6389,7 @@ function builtin_v128_bitwise_ternary(ctx: BuiltinFunctionContext, op: SIMDTerna let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit); - return module.simd_ternary(op, arg0, arg1, arg2); -} - -// v128.bitselect(v1: v128, v2: v128, c: v128) -> v128 -function builtin_v128_bitselect(ctx: BuiltinFunctionContext): ExpressionRef { - return builtin_v128_bitwise_ternary(ctx, SIMDTernaryOp.Bitselect); + return module.simd_ternary(SIMDTernaryOp.Bitselect, arg0, arg1, arg2); } builtinFunctions.set(BuiltinNames.v128_bitselect, builtin_v128_bitselect); @@ -6701,6 +6724,368 @@ function builtin_v128_extmul_high(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.v128_extmul_high, builtin_v128_extmul_high); +// === Relaxed SIMD =========================================================================== + +// v128.relaxed_swizzle(a: v128, s: v128) -> v128 +function builtin_v128_relaxed_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeAbsent(ctx) | + checkArgsRequired(ctx, 2) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + return module.binary(BinaryOp.RelaxedSwizzleI8x16, arg0, arg1); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_swizzle, builtin_v128_relaxed_swizzle); + +// v128.relaxed_trunc(a: v128) -> v128 +function builtin_v128_relaxed_trunc(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 1) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.Isize: { + if (compiler.options.isWasm64) break; + // fall-through + } + case TypeKind.I32: return module.unary(UnaryOp.RelaxedTruncF32x4ToI32x4, arg0); + case TypeKind.Usize: { + if (compiler.options.isWasm64) break; + // fall-through + } + case TypeKind.U32: return module.unary(UnaryOp.RelaxedTruncF32x4ToU32x4, arg0); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_trunc", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_trunc, builtin_v128_relaxed_trunc); + +// v128.relaxed_trunc_zero(a: v128) -> v128 +function builtin_v128_relaxed_trunc_zero(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 1) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.Isize: { + if (compiler.options.isWasm64) break; + // fall-through + } + case TypeKind.I32: return module.unary(UnaryOp.RelaxedTruncF64x2ToI32x4Zero, arg0); + case TypeKind.Usize: { + if (compiler.options.isWasm64) break; + // fall-through + } + case TypeKind.U32: return module.unary(UnaryOp.RelaxedTruncF64x2ToU32x4Zero, arg0); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_trunc_zero", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_trunc_zero, builtin_v128_relaxed_trunc_zero); + +// v128.relaxed_madd(a: v128, b: v128, c: v128) -> v128 +function builtin_v128_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 3) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.F32: return module.simd_ternary(SIMDTernaryOp.RelaxedMaddF32x4, arg0, arg1, arg2); + case TypeKind.F64: return module.simd_ternary(SIMDTernaryOp.RelaxedMaddF64x2, arg0, arg1, arg2); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_madd", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_madd, builtin_v128_relaxed_madd); + +// v128.relaxed_nmadd(a: v128, b: v128, c: v128) -> v128 +function builtin_v128_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 3) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.F32: return module.simd_ternary(SIMDTernaryOp.RelaxedNmaddF32x4, arg0, arg1, arg2); + case TypeKind.F64: return module.simd_ternary(SIMDTernaryOp.RelaxedNmaddF64x2, arg0, arg1, arg2); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_nmadd", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_nmadd, builtin_v128_relaxed_nmadd); + +// v128.relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 +function builtin_v128_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 3) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.I8: + case TypeKind.U8: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI8x16, arg0, arg1, arg2); + case TypeKind.I16: + case TypeKind.U16: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI16x8, arg0, arg1, arg2); + case TypeKind.I32: + case TypeKind.U32: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI32x4, arg0, arg1, arg2); + case TypeKind.I64: + case TypeKind.U64: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI64x2, arg0, arg1, arg2); + case TypeKind.Isize: + case TypeKind.Usize: { + return module.simd_ternary( + compiler.options.isWasm64 + ? SIMDTernaryOp.RelaxedLaneselectI64x2 + : SIMDTernaryOp.RelaxedLaneselectI32x4, + arg0, arg1, arg2 + ); + } + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_laneselect", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_laneselect, builtin_v128_relaxed_laneselect); + +// v128.relaxed_min(a: v128, b: v128) -> v128 +function builtin_v128_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 2) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.F32: return module.binary(BinaryOp.RelaxedMinF32x4, arg0, arg1); + case TypeKind.F64: return module.binary(BinaryOp.RelaxedMinF64x2, arg0, arg1); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_min", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_min, builtin_v128_relaxed_min); + +// v128.relaxed_max(a: v128, b: v128) -> v128 +function builtin_v128_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 2) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.F32: return module.binary(BinaryOp.RelaxedMaxF32x4, arg0, arg1); + case TypeKind.F64: return module.binary(BinaryOp.RelaxedMaxF64x2, arg0, arg1); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_max", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_max, builtin_v128_relaxed_max); + +// v128.relaxed_q15mulr(a: v128, b: v128) -> v128 +function builtin_v128_relaxed_q15mulr(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkTypeRequired(ctx) | + checkArgsRequired(ctx, 2) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + if (type.isValue) { + switch (type.kind) { + case TypeKind.I16: return module.binary(BinaryOp.RelaxedQ15MulrI16x8, arg0, arg1); + } + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_q15mulr", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_q15mulr, builtin_v128_relaxed_q15mulr); + +// v128.relaxed_dot(a: v128, b: v128) -> v128 +function builtin_v128_relaxed_dot(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkArgsRequired(ctx, 2) | + checkTypeRequired(ctx) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + switch (type.kind) { + case TypeKind.I16: return module.binary(BinaryOp.RelaxedDotI8x16I7x16ToI16x8, arg0, arg1); + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_dot", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_dot, builtin_v128_relaxed_dot); + +// v128.relaxed_dot_add(a: v128, b: v128, c: v128) -> v128 +function builtin_v128_relaxed_dot_add(ctx: BuiltinFunctionContext): ExpressionRef { + let compiler = ctx.compiler; + let module = compiler.module; + if ( + checkFeatureEnabled(ctx, Feature.RelaxedSimd) | + checkArgsRequired(ctx, 3) | + checkTypeRequired(ctx) + ) { + compiler.currentType = Type.v128; + return module.unreachable(); + } + let operands = ctx.operands; + let typeArguments = ctx.typeArguments!; + let type = typeArguments[0]; + let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit); + let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit); + let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit); + switch (type.kind) { + // TOOD: emulate relaxed_dot_add of i16 with multiple instructions? + case TypeKind.Isize: { + if (compiler.options.isWasm64) break; + // fall-through + } + case TypeKind.I32: return module.simd_ternary(SIMDTernaryOp.RelaxedDotI8x16I7x16AddToI32x4, arg0, arg1, arg2); + } + compiler.error( + DiagnosticCode.Operation_0_cannot_be_applied_to_type_1, + ctx.reportNode.typeArgumentsRange, "v128.relaxed_dot_add", type.toString() + ); + return module.unreachable(); +} +builtinFunctions.set(BuiltinNames.v128_relaxed_dot_add, builtin_v128_relaxed_dot_add); + // === Internal runtime ======================================================================= // __visit_globals(cookie: u32) -> void @@ -8688,6 +9073,7 @@ builtinFunctions.set(BuiltinNames.i8x16_shuffle, builtin_i8x16_shuffle); // i8x16.swizzle -> v128.swizzle function builtin_i8x16_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); ctx.typeArguments = null; ctx.contextualType = Type.v128; return builtin_v128_swizzle(ctx); @@ -9117,14 +9503,6 @@ function builtin_i16x8_shuffle(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.i16x8_shuffle, builtin_i16x8_shuffle); -// i16x8.swizzle -> v128.swizzle -function builtin_i16x8_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { - ctx.typeArguments = null; - ctx.contextualType = Type.v128; - return builtin_v128_swizzle(ctx); -} -builtinFunctions.set(BuiltinNames.i16x8_swizzle, builtin_i16x8_swizzle); - // i32x4.splat -> v128.splat function builtin_i32x4_splat(ctx: BuiltinFunctionContext): ExpressionRef { checkTypeAbsent(ctx); @@ -9512,14 +9890,6 @@ function builtin_i32x4_shuffle(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.i32x4_shuffle, builtin_i32x4_shuffle); -// i32x4.swizzle -> v128.swizzle -function builtin_i32x4_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { - ctx.typeArguments = null; - ctx.contextualType = Type.v128; - return builtin_v128_swizzle(ctx); -} -builtinFunctions.set(BuiltinNames.i32x4_swizzle, builtin_i32x4_swizzle); - // i64x2.splat -> v128.splat function builtin_i64x2_splat(ctx: BuiltinFunctionContext): ExpressionRef { checkTypeAbsent(ctx); @@ -9772,14 +10142,6 @@ function builtin_i64x2_shuffle(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.i64x2_shuffle, builtin_i64x2_shuffle); -// i64x2.swizzle -> v128.swizzle -function builtin_i64x2_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { - ctx.typeArguments = null; - ctx.contextualType = Type.v128; - return builtin_v128_swizzle(ctx); -} -builtinFunctions.set(BuiltinNames.i64x2_swizzle, builtin_i64x2_swizzle); - // f32x4.splat -> v128.splat function builtin_f32x4_splat(ctx: BuiltinFunctionContext): ExpressionRef { checkTypeAbsent(ctx); @@ -10032,14 +10394,6 @@ function builtin_f32x4_shuffle(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.f32x4_shuffle, builtin_f32x4_shuffle); -// f32x4.swizzle -> v128.swizzle -function builtin_f32x4_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { - ctx.typeArguments = null; - ctx.contextualType = Type.v128; - return builtin_v128_swizzle(ctx); -} -builtinFunctions.set(BuiltinNames.f32x4_swizzle, builtin_f32x4_swizzle); - // f64x2.splat -> v128.splat function builtin_f64x2_splat(ctx: BuiltinFunctionContext): ExpressionRef { checkTypeAbsent(ctx); @@ -10292,13 +10646,185 @@ function builtin_f64x2_shuffle(ctx: BuiltinFunctionContext): ExpressionRef { } builtinFunctions.set(BuiltinNames.f64x2_shuffle, builtin_f64x2_shuffle); -// f64x2.swizzle -> v128.swizzle -function builtin_f64x2_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { +// i8x16.relaxed_swizzle -> v128.relaxed_swizzle +function builtin_i8x16_relaxed_swizzle(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); ctx.typeArguments = null; ctx.contextualType = Type.v128; - return builtin_v128_swizzle(ctx); + return builtin_v128_relaxed_swizzle(ctx); +} +builtinFunctions.set(BuiltinNames.i8x16_relaxed_swizzle, builtin_i8x16_relaxed_swizzle); + +// i32x4.relaxed_trunc_f32x4_s -> v128.relaxed_trunc +function builtin_i32x4_relaxed_trunc_f32x4_s(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_trunc(ctx); +} +builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f32x4_s, builtin_i32x4_relaxed_trunc_f32x4_s); + +// i32x4.relaxed_trunc_f32x4_u -> v128.relaxed_trunc +function builtin_i32x4_relaxed_trunc_f32x4_u(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.u32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_trunc(ctx); +} +builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f32x4_u, builtin_i32x4_relaxed_trunc_f32x4_u); + +// i32x4.relaxed_trunc_f64x2_s_zero -> v128.relaxed_trunc_zero +function builtin_i32x4_relaxed_trunc_f64x2_s_zero(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_trunc_zero(ctx); +} +builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f64x2_s_zero, builtin_i32x4_relaxed_trunc_f64x2_s_zero); + +// i32x4.relaxed_trunc_f64x2_u_zero -> v128.relaxed_trunc_zero +function builtin_i32x4_relaxed_trunc_f64x2_u_zero(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.u32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_trunc_zero(ctx); +} +builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f64x2_u_zero, builtin_i32x4_relaxed_trunc_f64x2_u_zero); + +// f32x4.relaxed_madd -> v128.relaxed_madd +function builtin_f32x4_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_madd(ctx); +} +builtinFunctions.set(BuiltinNames.f32x4_relaxed_madd, builtin_f32x4_relaxed_madd); + +// f32x4.relaxed_nmadd -> v128.relaxed_nmadd +function builtin_f32x4_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_nmadd(ctx); +} +builtinFunctions.set(BuiltinNames.f32x4_relaxed_nmadd, builtin_f32x4_relaxed_nmadd); + +// f64x2.relaxed_madd -> v128.relaxed_madd +function builtin_f64x2_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f64 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_madd(ctx); +} +builtinFunctions.set(BuiltinNames.f64x2_relaxed_madd, builtin_f64x2_relaxed_madd); + +// f64x2.relaxed_nmadd -> v128.relaxed_nmadd +function builtin_f64x2_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f64 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_nmadd(ctx); +} +builtinFunctions.set(BuiltinNames.f64x2_relaxed_nmadd, builtin_f64x2_relaxed_nmadd); + +// i8x16.relaxed_laneselect -> v128.relaxed_laneselect +function builtin_i8x16_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i8 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_laneselect(ctx); +} +builtinFunctions.set(BuiltinNames.i8x16_relaxed_laneselect, builtin_i8x16_relaxed_laneselect); + +// i16x8.relaxed_laneselect -> v128.relaxed_laneselect +function builtin_i16x8_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i16 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_laneselect(ctx); +} +builtinFunctions.set(BuiltinNames.i16x8_relaxed_laneselect, builtin_i16x8_relaxed_laneselect); + +// i32x4.relaxed_laneselect -> v128.relaxed_laneselect +function builtin_i32x4_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_laneselect(ctx); +} +builtinFunctions.set(BuiltinNames.i32x4_relaxed_laneselect, builtin_i32x4_relaxed_laneselect); + +// i64x2.relaxed_laneselect -> v128.relaxed_laneselect +function builtin_i64x2_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i64 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_laneselect(ctx); +} +builtinFunctions.set(BuiltinNames.i64x2_relaxed_laneselect, builtin_i64x2_relaxed_laneselect); + +// f32x4.relaxed_min -> v128.relaxed_min +function builtin_f32x4_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_min(ctx); +} +builtinFunctions.set(BuiltinNames.f32x4_relaxed_min, builtin_f32x4_relaxed_min); + +// f32x4.relaxed_max -> v128.relaxed_max +function builtin_f32x4_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_max(ctx); +} +builtinFunctions.set(BuiltinNames.f32x4_relaxed_max, builtin_f32x4_relaxed_max); + +// f64x2.relaxed_min -> v128.relaxed_min +function builtin_f64x2_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f64 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_min(ctx); +} +builtinFunctions.set(BuiltinNames.f64x2_relaxed_min, builtin_f64x2_relaxed_min); + +// f64x2.relaxed_max -> v128.relaxed_max +function builtin_f64x2_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.f64 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_max(ctx); +} +builtinFunctions.set(BuiltinNames.f64x2_relaxed_max, builtin_f64x2_relaxed_max); + +// i16x8.relaxed_q15mulr_s -> v128.relaxed_q15mulr +function builtin_i16x8_relaxed_q15mulr_s(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i16 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_q15mulr(ctx); +} +builtinFunctions.set(BuiltinNames.i16x8_relaxed_q15mulr_s, builtin_i16x8_relaxed_q15mulr_s); + +// i16x8.relaxed_dot_i8x16_i7x16_s -> v128.relaxed_dot +function builtin_i16x8_relaxed_dot_i8x16_i7x16_s(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i16 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_dot(ctx); +} +builtinFunctions.set(BuiltinNames.i16x8_relaxed_dot_i8x16_i7x16_s, builtin_i16x8_relaxed_dot_i8x16_i7x16_s); + +// i32x4.relaxed_dot_i8x16_i7x16_add_s -> v128.relaxed_dot_add +function builtin_i32x4_relaxed_dot_i8x16_i7x16_add_s(ctx: BuiltinFunctionContext): ExpressionRef { + checkTypeAbsent(ctx); + ctx.typeArguments = [ Type.i32 ]; + ctx.contextualType = Type.v128; + return builtin_v128_relaxed_dot_add(ctx); } -builtinFunctions.set(BuiltinNames.f64x2_swizzle, builtin_f64x2_swizzle); +builtinFunctions.set(BuiltinNames.i32x4_relaxed_dot_i8x16_i7x16_add_s, builtin_i32x4_relaxed_dot_i8x16_i7x16_add_s); // === Internal helpers ======================================================================= diff --git a/std/assembly/builtins.ts b/std/assembly/builtins.ts index 05238edc03..12f0dfd574 100644 --- a/std/assembly/builtins.ts +++ b/std/assembly/builtins.ts @@ -1562,6 +1562,50 @@ export namespace v128 { // @ts-ignore: decorator @builtin export declare function extmul_high(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_swizzle(a: v128, s: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_trunc(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_trunc_zero(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_madd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_min(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_max(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_q15mulr(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_dot(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_dot_add(a: v128, b: v128, c: v128): v128; } // @ts-ignore: decorator @@ -1724,6 +1768,14 @@ export namespace i8x16 { // @ts-ignore: decorator @builtin export declare function swizzle(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_swizzle(a: v128, s: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128; } // @ts-ignore: decorator @@ -1925,7 +1977,15 @@ export namespace i16x8 { // @ts-ignore: decorator @builtin - export declare function swizzle(a: v128, b: v128): v128; + export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_q15mulr_s(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_dot_i8x16_i7x16_s(a: v128, b: v128, c: v128): v128; } // @ts-ignore: decorator @@ -2108,7 +2168,27 @@ export namespace i32x4 { // @ts-ignore: decorator @builtin - export declare function swizzle(a: v128, b: v128): v128; + export declare function relaxed_trunc_f32x4_s(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_trunc_f32x4_u(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_trunc_f64x2_s_zero(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_trunc_f64x2_u_zero(a: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_dot_i8x16_i7x16_add_s(a: v128, b: v128, c: v128): v128; } // @ts-ignore: decorator @@ -2231,7 +2311,7 @@ export namespace i64x2 { // @ts-ignore: decorator @builtin - export declare function swizzle(a: v128, b: v128): v128; + export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128; } // @ts-ignore: decorator @@ -2354,7 +2434,19 @@ export namespace f32x4 { // @ts-ignore: decorator @builtin - export declare function swizzle(a: v128, b: v128): v128; + export declare function relaxed_madd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_min(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_max(a: v128, b: v128): v128; } // @ts-ignore: decorator @@ -2477,7 +2569,19 @@ export namespace f64x2 { // @ts-ignore: decorator @builtin - export declare function swizzle(a: v128, b: v128): v128; + export declare function relaxed_madd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_min(a: v128, b: v128): v128; + + // @ts-ignore: decorator + @builtin + export declare function relaxed_max(a: v128, b: v128): v128; } @final diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts index ecf389936e..e71cd018de 100644 --- a/std/assembly/index.d.ts +++ b/std/assembly/index.d.ts @@ -859,7 +859,7 @@ declare namespace v128 { export function andnot(a: v128, b: v128): v128; /** Performs the bitwise NOT operation on a vector. */ export function not(a: v128): v128; - /** Selects bits of either vector according to the specified mask. */ + /** Selects bits of either vector according to the specified mask. Selects from `v1` if the bit in `mask` is `1`, otherwise from `v2`. */ export function bitselect(v1: v128, v2: v128, mask: v128): v128; /** Reduces a vector to a scalar indicating whether any lane is considered `true`. */ export function any_true(a: v128): bool; @@ -925,12 +925,104 @@ declare namespace v128 { export function demote_zero(a: v128): v128; /** Promotes the lower float lanes to higher precision. */ export function promote_low(a: v128): v128; - /** Performs the line-wise saturating rounding multiplication in Q15 format. */ + /** Performs the line-wise saturating rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`). */ export function q15mulr_sat(a: v128, b: v128): v128; /** Performs the lane-wise integer extended multiplication of the lower lanes producing a twice wider result than the inputs. */ export function extmul_low(a: v128, b: v128): v128; /** Performs the lane-wise integer extended multiplication of the higher lanes producing a twice wider result than the inputs. */ export function extmul_high(a: v128, b: v128): v128; + /** + * Selects 8-bit lanes from `a` using indices in `s`. Indices in the range [0-15] select the i-th element of `a`. + * + * Unlike {@link v128.swizzle}, the result of an out of bounds index is implementation-defined, depending on hardware + * capabilities: Either `0` or `a[s[i]%16]`. + */ + export function relaxed_swizzle(a: v128, s: v128): v128; + /** + * Truncates each lane of a vector from 32-bit floating point to a 32-bit signed or unsigned integer as indicated by + * `T`. + * + * Unlike {@link v128.trunc_sat}, the result of lanes out of bounds of the target type is implementation defined, + * depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc(a: v128): v128; + /** + * Truncates each lane of a vector from 64-bit floating point to a 32-bit signed or unsigned integer as indicated by + * `T`. Unused higher integer lanes of the result are initialized to zero. + * + * Unlike {@link v128.trunc_sat_zero}, the result of lanes out of bounds of the target type is implementation defined, + * depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc_zero(a: v128): v128; + /** + * Performs the fused multiply-add operation (`a * b + c`) on 32- or 64-bit floating point lanes as indicated by + * `T`. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_madd(a: v128, b: v128, c: v128): v128; + /** + * Performs the fused negative multiply-add operation (`-(a * b) + c`) on 32- or 64-bit floating point lanes as + * indicated by `T`. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + /** + * Selects 8-, 16-, 32- or 64-bit integer lanes as indicated by `T` from `a` or `b` based on masks in `m`. + * + * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result + * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most + * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result + * is `b[i]`. + */ + export function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + /** + * Computes the minimum of each 32- or 64-bit floating point lane as indicated by `T`. + * + * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_min(a: v128, b: v128): v128; + /** + * Computes the maximum of each 32- or 64-bit floating point lane as indicated by `T`. + * + * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_max(a: v128, b: v128): v128; + /** + * Performs the lane-wise rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`). + * + * Unlike {@link v128.q15mulr_sat}, the result is implementation-defined if both inputs are the minimum signed value: + * Either the minimum or maximum signed value. + */ + export function relaxed_q15mulr(a: v128, b: v128): v128; + /** + * Computes the dot product of two 8-bit integer lanes each, yielding lanes one size wider than the input. + * + * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or + * unsigned is implementation-defined. + */ + export function relaxed_dot(a: v128, b: v128): v128; + /** + * Computes the dot product of two 8-bit integer lanes each, yielding lanes two sizes wider than the input with the + * lanes of `c` accumulated into the result. + * + * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or + * unsigned by the intermediate multiplication is implementation-defined. + */ + export function relaxed_dot_add(a: v128, b: v128, c: v128): v128; } /** Initializes a 128-bit vector from sixteen 8-bit integer values. Arguments must be compile-time constants. */ declare function i8x16(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8): v128; @@ -1009,6 +1101,23 @@ declare namespace i8x16 { export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8, l4: u8, l5: u8, l6: u8, l7: u8, l8: u8, l9: u8, l10: u8, l11: u8, l12: u8, l13: u8, l14: u8, l15: u8): v128; /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ export function swizzle(a: v128, s: v128): v128; + /** + * Selects 8-bit integer lanes from `a` using indices in `s`. Indices in the range [0-15] select the i-th element of + * `a`. + * + * Unlike {@link i8x16.swizzle}, the result of an out of bounds index is implementation-defined, depending on hardware + * capabilities: Either `0` or `a[s[i]%16]`. + */ + export function relaxed_swizzle(a: v128, s: v128): v128; + /** + * Selects 8-bit integer lanes from `a` or `b` based on masks in `m`. + * + * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result + * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most + * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result + * is `b[i]`. + */ + export function relaxed_laneselect(a: v128, b: v128, m: v128): v128; } /** Initializes a 128-bit vector from eight 16-bit integer values. Arguments must be compile-time constants. */ declare function i16x8(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16): v128; @@ -1107,8 +1216,29 @@ declare namespace i16x8 { export function extmul_high_i8x16_u(a: v128, b: v128): v128; /** Selects 16-bit lanes from either vector according to the specified [0-7] respectively [8-15] lane indexes. */ export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8, l4: u8, l5: u8, l6: u8, l7: u8): v128; - /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ - export function swizzle(a: v128, s: v128): v128; + /** + * Selects 16-bit integer lanes from `a` or `b` based on masks in `m`. + * + * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result + * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most + * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result + * is `b[i]`. + */ + export function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + /** + * Performs the line-wise rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`). + * + * Some results are implementation-defined: If both inputs are `i16.MIN_VALUE`, the value of the respective + * resulting lane may be either `i16.MIN_VALUE` or `i16.MAX_VALUE`. + */ + export function relaxed_q15mulr_s(a: v128, b: v128): v128; + /** + * Computes the dot product of two 8-bit integer lanes each, yielding lanes one size wider than the input. + * + * Some results are implementation-defined: If the most significant bit of `b[i]` is set, the intermediate + * multiplication may interpret `b[i]` as either signed or unsigned. + */ + export function relaxed_dot_i8x16_i7x16_s(a: v128, b: v128): v128; } /** Initializes a 128-bit vector from four 32-bit integer values. Arguments must be compile-time constants. */ declare function i32x4(a: i32, b: i32, c: i32, d: i32): v128; @@ -1199,8 +1329,65 @@ declare namespace i32x4 { export function extmul_high_i16x8_u(a: v128, b: v128): v128; /** Selects 32-bit lanes from either vector according to the specified [0-3] respectively [4-7] lane indexes. */ export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8): v128; - /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ - export function swizzle(a: v128, s: v128): v128; + /** + * Truncates each lane of a vector from 32-bit floating point to a signed 32-bit integer. + * + * Unlike {@link i32x4.trunc_sat_f32x4_s}, the result of lanes out of bounds of the target type is implementation + * defined, depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc_f32x4_s(a: v128): v128; + /** + * Truncates each lane of a vector from 32-bit floating point to an unsigned 32-bit integer. + * + * Unlike {@link i32x4.trunc_sat_f32x4_u}, the result of lanes out of bounds of the target type is implementation + * defined, depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc_f32x4_u(a: v128): v128; + /** + * Truncates each lane of a vector from 64-bit floating point to a signed 32-bit integer. The two higher + * integer lanes of the result are initialized to zero. + * + * Unlike {@link i32x4.trunc_sat_f64x2_s_zero}, the result of lanes out of bounds of the target type is implementation + * defined, depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc_f64x2_s_zero(a: v128): v128; + /** + * Truncates each lane of a vector from 64-bit floating point to an unsigned 32-bit integer. The two higher + * integer lanes of the result are initialized to zero. + * + * Unlike {@link i32x4.trunc_sat_f64x2_u_zero}, the result of lanes out of bounds of the target type is implementation + * defined, depending on hardware capabilities: + * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value. + * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the + * saturatated result or maximum integer value. + */ + export function relaxed_trunc_f64x2_u_zero(a: v128): v128; + /** + * Selects 32-bit integer lanes from `a` or `b` based on masks in `m`. + * + * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result + * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most + * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result + * is `b[i]`. + */ + export function relaxed_laneselect(a: v128, b: v128, m: v128): v128; + /** + * Computes the dot product of two 8-bit lanes each, yielding lanes two sizes wider than the input with the lanes of + * `c` accumulated into the result. + * + * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or + * unsigned by the intermediate multiplication is implementation-defined. + */ + export function relaxed_dot_i8x16_i7x16_add_s(a: v128, b: v128, c: v128): v128; } /** Initializes a 128-bit vector from two 64-bit integer values. Arguments must be compile-time constants. */ declare function i64x2(a: i64, b: i64): v128; @@ -1261,8 +1448,15 @@ declare namespace i64x2 { export function extmul_high_i32x4_u(a: v128, b: v128): v128; /** Selects 64-bit lanes from either vector according to the specified [0-1] respectively [2-3] lane indexes. */ export function shuffle(a: v128, b: v128, l0: u8, l1: u8): v128; - /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ - export function swizzle(a: v128, s: v128): v128; + /** + * Selects 64-bit integer lanes from `a` or `b` based on masks in `m`. + * + * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result + * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most + * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result + * is `b[i]`. + */ + export function relaxed_laneselect(a: v128, b: v128, m: v128): v128; } /** Initializes a 128-bit vector from four 32-bit float values. Arguments must be compile-time constants. */ declare function f32x4(a: f32, b: f32, c: f32, d: f32): v128; @@ -1323,8 +1517,36 @@ declare namespace f32x4 { export function demote_f64x2_zero(a: v128): v128; /** Selects 32-bit lanes from either vector according to the specified [0-3] respectively [4-7] lane indexes. */ export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8): v128; - /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ - export function swizzle(a: v128, s: v128): v128; + /** + * Performs the fused multiply-add operation (`a * b + c`) on all 32-bit floating point lanes. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_madd(a: v128, b: v128, c: v128): v128; + /** + * Performs the fused negative multiply-add operation (`-(a * b) + c`) on all 32-bit floating point lanes. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + /** + * Computes the minimum of each 32-bit floating point lane. + * + * Unlike {@link f32x4.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_min(a: v128, b: v128): v128; + /** + * Computes the maximum of each 32-bit floating point lane. + * + * Unlike {@link f32x4.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_max(a: v128, b: v128): v128; } /** Initializes a 128-bit vector from two 64-bit float values. Arguments must be compile-time constants. */ declare function f64x2(a: f64, b: f64): v128; @@ -1385,8 +1607,36 @@ declare namespace f64x2 { export function promote_low_f32x4(a: v128): v128; /** Selects 64-bit lanes from either vector according to the specified [0-1] respectively [2-3] lane indexes. */ export function shuffle(a: v128, b: v128, l0: u8, l1: u8): v128; - /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */ - export function swizzle(a: v128, s: v128): v128; + /** + * Performs the fused multiply-add operation (`a * b + c`) on all 64-bit floating point lanes. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_madd(a: v128, b: v128, c: v128): v128; + /** + * Performs the fused negative multiply-add operation (`-(a * b) + c`) on all 64-bit floating point lanes. + * + * The result is implementation defined, depending on hardware capabilities: + * - Either `a * b` is rounded once and the final result rounded again, or + * - The expression is evaluated with higher precision and only rounded once + */ + export function relaxed_nmadd(a: v128, b: v128, c: v128): v128; + /** + * Computes the minimum of each 64-bit floating point lane. + * + * Unlike {@link f64x2.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_min(a: v128, b: v128): v128; + /** + * Computes the maximum of each 64-bit floating point lane. + * + * Unlike {@link f64x2.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, + * depending on hardware capabilities: Either `a[i]` or `b[i]`. + */ + export function relaxed_max(a: v128, b: v128): v128; } declare abstract class i31 { diff --git a/tests/compiler/features/relaxed-simd.debug.wat b/tests/compiler/features/relaxed-simd.debug.wat new file mode 100644 index 0000000000..22c0697e81 --- /dev/null +++ b/tests/compiler/features/relaxed-simd.debug.wat @@ -0,0 +1,175 @@ +(module + (type $none_=>_none (func)) + (global $features/relaxed-simd/v v128 (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)) + (global $features/relaxed-simd/r (mut v128) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000)) + (memory $0 0) + (table $0 1 1 funcref) + (elem $0 (i32.const 1)) + (export "memory" (memory $0)) + (start $~start) + (func $start:features/relaxed-simd + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i8x16.relaxed_swizzle + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i8x16.relaxed_swizzle + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f32x4_s + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f32x4_s + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f32x4_u + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f32x4_u + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f64x2_s_zero + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f64x2_s_zero + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f64x2_u_zero + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + i32x4.relaxed_trunc_f64x2_u_zero + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_fma + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_fma + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_fms + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_fms + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_fma + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_fma + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_fms + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_fms + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i8x16.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i8x16.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i32x4.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i32x4.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i64x2.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i64x2.laneselect + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_min + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_min + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_max + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f32x4.relaxed_max + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_min + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_min + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_max + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + f64x2.relaxed_max + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.relaxed_q15mulr_s + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.relaxed_q15mulr_s + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.dot_i8x16_i7x16_s + global.set $features/relaxed-simd/r + global.get $features/relaxed-simd/v + global.get $features/relaxed-simd/v + i16x8.dot_i8x16_i7x16_s + global.set $features/relaxed-simd/r + ) + (func $~start + call $start:features/relaxed-simd + ) +) diff --git a/tests/compiler/features/relaxed-simd.json b/tests/compiler/features/relaxed-simd.json new file mode 100644 index 0000000000..a01579d1e7 --- /dev/null +++ b/tests/compiler/features/relaxed-simd.json @@ -0,0 +1,9 @@ +{ + "asc_flags": [ + "--runtime stub" + ], + "features": [ + "simd", + "relaxed-simd" + ] +} diff --git a/tests/compiler/features/relaxed-simd.release.wat b/tests/compiler/features/relaxed-simd.release.wat new file mode 100644 index 0000000000..23da3862e2 --- /dev/null +++ b/tests/compiler/features/relaxed-simd.release.wat @@ -0,0 +1,4 @@ +(module + (memory $0 0) + (export "memory" (memory $0)) +) diff --git a/tests/compiler/features/relaxed-simd.ts b/tests/compiler/features/relaxed-simd.ts new file mode 100644 index 0000000000..3fc581cc7e --- /dev/null +++ b/tests/compiler/features/relaxed-simd.ts @@ -0,0 +1,83 @@ +const v = v128.splat(0); +let r: v128; + +r = i8x16.relaxed_swizzle(v, v); +// == +r = v128.relaxed_swizzle(v, v); + +r = i32x4.relaxed_trunc_f32x4_s(v); +// == +r = v128.relaxed_trunc(v); + +r = i32x4.relaxed_trunc_f32x4_u(v); +// == +r = v128.relaxed_trunc(v); + +r = i32x4.relaxed_trunc_f64x2_s_zero(v); +// == +r = v128.relaxed_trunc_zero(v); + +r = i32x4.relaxed_trunc_f64x2_u_zero(v); +// == +r = v128.relaxed_trunc_zero(v); + +r = f32x4.relaxed_madd(v, v, v); +// == +r = v128.relaxed_madd(v, v, v); + +r = f32x4.relaxed_nmadd(v, v, v); +// == +r = v128.relaxed_nmadd(v, v, v); + +r = f64x2.relaxed_madd(v, v, v); +// == +r = v128.relaxed_madd(v, v, v); + +r = f64x2.relaxed_nmadd(v, v, v); +// == +r = v128.relaxed_nmadd(v, v, v); + +r = i8x16.relaxed_laneselect(v, v, v); +// == +r = v128.relaxed_laneselect(v, v, v); + +r = i16x8.relaxed_laneselect(v, v, v); +// == +r = v128.relaxed_laneselect(v, v, v); + +r = i32x4.relaxed_laneselect(v, v, v); +// == +r = v128.relaxed_laneselect(v, v, v); + +r = i64x2.relaxed_laneselect(v, v, v); +// == +r = v128.relaxed_laneselect(v, v, v); + +r = f32x4.relaxed_min(v, v); +// == +r = v128.relaxed_min(v, v); + +r = f32x4.relaxed_max(v, v); +// == +r = v128.relaxed_max(v, v); + +r = f64x2.relaxed_min(v, v); +// == +r = v128.relaxed_min(v, v); + +r = f64x2.relaxed_max(v, v); +// == +r = v128.relaxed_max(v, v); + +r = i16x8.relaxed_q15mulr_s(v, v); +// == +r = v128.relaxed_q15mulr(v, v); + +r = i16x8.relaxed_dot_i8x16_i7x16_s(v, v); +// == +r = v128.relaxed_dot(v, v); + +// TODO: not yet implemented in binaryen/src/wasm-interpreter.h +// r = i32x4.relaxed_dot_i8x16_i7x16_add_s(v, v, v); +// // == +// r = v128.relaxed_dot_add(v, v, v); diff --git a/tests/compiler/simd.debug.wat b/tests/compiler/simd.debug.wat index f8a950898b..3d765bb1eb 100644 --- a/tests/compiler/simd.debug.wat +++ b/tests/compiler/simd.debug.wat @@ -4186,25 +4186,6 @@ call $~lib/builtins/abort unreachable end - local.get $a - local.get $b - i8x16.swizzle - local.get $a - local.get $b - i8x16.swizzle - i8x16.eq - i8x16.all_true - i32.const 0 - i32.ne - i32.eqz - if - i32.const 0 - i32.const 528 - i32.const 584 - i32.const 3 - call $~lib/builtins/abort - unreachable - end i32.const 16 call $~lib/rt/tlsf/__alloc local.set $ptr @@ -4243,7 +4224,7 @@ if i32.const 0 i32.const 528 - i32.const 599 + i32.const 594 i32.const 5 call $~lib/builtins/abort unreachable @@ -4259,7 +4240,7 @@ if i32.const 0 i32.const 528 - i32.const 604 + i32.const 599 i32.const 5 call $~lib/builtins/abort unreachable @@ -4361,7 +4342,7 @@ if i32.const 0 i32.const 528 - i32.const 672 + i32.const 667 i32.const 3 call $~lib/builtins/abort unreachable @@ -4379,7 +4360,7 @@ if i32.const 0 i32.const 528 - i32.const 674 + i32.const 669 i32.const 3 call $~lib/builtins/abort unreachable @@ -4398,7 +4379,7 @@ if i32.const 0 i32.const 528 - i32.const 676 + i32.const 671 i32.const 3 call $~lib/builtins/abort unreachable @@ -4415,7 +4396,7 @@ if i32.const 0 i32.const 528 - i32.const 677 + i32.const 672 i32.const 3 call $~lib/builtins/abort unreachable @@ -4432,7 +4413,7 @@ if i32.const 0 i32.const 528 - i32.const 678 + i32.const 673 i32.const 3 call $~lib/builtins/abort unreachable @@ -4493,7 +4474,7 @@ if i32.const 0 i32.const 528 - i32.const 719 + i32.const 714 i32.const 3 call $~lib/builtins/abort unreachable @@ -4506,7 +4487,7 @@ if i32.const 0 i32.const 528 - i32.const 724 + i32.const 719 i32.const 3 call $~lib/builtins/abort unreachable @@ -4519,7 +4500,7 @@ if i32.const 0 i32.const 528 - i32.const 725 + i32.const 720 i32.const 3 call $~lib/builtins/abort unreachable @@ -4536,7 +4517,7 @@ if i32.const 0 i32.const 528 - i32.const 726 + i32.const 721 i32.const 3 call $~lib/builtins/abort unreachable @@ -4553,7 +4534,7 @@ if i32.const 0 i32.const 528 - i32.const 731 + i32.const 726 i32.const 3 call $~lib/builtins/abort unreachable @@ -4633,7 +4614,7 @@ if i32.const 0 i32.const 528 - i32.const 745 + i32.const 740 i32.const 3 call $~lib/builtins/abort unreachable @@ -4650,7 +4631,7 @@ if i32.const 0 i32.const 528 - i32.const 746 + i32.const 741 i32.const 3 call $~lib/builtins/abort unreachable @@ -4667,7 +4648,7 @@ if i32.const 0 i32.const 528 - i32.const 747 + i32.const 742 i32.const 3 call $~lib/builtins/abort unreachable @@ -4684,7 +4665,7 @@ if i32.const 0 i32.const 528 - i32.const 748 + i32.const 743 i32.const 3 call $~lib/builtins/abort unreachable @@ -4701,7 +4682,7 @@ if i32.const 0 i32.const 528 - i32.const 749 + i32.const 744 i32.const 3 call $~lib/builtins/abort unreachable @@ -4718,7 +4699,7 @@ if i32.const 0 i32.const 528 - i32.const 750 + i32.const 745 i32.const 3 call $~lib/builtins/abort unreachable @@ -4735,7 +4716,7 @@ if i32.const 0 i32.const 528 - i32.const 751 + i32.const 746 i32.const 3 call $~lib/builtins/abort unreachable @@ -4752,7 +4733,7 @@ if i32.const 0 i32.const 528 - i32.const 752 + i32.const 747 i32.const 3 call $~lib/builtins/abort unreachable @@ -4769,7 +4750,7 @@ if i32.const 0 i32.const 528 - i32.const 753 + i32.const 748 i32.const 3 call $~lib/builtins/abort unreachable @@ -4786,7 +4767,7 @@ if i32.const 0 i32.const 528 - i32.const 754 + i32.const 749 i32.const 3 call $~lib/builtins/abort unreachable @@ -4873,7 +4854,7 @@ if i32.const 0 i32.const 528 - i32.const 769 + i32.const 764 i32.const 3 call $~lib/builtins/abort unreachable @@ -4904,7 +4885,7 @@ if i32.const 0 i32.const 528 - i32.const 780 + i32.const 775 i32.const 5 call $~lib/builtins/abort unreachable @@ -4920,7 +4901,7 @@ if i32.const 0 i32.const 528 - i32.const 785 + i32.const 780 i32.const 5 call $~lib/builtins/abort unreachable @@ -5020,7 +5001,7 @@ if i32.const 0 i32.const 528 - i32.const 819 + i32.const 814 i32.const 3 call $~lib/builtins/abort unreachable @@ -5038,7 +5019,7 @@ if i32.const 0 i32.const 528 - i32.const 821 + i32.const 816 i32.const 3 call $~lib/builtins/abort unreachable @@ -5057,7 +5038,7 @@ if i32.const 0 i32.const 528 - i32.const 823 + i32.const 818 i32.const 3 call $~lib/builtins/abort unreachable @@ -5074,7 +5055,7 @@ if i32.const 0 i32.const 528 - i32.const 824 + i32.const 819 i32.const 3 call $~lib/builtins/abort unreachable @@ -5091,7 +5072,7 @@ if i32.const 0 i32.const 528 - i32.const 825 + i32.const 820 i32.const 3 call $~lib/builtins/abort unreachable @@ -5107,7 +5088,7 @@ if i32.const 0 i32.const 528 - i32.const 826 + i32.const 821 i32.const 3 call $~lib/builtins/abort unreachable @@ -5120,7 +5101,7 @@ if i32.const 0 i32.const 528 - i32.const 831 + i32.const 826 i32.const 3 call $~lib/builtins/abort unreachable @@ -5133,7 +5114,7 @@ if i32.const 0 i32.const 528 - i32.const 832 + i32.const 827 i32.const 3 call $~lib/builtins/abort unreachable @@ -5150,7 +5131,7 @@ if i32.const 0 i32.const 528 - i32.const 833 + i32.const 828 i32.const 3 call $~lib/builtins/abort unreachable @@ -5167,7 +5148,7 @@ if i32.const 0 i32.const 528 - i32.const 838 + i32.const 833 i32.const 3 call $~lib/builtins/abort unreachable @@ -5240,7 +5221,7 @@ if i32.const 0 i32.const 528 - i32.const 852 + i32.const 847 i32.const 5 call $~lib/builtins/abort unreachable @@ -5256,7 +5237,7 @@ if i32.const 0 i32.const 528 - i32.const 857 + i32.const 852 i32.const 5 call $~lib/builtins/abort unreachable @@ -5637,7 +5618,7 @@ if i32.const 0 i32.const 528 - i32.const 930 + i32.const 925 i32.const 3 call $~lib/builtins/abort unreachable @@ -5655,7 +5636,7 @@ if i32.const 0 i32.const 528 - i32.const 932 + i32.const 927 i32.const 3 call $~lib/builtins/abort unreachable @@ -5674,7 +5655,7 @@ if i32.const 0 i32.const 528 - i32.const 934 + i32.const 929 i32.const 3 call $~lib/builtins/abort unreachable @@ -5691,7 +5672,7 @@ if i32.const 0 i32.const 528 - i32.const 935 + i32.const 930 i32.const 3 call $~lib/builtins/abort unreachable @@ -5708,7 +5689,7 @@ if i32.const 0 i32.const 528 - i32.const 936 + i32.const 931 i32.const 3 call $~lib/builtins/abort unreachable @@ -5729,7 +5710,7 @@ if i32.const 0 i32.const 528 - i32.const 938 + i32.const 933 i32.const 3 call $~lib/builtins/abort unreachable @@ -5746,7 +5727,7 @@ if i32.const 0 i32.const 528 - i32.const 939 + i32.const 934 i32.const 3 call $~lib/builtins/abort unreachable @@ -5762,7 +5743,7 @@ if i32.const 0 i32.const 528 - i32.const 940 + i32.const 935 i32.const 3 call $~lib/builtins/abort unreachable @@ -5775,7 +5756,7 @@ if i32.const 0 i32.const 528 - i32.const 941 + i32.const 936 i32.const 3 call $~lib/builtins/abort unreachable @@ -5788,7 +5769,7 @@ if i32.const 0 i32.const 528 - i32.const 942 + i32.const 937 i32.const 3 call $~lib/builtins/abort unreachable @@ -5805,7 +5786,7 @@ if i32.const 0 i32.const 528 - i32.const 943 + i32.const 938 i32.const 3 call $~lib/builtins/abort unreachable @@ -5822,7 +5803,7 @@ if i32.const 0 i32.const 528 - i32.const 948 + i32.const 943 i32.const 3 call $~lib/builtins/abort unreachable @@ -5853,7 +5834,7 @@ if i32.const 0 i32.const 528 - i32.const 957 + i32.const 952 i32.const 3 call $~lib/builtins/abort unreachable @@ -5870,7 +5851,7 @@ if i32.const 0 i32.const 528 - i32.const 958 + i32.const 953 i32.const 3 call $~lib/builtins/abort unreachable @@ -5887,7 +5868,7 @@ if i32.const 0 i32.const 528 - i32.const 959 + i32.const 954 i32.const 3 call $~lib/builtins/abort unreachable @@ -5904,7 +5885,7 @@ if i32.const 0 i32.const 528 - i32.const 960 + i32.const 955 i32.const 3 call $~lib/builtins/abort unreachable @@ -5921,7 +5902,7 @@ if i32.const 0 i32.const 528 - i32.const 961 + i32.const 956 i32.const 3 call $~lib/builtins/abort unreachable @@ -5938,7 +5919,7 @@ if i32.const 0 i32.const 528 - i32.const 962 + i32.const 957 i32.const 3 call $~lib/builtins/abort unreachable @@ -5955,7 +5936,7 @@ if i32.const 0 i32.const 528 - i32.const 963 + i32.const 958 i32.const 3 call $~lib/builtins/abort unreachable @@ -5972,7 +5953,7 @@ if i32.const 0 i32.const 528 - i32.const 964 + i32.const 959 i32.const 3 call $~lib/builtins/abort unreachable @@ -5988,7 +5969,7 @@ if i32.const 0 i32.const 528 - i32.const 965 + i32.const 960 i32.const 3 call $~lib/builtins/abort unreachable @@ -6055,7 +6036,7 @@ if i32.const 0 i32.const 528 - i32.const 992 + i32.const 987 i32.const 5 call $~lib/builtins/abort unreachable @@ -6073,7 +6054,7 @@ if i32.const 0 i32.const 528 - i32.const 996 + i32.const 991 i32.const 5 call $~lib/builtins/abort unreachable @@ -6118,7 +6099,7 @@ if i32.const 0 i32.const 528 - i32.const 1012 + i32.const 1007 i32.const 3 call $~lib/builtins/abort unreachable @@ -6136,7 +6117,7 @@ if i32.const 0 i32.const 528 - i32.const 1014 + i32.const 1009 i32.const 3 call $~lib/builtins/abort unreachable @@ -6155,7 +6136,7 @@ if i32.const 0 i32.const 528 - i32.const 1016 + i32.const 1011 i32.const 3 call $~lib/builtins/abort unreachable @@ -6172,7 +6153,7 @@ if i32.const 0 i32.const 528 - i32.const 1017 + i32.const 1012 i32.const 3 call $~lib/builtins/abort unreachable @@ -6189,7 +6170,7 @@ if i32.const 0 i32.const 528 - i32.const 1018 + i32.const 1013 i32.const 3 call $~lib/builtins/abort unreachable @@ -6210,7 +6191,7 @@ if i32.const 0 i32.const 528 - i32.const 1020 + i32.const 1015 i32.const 3 call $~lib/builtins/abort unreachable @@ -6227,7 +6208,7 @@ if i32.const 0 i32.const 528 - i32.const 1021 + i32.const 1016 i32.const 3 call $~lib/builtins/abort unreachable @@ -6243,7 +6224,7 @@ if i32.const 0 i32.const 528 - i32.const 1022 + i32.const 1017 i32.const 3 call $~lib/builtins/abort unreachable @@ -6256,7 +6237,7 @@ if i32.const 0 i32.const 528 - i32.const 1023 + i32.const 1018 i32.const 3 call $~lib/builtins/abort unreachable @@ -6269,7 +6250,7 @@ if i32.const 0 i32.const 528 - i32.const 1024 + i32.const 1019 i32.const 3 call $~lib/builtins/abort unreachable @@ -6286,7 +6267,7 @@ if i32.const 0 i32.const 528 - i32.const 1025 + i32.const 1020 i32.const 3 call $~lib/builtins/abort unreachable @@ -6303,7 +6284,7 @@ if i32.const 0 i32.const 528 - i32.const 1030 + i32.const 1025 i32.const 3 call $~lib/builtins/abort unreachable @@ -6334,7 +6315,7 @@ if i32.const 0 i32.const 528 - i32.const 1039 + i32.const 1034 i32.const 3 call $~lib/builtins/abort unreachable @@ -6351,7 +6332,7 @@ if i32.const 0 i32.const 528 - i32.const 1040 + i32.const 1035 i32.const 3 call $~lib/builtins/abort unreachable @@ -6368,7 +6349,7 @@ if i32.const 0 i32.const 528 - i32.const 1041 + i32.const 1036 i32.const 3 call $~lib/builtins/abort unreachable @@ -6385,7 +6366,7 @@ if i32.const 0 i32.const 528 - i32.const 1042 + i32.const 1037 i32.const 3 call $~lib/builtins/abort unreachable @@ -6402,7 +6383,7 @@ if i32.const 0 i32.const 528 - i32.const 1043 + i32.const 1038 i32.const 3 call $~lib/builtins/abort unreachable @@ -6419,7 +6400,7 @@ if i32.const 0 i32.const 528 - i32.const 1044 + i32.const 1039 i32.const 3 call $~lib/builtins/abort unreachable @@ -6436,7 +6417,7 @@ if i32.const 0 i32.const 528 - i32.const 1045 + i32.const 1040 i32.const 3 call $~lib/builtins/abort unreachable @@ -6453,7 +6434,7 @@ if i32.const 0 i32.const 528 - i32.const 1046 + i32.const 1041 i32.const 3 call $~lib/builtins/abort unreachable @@ -6469,7 +6450,7 @@ if i32.const 0 i32.const 528 - i32.const 1047 + i32.const 1042 i32.const 3 call $~lib/builtins/abort unreachable @@ -6522,7 +6503,7 @@ if i32.const 0 i32.const 528 - i32.const 1066 + i32.const 1061 i32.const 5 call $~lib/builtins/abort unreachable @@ -6540,7 +6521,7 @@ if i32.const 0 i32.const 528 - i32.const 1070 + i32.const 1065 i32.const 5 call $~lib/builtins/abort unreachable diff --git a/tests/compiler/simd.release.wat b/tests/compiler/simd.release.wat index 1103cb213f..7cd8b7e52d 100644 --- a/tests/compiler/simd.release.wat +++ b/tests/compiler/simd.release.wat @@ -2297,7 +2297,7 @@ if i32.const 0 i32.const 1552 - i32.const 599 + i32.const 594 i32.const 5 call $~lib/builtins/abort unreachable @@ -2311,7 +2311,7 @@ if i32.const 0 i32.const 1552 - i32.const 604 + i32.const 599 i32.const 5 call $~lib/builtins/abort unreachable @@ -2349,7 +2349,7 @@ if i32.const 0 i32.const 1552 - i32.const 780 + i32.const 775 i32.const 5 call $~lib/builtins/abort unreachable @@ -2363,7 +2363,7 @@ if i32.const 0 i32.const 1552 - i32.const 785 + i32.const 780 i32.const 5 call $~lib/builtins/abort unreachable @@ -2395,7 +2395,7 @@ if i32.const 0 i32.const 1552 - i32.const 852 + i32.const 847 i32.const 5 call $~lib/builtins/abort unreachable @@ -2409,7 +2409,7 @@ if i32.const 0 i32.const 1552 - i32.const 857 + i32.const 852 i32.const 5 call $~lib/builtins/abort unreachable diff --git a/tests/compiler/simd.ts b/tests/compiler/simd.ts index 2a68d5cb8b..f9672912c5 100644 --- a/tests/compiler/simd.ts +++ b/tests/compiler/simd.ts @@ -581,11 +581,6 @@ function test_i16x8(): void { == v128.shuffle(a, b, 0, 1, 2, 3, 12, 13, 14, 15) ); - assert( - i16x8.swizzle(a, b) - == - v128.swizzle(a, b) - ); { let ptr = __alloc(16); store(ptr, 1); diff --git a/tests/features.json b/tests/features.json index 1ac3d7867a..4ba0c0b4c5 100644 --- a/tests/features.json +++ b/tests/features.json @@ -32,5 +32,21 @@ "v8_flags": [ "--experimental-wasm-eh" ] + }, + "simd": { + "asc_flags": [ + "--enable simd" + ], + "v8_flags": [ + "--experimental-wasm-simd" + ] + }, + "relaxed-simd": { + "asc_flags": [ + "--enable relaxed-simd" + ], + "v8_flags": [ + "--experimental-wasm-relaxed-simd" + ] } } From 903e4bd6ad264a40c3631c91a9ac4007bad62aac Mon Sep 17 00:00:00 2001 From: dcode Date: Fri, 7 Apr 2023 00:26:16 +0200 Subject: [PATCH 2/3] list relaxed-simd in cli help --- cli/options.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/options.json b/cli/options.json index 6590a92138..f6776dbe26 100644 --- a/cli/options.json +++ b/cli/options.json @@ -221,6 +221,7 @@ " reference-types Reference types and operations.", " gc Garbage collection (WIP).", " stringref String reference types.", + " relaxed-simd Relaxed SIMD operations.", "" ], "TODO_doesNothingYet": [ @@ -228,7 +229,6 @@ " tail-calls Tail call operations.", " multi-value Multi value types.", " memory64 Memory64 operations.", - " relaxed-simd Relaxed SIMD operations.", " extended-const Extended const expressions." ], "type": "S", From 7e855dbdda41815a42eb1ff36625ae0f80325d7d Mon Sep 17 00:00:00 2001 From: dcode Date: Tue, 11 Apr 2023 14:18:08 +0200 Subject: [PATCH 3/3] clarify --- std/assembly/index.d.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts index e71cd018de..13e59d0886 100644 --- a/std/assembly/index.d.ts +++ b/std/assembly/index.d.ts @@ -990,15 +990,15 @@ declare namespace v128 { /** * Computes the minimum of each 32- or 64-bit floating point lane as indicated by `T`. * - * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, - * depending on hardware capabilities: Either `a[i]` or `b[i]`. + * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN` or both are `-0.0` and + * `+0.0`, depending on hardware capabilities: Either `a[i]` or `b[i]`. */ export function relaxed_min(a: v128, b: v128): v128; /** * Computes the maximum of each 32- or 64-bit floating point lane as indicated by `T`. * - * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`, - * depending on hardware capabilities: Either `a[i]` or `b[i]`. + * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN` or both are `-0.0` and + * `+0.0`, depending on hardware capabilities: Either `a[i]` or `b[i]`. */ export function relaxed_max(a: v128, b: v128): v128; /**