From 9be5a3534c8488a6a4741e8e6664675eb70a5434 Mon Sep 17 00:00:00 2001
From: dcode <dcode@dcode.io>
Date: Fri, 7 Apr 2023 00:14:57 +0200
Subject: [PATCH 1/3] Implement relaxed SIMD operations

---
 src/builtins.ts                               | 622 ++++++++++++++++--
 std/assembly/builtins.ts                      | 114 +++-
 std/assembly/index.d.ts                       | 274 +++++++-
 .../compiler/features/relaxed-simd.debug.wat  | 175 +++++
 tests/compiler/features/relaxed-simd.json     |   9 +
 .../features/relaxed-simd.release.wat         |   4 +
 tests/compiler/features/relaxed-simd.ts       |  83 +++
 tests/compiler/simd.debug.wat                 | 185 +++---
 tests/compiler/simd.release.wat               |  12 +-
 tests/compiler/simd.ts                        |   5 -
 tests/features.json                           |  16 +
 11 files changed, 1321 insertions(+), 178 deletions(-)
 create mode 100644 tests/compiler/features/relaxed-simd.debug.wat
 create mode 100644 tests/compiler/features/relaxed-simd.json
 create mode 100644 tests/compiler/features/relaxed-simd.release.wat
 create mode 100644 tests/compiler/features/relaxed-simd.ts

diff --git a/src/builtins.ts b/src/builtins.ts
index 8b8dd638cf..20cf2e044d 100644
--- a/src/builtins.ts
+++ b/src/builtins.ts
@@ -450,6 +450,17 @@ export namespace BuiltinNames {
   export const v128_q15mulr_sat = "~lib/builtins/v128.q15mulr_sat";
   export const v128_extmul_low = "~lib/builtins/v128.extmul_low";
   export const v128_extmul_high = "~lib/builtins/v128.extmul_high";
+  export const v128_relaxed_swizzle = "~lib/builtins/v128.relaxed_swizzle";
+  export const v128_relaxed_trunc = "~lib/builtins/v128.relaxed_trunc";
+  export const v128_relaxed_trunc_zero = "~lib/builtins/v128.relaxed_trunc_zero";
+  export const v128_relaxed_madd = "~lib/builtins/v128.relaxed_madd";
+  export const v128_relaxed_nmadd = "~lib/builtins/v128.relaxed_nmadd";
+  export const v128_relaxed_laneselect = "~lib/builtins/v128.relaxed_laneselect";
+  export const v128_relaxed_min = "~lib/builtins/v128.relaxed_min";
+  export const v128_relaxed_max = "~lib/builtins/v128.relaxed_max";
+  export const v128_relaxed_q15mulr = "~lib/builtins/v128.relaxed_q15mulr";
+  export const v128_relaxed_dot = "~lib/builtins/v128.relaxed_dot";
+  export const v128_relaxed_dot_add = "~lib/builtins/v128.relaxed_dot_add";
 
   export const i8x16 = "~lib/builtins/i8x16";
   export const i16x8 = "~lib/builtins/i16x8";
@@ -544,7 +555,6 @@ export namespace BuiltinNames {
   export const i16x8_extmul_high_i8x16_s = "~lib/builtins/i16x8.extmul_high_i8x16_s";
   export const i16x8_extmul_high_i8x16_u = "~lib/builtins/i16x8.extmul_high_i8x16_u";
   export const i16x8_shuffle = "~lib/builtins/i16x8.shuffle";
-  export const i16x8_swizzle = "~lib/builtins/i16x8.swizzle";
 
   export const i32x4_splat = "~lib/builtins/i32x4.splat";
   export const i32x4_extract_lane = "~lib/builtins/i32x4.extract_lane";
@@ -589,7 +599,6 @@ export namespace BuiltinNames {
   export const i32x4_extmul_high_i16x8_s = "~lib/builtins/i32x4.extmul_high_i16x8_s";
   export const i32x4_extmul_high_i16x8_u = "~lib/builtins/i32x4.extmul_high_i16x8_u";
   export const i32x4_shuffle = "~lib/builtins/i32x4.shuffle";
-  export const i32x4_swizzle = "~lib/builtins/i32x4.swizzle";
 
   export const i64x2_splat = "~lib/builtins/i64x2.splat";
   export const i64x2_extract_lane = "~lib/builtins/i64x2.extract_lane";
@@ -623,7 +632,6 @@ export namespace BuiltinNames {
   export const i64x2_extmul_high_i32x4_s = "~lib/builtins/i64x2.extmul_high_i32x4_s";
   export const i64x2_extmul_high_i32x4_u = "~lib/builtins/i64x2.extmul_high_i32x4_u";
   export const i64x2_shuffle = "~lib/builtins/i64x2.shuffle";
-  export const i64x2_swizzle = "~lib/builtins/i64x2.swizzle";
 
   export const f32x4_splat = "~lib/builtins/f32x4.splat";
   export const f32x4_extract_lane = "~lib/builtins/f32x4.extract_lane";
@@ -653,7 +661,6 @@ export namespace BuiltinNames {
   export const f32x4_convert_i32x4_u = "~lib/builtins/f32x4.convert_i32x4_u";
   export const f32x4_demote_f64x2_zero = "~lib/builtins/f32x4.demote_f64x2_zero";
   export const f32x4_shuffle = "~lib/builtins/f32x4.shuffle";
-  export const f32x4_swizzle = "~lib/builtins/f32x4.swizzle";
 
   export const f64x2_splat = "~lib/builtins/f64x2.splat";
   export const f64x2_extract_lane = "~lib/builtins/f64x2.extract_lane";
@@ -683,7 +690,27 @@ export namespace BuiltinNames {
   export const f64x2_convert_low_i32x4_u = "~lib/builtins/f64x2.convert_low_i32x4_u";
   export const f64x2_promote_low_f32x4 = "~lib/builtins/f64x2.promote_low_f32x4";
   export const f64x2_shuffle = "~lib/builtins/f64x2.shuffle";
-  export const f64x2_swizzle = "~lib/builtins/f64x2.swizzle";
+
+  export const i8x16_relaxed_swizzle = "~lib/builtins/i8x16.relaxed_swizzle";
+  export const i32x4_relaxed_trunc_f32x4_s = "~lib/builtins/i32x4.relaxed_trunc_f32x4_s";
+  export const i32x4_relaxed_trunc_f32x4_u = "~lib/builtins/i32x4.relaxed_trunc_f32x4_u";
+  export const i32x4_relaxed_trunc_f64x2_s_zero = "~lib/builtins/i32x4.relaxed_trunc_f64x2_s_zero";
+  export const i32x4_relaxed_trunc_f64x2_u_zero = "~lib/builtins/i32x4.relaxed_trunc_f64x2_u_zero";
+  export const f32x4_relaxed_madd = "~lib/builtins/f32x4.relaxed_madd";
+  export const f32x4_relaxed_nmadd = "~lib/builtins/f32x4.relaxed_nmadd";
+  export const f64x2_relaxed_madd = "~lib/builtins/f64x2.relaxed_madd";
+  export const f64x2_relaxed_nmadd = "~lib/builtins/f64x2.relaxed_nmadd";
+  export const i8x16_relaxed_laneselect = "~lib/builtins/i8x16.relaxed_laneselect";
+  export const i16x8_relaxed_laneselect = "~lib/builtins/i16x8.relaxed_laneselect";
+  export const i32x4_relaxed_laneselect = "~lib/builtins/i32x4.relaxed_laneselect";
+  export const i64x2_relaxed_laneselect = "~lib/builtins/i64x2.relaxed_laneselect";
+  export const f32x4_relaxed_min = "~lib/builtins/f32x4.relaxed_min";
+  export const f32x4_relaxed_max = "~lib/builtins/f32x4.relaxed_max";
+  export const f64x2_relaxed_min = "~lib/builtins/f64x2.relaxed_min";
+  export const f64x2_relaxed_max = "~lib/builtins/f64x2.relaxed_max";
+  export const i16x8_relaxed_q15mulr_s = "~lib/builtins/i16x8.relaxed_q15mulr_s";
+  export const i16x8_relaxed_dot_i8x16_i7x16_s = "~lib/builtins/i16x8.relaxed_dot_i8x16_i7x16_s";
+  export const i32x4_relaxed_dot_i8x16_i7x16_add_s = "~lib/builtins/i32x4.relaxed_dot_i8x16_i7x16_add_s";
 
   export const i31_new = "~lib/builtins/i31.new";
   export const i31_get = "~lib/builtins/i31.get";
@@ -6346,7 +6373,8 @@ function builtin_v128_not(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.v128_not, builtin_v128_not);
 
-function builtin_v128_bitwise_ternary(ctx: BuiltinFunctionContext, op: SIMDTernaryOp): ExpressionRef {
+// v128.bitselect(v1: v128, v2: v128, c: v128) -> v128
+function builtin_v128_bitselect(ctx: BuiltinFunctionContext): ExpressionRef {
   let compiler = ctx.compiler;
   let module = compiler.module;
   if (
@@ -6361,12 +6389,7 @@ function builtin_v128_bitwise_ternary(ctx: BuiltinFunctionContext, op: SIMDTerna
   let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
   let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
   let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit);
-  return module.simd_ternary(op, arg0, arg1, arg2);
-}
-
-// v128.bitselect(v1: v128, v2: v128, c: v128) -> v128
-function builtin_v128_bitselect(ctx: BuiltinFunctionContext): ExpressionRef {
-  return builtin_v128_bitwise_ternary(ctx, SIMDTernaryOp.Bitselect);
+  return module.simd_ternary(SIMDTernaryOp.Bitselect, arg0, arg1, arg2);
 }
 builtinFunctions.set(BuiltinNames.v128_bitselect, builtin_v128_bitselect);
 
@@ -6701,6 +6724,368 @@ function builtin_v128_extmul_high(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.v128_extmul_high, builtin_v128_extmul_high);
 
+// === Relaxed SIMD ===========================================================================
+
+// v128.relaxed_swizzle(a: v128, s: v128) -> v128
+function builtin_v128_relaxed_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeAbsent(ctx) |
+    checkArgsRequired(ctx, 2)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  return module.binary(BinaryOp.RelaxedSwizzleI8x16, arg0, arg1);
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_swizzle, builtin_v128_relaxed_swizzle);
+
+// v128.relaxed_trunc<T!>(a: v128) -> v128
+function builtin_v128_relaxed_trunc(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 1)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.Isize: {
+        if (compiler.options.isWasm64) break;
+        // fall-through
+      }
+      case TypeKind.I32: return module.unary(UnaryOp.RelaxedTruncF32x4ToI32x4, arg0);
+      case TypeKind.Usize: {
+        if (compiler.options.isWasm64) break;
+        // fall-through
+      }
+      case TypeKind.U32: return module.unary(UnaryOp.RelaxedTruncF32x4ToU32x4, arg0);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_trunc", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_trunc, builtin_v128_relaxed_trunc);
+
+// v128.relaxed_trunc_zero<T!>(a: v128) -> v128
+function builtin_v128_relaxed_trunc_zero(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 1)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.Isize: {
+        if (compiler.options.isWasm64) break;
+        // fall-through
+      }
+      case TypeKind.I32: return module.unary(UnaryOp.RelaxedTruncF64x2ToI32x4Zero, arg0);
+      case TypeKind.Usize: {
+        if (compiler.options.isWasm64) break;
+        // fall-through
+      }
+      case TypeKind.U32: return module.unary(UnaryOp.RelaxedTruncF64x2ToU32x4Zero, arg0);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_trunc_zero", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_trunc_zero, builtin_v128_relaxed_trunc_zero);
+
+// v128.relaxed_madd<T!>(a: v128, b: v128, c: v128) -> v128
+function builtin_v128_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 3)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.F32: return module.simd_ternary(SIMDTernaryOp.RelaxedMaddF32x4, arg0, arg1, arg2);
+      case TypeKind.F64: return module.simd_ternary(SIMDTernaryOp.RelaxedMaddF64x2, arg0, arg1, arg2);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_madd", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_madd, builtin_v128_relaxed_madd);
+
+// v128.relaxed_nmadd<T!>(a: v128, b: v128, c: v128) -> v128
+function builtin_v128_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 3)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.F32: return module.simd_ternary(SIMDTernaryOp.RelaxedNmaddF32x4, arg0, arg1, arg2);
+      case TypeKind.F64: return module.simd_ternary(SIMDTernaryOp.RelaxedNmaddF64x2, arg0, arg1, arg2);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_nmadd", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_nmadd, builtin_v128_relaxed_nmadd);
+
+// v128.relaxed_laneselect<T!>(a: v128, b: v128, m: v128) -> v128
+function builtin_v128_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 3)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.I8:
+      case TypeKind.U8: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI8x16, arg0, arg1, arg2);
+      case TypeKind.I16:
+      case TypeKind.U16: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI16x8, arg0, arg1, arg2);
+      case TypeKind.I32:
+      case TypeKind.U32: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI32x4, arg0, arg1, arg2);
+      case TypeKind.I64:
+      case TypeKind.U64: return module.simd_ternary(SIMDTernaryOp.RelaxedLaneselectI64x2, arg0, arg1, arg2);
+      case TypeKind.Isize:
+      case TypeKind.Usize: {
+        return module.simd_ternary(
+          compiler.options.isWasm64
+            ? SIMDTernaryOp.RelaxedLaneselectI64x2
+            : SIMDTernaryOp.RelaxedLaneselectI32x4,
+          arg0, arg1, arg2
+        );
+      }
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_laneselect", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_laneselect, builtin_v128_relaxed_laneselect);
+
+// v128.relaxed_min<T!>(a: v128, b: v128) -> v128
+function builtin_v128_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 2)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.F32: return module.binary(BinaryOp.RelaxedMinF32x4, arg0, arg1);
+      case TypeKind.F64: return module.binary(BinaryOp.RelaxedMinF64x2, arg0, arg1);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_min", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_min, builtin_v128_relaxed_min);
+
+// v128.relaxed_max<T!>(a: v128, b: v128) -> v128
+function builtin_v128_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 2)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.F32: return module.binary(BinaryOp.RelaxedMaxF32x4, arg0, arg1);
+      case TypeKind.F64: return module.binary(BinaryOp.RelaxedMaxF64x2, arg0, arg1);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_max", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_max, builtin_v128_relaxed_max);
+
+// v128.relaxed_q15mulr<T!>(a: v128, b: v128) -> v128
+function builtin_v128_relaxed_q15mulr(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkTypeRequired(ctx) |
+    checkArgsRequired(ctx, 2)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  if (type.isValue) {
+    switch (type.kind) {
+      case TypeKind.I16: return module.binary(BinaryOp.RelaxedQ15MulrI16x8, arg0, arg1);
+    }
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_q15mulr", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_q15mulr, builtin_v128_relaxed_q15mulr);
+
+// v128.relaxed_dot<T!>(a: v128, b: v128) -> v128
+function builtin_v128_relaxed_dot(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkArgsRequired(ctx, 2) |
+    checkTypeRequired(ctx)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  switch (type.kind) {
+    case TypeKind.I16: return module.binary(BinaryOp.RelaxedDotI8x16I7x16ToI16x8, arg0, arg1);
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_dot", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_dot, builtin_v128_relaxed_dot);
+
+// v128.relaxed_dot_add<T!>(a: v128, b: v128, c: v128) -> v128
+function builtin_v128_relaxed_dot_add(ctx: BuiltinFunctionContext): ExpressionRef {
+  let compiler = ctx.compiler;
+  let module = compiler.module;
+  if (
+    checkFeatureEnabled(ctx, Feature.RelaxedSimd) |
+    checkArgsRequired(ctx, 3) |
+    checkTypeRequired(ctx)
+  ) {
+    compiler.currentType = Type.v128;
+    return module.unreachable();
+  }
+  let operands = ctx.operands;
+  let typeArguments = ctx.typeArguments!;
+  let type = typeArguments[0];
+  let arg0 = compiler.compileExpression(operands[0], Type.v128, Constraints.ConvImplicit);
+  let arg1 = compiler.compileExpression(operands[1], Type.v128, Constraints.ConvImplicit);
+  let arg2 = compiler.compileExpression(operands[2], Type.v128, Constraints.ConvImplicit);
+  switch (type.kind) {
+    // TOOD: emulate relaxed_dot_add of i16 with multiple instructions?
+    case TypeKind.Isize: {
+      if (compiler.options.isWasm64) break;
+      // fall-through
+    }
+    case TypeKind.I32: return module.simd_ternary(SIMDTernaryOp.RelaxedDotI8x16I7x16AddToI32x4, arg0, arg1, arg2);
+  }
+  compiler.error(
+    DiagnosticCode.Operation_0_cannot_be_applied_to_type_1,
+    ctx.reportNode.typeArgumentsRange, "v128.relaxed_dot_add", type.toString()
+  );
+  return module.unreachable();
+}
+builtinFunctions.set(BuiltinNames.v128_relaxed_dot_add, builtin_v128_relaxed_dot_add);
+
 // === Internal runtime =======================================================================
 
 // __visit_globals(cookie: u32) -> void
@@ -8688,6 +9073,7 @@ builtinFunctions.set(BuiltinNames.i8x16_shuffle, builtin_i8x16_shuffle);
 
 // i8x16.swizzle -> v128.swizzle
 function builtin_i8x16_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
   ctx.typeArguments = null;
   ctx.contextualType = Type.v128;
   return builtin_v128_swizzle(ctx);
@@ -9117,14 +9503,6 @@ function builtin_i16x8_shuffle(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.i16x8_shuffle, builtin_i16x8_shuffle);
 
-// i16x8.swizzle -> v128.swizzle
-function builtin_i16x8_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
-  ctx.typeArguments = null;
-  ctx.contextualType = Type.v128;
-  return builtin_v128_swizzle(ctx);
-}
-builtinFunctions.set(BuiltinNames.i16x8_swizzle, builtin_i16x8_swizzle);
-
 // i32x4.splat -> v128.splat<i32>
 function builtin_i32x4_splat(ctx: BuiltinFunctionContext): ExpressionRef {
   checkTypeAbsent(ctx);
@@ -9512,14 +9890,6 @@ function builtin_i32x4_shuffle(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.i32x4_shuffle, builtin_i32x4_shuffle);
 
-// i32x4.swizzle -> v128.swizzle
-function builtin_i32x4_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
-  ctx.typeArguments = null;
-  ctx.contextualType = Type.v128;
-  return builtin_v128_swizzle(ctx);
-}
-builtinFunctions.set(BuiltinNames.i32x4_swizzle, builtin_i32x4_swizzle);
-
 // i64x2.splat -> v128.splat<i64>
 function builtin_i64x2_splat(ctx: BuiltinFunctionContext): ExpressionRef {
   checkTypeAbsent(ctx);
@@ -9772,14 +10142,6 @@ function builtin_i64x2_shuffle(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.i64x2_shuffle, builtin_i64x2_shuffle);
 
-// i64x2.swizzle -> v128.swizzle
-function builtin_i64x2_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
-  ctx.typeArguments = null;
-  ctx.contextualType = Type.v128;
-  return builtin_v128_swizzle(ctx);
-}
-builtinFunctions.set(BuiltinNames.i64x2_swizzle, builtin_i64x2_swizzle);
-
 // f32x4.splat -> v128.splat<f32>
 function builtin_f32x4_splat(ctx: BuiltinFunctionContext): ExpressionRef {
   checkTypeAbsent(ctx);
@@ -10032,14 +10394,6 @@ function builtin_f32x4_shuffle(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.f32x4_shuffle, builtin_f32x4_shuffle);
 
-// f32x4.swizzle -> v128.swizzle
-function builtin_f32x4_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
-  ctx.typeArguments = null;
-  ctx.contextualType = Type.v128;
-  return builtin_v128_swizzle(ctx);
-}
-builtinFunctions.set(BuiltinNames.f32x4_swizzle, builtin_f32x4_swizzle);
-
 // f64x2.splat -> v128.splat<f64>
 function builtin_f64x2_splat(ctx: BuiltinFunctionContext): ExpressionRef {
   checkTypeAbsent(ctx);
@@ -10292,13 +10646,185 @@ function builtin_f64x2_shuffle(ctx: BuiltinFunctionContext): ExpressionRef {
 }
 builtinFunctions.set(BuiltinNames.f64x2_shuffle, builtin_f64x2_shuffle);
 
-// f64x2.swizzle -> v128.swizzle
-function builtin_f64x2_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
+// i8x16.relaxed_swizzle -> v128.relaxed_swizzle
+function builtin_i8x16_relaxed_swizzle(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
   ctx.typeArguments = null;
   ctx.contextualType = Type.v128;
-  return builtin_v128_swizzle(ctx);
+  return builtin_v128_relaxed_swizzle(ctx);
+}
+builtinFunctions.set(BuiltinNames.i8x16_relaxed_swizzle, builtin_i8x16_relaxed_swizzle);
+
+// i32x4.relaxed_trunc_f32x4_s -> v128.relaxed_trunc<i32>
+function builtin_i32x4_relaxed_trunc_f32x4_s(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_trunc(ctx);
+}
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f32x4_s, builtin_i32x4_relaxed_trunc_f32x4_s);
+
+// i32x4.relaxed_trunc_f32x4_u -> v128.relaxed_trunc<u32>
+function builtin_i32x4_relaxed_trunc_f32x4_u(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.u32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_trunc(ctx);
+}
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f32x4_u, builtin_i32x4_relaxed_trunc_f32x4_u);
+
+// i32x4.relaxed_trunc_f64x2_s_zero -> v128.relaxed_trunc_zero<i32>
+function builtin_i32x4_relaxed_trunc_f64x2_s_zero(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_trunc_zero(ctx);
+}
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f64x2_s_zero, builtin_i32x4_relaxed_trunc_f64x2_s_zero);
+
+// i32x4.relaxed_trunc_f64x2_u_zero -> v128.relaxed_trunc_zero<u32>
+function builtin_i32x4_relaxed_trunc_f64x2_u_zero(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.u32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_trunc_zero(ctx);
+}
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_trunc_f64x2_u_zero, builtin_i32x4_relaxed_trunc_f64x2_u_zero);
+
+// f32x4.relaxed_madd -> v128.relaxed_madd<f32>
+function builtin_f32x4_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_madd(ctx);
+}
+builtinFunctions.set(BuiltinNames.f32x4_relaxed_madd, builtin_f32x4_relaxed_madd);
+
+// f32x4.relaxed_nmadd -> v128.relaxed_nmadd<f32>
+function builtin_f32x4_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_nmadd(ctx);
+}
+builtinFunctions.set(BuiltinNames.f32x4_relaxed_nmadd, builtin_f32x4_relaxed_nmadd);
+
+// f64x2.relaxed_madd -> v128.relaxed_madd<f64>
+function builtin_f64x2_relaxed_madd(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f64 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_madd(ctx);
+}
+builtinFunctions.set(BuiltinNames.f64x2_relaxed_madd, builtin_f64x2_relaxed_madd);
+
+// f64x2.relaxed_nmadd -> v128.relaxed_nmadd<f64>
+function builtin_f64x2_relaxed_nmadd(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f64 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_nmadd(ctx);
+}
+builtinFunctions.set(BuiltinNames.f64x2_relaxed_nmadd, builtin_f64x2_relaxed_nmadd);
+
+// i8x16.relaxed_laneselect -> v128.relaxed_laneselect<i8>
+function builtin_i8x16_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i8 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_laneselect(ctx);
+}
+builtinFunctions.set(BuiltinNames.i8x16_relaxed_laneselect, builtin_i8x16_relaxed_laneselect);
+
+// i16x8.relaxed_laneselect -> v128.relaxed_laneselect<i16>
+function builtin_i16x8_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i16 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_laneselect(ctx);
+}
+builtinFunctions.set(BuiltinNames.i16x8_relaxed_laneselect, builtin_i16x8_relaxed_laneselect);
+
+// i32x4.relaxed_laneselect -> v128.relaxed_laneselect<i32>
+function builtin_i32x4_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_laneselect(ctx);
+}
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_laneselect, builtin_i32x4_relaxed_laneselect);
+
+// i64x2.relaxed_laneselect -> v128.relaxed_laneselect<i64>
+function builtin_i64x2_relaxed_laneselect(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i64 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_laneselect(ctx);
+}
+builtinFunctions.set(BuiltinNames.i64x2_relaxed_laneselect, builtin_i64x2_relaxed_laneselect);
+
+// f32x4.relaxed_min -> v128.relaxed_min<f32>
+function builtin_f32x4_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_min(ctx);
+}
+builtinFunctions.set(BuiltinNames.f32x4_relaxed_min, builtin_f32x4_relaxed_min);
+
+// f32x4.relaxed_max -> v128.relaxed_max<f32>
+function builtin_f32x4_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_max(ctx);
+}
+builtinFunctions.set(BuiltinNames.f32x4_relaxed_max, builtin_f32x4_relaxed_max);
+
+// f64x2.relaxed_min -> v128.relaxed_min<f64>
+function builtin_f64x2_relaxed_min(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f64 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_min(ctx);
+}
+builtinFunctions.set(BuiltinNames.f64x2_relaxed_min, builtin_f64x2_relaxed_min);
+
+// f64x2.relaxed_max -> v128.relaxed_max<f64>
+function builtin_f64x2_relaxed_max(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.f64 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_max(ctx);
+}
+builtinFunctions.set(BuiltinNames.f64x2_relaxed_max, builtin_f64x2_relaxed_max);
+
+// i16x8.relaxed_q15mulr_s -> v128.relaxed_q15mulr<i16>
+function builtin_i16x8_relaxed_q15mulr_s(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i16 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_q15mulr(ctx);
+}
+builtinFunctions.set(BuiltinNames.i16x8_relaxed_q15mulr_s, builtin_i16x8_relaxed_q15mulr_s);
+
+// i16x8.relaxed_dot_i8x16_i7x16_s -> v128.relaxed_dot<i16>
+function builtin_i16x8_relaxed_dot_i8x16_i7x16_s(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i16 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_dot(ctx);
+}
+builtinFunctions.set(BuiltinNames.i16x8_relaxed_dot_i8x16_i7x16_s, builtin_i16x8_relaxed_dot_i8x16_i7x16_s);
+
+// i32x4.relaxed_dot_i8x16_i7x16_add_s -> v128.relaxed_dot_add<i32>
+function builtin_i32x4_relaxed_dot_i8x16_i7x16_add_s(ctx: BuiltinFunctionContext): ExpressionRef {
+  checkTypeAbsent(ctx);
+  ctx.typeArguments = [ Type.i32 ];
+  ctx.contextualType = Type.v128;
+  return builtin_v128_relaxed_dot_add(ctx);
 }
-builtinFunctions.set(BuiltinNames.f64x2_swizzle, builtin_f64x2_swizzle);
+builtinFunctions.set(BuiltinNames.i32x4_relaxed_dot_i8x16_i7x16_add_s, builtin_i32x4_relaxed_dot_i8x16_i7x16_add_s);
 
 // === Internal helpers =======================================================================
 
diff --git a/std/assembly/builtins.ts b/std/assembly/builtins.ts
index 05238edc03..12f0dfd574 100644
--- a/std/assembly/builtins.ts
+++ b/std/assembly/builtins.ts
@@ -1562,6 +1562,50 @@ export namespace v128 {
   // @ts-ignore: decorator
   @builtin
   export declare function extmul_high<T>(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_swizzle(a: v128, s: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_trunc<T>(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_trunc_zero<T>(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_madd<T>(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_nmadd<T>(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_laneselect<T>(a: v128, b: v128, m: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_min<T>(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_max<T>(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_q15mulr<T>(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_dot<T>(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_dot_add<T>(a: v128, b: v128, c: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -1724,6 +1768,14 @@ export namespace i8x16 {
   // @ts-ignore: decorator
   @builtin
   export declare function swizzle(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_swizzle(a: v128, s: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -1925,7 +1977,15 @@ export namespace i16x8 {
 
   // @ts-ignore: decorator
   @builtin
-  export declare function swizzle(a: v128, b: v128): v128;
+  export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_q15mulr_s(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_dot_i8x16_i7x16_s(a: v128, b: v128, c: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -2108,7 +2168,27 @@ export namespace i32x4 {
 
   // @ts-ignore: decorator
   @builtin
-  export declare function swizzle(a: v128, b: v128): v128;
+  export declare function relaxed_trunc_f32x4_s(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_trunc_f32x4_u(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_trunc_f64x2_s_zero(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_trunc_f64x2_u_zero(a: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_dot_i8x16_i7x16_add_s(a: v128, b: v128, c: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -2231,7 +2311,7 @@ export namespace i64x2 {
 
   // @ts-ignore: decorator
   @builtin
-  export declare function swizzle(a: v128, b: v128): v128;
+  export declare function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -2354,7 +2434,19 @@ export namespace f32x4 {
 
   // @ts-ignore: decorator
   @builtin
-  export declare function swizzle(a: v128, b: v128): v128;
+  export declare function relaxed_madd(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_nmadd(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_min(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_max(a: v128, b: v128): v128;
 }
 
 // @ts-ignore: decorator
@@ -2477,7 +2569,19 @@ export namespace f64x2 {
 
   // @ts-ignore: decorator
   @builtin
-  export declare function swizzle(a: v128, b: v128): v128;
+  export declare function relaxed_madd(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_nmadd(a: v128, b: v128, c: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_min(a: v128, b: v128): v128;
+
+  // @ts-ignore: decorator
+  @builtin
+  export declare function relaxed_max(a: v128, b: v128): v128;
 }
 
 @final
diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts
index ecf389936e..e71cd018de 100644
--- a/std/assembly/index.d.ts
+++ b/std/assembly/index.d.ts
@@ -859,7 +859,7 @@ declare namespace v128 {
   export function andnot(a: v128, b: v128): v128;
   /** Performs the bitwise NOT operation on a vector. */
   export function not(a: v128): v128;
-  /** Selects bits of either vector according to the specified mask. */
+  /** Selects bits of either vector according to the specified mask. Selects from `v1` if the bit in `mask` is `1`, otherwise from `v2`. */
   export function bitselect(v1: v128, v2: v128, mask: v128): v128;
   /** Reduces a vector to a scalar indicating whether any lane is considered `true`. */
   export function any_true(a: v128): bool;
@@ -925,12 +925,104 @@ declare namespace v128 {
   export function demote_zero<T extends f64 = f64>(a: v128): v128;
   /** Promotes the lower float lanes to higher precision. */
   export function promote_low<T extends f32 = f32>(a: v128): v128;
-  /** Performs the line-wise saturating rounding multiplication in Q15 format. */
+  /** Performs the line-wise saturating rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`). */
   export function q15mulr_sat<T extends i16>(a: v128, b: v128): v128;
   /** Performs the lane-wise integer extended multiplication of the lower lanes producing a twice wider result than the inputs. */
   export function extmul_low<T extends i8 | u8 | i16 | u16 | i32 | u32>(a: v128, b: v128): v128;
   /** Performs the lane-wise integer extended multiplication of the higher lanes producing a twice wider result than the inputs. */
   export function extmul_high<T extends i8 | u8 | i16 | u16 | i32 | u32>(a: v128, b: v128): v128;
+  /**
+   * Selects 8-bit lanes from `a` using indices in `s`. Indices in the range [0-15] select the i-th element of `a`.
+   * 
+   * Unlike {@link v128.swizzle}, the result of an out of bounds index is implementation-defined, depending on hardware
+   * capabilities: Either `0` or `a[s[i]%16]`.
+   */
+  export function relaxed_swizzle(a: v128, s: v128): v128;
+  /**
+   * Truncates each lane of a vector from 32-bit floating point to a 32-bit signed or unsigned integer as indicated by
+   * `T`.
+   *
+   * Unlike {@link v128.trunc_sat}, the result of lanes out of bounds of the target type is implementation defined,
+   * depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc<T extends i32 | u32>(a: v128): v128;
+  /**
+   * Truncates each lane of a vector from 64-bit floating point to a 32-bit signed or unsigned integer as indicated by
+   * `T`. Unused higher integer lanes of the result are initialized to zero.
+   * 
+   * Unlike {@link v128.trunc_sat_zero}, the result of lanes out of bounds of the target type is implementation defined,
+   * depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc_zero<T extends i32 | u32>(a: v128): v128;
+  /**
+   * Performs the fused multiply-add operation (`a * b + c`) on 32- or 64-bit floating point lanes as indicated by
+   * `T`.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_madd<T>(a: v128, b: v128, c: v128): v128;
+  /**
+   * Performs the fused negative multiply-add operation (`-(a * b) + c`) on 32- or 64-bit floating point lanes as
+   * indicated by `T`.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_nmadd<T>(a: v128, b: v128, c: v128): v128;
+  /**
+   * Selects 8-, 16-, 32- or 64-bit integer lanes as indicated by `T` from `a` or `b` based on masks in `m`.
+   * 
+   * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result
+   * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most
+   * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result
+   * is `b[i]`.
+   */
+  export function relaxed_laneselect<T>(a: v128, b: v128, m: v128): v128;
+  /**
+   * Computes the minimum of each 32- or 64-bit floating point lane as indicated by `T`.
+   * 
+   * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_min<T>(a: v128, b: v128): v128;
+  /**
+   * Computes the maximum of each 32- or 64-bit floating point lane as indicated by `T`.
+   * 
+   * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_max<T>(a: v128, b: v128): v128;
+  /**
+   * Performs the lane-wise rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`).
+   * 
+   * Unlike {@link v128.q15mulr_sat}, the result is implementation-defined if both inputs are the minimum signed value:
+   * Either the minimum or maximum signed value.
+   */
+  export function relaxed_q15mulr<T>(a: v128, b: v128): v128;
+  /**
+   * Computes the dot product of two 8-bit integer lanes each, yielding lanes one size wider than the input.
+   * 
+   * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or
+   * unsigned is implementation-defined.
+   */
+  export function relaxed_dot<T>(a: v128, b: v128): v128;
+  /**
+   * Computes the dot product of two 8-bit integer lanes each, yielding lanes two sizes wider than the input with the
+   * lanes of `c` accumulated into the result.
+   * 
+   * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or
+   * unsigned by the intermediate multiplication is implementation-defined.
+   */
+  export function relaxed_dot_add<T>(a: v128, b: v128, c: v128): v128;
 }
 /** Initializes a 128-bit vector from sixteen 8-bit integer values. Arguments must be compile-time constants. */
 declare function i8x16(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8): v128;
@@ -1009,6 +1101,23 @@ declare namespace i8x16 {
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8, l4: u8, l5: u8, l6: u8, l7: u8, l8: u8, l9: u8, l10: u8, l11: u8, l12: u8, l13: u8, l14: u8, l15: u8): v128;
   /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
   export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Selects 8-bit integer lanes from `a` using indices in `s`. Indices in the range [0-15] select the i-th element of
+   * `a`.
+   * 
+   * Unlike {@link i8x16.swizzle}, the result of an out of bounds index is implementation-defined, depending on hardware
+   * capabilities: Either `0` or `a[s[i]%16]`.
+   */
+  export function relaxed_swizzle(a: v128, s: v128): v128;
+  /**
+   * Selects 8-bit integer lanes from `a` or `b` based on masks in `m`.
+   * 
+   * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result
+   * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most
+   * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result
+   * is `b[i]`.
+   */
+  export function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
 }
 /** Initializes a 128-bit vector from eight 16-bit integer values. Arguments must be compile-time constants. */
 declare function i16x8(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16): v128;
@@ -1107,8 +1216,29 @@ declare namespace i16x8 {
   export function extmul_high_i8x16_u(a: v128, b: v128): v128;
   /** Selects 16-bit lanes from either vector according to the specified [0-7] respectively [8-15] lane indexes. */
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8, l4: u8, l5: u8, l6: u8, l7: u8): v128;
-  /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
-  export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Selects 16-bit integer lanes from `a` or `b` based on masks in `m`.
+   *
+   * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result
+   * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most
+   * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result
+   * is `b[i]`.
+   */
+  export function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
+  /**
+   * Performs the line-wise rounding multiplication in Q15 format (`(a[i] * b[i] + (1 << (Q - 1))) >> Q` where `Q=15`).
+   *
+   * Some results are implementation-defined: If both inputs are `i16.MIN_VALUE`, the value of the respective
+   * resulting lane may be either `i16.MIN_VALUE` or `i16.MAX_VALUE`.
+   */
+  export function relaxed_q15mulr_s(a: v128, b: v128): v128;
+  /**
+   * Computes the dot product of two 8-bit integer lanes each, yielding lanes one size wider than the input.
+   * 
+   * Some results are implementation-defined: If the most significant bit of `b[i]` is set, the intermediate
+   * multiplication may interpret `b[i]` as either signed or unsigned.
+   */
+  export function relaxed_dot_i8x16_i7x16_s(a: v128, b: v128): v128;
 }
 /** Initializes a 128-bit vector from four 32-bit integer values. Arguments must be compile-time constants. */
 declare function i32x4(a: i32, b: i32, c: i32, d: i32): v128;
@@ -1199,8 +1329,65 @@ declare namespace i32x4 {
   export function extmul_high_i16x8_u(a: v128, b: v128): v128;
   /** Selects 32-bit lanes from either vector according to the specified [0-3] respectively [4-7] lane indexes. */
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8): v128;
-  /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
-  export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Truncates each lane of a vector from 32-bit floating point to a signed 32-bit integer.
+   *
+   * Unlike {@link i32x4.trunc_sat_f32x4_s}, the result of lanes out of bounds of the target type is implementation
+   * defined, depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc_f32x4_s(a: v128): v128;
+  /**
+   * Truncates each lane of a vector from 32-bit floating point to an unsigned 32-bit integer.
+   *
+   * Unlike {@link i32x4.trunc_sat_f32x4_u}, the result of lanes out of bounds of the target type is implementation
+   * defined, depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc_f32x4_u(a: v128): v128;
+  /**
+   * Truncates each lane of a vector from 64-bit floating point to a signed 32-bit integer. The two higher
+   * integer lanes of the result are initialized to zero.
+   * 
+   * Unlike {@link i32x4.trunc_sat_f64x2_s_zero}, the result of lanes out of bounds of the target type is implementation
+   * defined, depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc_f64x2_s_zero(a: v128): v128;
+  /**
+   * Truncates each lane of a vector from 64-bit floating point to an unsigned 32-bit integer. The two higher
+   * integer lanes of the result are initialized to zero.
+   * 
+   * Unlike {@link i32x4.trunc_sat_f64x2_u_zero}, the result of lanes out of bounds of the target type is implementation
+   * defined, depending on hardware capabilities:
+   * - If the input lane contains `NaN`, the result is either `0` or the respective maximum integer value.
+   * - If the input lane contains a value otherwise out of bounds of the target type, the result is either the
+   *   saturatated result or maximum integer value.
+   */
+  export function relaxed_trunc_f64x2_u_zero(a: v128): v128;
+  /**
+   * Selects 32-bit integer lanes from `a` or `b` based on masks in `m`.
+   *
+   * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result
+   * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most
+   * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result
+   * is `b[i]`.
+   */
+  export function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
+  /**
+   * Computes the dot product of two 8-bit lanes each, yielding lanes two sizes wider than the input with the lanes of
+   * `c` accumulated into the result.
+   * 
+   * Unlike {@link v128.dot}, if the most significant bit of `b[i]` is set, whether `b[i]` is interpreted as signed or
+   * unsigned by the intermediate multiplication is implementation-defined.
+   */
+  export function relaxed_dot_i8x16_i7x16_add_s(a: v128, b: v128, c: v128): v128;
 }
 /** Initializes a 128-bit vector from two 64-bit integer values. Arguments must be compile-time constants. */
 declare function i64x2(a: i64, b: i64): v128;
@@ -1261,8 +1448,15 @@ declare namespace i64x2 {
   export function extmul_high_i32x4_u(a: v128, b: v128): v128;
   /** Selects 64-bit lanes from either vector according to the specified [0-1] respectively [2-3] lane indexes. */
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8): v128;
-  /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
-  export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Selects 64-bit integer lanes from `a` or `b` based on masks in `m`.
+   *
+   * Behaves like {@link v128.bitselect} if masks in `m` do have all bits either set (result is `a[i]`) or unset (result
+   * is `b[i]`). Otherwise the result is implementation-defined, depending on hardware capabilities: If the most
+   * significant bit of `m` is set, the result is either `bitselect(a[i], b[i], mask)` or `a[i]`, otherwise the result
+   * is `b[i]`.
+   */
+  export function relaxed_laneselect(a: v128, b: v128, m: v128): v128;
 }
 /** Initializes a 128-bit vector from four 32-bit float values. Arguments must be compile-time constants. */
 declare function f32x4(a: f32, b: f32, c: f32, d: f32): v128;
@@ -1323,8 +1517,36 @@ declare namespace f32x4 {
   export function demote_f64x2_zero(a: v128): v128;
   /** Selects 32-bit lanes from either vector according to the specified [0-3] respectively [4-7] lane indexes. */
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8, l2: u8, l3: u8): v128;
-  /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
-  export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Performs the fused multiply-add operation (`a * b + c`) on all 32-bit floating point lanes.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_madd(a: v128, b: v128, c: v128): v128;
+  /**
+   * Performs the fused negative multiply-add operation (`-(a * b) + c`) on all 32-bit floating point lanes.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_nmadd(a: v128, b: v128, c: v128): v128;
+  /**
+   * Computes the minimum of each 32-bit floating point lane.
+   * 
+   * Unlike {@link f32x4.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_min(a: v128, b: v128): v128;
+  /**
+   * Computes the maximum of each 32-bit floating point lane.
+   * 
+   * Unlike {@link f32x4.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_max(a: v128, b: v128): v128;
 }
 /** Initializes a 128-bit vector from two 64-bit float values. Arguments must be compile-time constants. */
 declare function f64x2(a: f64, b: f64): v128;
@@ -1385,8 +1607,36 @@ declare namespace f64x2 {
   export function promote_low_f32x4(a: v128): v128;
   /** Selects 64-bit lanes from either vector according to the specified [0-1] respectively [2-3] lane indexes. */
   export function shuffle(a: v128, b: v128, l0: u8, l1: u8): v128;
-  /** Selects 8-bit lanes from the first vector according to the indexes [0-15] specified by the 8-bit lanes of the second vector. */
-  export function swizzle(a: v128, s: v128): v128;
+  /**
+   * Performs the fused multiply-add operation (`a * b + c`) on all 64-bit floating point lanes.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_madd(a: v128, b: v128, c: v128): v128;
+  /**
+   * Performs the fused negative multiply-add operation (`-(a * b) + c`) on all 64-bit floating point lanes.
+   * 
+   * The result is implementation defined, depending on hardware capabilities:
+   * - Either `a * b` is rounded once and the final result rounded again, or
+   * - The expression is evaluated with higher precision and only rounded once
+   */
+  export function relaxed_nmadd(a: v128, b: v128, c: v128): v128;
+  /**
+   * Computes the minimum of each 64-bit floating point lane.
+   * 
+   * Unlike {@link f64x2.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_min(a: v128, b: v128): v128;
+  /**
+   * Computes the maximum of each 64-bit floating point lane.
+   * 
+   * Unlike {@link f64x2.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
+   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   */
+  export function relaxed_max(a: v128, b: v128): v128;
 }
 
 declare abstract class i31 {
diff --git a/tests/compiler/features/relaxed-simd.debug.wat b/tests/compiler/features/relaxed-simd.debug.wat
new file mode 100644
index 0000000000..22c0697e81
--- /dev/null
+++ b/tests/compiler/features/relaxed-simd.debug.wat
@@ -0,0 +1,175 @@
+(module
+ (type $none_=>_none (func))
+ (global $features/relaxed-simd/v v128 (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+ (global $features/relaxed-simd/r (mut v128) (v128.const i32x4 0x00000000 0x00000000 0x00000000 0x00000000))
+ (memory $0 0)
+ (table $0 1 1 funcref)
+ (elem $0 (i32.const 1))
+ (export "memory" (memory $0))
+ (start $~start)
+ (func $start:features/relaxed-simd
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i8x16.relaxed_swizzle
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i8x16.relaxed_swizzle
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f32x4_s
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f32x4_s
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f32x4_u
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f32x4_u
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f64x2_s_zero
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f64x2_s_zero
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f64x2_u_zero
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  i32x4.relaxed_trunc_f64x2_u_zero
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_fma
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_fma
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_fms
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_fms
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_fma
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_fma
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_fms
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_fms
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i8x16.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i8x16.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i32x4.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i32x4.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i64x2.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i64x2.laneselect
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_min
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_min
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_max
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f32x4.relaxed_max
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_min
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_min
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_max
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  f64x2.relaxed_max
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.relaxed_q15mulr_s
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.relaxed_q15mulr_s
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.dot_i8x16_i7x16_s
+  global.set $features/relaxed-simd/r
+  global.get $features/relaxed-simd/v
+  global.get $features/relaxed-simd/v
+  i16x8.dot_i8x16_i7x16_s
+  global.set $features/relaxed-simd/r
+ )
+ (func $~start
+  call $start:features/relaxed-simd
+ )
+)
diff --git a/tests/compiler/features/relaxed-simd.json b/tests/compiler/features/relaxed-simd.json
new file mode 100644
index 0000000000..a01579d1e7
--- /dev/null
+++ b/tests/compiler/features/relaxed-simd.json
@@ -0,0 +1,9 @@
+{
+  "asc_flags": [
+    "--runtime stub"
+  ],
+  "features": [
+    "simd",
+    "relaxed-simd"
+  ]
+}
diff --git a/tests/compiler/features/relaxed-simd.release.wat b/tests/compiler/features/relaxed-simd.release.wat
new file mode 100644
index 0000000000..23da3862e2
--- /dev/null
+++ b/tests/compiler/features/relaxed-simd.release.wat
@@ -0,0 +1,4 @@
+(module
+ (memory $0 0)
+ (export "memory" (memory $0))
+)
diff --git a/tests/compiler/features/relaxed-simd.ts b/tests/compiler/features/relaxed-simd.ts
new file mode 100644
index 0000000000..3fc581cc7e
--- /dev/null
+++ b/tests/compiler/features/relaxed-simd.ts
@@ -0,0 +1,83 @@
+const v = v128.splat<i32>(0);
+let r: v128;
+
+r = i8x16.relaxed_swizzle(v, v);
+// ==
+r = v128.relaxed_swizzle(v, v);
+
+r = i32x4.relaxed_trunc_f32x4_s(v);
+// ==
+r = v128.relaxed_trunc<i32>(v);
+
+r = i32x4.relaxed_trunc_f32x4_u(v);
+// ==
+r = v128.relaxed_trunc<u32>(v);
+
+r = i32x4.relaxed_trunc_f64x2_s_zero(v);
+// ==
+r = v128.relaxed_trunc_zero<i32>(v);
+
+r = i32x4.relaxed_trunc_f64x2_u_zero(v);
+// ==
+r = v128.relaxed_trunc_zero<u32>(v);
+
+r = f32x4.relaxed_madd(v, v, v);
+// ==
+r = v128.relaxed_madd<f32>(v, v, v);
+
+r = f32x4.relaxed_nmadd(v, v, v);
+// ==
+r = v128.relaxed_nmadd<f32>(v, v, v);
+
+r = f64x2.relaxed_madd(v, v, v);
+// ==
+r = v128.relaxed_madd<f64>(v, v, v);
+
+r = f64x2.relaxed_nmadd(v, v, v);
+// ==
+r = v128.relaxed_nmadd<f64>(v, v, v);
+
+r = i8x16.relaxed_laneselect(v, v, v);
+// ==
+r = v128.relaxed_laneselect<i8>(v, v, v);
+
+r = i16x8.relaxed_laneselect(v, v, v);
+// ==
+r = v128.relaxed_laneselect<i16>(v, v, v);
+
+r = i32x4.relaxed_laneselect(v, v, v);
+// ==
+r = v128.relaxed_laneselect<i32>(v, v, v);
+
+r = i64x2.relaxed_laneselect(v, v, v);
+// ==
+r = v128.relaxed_laneselect<i64>(v, v, v);
+
+r = f32x4.relaxed_min(v, v);
+// ==
+r = v128.relaxed_min<f32>(v, v);
+
+r = f32x4.relaxed_max(v, v);
+// ==
+r = v128.relaxed_max<f32>(v, v);
+
+r = f64x2.relaxed_min(v, v);
+// ==
+r = v128.relaxed_min<f64>(v, v);
+
+r = f64x2.relaxed_max(v, v);
+// ==
+r = v128.relaxed_max<f64>(v, v);
+
+r = i16x8.relaxed_q15mulr_s(v, v);
+// ==
+r = v128.relaxed_q15mulr<i16>(v, v);
+
+r = i16x8.relaxed_dot_i8x16_i7x16_s(v, v);
+// ==
+r = v128.relaxed_dot<i16>(v, v);
+
+// TODO: not yet implemented in binaryen/src/wasm-interpreter.h
+// r = i32x4.relaxed_dot_i8x16_i7x16_add_s(v, v, v);
+// // ==
+// r = v128.relaxed_dot_add<i32>(v, v, v);
diff --git a/tests/compiler/simd.debug.wat b/tests/compiler/simd.debug.wat
index f8a950898b..3d765bb1eb 100644
--- a/tests/compiler/simd.debug.wat
+++ b/tests/compiler/simd.debug.wat
@@ -4186,25 +4186,6 @@
    call $~lib/builtins/abort
    unreachable
   end
-  local.get $a
-  local.get $b
-  i8x16.swizzle
-  local.get $a
-  local.get $b
-  i8x16.swizzle
-  i8x16.eq
-  i8x16.all_true
-  i32.const 0
-  i32.ne
-  i32.eqz
-  if
-   i32.const 0
-   i32.const 528
-   i32.const 584
-   i32.const 3
-   call $~lib/builtins/abort
-   unreachable
-  end
   i32.const 16
   call $~lib/rt/tlsf/__alloc
   local.set $ptr
@@ -4243,7 +4224,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 599
+   i32.const 594
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -4259,7 +4240,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 604
+   i32.const 599
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -4361,7 +4342,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 672
+   i32.const 667
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4379,7 +4360,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 674
+   i32.const 669
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4398,7 +4379,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 676
+   i32.const 671
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4415,7 +4396,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 677
+   i32.const 672
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4432,7 +4413,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 678
+   i32.const 673
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4493,7 +4474,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 719
+   i32.const 714
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4506,7 +4487,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 724
+   i32.const 719
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4519,7 +4500,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 725
+   i32.const 720
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4536,7 +4517,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 726
+   i32.const 721
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4553,7 +4534,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 731
+   i32.const 726
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4633,7 +4614,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 745
+   i32.const 740
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4650,7 +4631,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 746
+   i32.const 741
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4667,7 +4648,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 747
+   i32.const 742
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4684,7 +4665,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 748
+   i32.const 743
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4701,7 +4682,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 749
+   i32.const 744
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4718,7 +4699,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 750
+   i32.const 745
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4735,7 +4716,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 751
+   i32.const 746
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4752,7 +4733,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 752
+   i32.const 747
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4769,7 +4750,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 753
+   i32.const 748
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4786,7 +4767,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 754
+   i32.const 749
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4873,7 +4854,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 769
+   i32.const 764
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -4904,7 +4885,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 780
+   i32.const 775
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -4920,7 +4901,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 785
+   i32.const 780
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -5020,7 +5001,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 819
+   i32.const 814
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5038,7 +5019,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 821
+   i32.const 816
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5057,7 +5038,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 823
+   i32.const 818
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5074,7 +5055,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 824
+   i32.const 819
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5091,7 +5072,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 825
+   i32.const 820
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5107,7 +5088,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 826
+   i32.const 821
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5120,7 +5101,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 831
+   i32.const 826
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5133,7 +5114,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 832
+   i32.const 827
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5150,7 +5131,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 833
+   i32.const 828
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5167,7 +5148,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 838
+   i32.const 833
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5240,7 +5221,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 852
+   i32.const 847
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -5256,7 +5237,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 857
+   i32.const 852
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -5637,7 +5618,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 930
+   i32.const 925
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5655,7 +5636,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 932
+   i32.const 927
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5674,7 +5655,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 934
+   i32.const 929
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5691,7 +5672,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 935
+   i32.const 930
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5708,7 +5689,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 936
+   i32.const 931
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5729,7 +5710,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 938
+   i32.const 933
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5746,7 +5727,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 939
+   i32.const 934
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5762,7 +5743,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 940
+   i32.const 935
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5775,7 +5756,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 941
+   i32.const 936
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5788,7 +5769,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 942
+   i32.const 937
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5805,7 +5786,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 943
+   i32.const 938
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5822,7 +5803,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 948
+   i32.const 943
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5853,7 +5834,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 957
+   i32.const 952
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5870,7 +5851,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 958
+   i32.const 953
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5887,7 +5868,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 959
+   i32.const 954
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5904,7 +5885,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 960
+   i32.const 955
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5921,7 +5902,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 961
+   i32.const 956
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5938,7 +5919,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 962
+   i32.const 957
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5955,7 +5936,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 963
+   i32.const 958
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5972,7 +5953,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 964
+   i32.const 959
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -5988,7 +5969,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 965
+   i32.const 960
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6055,7 +6036,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 992
+   i32.const 987
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -6073,7 +6054,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 996
+   i32.const 991
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -6118,7 +6099,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1012
+   i32.const 1007
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6136,7 +6117,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1014
+   i32.const 1009
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6155,7 +6136,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1016
+   i32.const 1011
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6172,7 +6153,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1017
+   i32.const 1012
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6189,7 +6170,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1018
+   i32.const 1013
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6210,7 +6191,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1020
+   i32.const 1015
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6227,7 +6208,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1021
+   i32.const 1016
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6243,7 +6224,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1022
+   i32.const 1017
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6256,7 +6237,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1023
+   i32.const 1018
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6269,7 +6250,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1024
+   i32.const 1019
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6286,7 +6267,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1025
+   i32.const 1020
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6303,7 +6284,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1030
+   i32.const 1025
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6334,7 +6315,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1039
+   i32.const 1034
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6351,7 +6332,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1040
+   i32.const 1035
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6368,7 +6349,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1041
+   i32.const 1036
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6385,7 +6366,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1042
+   i32.const 1037
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6402,7 +6383,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1043
+   i32.const 1038
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6419,7 +6400,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1044
+   i32.const 1039
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6436,7 +6417,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1045
+   i32.const 1040
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6453,7 +6434,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1046
+   i32.const 1041
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6469,7 +6450,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1047
+   i32.const 1042
    i32.const 3
    call $~lib/builtins/abort
    unreachable
@@ -6522,7 +6503,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1066
+   i32.const 1061
    i32.const 5
    call $~lib/builtins/abort
    unreachable
@@ -6540,7 +6521,7 @@
   if
    i32.const 0
    i32.const 528
-   i32.const 1070
+   i32.const 1065
    i32.const 5
    call $~lib/builtins/abort
    unreachable
diff --git a/tests/compiler/simd.release.wat b/tests/compiler/simd.release.wat
index 1103cb213f..7cd8b7e52d 100644
--- a/tests/compiler/simd.release.wat
+++ b/tests/compiler/simd.release.wat
@@ -2297,7 +2297,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 599
+    i32.const 594
     i32.const 5
     call $~lib/builtins/abort
     unreachable
@@ -2311,7 +2311,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 604
+    i32.const 599
     i32.const 5
     call $~lib/builtins/abort
     unreachable
@@ -2349,7 +2349,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 780
+    i32.const 775
     i32.const 5
     call $~lib/builtins/abort
     unreachable
@@ -2363,7 +2363,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 785
+    i32.const 780
     i32.const 5
     call $~lib/builtins/abort
     unreachable
@@ -2395,7 +2395,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 852
+    i32.const 847
     i32.const 5
     call $~lib/builtins/abort
     unreachable
@@ -2409,7 +2409,7 @@
    if
     i32.const 0
     i32.const 1552
-    i32.const 857
+    i32.const 852
     i32.const 5
     call $~lib/builtins/abort
     unreachable
diff --git a/tests/compiler/simd.ts b/tests/compiler/simd.ts
index 2a68d5cb8b..f9672912c5 100644
--- a/tests/compiler/simd.ts
+++ b/tests/compiler/simd.ts
@@ -581,11 +581,6 @@ function test_i16x8(): void {
     ==
     v128.shuffle<i16>(a, b, 0, 1, 2, 3, 12, 13, 14, 15)
   );
-  assert(
-    i16x8.swizzle(a, b)
-    ==
-    v128.swizzle(a, b)
-  );
   {
     let ptr = __alloc(16);
     store<i8>(ptr, 1);
diff --git a/tests/features.json b/tests/features.json
index 1ac3d7867a..4ba0c0b4c5 100644
--- a/tests/features.json
+++ b/tests/features.json
@@ -32,5 +32,21 @@
     "v8_flags": [
       "--experimental-wasm-eh"
     ]
+  },
+  "simd": {
+    "asc_flags": [
+      "--enable simd"
+    ],
+    "v8_flags": [
+      "--experimental-wasm-simd"
+    ]
+  },
+  "relaxed-simd": {
+    "asc_flags": [
+      "--enable relaxed-simd"
+    ],
+    "v8_flags": [
+      "--experimental-wasm-relaxed-simd"
+    ]
   }
 }

From 903e4bd6ad264a40c3631c91a9ac4007bad62aac Mon Sep 17 00:00:00 2001
From: dcode <dcode@dcode.io>
Date: Fri, 7 Apr 2023 00:26:16 +0200
Subject: [PATCH 2/3] list relaxed-simd in cli help

---
 cli/options.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/options.json b/cli/options.json
index 6590a92138..f6776dbe26 100644
--- a/cli/options.json
+++ b/cli/options.json
@@ -221,6 +221,7 @@
       " reference-types     Reference types and operations.",
       " gc                  Garbage collection (WIP).",
       " stringref           String reference types.",
+      " relaxed-simd        Relaxed SIMD operations.",
       ""
     ],
     "TODO_doesNothingYet": [
@@ -228,7 +229,6 @@
       " tail-calls          Tail call operations.",
       " multi-value         Multi value types.",
       " memory64            Memory64 operations.",
-      " relaxed-simd        Relaxed SIMD operations.",
       " extended-const      Extended const expressions."
     ],
     "type": "S",

From 7e855dbdda41815a42eb1ff36625ae0f80325d7d Mon Sep 17 00:00:00 2001
From: dcode <dcode@dcode.io>
Date: Tue, 11 Apr 2023 14:18:08 +0200
Subject: [PATCH 3/3] clarify

---
 std/assembly/index.d.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/std/assembly/index.d.ts b/std/assembly/index.d.ts
index e71cd018de..13e59d0886 100644
--- a/std/assembly/index.d.ts
+++ b/std/assembly/index.d.ts
@@ -990,15 +990,15 @@ declare namespace v128 {
   /**
    * Computes the minimum of each 32- or 64-bit floating point lane as indicated by `T`.
    * 
-   * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
-   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   * Unlike {@link v128.min}, the result is implementation-defined if either value is `NaN` or both are `-0.0` and
+   * `+0.0`, depending on hardware capabilities: Either `a[i]` or `b[i]`.
    */
   export function relaxed_min<T>(a: v128, b: v128): v128;
   /**
    * Computes the maximum of each 32- or 64-bit floating point lane as indicated by `T`.
    * 
-   * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN`, `-0.0` or `+0.0`,
-   * depending on hardware capabilities: Either `a[i]` or `b[i]`.
+   * Unlike {@link v128.max}, the result is implementation-defined if either value is `NaN` or both are `-0.0` and
+   * `+0.0`, depending on hardware capabilities: Either `a[i]` or `b[i]`.
    */
   export function relaxed_max<T>(a: v128, b: v128): v128;
   /**