Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[20.0.0]: Backport fixes from main to the release branch #8331

Merged
merged 4 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
24 changes: 23 additions & 1 deletion cranelift/codegen/src/bitset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,35 @@ use core::mem::size_of;
use core::ops::{Add, BitOr, Shl, Sub};

/// A small bitset built on a single primitive integer type
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[derive(Clone, Copy, Default, PartialEq, Eq)]
#[cfg_attr(
feature = "enable-serde",
derive(serde_derive::Serialize, serde_derive::Deserialize)
)]
pub struct BitSet<T>(pub T);

impl<T> std::fmt::Debug for BitSet<T>
where
T: Into<u32>
+ From<u8>
+ BitOr<T, Output = T>
+ Shl<u8, Output = T>
+ Sub<T, Output = T>
+ Add<T, Output = T>
+ PartialEq
+ Copy,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
let mut s = f.debug_struct(std::any::type_name::<Self>());
for i in 0..Self::bits() {
use std::string::ToString;
let i = u32::try_from(i).unwrap();
s.field(&i.to_string(), &self.contains(i));
}
s.finish()
}
}

impl<T> BitSet<T>
where
T: Into<u32>
Expand Down
21 changes: 20 additions & 1 deletion cranelift/codegen/src/inst_predicates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ pub fn has_side_effect(func: &Function, inst: Inst) -> bool {
trivially_has_side_effects(opcode) || is_load_with_defined_trapping(opcode, data)
}

/// Is the given instruction a bitcast to or from a reference type (e.g. `r64`)?
pub fn is_bitcast_from_ref(func: &Function, inst: Inst) -> bool {
let op = func.dfg.insts[inst].opcode();
if op != ir::Opcode::Bitcast {
return false;
}

let arg = func.dfg.inst_args(inst)[0];
func.dfg.value_type(arg).is_ref()
}

/// Does the given instruction behave as a "pure" node with respect to
/// aegraph semantics?
///
Expand All @@ -58,6 +69,7 @@ pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool {
} => flags.readonly() && flags.notrap(),
_ => false,
};

// Multi-value results do not play nicely with much of the egraph
// infrastructure. They are in practice used only for multi-return
// calls and some other odd instructions (e.g. uadd_overflow) which,
Expand All @@ -70,7 +82,11 @@ pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool {

let op = func.dfg.insts[inst].opcode();

has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
has_one_result
&& (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
// Cannot optimize ref-y bitcasts, as that can interact badly with
// safepoints and stack maps.
&& !is_bitcast_from_ref(func, inst)
}

/// Can the given instruction be merged into another copy of itself?
Expand All @@ -90,6 +106,9 @@ pub fn is_mergeable_for_egraph(func: &Function, inst: Inst) -> bool {
&& !op.can_store()
// Can only have idempotent side-effects.
&& (!has_side_effect(func, inst) || op.side_effects_idempotent())
// Cannot optimize ref-y bitcasts, as that can interact badly with
// safepoints and stack maps.
&& !is_bitcast_from_ref(func, inst)
}

/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,6 @@ impl ABIMachineSpec for AArch64MachineDeps {
};

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,6 @@ impl ABIMachineSpec for Riscv64MachineDeps {
};

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
1 change: 0 additions & 1 deletion cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,6 @@ impl ABIMachineSpec for X64ABIMachineSpec {
let setup_area_size = 16; // RBP, return address

// Return FrameLayout structure.
debug_assert!(outgoing_args_size == 0);
FrameLayout {
stack_args_size,
setup_area_size,
Expand Down
18 changes: 17 additions & 1 deletion cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1806,9 +1806,25 @@ impl<M: ABIMachineSpec> Callee<M> {
&frame_layout,
));

// The stack limit check needs to cover all the stack adjustments we
// might make, up to the next stack limit check in any function we
// call. Since this happens after frame setup, the current function's
// setup area needs to be accounted for in the caller's stack limit
// check, but we need to account for any setup area that our callees
// might need. Note that s390x may also use the outgoing args area for
// backtrace support even in leaf functions, so that should be accounted
// for unconditionally.
let total_stacksize = frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size
+ if self.is_leaf {
0
} else {
frame_layout.setup_area_size
};

// Leaf functions with zero stack don't need a stack check if one's
// specified, otherwise always insert the stack check.
let total_stacksize = frame_layout.fixed_frame_storage_size;
if total_stacksize > 0 || !self.is_leaf {
if let Some((reg, stack_limit_load)) = &self.stack_limit {
insts.extend(stack_limit_load.clone());
Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/machinst/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1675,7 +1675,7 @@ impl<I: VCodeInst> MachBuffer<I> {
(start_offset, end_offset)
}
};
trace!("Adding stack map for offsets {start:#x}..{end:#x}");
trace!("Adding stack map for offsets {start:#x}..{end:#x}: {stack_map:?}");
self.stack_maps.push(MachStackMap {
offset: start,
offset_end: end,
Expand Down
6 changes: 6 additions & 0 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,12 @@ macro_rules! isle_prelude_method_helpers {
mut caller: $abicaller,
args: ValueSlice,
) -> InstOutput {
let off = self.lower_ctx.sigs()[abi].sized_stack_arg_space()
+ self.lower_ctx.sigs()[abi].sized_stack_ret_space();
self.lower_ctx
.abi_mut()
.accumulate_outgoing_args_size(off as u32);

caller.emit_stack_pre_adjust(self.lower_ctx);

self.gen_call_common_args(&mut caller, args);
Expand Down
71 changes: 71 additions & 0 deletions cranelift/filetests/filetests/egraph/bitcast-ref.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
test optimize precise-output
set opt_level=speed
target x86_64

function %bitcast_from_r64(i64 vmctx, r64) -> i32 fast {
sig0 = (i32) fast
fn0 = colocated %foo sig0

block0(v0: i64, v1: r64):
v2 = bitcast.i64 v1
v3 = ireduce.i32 v2

call fn0(v3)

;; We MUST NOT dedupe the bitcast *from* the reference type, as that would
;; make `v1` no longer live across the `call`, which would mean that it
;; would not show up in the `call`'s safepoint's stack map, which could
;; result in the collector reclaiming an object too early, which is basically
;; a use-after-free bug.
v4 = bitcast.i64 v1
v5 = ireduce.i32 v4

return v5
}

; function %bitcast_from_r64(i64 vmctx, r64) -> i32 fast {
; sig0 = (i32) fast
; fn0 = colocated %foo sig0
;
; block0(v0: i64, v1: r64):
; v2 = bitcast.i64 v1
; v3 = ireduce.i32 v2
; call fn0(v3)
; v4 = bitcast.i64 v1
; v5 = ireduce.i32 v4
; return v5
; }

function %bitcast_to_r64(i64 vmctx, i32) -> r64 fast {
sig0 = (r64) fast
fn0 = colocated %foo sig0

block0(v0: i64, v1: i32):
v2 = uextend.i64 v1
v3 = bitcast.r64 v2

call fn0(v3)

;; On the other hand, it is technically fine to dedupe bitcasts *to*
;; reference types. Doing so extends the live range of the GC reference --
;; potentially adding it to more stack maps than it otherwise would have
;; been in, which means it might unnecessarily survive a GC it otherwise
;; wouldn't have -- but that is okay. Shrinking live ranges of GC
;; references, and removing them from stack maps they otherwise would have
;; been in, is the problematic transformation.
v4 = uextend.i64 v1
v5 = bitcast.r64 v4

return v5
}

; function %bitcast_to_r64(i64 vmctx, i32) -> r64 fast {
; sig0 = (r64) fast
; fn0 = colocated %foo sig0
;
; block0(v0: i64, v1: i32):
; v2 = uextend.i64 v1
; v3 = bitcast.r64 v2
; call fn0(v3)
; return v3
; }
24 changes: 14 additions & 10 deletions cranelift/filetests/filetests/isa/aarch64/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ block0(v0: i64):
; VCode:
; stp fp, lr, [sp, #-16]!
; mov fp, sp
; subs xzr, sp, x0, UXTX
; add x16, x0, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
; load_ext_name x2, TestCase(%foo)+0
Expand All @@ -67,11 +68,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; stp x29, x30, [sp, #-0x10]!
; mov x29, sp
; cmp sp, x0
; b.lo #0x2c
; block1: ; offset 0x10
; ldr x2, #0x18
; b #0x20
; add x16, x0, #0x10
; cmp sp, x16
; b.lo #0x30
; block1: ; offset 0x14
; ldr x2, #0x1c
; b #0x24
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
Expand All @@ -95,6 +97,7 @@ block0(v0: i64):
; mov fp, sp
; ldr x16, [x0]
; ldr x16, [x16, #4]
; add x16, x16, #16
; subs xzr, sp, x16, UXTX
; b.lo #trap=stk_ovf
; block0:
Expand All @@ -109,11 +112,12 @@ block0(v0: i64):
; mov x29, sp
; ldur x16, [x0]
; ldur x16, [x16, #4]
; add x16, x16, #0x10
; cmp sp, x16
; b.lo #0x34
; block1: ; offset 0x18
; ldr x2, #0x20
; b #0x28
; b.lo #0x38
; block1: ; offset 0x1c
; ldr x2, #0x24
; b #0x2c
; .byte 0x00, 0x00, 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00, 0x00, 0x00
; blr x2
Expand Down
12 changes: 8 additions & 4 deletions cranelift/filetests/filetests/isa/riscv64/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ block0(v0: i64):
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; trap_if stk_ovf##(sp ult a0)
; addi t6,a0,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
; callind a2
Expand All @@ -73,9 +74,10 @@ block0(v0: i64):
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; bgeu sp, a0, 8
; addi t6, a0, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x18
; block1: ; offset 0x1c
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc
Expand Down Expand Up @@ -105,6 +107,7 @@ block0(v0: i64):
; mv fp,sp
; ld t6,0(a0)
; ld t6,4(t6)
; addi t6,t6,16
; trap_if stk_ovf##(sp ult t6)
; block0:
; load_sym a2,%foo+0
Expand All @@ -122,9 +125,10 @@ block0(v0: i64):
; mv s0, sp
; ld t6, 0(a0)
; ld t6, 4(t6)
; addi t6, t6, 0x10
; bgeu sp, t6, 8
; .byte 0x00, 0x00, 0x00, 0x00 ; trap: stk_ovf
; block1: ; offset 0x20
; block1: ; offset 0x24
; auipc a2, 0
; ld a2, 0xc(a2)
; j 0xc
Expand Down
16 changes: 10 additions & 6 deletions cranelift/filetests/filetests/isa/s390x/stack-limit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ block0(v0: i64):
}

; VCode:
; clgrtle %r15, %r2
; la %r1, 160(%r2)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
; virtual_sp_offset_adjust 160
Expand All @@ -64,11 +65,12 @@ block0(v0: i64):
;
; Disassembled:
; block0: ; offset 0x0
; clgrtle %r15, %r2 ; trap: stk_ovf
; la %r1, 0xa0(%r2)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0xe
; bras %r1, 0x1a
; block1: ; offset 0x12
; bras %r1, 0x1e
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00
Expand All @@ -92,6 +94,7 @@ block0(v0: i64):
; VCode:
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 160(%r1)
; clgrtle %r15, %r1
; stmg %r14, %r15, 112(%r15)
; aghi %r15, -160
Expand All @@ -106,11 +109,12 @@ block0(v0: i64):
; block0: ; offset 0x0
; lg %r1, 0(%r2)
; lg %r1, 4(%r1)
; la %r1, 0xa0(%r1)
; clgrtle %r15, %r1 ; trap: stk_ovf
; stmg %r14, %r15, 0x70(%r15)
; aghi %r15, -0xa0
; block1: ; offset 0x1a
; bras %r1, 0x26
; block1: ; offset 0x1e
; bras %r1, 0x2a
; .byte 0x00, 0x00 ; reloc_external Abs8 %foo 0
; .byte 0x00, 0x00
; .byte 0x00, 0x00
Expand Down