Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up in LowerStoreIndirCoalescing #101665

Merged
merged 3 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
143 changes: 76 additions & 67 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8347,54 +8347,57 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode)
#endif
}

struct StoreCoalescingData
{
var_types targetType;
GenTree* baseAddr;
GenTree* index;
GenTree* value;
uint32_t scale;
int offset;
};

//------------------------------------------------------------------------
// GetStoreCoalescingData: given a STOREIND node, get the data needed to perform
// store coalescing including pointer to the previous node.
// GetLoadStoreCoalescingData: given a STOREIND/IND node, get the data needed to perform
// store/load coalescing including pointer to the previous node.
//
// Arguments:
// comp - the compiler instance
// ind - the STOREIND node
// data - [OUT] the data needed for store coalescing
// ind - the STOREIND/IND node
// data - [OUT] the data needed for store/load coalescing
//
// Return Value:
// true if the data was successfully retrieved, false otherwise.
// Basically, false means that we definitely can't do store coalescing.
//
static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data)
bool Lowering::GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescingData* data) const
{
// Don't merge volatile stores.
// Don't merge volatile load/stores.
if (ind->IsVolatile())
{
return false;
}

// Data has to be INT_CNS, can be also VEC_CNS in future.
if (!ind->Data()->IsCnsIntOrI() && !ind->Data()->IsVectorConst())
{
return false;
}
const bool isStore = ind->OperIs(GT_STOREIND, GT_STORE_BLK);
const bool isLoad = ind->OperIs(GT_IND);

auto isNodeInvariant = [](Compiler* comp, GenTree* node, bool allowNull) {
if (node == nullptr)
{
return allowNull;
}
if (node->OperIsConst())
{
return true;
}
// We can allow bigger trees here, but it's not clear if it's worth it.
return node->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(node->AsLclVar()->GetLclNum());
};

if (isStore)
{
// For stores, Data() is expected to be an invariant node
if (!isNodeInvariant(comp, ind->Data(), false))
{
return false;
}
}
else if (!isLoad)
{
return false;
}

data->targetType = ind->TypeGet();
data->value = ind->Data();
data->value = isStore ? ind->Data() : nullptr;
if (ind->Addr()->OperIs(GT_LEA))
{
GenTree* base = ind->Addr()->AsAddrMode()->Base();
Expand Down Expand Up @@ -8430,12 +8433,24 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo
// Address is not LEA or local.
return false;
}

bool isClosedRange = false;
// Make sure there are no other unexpected nodes in-between.
LIR::ReadOnlyRange range = BlockRange().GetTreeRange(ind, &isClosedRange);
if (!isClosedRange)
{
return false;
}

data->rangeStart = range.FirstNode();
data->rangeEnd = range.LastNode();

return true;
}

//------------------------------------------------------------------------
// LowerStoreIndirCoalescing: If the given STOREIND node is followed by a similar
// STOREIND node, try to merge them into a single store of a twice wider type. Example:
// LowerStoreIndirCoalescing: If the given IND/STOREIND node is followed by a similar
// IND/STOREIND node, try to merge them into a single store of a twice wider type. Example:
//
// * STOREIND int
// +--* LCL_VAR byref V00
Expand All @@ -8459,7 +8474,7 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo
// Arguments:
// ind - the current STOREIND node
//
void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
{
// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from
// unaligned accesses making this optimization questionable.
Expand All @@ -8469,12 +8484,9 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
return;
}

// TODO-ARM64-CQ: enable TYP_REF if we find a case where it's beneficial.
// The algorithm does support TYP_REF (with null value), but it seems to be not worth
// it on ARM64 where it's pretty efficient to do "stp xzr, xzr, [addr]" to clear two
// items at once. Although, it may be profitable to do "stp q0, q0, [addr]".
if (!varTypeIsIntegral(ind) && !varTypeIsSIMD(ind))
if (!ind->OperIs(GT_STOREIND, GT_STORE_BLK))
{
// Load coalescing is not yet supported
return;
}

Expand All @@ -8492,72 +8504,68 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
// to get a single store of 8 bytes.
do
{
StoreCoalescingData currData;
StoreCoalescingData prevData;
LoadStoreCoalescingData currData;
LoadStoreCoalescingData prevData;

// Get coalescing data for the current STOREIND
if (!GetStoreCoalescingData(comp, ind, &currData))
if (!GetLoadStoreCoalescingData(ind, &currData))
{
return;
}

bool isClosedRange = false;
// Now we need to find the very first LIR node representing the current STOREIND
// and make sure that there are no other unexpected nodes in-between.
LIR::ReadOnlyRange currIndRange = BlockRange().GetTreeRange(ind, &isClosedRange);
if (!isClosedRange)
{
return;
}
GenTree* prevTree = currIndRange.FirstNode()->gtPrev;
GenTree* prevTree = currData.rangeStart->gtPrev;
// Now we need to find the previous STOREIND,
// we can ignore any NOPs or IL_OFFSETs in-between
while ((prevTree != nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET))
{
prevTree = prevTree->gtPrev;
}

// It's not a STOREIND - bail out.
if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND))
// It's not a store - bail out.
if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND, GT_STORE_BLK))
{
return;
}

// Get coalescing data for the previous STOREIND
GenTreeStoreInd* prevInd = prevTree->AsStoreInd();
if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData))
GenTreeIndir* prevInd = prevTree->AsIndir();
if (!GetLoadStoreCoalescingData(prevInd, &prevData))
{
return;
}

// Same for the previous STOREIND, make sure there are no unexpected nodes around.
LIR::ReadOnlyRange prevIndRange = BlockRange().GetTreeRange(prevInd, &isClosedRange);
if (!isClosedRange)
if (!currData.IsAddressEqual(prevData, /*ignoreOffset*/ true))
{
// Non-offset part of the address is not the same - bail out.
return;
}

// STOREIND aren't value nodes.
LIR::Use use;
assert(!BlockRange().TryGetUse(prevInd, &use) && !BlockRange().TryGetUse(ind, &use));
// The same offset means that we're storing to the same location of the same width.
// Just remove the previous store then.
if (prevData.offset == currData.offset)
{
BlockRange().Remove(prevData.rangeStart, prevData.rangeEnd);
continue;
}

// BaseAddr, Index, Scale and Type all have to match.
if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) ||
!GenTree::Compare(prevData.baseAddr, currData.baseAddr) ||
!GenTree::Compare(prevData.index, currData.index))
// TODO-ARM64-CQ: enable TYP_REF if we find a case where it's beneficial.
// The algorithm does support TYP_REF (with null value), but it seems to be not worth
// it on ARM64 where it's pretty efficient to do "stp xzr, xzr, [addr]" to clear two
// items at once. Although, it may be profitable to do "stp q0, q0, [addr]".
if (!varTypeIsIntegral(ind) && !varTypeIsSIMD(ind))
{
return;
}

// At this point we know that we have two consecutive STOREINDs with the same base address,
// index and scale, the only variable thing is the offset (constant)
assert(ind->OperIs(GT_STOREIND));
assert(prevInd->OperIs(GT_STOREIND));
assert(prevData.IsStore());
assert(currData.IsStore());

// The same offset means that we're storing to the same location of the same width.
// Just remove the previous store then.
if (prevData.offset == currData.offset)
// For now, only constants are supported for data.
if (!prevData.value->OperIsConst() || !currData.value->OperIsConst())
{
BlockRange().Remove(std::move(prevIndRange));
continue;
return;
}

// Otherwise, the difference between two offsets has to match the size of the type.
Expand Down Expand Up @@ -8702,11 +8710,11 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
}

// We should not be here for stores requiring write barriers.
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(prevInd));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind->AsStoreInd()));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(prevInd->AsStoreInd()));

// Delete previous STOREIND entirely
BlockRange().Remove(std::move(prevIndRange));
BlockRange().Remove(prevData.rangeStart, prevData.rangeEnd);

// It's not expected to be contained yet, but just in case...
ind->Data()->ClearContained();
Expand Down Expand Up @@ -9413,6 +9421,7 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
}

LowerBlockStore(blkNode);
LowerStoreIndirCoalescing(blkNode);
}

//------------------------------------------------------------------------
Expand Down
31 changes: 30 additions & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,35 @@ class Lowering final : public Phase
}
#endif // defined(TARGET_XARCH)

struct LoadStoreCoalescingData
{
var_types targetType;
GenTree* baseAddr;
GenTree* index;
GenTree* value;
uint32_t scale;
int offset;
GenTree* rangeStart;
GenTree* rangeEnd;

bool IsStore() const
{
return value != nullptr;
}

bool IsAddressEqual(const LoadStoreCoalescingData& other, bool ignoreOffset) const
{
if ((scale != other.scale) || (targetType != other.targetType) ||
!GenTree::Compare(baseAddr, other.baseAddr) || !GenTree::Compare(index, other.index))
{
return false;
}
return ignoreOffset || (offset == other.offset);
}
};

bool GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescingData* data) const;

// Per tree node member functions
void LowerStoreIndirCommon(GenTreeStoreInd* ind);
GenTree* LowerIndir(GenTreeIndir* ind);
Expand All @@ -323,7 +352,7 @@ class Lowering final : public Phase
void MarkTree(GenTree* root);
void UnmarkTree(GenTree* root);
void LowerStoreIndir(GenTreeStoreInd* node);
void LowerStoreIndirCoalescing(GenTreeStoreInd* node);
void LowerStoreIndirCoalescing(GenTreeIndir* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
bool TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);
Expand Down