Skip to content

Commit

Permalink
Clean up in LowerStoreIndirCoalescing (dotnet#101665)
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorBo authored and michaelgsharp committed May 8, 2024
1 parent 4616325 commit ac291c5
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 68 deletions.
143 changes: 76 additions & 67 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8347,54 +8347,57 @@ void Lowering::LowerBlockStoreAsHelperCall(GenTreeBlk* blkNode)
#endif
}

struct StoreCoalescingData
{
var_types targetType;
GenTree* baseAddr;
GenTree* index;
GenTree* value;
uint32_t scale;
int offset;
};

//------------------------------------------------------------------------
// GetStoreCoalescingData: given a STOREIND node, get the data needed to perform
// store coalescing including pointer to the previous node.
// GetLoadStoreCoalescingData: given a STOREIND/IND node, get the data needed to perform
// store/load coalescing including pointer to the previous node.
//
// Arguments:
// comp - the compiler instance
// ind - the STOREIND node
// data - [OUT] the data needed for store coalescing
// ind - the STOREIND/IND node
// data - [OUT] the data needed for store/load coalescing
//
// Return Value:
// true if the data was successfully retrieved, false otherwise.
// Basically, false means that we definitely can't do store coalescing.
//
static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCoalescingData* data)
bool Lowering::GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescingData* data) const
{
// Don't merge volatile stores.
// Don't merge volatile load/stores.
if (ind->IsVolatile())
{
return false;
}

// Data has to be INT_CNS, can be also VEC_CNS in future.
if (!ind->Data()->IsCnsIntOrI() && !ind->Data()->IsVectorConst())
{
return false;
}
const bool isStore = ind->OperIs(GT_STOREIND, GT_STORE_BLK);
const bool isLoad = ind->OperIs(GT_IND);

auto isNodeInvariant = [](Compiler* comp, GenTree* node, bool allowNull) {
if (node == nullptr)
{
return allowNull;
}
if (node->OperIsConst())
{
return true;
}
// We can allow bigger trees here, but it's not clear if it's worth it.
return node->OperIs(GT_LCL_VAR) && !comp->lvaVarAddrExposed(node->AsLclVar()->GetLclNum());
};

if (isStore)
{
// For stores, Data() is expected to be an invariant node
if (!isNodeInvariant(comp, ind->Data(), false))
{
return false;
}
}
else if (!isLoad)
{
return false;
}

data->targetType = ind->TypeGet();
data->value = ind->Data();
data->value = isStore ? ind->Data() : nullptr;
if (ind->Addr()->OperIs(GT_LEA))
{
GenTree* base = ind->Addr()->AsAddrMode()->Base();
Expand Down Expand Up @@ -8430,12 +8433,24 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo
// Address is not LEA or local.
return false;
}

bool isClosedRange = false;
// Make sure there are no other unexpected nodes in-between.
LIR::ReadOnlyRange range = BlockRange().GetTreeRange(ind, &isClosedRange);
if (!isClosedRange)
{
return false;
}

data->rangeStart = range.FirstNode();
data->rangeEnd = range.LastNode();

return true;
}

//------------------------------------------------------------------------
// LowerStoreIndirCoalescing: If the given STOREIND node is followed by a similar
// STOREIND node, try to merge them into a single store of a twice wider type. Example:
// LowerStoreIndirCoalescing: If the given IND/STOREIND node is followed by a similar
// IND/STOREIND node, try to merge them into a single store of a twice wider type. Example:
//
// * STOREIND int
// +--* LCL_VAR byref V00
Expand All @@ -8459,7 +8474,7 @@ static bool GetStoreCoalescingData(Compiler* comp, GenTreeStoreInd* ind, StoreCo
// Arguments:
// ind - the current STOREIND node
//
void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
void Lowering::LowerStoreIndirCoalescing(GenTreeIndir* ind)
{
// LA, RISC-V and ARM32 more likely to recieve a terrible performance hit from
// unaligned accesses making this optimization questionable.
Expand All @@ -8469,12 +8484,9 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
return;
}

// TODO-ARM64-CQ: enable TYP_REF if we find a case where it's beneficial.
// The algorithm does support TYP_REF (with null value), but it seems to be not worth
// it on ARM64 where it's pretty efficient to do "stp xzr, xzr, [addr]" to clear two
// items at once. Although, it may be profitable to do "stp q0, q0, [addr]".
if (!varTypeIsIntegral(ind) && !varTypeIsSIMD(ind))
if (!ind->OperIs(GT_STOREIND, GT_STORE_BLK))
{
// Load coalescing is not yet supported
return;
}

Expand All @@ -8492,72 +8504,68 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
// to get a single store of 8 bytes.
do
{
StoreCoalescingData currData;
StoreCoalescingData prevData;
LoadStoreCoalescingData currData;
LoadStoreCoalescingData prevData;

// Get coalescing data for the current STOREIND
if (!GetStoreCoalescingData(comp, ind, &currData))
if (!GetLoadStoreCoalescingData(ind, &currData))
{
return;
}

bool isClosedRange = false;
// Now we need to find the very first LIR node representing the current STOREIND
// and make sure that there are no other unexpected nodes in-between.
LIR::ReadOnlyRange currIndRange = BlockRange().GetTreeRange(ind, &isClosedRange);
if (!isClosedRange)
{
return;
}
GenTree* prevTree = currIndRange.FirstNode()->gtPrev;
GenTree* prevTree = currData.rangeStart->gtPrev;
// Now we need to find the previous STOREIND,
// we can ignore any NOPs or IL_OFFSETs in-between
while ((prevTree != nullptr) && prevTree->OperIs(GT_NOP, GT_IL_OFFSET))
{
prevTree = prevTree->gtPrev;
}

// It's not a STOREIND - bail out.
if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND))
// It's not a store - bail out.
if ((prevTree == nullptr) || !prevTree->OperIs(GT_STOREIND, GT_STORE_BLK))
{
return;
}

// Get coalescing data for the previous STOREIND
GenTreeStoreInd* prevInd = prevTree->AsStoreInd();
if (!GetStoreCoalescingData(comp, prevInd->AsStoreInd(), &prevData))
GenTreeIndir* prevInd = prevTree->AsIndir();
if (!GetLoadStoreCoalescingData(prevInd, &prevData))
{
return;
}

// Same for the previous STOREIND, make sure there are no unexpected nodes around.
LIR::ReadOnlyRange prevIndRange = BlockRange().GetTreeRange(prevInd, &isClosedRange);
if (!isClosedRange)
if (!currData.IsAddressEqual(prevData, /*ignoreOffset*/ true))
{
// Non-offset part of the address is not the same - bail out.
return;
}

// STOREIND aren't value nodes.
LIR::Use use;
assert(!BlockRange().TryGetUse(prevInd, &use) && !BlockRange().TryGetUse(ind, &use));
// The same offset means that we're storing to the same location of the same width.
// Just remove the previous store then.
if (prevData.offset == currData.offset)
{
BlockRange().Remove(prevData.rangeStart, prevData.rangeEnd);
continue;
}

// BaseAddr, Index, Scale and Type all have to match.
if ((prevData.scale != currData.scale) || (prevData.targetType != currData.targetType) ||
!GenTree::Compare(prevData.baseAddr, currData.baseAddr) ||
!GenTree::Compare(prevData.index, currData.index))
// TODO-ARM64-CQ: enable TYP_REF if we find a case where it's beneficial.
// The algorithm does support TYP_REF (with null value), but it seems to be not worth
// it on ARM64 where it's pretty efficient to do "stp xzr, xzr, [addr]" to clear two
// items at once. Although, it may be profitable to do "stp q0, q0, [addr]".
if (!varTypeIsIntegral(ind) && !varTypeIsSIMD(ind))
{
return;
}

// At this point we know that we have two consecutive STOREINDs with the same base address,
// index and scale, the only variable thing is the offset (constant)
assert(ind->OperIs(GT_STOREIND));
assert(prevInd->OperIs(GT_STOREIND));
assert(prevData.IsStore());
assert(currData.IsStore());

// The same offset means that we're storing to the same location of the same width.
// Just remove the previous store then.
if (prevData.offset == currData.offset)
// For now, only constants are supported for data.
if (!prevData.value->OperIsConst() || !currData.value->OperIsConst())
{
BlockRange().Remove(std::move(prevIndRange));
continue;
return;
}

// Otherwise, the difference between two offsets has to match the size of the type.
Expand Down Expand Up @@ -8702,11 +8710,11 @@ void Lowering::LowerStoreIndirCoalescing(GenTreeStoreInd* ind)
}

// We should not be here for stores requiring write barriers.
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(prevInd));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind->AsStoreInd()));
assert(!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(prevInd->AsStoreInd()));

// Delete previous STOREIND entirely
BlockRange().Remove(std::move(prevIndRange));
BlockRange().Remove(prevData.rangeStart, prevData.rangeEnd);

// It's not expected to be contained yet, but just in case...
ind->Data()->ClearContained();
Expand Down Expand Up @@ -9413,6 +9421,7 @@ void Lowering::LowerBlockStoreCommon(GenTreeBlk* blkNode)
}

LowerBlockStore(blkNode);
LowerStoreIndirCoalescing(blkNode);
}

//------------------------------------------------------------------------
Expand Down
31 changes: 30 additions & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,35 @@ class Lowering final : public Phase
}
#endif // defined(TARGET_XARCH)

struct LoadStoreCoalescingData
{
var_types targetType;
GenTree* baseAddr;
GenTree* index;
GenTree* value;
uint32_t scale;
int offset;
GenTree* rangeStart;
GenTree* rangeEnd;

bool IsStore() const
{
return value != nullptr;
}

bool IsAddressEqual(const LoadStoreCoalescingData& other, bool ignoreOffset) const
{
if ((scale != other.scale) || (targetType != other.targetType) ||
!GenTree::Compare(baseAddr, other.baseAddr) || !GenTree::Compare(index, other.index))
{
return false;
}
return ignoreOffset || (offset == other.offset);
}
};

bool GetLoadStoreCoalescingData(GenTreeIndir* ind, LoadStoreCoalescingData* data) const;

// Per tree node member functions
void LowerStoreIndirCommon(GenTreeStoreInd* ind);
GenTree* LowerIndir(GenTreeIndir* ind);
Expand All @@ -323,7 +352,7 @@ class Lowering final : public Phase
void MarkTree(GenTree* root);
void UnmarkTree(GenTree* root);
void LowerStoreIndir(GenTreeStoreInd* node);
void LowerStoreIndirCoalescing(GenTreeStoreInd* node);
void LowerStoreIndirCoalescing(GenTreeIndir* node);
GenTree* LowerAdd(GenTreeOp* node);
GenTree* LowerMul(GenTreeOp* mul);
bool TryLowerAndNegativeOne(GenTreeOp* node, GenTree** nextNode);
Expand Down

0 comments on commit ac291c5

Please sign in to comment.