From cec9ad950094f6c8a96f1fa5cedb80d77200133c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 22:13:01 -0700 Subject: [PATCH 01/44] Add `high` in regMaskTP --- src/coreclr/jit/target.h | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 6ebe5a5ea5002f..b0cb682063e899 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -219,10 +219,17 @@ struct regMaskTP { private: uint64_t low; + uint64_t high; public: + constexpr regMaskTP(uint64_t lowRegMask, uint64_t highRegMask) + : low(lowRegMask) + , high(highRegMask) + { + } constexpr regMaskTP(uint64_t regMask) : low(regMask) + , high(RBM_NONE) { } @@ -249,23 +256,28 @@ struct regMaskTP { return low; } + + uint64_t getHigh() const + { + return high; + } }; static regMaskTP operator^(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() ^ second.getLow()); + regMaskTP result(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); return result; } static regMaskTP operator&(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() & second.getLow()); + regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); return result; } static regMaskTP operator|(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() | second.getLow()); + regMaskTP result(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); return result; } @@ -325,17 +337,17 @@ static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) static bool operator==(regMaskTP first, regMaskTP second) { - return (first.getLow() == second.getLow()); + return (first.getLow() == second.getLow()) && (first.getHigh() == second.getHigh()); } static bool operator!=(regMaskTP first, regMaskTP second) { - return (first.getLow() != second.getLow()); + return !(first == second); } static regMaskTP operator~(regMaskTP first) { - regMaskTP result(~first.getLow()); + regMaskTP result(~first.getLow(), ~first.getHigh()); return result; } @@ -346,7 +358,7 @@ typedef unsigned regMaskTP; static uint32_t PopCount(regMaskTP value) { #ifdef TARGET_ARM64 - return BitOperations::PopCount(value.getLow()); + return BitOperations::PopCount(value.getLow()) + BitOperations::PopCount(value.getHigh()); #else return BitOperations::PopCount(value); #endif @@ -355,7 +367,14 @@ static uint32_t PopCount(regMaskTP value) static uint32_t BitScanForward(regMaskTP mask) { #ifdef TARGET_ARM64 + if (mask.getLow() != RBM_NONE) + { return BitOperations::BitScanForward(mask.getLow()); + } + else + { + return 64 + BitOperations::BitScanForward(mask.getHigh()); + } #else return BitOperations::BitScanForward(mask); #endif From 5fa863e56ac4bbce4eb73a4d673ad915ae83cc6e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 22:13:21 -0700 Subject: [PATCH 02/44] Introduce SingleTypeRegSet --- src/coreclr/jit/target.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b0cb682063e899..b1066609f84967 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -398,6 +398,8 @@ typedef unsigned __int64 regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif +typedef regMaskSmall SingleTypeRegSet; + /*****************************************************************************/ #ifdef DEBUG From 7dba61bb4c015c00d16ea9ab14cf421788b53843 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 22:13:59 -0700 Subject: [PATCH 03/44] Use SingleTypeRegSet in few places --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 060f5d497d3778..415a8e91c0b63f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -13656,7 +13656,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current unsigned startRegister = BitScanForward(limitConsecutiveResult); - regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; + SingleTypeRegSet registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 264bc70b0a74de..89a46e784529e6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -331,7 +331,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC regMaskTP unprocessedRegs = consecutiveCandidates; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + SingleTypeRegSet registersNeededMask = (1ULL << registersNeeded) - 1; do { // From LSB, find the first available register (bit `1`) From f68cf05dd5390efa5a504cd612f797c9750e6cf0 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 22:14:13 -0700 Subject: [PATCH 04/44] Delete some methods in regMaskTP --- src/coreclr/jit/target.h | 20 +------------------- src/coreclr/jit/unwind.cpp | 3 +-- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b1066609f84967..14fa057db2890f 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -281,24 +281,6 @@ static regMaskTP operator|(regMaskTP first, regMaskTP second) return result; } -static regMaskTP operator<<(regMaskTP first, const int b) -{ - regMaskTP result(first.getLow() << b); - return result; -} - -static regMaskTP operator>>(regMaskTP first, const int b) -{ - regMaskTP result(first.getLow() >> b); - return result; -} - -static regMaskTP& operator>>=(regMaskTP& first, const int b) -{ - first = first >> b; - return first; -} - static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) { first = first | second; @@ -369,7 +351,7 @@ static uint32_t BitScanForward(regMaskTP mask) #ifdef TARGET_ARM64 if (mask.getLow() != RBM_NONE) { - return BitOperations::BitScanForward(mask.getLow()); + return BitOperations::BitScanForward(mask.getLow()); } else { diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index a51a52ab21d640..97f05939013906 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -224,12 +224,11 @@ void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat) // because LLVM only know about D0-D31. // As such pairs Sx,Sx+1 are referenced as D0-D15 registers in DWARF // For that we process registers in pairs. - regBit >>= isFloat ? 2 : 1; regNum = isFloat ? REG_PREV(REG_PREV(regNum)) : REG_PREV(regNum); #else - regBit >>= 1; regNum = REG_PREV(regNum); #endif + regBit = genRegMask(regNum); } } From 7f8a9b66bd8ce9f1ce51fc6304b3dbbfeba3ad3f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 May 2024 22:25:39 -0700 Subject: [PATCH 05/44] Delete some more methods of regMaskTP --- src/coreclr/jit/emit.cpp | 2 +- src/coreclr/jit/gcencode.cpp | 2 +- src/coreclr/jit/regset.cpp | 2 +- src/coreclr/jit/target.h | 20 +------------------- 4 files changed, 4 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 3e003a14af83e0..879c0f7c85be5e 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10062,7 +10062,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn // of callee-saved registers only). for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; + regMaskSmall calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; if (emitThisGCrefRegs & calleeSavedRbm) { gcrefRegs |= (1 << calleeSavedRegIdx); diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index d592187ed55e9f..78db6026d4b059 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, while (regMask) { // Get hold of the next register bit. - regMaskTP tmpMask = genFindLowestBit(regMask); + regMaskSmall tmpMask = genFindLowestBit(regMask); assert(tmpMask); // Remember the new state of this register. diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 2ef2f9c1e17f16..a033e49fcad1fd 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -950,7 +950,7 @@ regNumber genRegArgNext(regNumber argReg) * are encoded in GC information at call sites. */ -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; +const regMaskSmall raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) { diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 14fa057db2890f..c5ab0b562e2b7f 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -293,24 +293,6 @@ static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) return first; } -static regMaskSmall operator^=(regMaskSmall& first, regMaskTP second) -{ - first ^= second.getLow(); - return first; -} - -static regMaskSmall operator&=(regMaskSmall& first, regMaskTP second) -{ - first &= second.getLow(); - return first; -} - -static regMaskSmall operator|=(regMaskSmall& first, regMaskTP second) -{ - first |= second.getLow(); - return first; -} - static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) { first = first & second; @@ -820,7 +802,7 @@ inline regMaskTP genRegMask(regNumber regNum, var_types type) * These arrays list the callee-saved register numbers (and bitmaps, respectively) for * the current architecture. */ -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; +extern const regMaskSmall raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; // This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short); From 2ede666b18372454a25cfb2aaf9cbe2f8fbf947a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 10:58:25 -0700 Subject: [PATCH 06/44] Fix actualRegistersMask --- src/coreclr/jit/lsrabuild.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 43c75a118b218d..b0a221ef96965b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2821,6 +2821,9 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } +#ifdef TARGET_ARM64 + actualRegistersMask = regMaskTP(~RBM_NONE, RBM_NONE); +#else if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount @@ -2830,6 +2833,7 @@ void LinearScan::buildIntervals() { actualRegistersMask = ~RBM_NONE; } +#endif #ifdef DEBUG // Make sure we don't have any blocks that were not visited From 4b873d4f67fc657d7dd3024ddc646dff43e2d907 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 11:36:34 -0700 Subject: [PATCH 07/44] Use SingleTypeRegSet in consecutive register code --- src/coreclr/jit/lsraarm64.cpp | 40 ++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 89a46e784529e6..8d9aa4d30fc709 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -180,23 +180,25 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) { - if (PopCount(candidates) < registersNeeded) + SingleTypeRegSet floatCandidates = (candidates & availableFloatRegs) + .getLow(); + if (PopCount(floatCandidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. return RBM_NONE; } - regMaskTP currAvailableRegs = candidates; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; + SingleTypeRegSet currAvailableRegs = floatCandidates; + SingleTypeRegSet overallResult = RBM_NONE; + SingleTypeRegSet consecutiveResult = RBM_NONE; // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -206,10 +208,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { // From LSB, find the first available register (bit `1`) regAvailableStartIndex = BitScanForward(currAvailableRegs); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + SingleTypeRegSet startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + SingleTypeRegSet maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -225,7 +227,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { regAvailableEndIndex = BitScanForward(maskProcessed); } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + SingleTypeRegSet endMask = (1ULL << regAvailableEndIndex) - 1; // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -236,8 +238,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; - if ((candidates & v0_v31_mask) == v0_v31_mask) + SingleTypeRegSet v0_v31_mask = RBM_V0 | RBM_V31; + if ((floatCandidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register // round to first register is available. @@ -251,7 +253,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { case 2: { - if ((candidates & v0_v31_mask) != RBM_NONE) + if ((floatCandidates & v0_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v31_mask; @@ -260,15 +262,15 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 3: { - regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; - if ((candidates & v0_v30_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + if ((floatCandidates & v0_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v30_v31_mask; } - regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; - if ((candidates & v0_v1_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + if ((floatCandidates & v0_v1_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v1_v31_mask; @@ -277,21 +279,21 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 4: { - regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V29; overallResult |= v0_v29_v30_v31_mask; } - regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v1_v30_v31_mask; } - regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + SingleTypeRegSet v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; From 98caccffc8cc43b5525db4f9c5034a2c139ad6e4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 12:30:25 -0700 Subject: [PATCH 08/44] Use SingleTypeRegSet in consecutive registers codebase --- src/coreclr/jit/lsra.cpp | 4 +-- src/coreclr/jit/lsra.h | 7 +++-- src/coreclr/jit/lsraarm64.cpp | 53 +++++++++++++++++------------------ 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 415a8e91c0b63f..e847742633c215 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -13647,8 +13647,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one // such range that is consecutive. Next, append that range to the `candidates`. // - regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask; - regMaskTP overallLimitCandidates; + SingleTypeRegSet limitCandidatesForConsecutive = ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs).getLow(); + SingleTypeRegSet overallLimitCandidates; regMaskTP limitConsecutiveResult = linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount, &overallLimitCandidates); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a80bf94c770bec..b6298c38bddbc0 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1233,10 +1233,11 @@ class LinearScan : public LinearScanInterface bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); - regMaskTP filterConsecutiveCandidates(regMaskTP candidates, + SingleTypeRegSet filterConsecutiveCandidates(SingleTypeRegSet candidates, unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates); - regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded); + SingleTypeRegSet* allConsecutiveCandidates); + SingleTypeRegSet filterConsecutiveCandidatesForSpill(SingleTypeRegSet consecutiveCandidates, + unsigned int registersNeeded); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 8d9aa4d30fc709..882cb4811f2cff 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -176,12 +176,12 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit // set. // -regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, +SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet floatCandidates, unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates) + SingleTypeRegSet* allConsecutiveCandidates) { - SingleTypeRegSet floatCandidates = (candidates & availableFloatRegs) - .getLow(); + assert((floatCandidates == RBM_NONE) || (floatCandidates & availableFloatRegs) != RBM_NONE); + if (PopCount(floatCandidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition @@ -196,10 +196,10 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. -#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ - consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; @@ -280,21 +280,21 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, case 4: { SingleTypeRegSet v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) + if ((floatCandidates & v0_v29_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V29; overallResult |= v0_v29_v30_v31_mask; } SingleTypeRegSet v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) + if ((floatCandidates & v0_v1_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v1_v30_v31_mask; } SingleTypeRegSet v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) + if ((floatCandidates & v0_v1_v2_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v1_v2_v31_mask; @@ -325,12 +325,13 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // Returns: // Filtered candidates that needs fewer spilling. // -regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded) +SingleTypeRegSet LinearScan::filterConsecutiveCandidatesForSpill(SingleTypeRegSet consecutiveCandidates, + unsigned int registersNeeded) { assert(consecutiveCandidates != RBM_NONE); assert((registersNeeded >= 2) && (registersNeeded <= 4)); - regMaskTP consecutiveResultForBusy = RBM_NONE; - regMaskTP unprocessedRegs = consecutiveCandidates; + SingleTypeRegSet consecutiveResultForBusy = RBM_NONE; + SingleTypeRegSet unprocessedRegs = consecutiveCandidates; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; SingleTypeRegSet registersNeededMask = (1ULL << registersNeeded) - 1; @@ -422,24 +423,26 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); regMaskTP freeCandidates = allCandidates & m_AvailableRegs; + assert((freeCandidates == RBM_NONE) || (freeCandidates & availableFloatRegs) != 0); + SingleTypeRegSet floatFreeCandidates = freeCandidates.getLow(); #ifdef DEBUG if (getStressLimitRegs() != LSRA_LIMIT_NONE) { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. - freeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | + floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); } #endif *busyCandidates = RBM_NONE; - regMaskTP overallResult; + SingleTypeRegSet overallResult; unsigned int registersNeeded = refPosition->regCount; - if (freeCandidates != RBM_NONE) + if (floatFreeCandidates != RBM_NONE) { - regMaskTP consecutiveResultForFree = - filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); + SingleTypeRegSet consecutiveResultForFree = + filterConsecutiveCandidates(floatFreeCandidates, registersNeeded, &overallResult); if (consecutiveResultForFree != RBM_NONE) { @@ -451,7 +454,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, regNumber firstRegNum = REG_NA; regNumber prevRegNum = REG_NA; int foundCount = 0; - regMaskTP foundRegMask = RBM_NONE; RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); assert(consecutiveRefPosition != nullptr); @@ -463,7 +465,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if (!interval->isActive) { - foundRegMask = RBM_NONE; foundCount = 0; continue; } @@ -472,7 +473,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if ((prevRegNum == REG_NA) || (prevRegNum == REG_PREV(currRegNum)) || ((prevRegNum == REG_FP_LAST) && (currRegNum == REG_FP_FIRST))) { - foundRegMask |= genRegMask(currRegNum); if (prevRegNum == REG_NA) { firstRegNum = currRegNum; @@ -482,7 +482,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, continue; } - foundRegMask = RBM_NONE; foundCount = 0; break; } @@ -530,9 +529,9 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select // registers based on fewer number of registers that has to be spilled. // - regMaskTP overallResultForBusy; - regMaskTP consecutiveResultForBusy = - filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); + SingleTypeRegSet overallResultForBusy; + SingleTypeRegSet consecutiveResultForBusy = + filterConsecutiveCandidates(floatFreeCandidates, registersNeeded, &overallResultForBusy); *busyCandidates = consecutiveResultForBusy; @@ -543,7 +542,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // If there is an overlap of that with free registers, then try to find a series that will need least // registers spilling as mentioned in #1 above. - regMaskTP optimalConsecutiveResultForBusy = + SingleTypeRegSet optimalConsecutiveResultForBusy = filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded); if (optimalConsecutiveResultForBusy != RBM_NONE) From dbb72eb5b6e452f378a288673d5ffa1721f453fa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 13:19:16 -0700 Subject: [PATCH 09/44] Change genRegMask*() method to return SingleTypeRegSet --- src/coreclr/jit/target.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index c5ab0b562e2b7f..19c1ff3e3a51f3 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -481,8 +481,8 @@ inline bool isByteReg(regNumber reg) } #endif -inline regMaskTP genRegMask(regNumber reg); -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); +inline SingleTypeRegSet genRegMask(regNumber reg); +inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); /***************************************************************************** * Return true if the register number is valid @@ -712,7 +712,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) extern const regMaskSmall regMasks[REG_COUNT]; -inline regMaskTP genRegMask(regNumber reg) +inline SingleTypeRegSet genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); #ifdef TARGET_AMD64 @@ -720,7 +720,7 @@ inline regMaskTP genRegMask(regNumber reg) // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK // and the result needs to be zero. - regMaskTP result = 1ULL << reg; + SingleTypeRegSet result = 1ULL << reg; assert(result == regMasks[reg]); return result; #else @@ -733,7 +733,7 @@ inline regMaskTP genRegMask(regNumber reg) * Map a register number to a floating-point register mask. */ -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) { #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ defined(TARGET_RISCV64) From 36d5919a5f691752311690dfaa910eee60e449c1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 14:34:48 -0700 Subject: [PATCH 10/44] wip --- src/coreclr/jit/jit.h | 2 +- src/coreclr/jit/lsra.cpp | 43 +++++++++++++------------- src/coreclr/jit/lsra.h | 58 ++++++++++++++++++----------------- src/coreclr/jit/lsrabuild.cpp | 14 ++++----- src/coreclr/jit/target.h | 50 +++++++++++++++++------------- 5 files changed, 88 insertions(+), 79 deletions(-) diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 11cd55699dc99f..39997dd341c625 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -513,7 +513,7 @@ class GlobalJitOptions #define TRACK_LSRA_STATS 1 // Collect LSRA stats #define TRACK_ENREG_STATS 1 // Collect enregistration stats #else -#define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well +#define MEASURE_MEM_ALLOC 1 // You can set this to 1 to get memory stats in retail, as well #define LOOP_HOIST_STATS 0 // You can set this to 1 to get loop hoist stats in retail, as well #define TRACK_LSRA_STATS 0 // You can set this to 1 to get LSRA stats in retail, as well #define TRACK_ENREG_STATS 0 diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index e847742633c215..36f842cf215a7d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -234,13 +234,13 @@ weight_t LinearScan::getWeight(RefPosition* refPos) // allRegs represents a set of registers that can // be used to allocate the specified type in any point // in time (more of a 'bank' of registers). -regMaskTP LinearScan::allRegs(RegisterType rt) +SingleTypeRegSet LinearScan::allRegs(RegisterType rt) { assert((rt != TYP_UNDEF) && (rt != TYP_STRUCT)); return *availableRegs[rt]; } -regMaskTP LinearScan::allByteRegs() +SingleTypeRegSet LinearScan::allByteRegs() { #ifdef TARGET_X86 return availableIntRegs & RBM_BYTE_REGS; @@ -249,7 +249,7 @@ regMaskTP LinearScan::allByteRegs() #endif } -regMaskTP LinearScan::allSIMDRegs() +SingleTypeRegSet LinearScan::allSIMDRegs() { return availableFloatRegs; } @@ -262,7 +262,7 @@ regMaskTP LinearScan::allSIMDRegs() // Return Value: // Register mask of the SSE/VEX-only SIMD registers // -regMaskTP LinearScan::lowSIMDRegs() +SingleTypeRegSet LinearScan::lowSIMDRegs() { #if defined(TARGET_AMD64) return (availableFloatRegs & RBM_LOWFLOAT); @@ -436,7 +436,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, // that it will select a callee-save register. But to be safe, we restrict // the set of candidates if compFloatingPointUsed is not already set. // -regMaskTP LinearScan::internalFloatRegCandidates() +SingleTypeRegSet LinearScan::internalFloatRegCandidates() { needNonIntegerRegisters = true; @@ -481,12 +481,12 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // New regMask that has minRegCount registers after intersection. // Otherwise returns regMaskActual. // -regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstraint, +SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, + SingleTypeRegSet regMaskActual, + SingleTypeRegSet regMaskConstraint, unsigned minRegCount) { - regMaskTP newMask = regMaskActual & regMaskConstraint; + SingleTypeRegSet newMask = regMaskActual & regMaskConstraint; if (genCountBits(newMask) < minRegCount) { // Constrained mask does not have minimum required registers needed. @@ -495,7 +495,7 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); //TODO: Pass the right type if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -532,8 +532,8 @@ static const regMaskTP LsraLimitUpperSimdSet = static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); -static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); @@ -561,7 +561,7 @@ static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 // This is the method used to implement the stress options that limit // the set of registers considered for allocation. // -regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) +SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, SingleTypeRegSet mask) { #ifdef TARGET_ARM64 if ((refPosition != nullptr) && refPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) @@ -831,7 +831,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note: one known reason why we exclude LR is because NativeAOT has dependency on not // using LR as a GPR. See: https://github.com/dotnet/runtime/issues/101932 // Once that is addressed, we may consider allowing LR in availableIntRegs. - availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = ((RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd)).getLow(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else @@ -2786,7 +2786,7 @@ void LinearScan::setFrameType() // If we are using FPBASE as the frame register, we cannot also use it for // a local var. - regMaskTP removeMask = RBM_NONE; + SingleTypeRegSet removeMask = RBM_NONE; if (frameType == FT_EBP_FRAME) { removeMask |= RBM_FPBASE; @@ -2986,7 +2986,7 @@ regNumber LinearScan::allocateRegMinimal(Interval* currentInterva { assert(!enregisterLocalVars); regNumber foundReg; - regMaskTP foundRegBit; + SingleTypeRegSet foundRegBit; RegRecord* availablePhysRegRecord; foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) @@ -3501,7 +3501,7 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) // Assign the given physical register interval to the given interval void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) { - regMaskTP assignedRegMask = genRegMask(regRec->regNum); + SingleTypeRegSet assignedRegMask = genRegMask(regRec->regNum); compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); interval->assignedReg = regRec; @@ -5254,7 +5254,7 @@ void LinearScan::allocateRegistersMinimal() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; + SingleTypeRegSet assignedRegBit = RBM_NONE; bool isInRegister = false; if (assignedRegister != REG_NA) { @@ -6186,7 +6186,7 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; + SingleTypeRegSet assignedRegBit = RBM_NONE; bool isInRegister = false; if (assignedRegister != REG_NA) { @@ -13319,7 +13319,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Register bit selected (a single register) and REG_NA if no register was selected. // template -regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, +SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG @@ -13778,7 +13778,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers // are busy). // -regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval, +SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( + Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!linearScan->enregisterLocalVars); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index b6298c38bddbc0..cf9aaa922c4d33 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -775,11 +775,11 @@ class LinearScan : public LinearScanInterface return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); } - regMaskTP getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstrain, + SingleTypeRegSet getConstrainedRegMask(RefPosition* refPosition, + SingleTypeRegSet regMaskActual, + SingleTypeRegSet regMaskConstrain, unsigned minRegCount); - regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); + SingleTypeRegSet stressLimitRegs(RefPosition* refPosition, SingleTypeRegSet mask); // This controls the heuristics used to select registers // These can be combined. @@ -1103,11 +1103,11 @@ class LinearScan : public LinearScanInterface // Given some tree node add refpositions for all the registers this node kills bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask); - regMaskTP allRegs(RegisterType rt); - regMaskTP allByteRegs(); - regMaskTP allSIMDRegs(); - regMaskTP lowSIMDRegs(); - regMaskTP internalFloatRegCandidates(); + SingleTypeRegSet allRegs(RegisterType rt); + SingleTypeRegSet allByteRegs(); + SingleTypeRegSet allSIMDRegs(); + SingleTypeRegSet lowSIMDRegs(); + SingleTypeRegSet internalFloatRegCandidates(); void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); @@ -1170,11 +1170,11 @@ class LinearScan : public LinearScanInterface LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, - regMaskTP mask, + SingleTypeRegSet mask, unsigned multiRegIdx = 0); RefPosition* newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, SingleTypeRegSet mask); void applyCalleeSaveHeuristics(RefPosition* rp); @@ -1270,10 +1270,10 @@ class LinearScan : public LinearScanInterface // Perform register selection and update currentInterval or refPosition template - FORCEINLINE regMaskTP select(Interval* currentInterval, + FORCEINLINE SingleTypeRegSet select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - FORCEINLINE regMaskTP selectMinimal(Interval* currentInterval, + FORCEINLINE SingleTypeRegSet selectMinimal(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); // If the register is from unassigned set such that it was not already @@ -1710,13 +1710,13 @@ class LinearScan : public LinearScanInterface // A temporary VarToRegMap used during the resolution of critical edges. VarToRegMap sharedCriticalVarToRegMap; PhasedVar actualRegistersMask; - PhasedVar availableIntRegs; - PhasedVar availableFloatRegs; - PhasedVar availableDoubleRegs; + PhasedVar availableIntRegs; + PhasedVar availableFloatRegs; + PhasedVar availableDoubleRegs; #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - PhasedVar availableMaskRegs; + PhasedVar availableMaskRegs; #endif - PhasedVar* availableRegs[TYP_COUNT]; + PhasedVar* availableRegs[TYP_COUNT]; #if defined(TARGET_XARCH) || defined(TARGET_ARM64) #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs) @@ -1897,7 +1897,7 @@ class LinearScan : public LinearScanInterface regMaskTP regsInUseThisLocation; regMaskTP regsInUseNextLocation; #ifdef TARGET_ARM64 - regMaskTP consecutiveRegsInUseThisLocation; + SingleTypeRegSet consecutiveRegsInUseThisLocation; #endif bool isRegBusy(regNumber reg, var_types regType) { @@ -2221,10 +2221,10 @@ class Interval : public Referenceable void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); // Fixed registers for which this Interval has a preference - regMaskTP registerPreferences; + SingleTypeRegSet registerPreferences; // Registers that should be avoided for this interval - regMaskTP registerAversion; + SingleTypeRegSet registerAversion; // The relatedInterval is: // - for any other interval, it is the interval to which this interval @@ -2373,12 +2373,12 @@ class Interval : public Referenceable // definitions. This method will return the current assigned register if any, or // the 'registerPreferences' otherwise. // - regMaskTP getCurrentPreferences() + SingleTypeRegSet getCurrentPreferences() { return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum); } - void mergeRegisterPreferences(regMaskTP preferences) + void mergeRegisterPreferences(SingleTypeRegSet preferences) { // We require registerPreferences to have been initialized. assert(registerPreferences != RBM_NONE); @@ -2393,7 +2393,7 @@ class Interval : public Referenceable return; } - regMaskTP commonPreferences = (registerPreferences & preferences); + SingleTypeRegSet commonPreferences = (registerPreferences & preferences); if (commonPreferences != RBM_NONE) { registerPreferences = commonPreferences; @@ -2428,11 +2428,13 @@ class Interval : public Referenceable // Keep only the callee-save preferences, if not empty. // Otherwise, take the union of the preferences. - regMaskTP newPreferences = registerPreferences | preferences; + SingleTypeRegSet newPreferences = registerPreferences | preferences; if (preferCalleeSave) { - regMaskTP calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences); + SingleTypeRegSet calleeSaveMask = + (LinearScan::calleeSaveRegs(this->registerType) & newPreferences).GetRegSetForType(this-registerType); + if (calleeSaveMask != RBM_NONE) { newPreferences = calleeSaveMask; @@ -2447,7 +2449,7 @@ class Interval : public Referenceable // An exception is made in the case where one of the existing or new // preferences are all callee-save, in which case we "prefer" the callee-save - void updateRegisterPreferences(regMaskTP preferences) + void updateRegisterPreferences(SingleTypeRegSet preferences) { // If this interval is preferenced, that interval may have already been assigned a // register, and we want to include that in the preferences. @@ -2485,7 +2487,7 @@ class RefPosition // Prior to the allocation pass, registerAssignment captures the valid registers // for this RefPosition. // After the allocation pass, this contains the actual assignment - regMaskTP registerAssignment; + SingleTypeRegSet registerAssignment; RefType refType; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b0a221ef96965b..1c4cf680e93bfb 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -251,8 +251,8 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de assert(!interval->isLocalVar); RefPosition* useRefPosition = defRefPosition->nextRefPosition; - regMaskTP defRegAssignment = defRefPosition->registerAssignment; - regMaskTP useRegAssignment = useRefPosition->registerAssignment; + SingleTypeRegSet defRegAssignment = defRefPosition->registerAssignment; + SingleTypeRegSet useRegAssignment = useRefPosition->registerAssignment; RegRecord* defRegRecord = nullptr; RegRecord* useRegRecord = nullptr; regNumber defReg = REG_NA; @@ -352,7 +352,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de RegisterType regType = interval->registerType; assert((getRegisterType(interval, defRefPosition) == regType) && (getRegisterType(interval, useRefPosition) == regType)); - regMaskTP candidates = allRegs(regType); + SingleTypeRegSet candidates = allRegs(regType); defRefPosition->registerAssignment = candidates; defRefPosition->isFixedRegRef = false; return; @@ -423,8 +423,8 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP) // All defs must have a valid treeNode, but we check it below to be conservative. assert(defRP->treeNode != nullptr); - regMaskTP prevAssignment = defRP->registerAssignment; - regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); + SingleTypeRegSet prevAssignment = defRP->registerAssignment; + SingleTypeRegSet newAssignment = (prevAssignment & useRP->registerAssignment); if (newAssignment != RBM_NONE) { if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) @@ -519,7 +519,7 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) // a new RefPosition // RefPosition* LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, SingleTypeRegSet mask) { RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); @@ -558,7 +558,7 @@ RefPosition* LinearScan::newRefPosition(Interval* theInterval, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, - regMaskTP mask, + SingleTypeRegSet mask, unsigned multiRegIdx /* = 0 */) { if (theInterval != nullptr) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 19c1ff3e3a51f3..b8aff287e9983a 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -211,10 +211,30 @@ enum _regMask_enum : unsigned typedef _regNumber_enum regNumber; typedef unsigned char regNumberSmall; + +#if REGMASK_BITS == 8 +typedef unsigned char regMaskSmall; +#define REG_MASK_INT_FMT "%02X" +#define REG_MASK_ALL_FMT "%02X" +#elif REGMASK_BITS == 16 +typedef unsigned short regMaskSmall; +#define REG_MASK_INT_FMT "%04X" +#define REG_MASK_ALL_FMT "%04X" +#elif REGMASK_BITS == 32 +typedef unsigned regMaskSmall; +#define REG_MASK_INT_FMT "%08X" +#define REG_MASK_ALL_FMT "%08X" +#else +typedef unsigned __int64 regMaskSmall; +#define REG_MASK_INT_FMT "%04llX" +#define REG_MASK_ALL_FMT "%016llX" +#endif + +typedef regMaskSmall SingleTypeRegSet; + #if defined(TARGET_AMD64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) typedef unsigned __int64 regMaskTP; #elif defined(TARGET_ARM64) -typedef unsigned __int64 regMaskSmall; struct regMaskTP { private: @@ -252,15 +272,20 @@ struct regMaskTP return (unsigned int)low; } - uint64_t getLow() const + SingleTypeRegSet getLow() const { return low; } - uint64_t getHigh() const + SingleTypeRegSet getHigh() const { return high; } + + SingleTypeRegSet GetRegSetForType(var_types type) const + { + return getLow(); + } }; static regMaskTP operator^(regMaskTP first, regMaskTP second) @@ -344,25 +369,6 @@ static uint32_t BitScanForward(regMaskTP mask) #endif } -#if REGMASK_BITS == 8 -typedef unsigned char regMaskSmall; -#define REG_MASK_INT_FMT "%02X" -#define REG_MASK_ALL_FMT "%02X" -#elif REGMASK_BITS == 16 -typedef unsigned short regMaskSmall; -#define REG_MASK_INT_FMT "%04X" -#define REG_MASK_ALL_FMT "%04X" -#elif REGMASK_BITS == 32 -typedef unsigned regMaskSmall; -#define REG_MASK_INT_FMT "%08X" -#define REG_MASK_ALL_FMT "%08X" -#else -typedef unsigned __int64 regMaskSmall; -#define REG_MASK_INT_FMT "%04llX" -#define REG_MASK_ALL_FMT "%016llX" -#endif - -typedef regMaskSmall SingleTypeRegSet; /*****************************************************************************/ From a647a65553094ab75ca92d27a1f8c41794efeff1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 19:30:28 -0700 Subject: [PATCH 11/44] another wip --- src/coreclr/jit/codegencommon.cpp | 20 ++++++------ src/coreclr/jit/codegeninterface.h | 12 +++---- src/coreclr/jit/lsra.cpp | 40 +++++++++++------------ src/coreclr/jit/lsra.h | 51 +++++++++++++++--------------- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 15 +++++---- src/coreclr/jit/lsrabuild.cpp | 18 +++++------ src/coreclr/jit/target.h | 10 ++++++ 8 files changed, 89 insertions(+), 79 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 31e8f30a48aa6c..e541123d7ba68d 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -74,11 +74,11 @@ NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) // tree - IR node to add internal allocated registers to // regs - Registers to add // -void NodeInternalRegisters::Add(GenTree* tree, regMaskTP regs) +void NodeInternalRegisters::Add(GenTree* tree, SingleTypeRegSet regs) { assert(regs != RBM_NONE); - regMaskTP* result = m_table.LookupPointerOrAdd(tree, RBM_NONE); + SingleTypeRegSet* result = m_table.LookupPointerOrAdd(tree, RBM_NONE); *result |= regs; } @@ -95,12 +95,12 @@ void NodeInternalRegisters::Add(GenTree* tree, regMaskTP regs) // Returns: // Register number. // -regNumber NodeInternalRegisters::Extract(GenTree* tree, regMaskTP mask) +regNumber NodeInternalRegisters::Extract(GenTree* tree, SingleTypeRegSet mask) { - regMaskTP* regs = m_table.LookupPointer(tree); + SingleTypeRegSet* regs = m_table.LookupPointer(tree); assert(regs != nullptr); - regMaskTP availableSet = *regs & mask; + SingleTypeRegSet availableSet = *regs & mask; assert(availableSet != RBM_NONE); regNumber result = genFirstRegNumFromMask(availableSet); @@ -122,7 +122,7 @@ regNumber NodeInternalRegisters::Extract(GenTree* tree, regMaskTP mask) // Returns: // Register number. // -regNumber NodeInternalRegisters::GetSingle(GenTree* tree, regMaskTP mask) +regNumber NodeInternalRegisters::GetSingle(GenTree* tree, SingleTypeRegSet mask) { regMaskTP* regs = m_table.LookupPointer(tree); assert(regs != nullptr); @@ -145,9 +145,9 @@ regNumber NodeInternalRegisters::GetSingle(GenTree* tree, regMaskTP mask) // Returns: // Mask of registers. // -regMaskTP NodeInternalRegisters::GetAll(GenTree* tree) +SingleTypeRegSet NodeInternalRegisters::GetAll(GenTree* tree) { - regMaskTP regs; + SingleTypeRegSet regs; return m_table.Lookup(tree, ®s) ? regs : RBM_NONE; } @@ -162,9 +162,9 @@ regMaskTP NodeInternalRegisters::GetAll(GenTree* tree) // Returns: // Count of nodes // -unsigned NodeInternalRegisters::Count(GenTree* tree, regMaskTP mask) +unsigned NodeInternalRegisters::Count(GenTree* tree, SingleTypeRegSet mask) { - regMaskTP regs; + SingleTypeRegSet regs; return m_table.Lookup(tree, ®s) ? genCountBits(regs & mask) : 0; } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 608c72c22d48d0..380bc5ee484e27 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -48,17 +48,17 @@ CodeGenInterface* getCodeGenerator(Compiler* comp); class NodeInternalRegisters { - typedef JitHashTable, regMaskTP> NodeInternalRegistersTable; + typedef JitHashTable, SingleTypeRegSet> NodeInternalRegistersTable; NodeInternalRegistersTable m_table; public: NodeInternalRegisters(Compiler* comp); - void Add(GenTree* tree, regMaskTP reg); - regNumber Extract(GenTree* tree, regMaskTP mask = static_cast(-1)); - regNumber GetSingle(GenTree* tree, regMaskTP mask = static_cast(-1)); - regMaskTP GetAll(GenTree* tree); - unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); + void Add(GenTree* tree, SingleTypeRegSet reg); + regNumber Extract(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + regNumber GetSingle(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + SingleTypeRegSet GetAll(GenTree* tree); + unsigned Count(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); }; class CodeGenInterface diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 36f842cf215a7d..86093f2dc3ae33 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8684,7 +8684,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs) + SingleTypeRegSet terminatorConsumedRegs) { // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps // and they would be more space-efficient as well. @@ -8703,7 +8703,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, freeRegs = allRegs(type); } #else // !TARGET_ARM - regMaskTP freeRegs = allRegs(type); + SingleTypeRegSet freeRegs = allRegs(type); #endif // !TARGET_ARM #ifdef DEBUG @@ -8990,7 +8990,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // // Note: Only switches and JCMP/JTEST (for Arm4) have input regs (and so can be fed by copies), so those // are the only block-ending branches that need special handling. - regMaskTP consumedRegs = RBM_NONE; + SingleTypeRegSet consumedRegs = RBM_NONE; if (block->KindIs(BBJ_SWITCH)) { // At this point, Lowering has transformed any non-switch-table blocks into @@ -9518,7 +9518,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs) + SingleTypeRegSet terminatorConsumedRegs) { VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); VarToRegMap toVarToRegMap; @@ -12560,9 +12560,9 @@ void LinearScan::RegisterSelection::reset(Interval* interval, RefPosition* refPo // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP selectionCandidates) +bool LinearScan::RegisterSelection::applySelection(int selectionScore, SingleTypeRegSet selectionCandidates) { - regMaskTP newCandidates = candidates & selectionCandidates; + SingleTypeRegSet newCandidates = candidates & selectionCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12581,10 +12581,10 @@ bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate) +bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, SingleTypeRegSet selectionCandidate) { assert(LinearScan::isSingleRegister(selectionCandidate)); - regMaskTP newCandidates = candidates & selectionCandidate; + SingleTypeRegSet newCandidates = candidates & selectionCandidate; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12631,7 +12631,7 @@ void LinearScan::RegisterSelection::try_CONST_AVAILABLE() if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - regMaskTP newCandidates = candidates & matchingConstants; + SingleTypeRegSet newCandidates = candidates & matchingConstants; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12792,7 +12792,7 @@ void LinearScan::RegisterSelection::try_COVERS_FULL() calculateCoversSets(); #endif - regMaskTP newCandidates = candidates & coversFullSet & freeCandidates; + SingleTypeRegSet newCandidates = candidates & coversFullSet & freeCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -13216,11 +13216,11 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate return; } - regMaskTP coversCandidates = candidates; + SingleTypeRegSet coversCandidates = candidates; for (; coversCandidates != RBM_NONE;) { regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // The register is considered unassigned if it has no assignedInterval, OR @@ -13244,11 +13244,11 @@ void LinearScan::RegisterSelection::calculateCoversSets() } preferenceSet = (candidates & preferences); - regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; + SingleTypeRegSet coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; for (; coversCandidates != RBM_NONE;) { regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // If we have a single candidate we don't need to compute the preference-related sets, but we @@ -13477,11 +13477,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* if (preferCalleeSave) { - regMaskTP calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType); + SingleTypeRegSet calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType); if (currentInterval->isWriteThru) { // We'll only prefer a callee-save register if it's already been used. - regMaskTP unusedCalleeSaves = + SingleTypeRegSet unusedCalleeSaves = calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; @@ -13836,7 +13836,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( #endif // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + SingleTypeRegSet fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13854,7 +13854,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; - candidates &= ~busyRegs; + candidates &= ~busyRegs.GetRegSetForType(regType); #ifdef TARGET_ARM // For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half @@ -13871,11 +13871,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index cf9aaa922c4d33..218f685a324118 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -725,7 +725,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs); + SingleTypeRegSet terminatorConsumedRegs); void resolveEdges(); @@ -1129,11 +1129,11 @@ class LinearScan : public LinearScanInterface } // Managing internal registers during the BuildNode process. - RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates); - RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); - RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); + RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, SingleTypeRegSet candidates); + RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); + RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); #if defined(FEATURE_SIMD) - RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); + RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); #endif void buildInternalRegisterUses(); @@ -1240,7 +1240,7 @@ class LinearScan : public LinearScanInterface unsigned int registersNeeded); #endif // TARGET_ARM64 - regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) + SingleTypeRegSet getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1315,34 +1315,34 @@ class LinearScan : public LinearScanInterface RegisterType regType = RegisterType::TYP_UNKNOWN; - regMaskTP candidates; - regMaskTP preferences = RBM_NONE; + SingleTypeRegSet candidates; + SingleTypeRegSet preferences = RBM_NONE; Interval* relatedInterval = nullptr; - regMaskTP relatedPreferences = RBM_NONE; + SingleTypeRegSet relatedPreferences = RBM_NONE; LsraLocation rangeEndLocation; LsraLocation relatedLastLocation; bool preferCalleeSave = false; RefPosition* rangeEndRefPosition; RefPosition* lastRefPosition; - regMaskTP callerCalleePrefs = RBM_NONE; + SingleTypeRegSet callerCalleePrefs = RBM_NONE; LsraLocation lastLocation; - regMaskTP foundRegBit; + SingleTypeRegSet foundRegBit; - regMaskTP prevRegBit = RBM_NONE; + SingleTypeRegSet prevRegBit = RBM_NONE; // These are used in the post-selection updates, and must be set for any selection. - regMaskTP freeCandidates; - regMaskTP matchingConstants; - regMaskTP unassignedSet; + SingleTypeRegSet freeCandidates; + SingleTypeRegSet matchingConstants; + SingleTypeRegSet unassignedSet; // Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together, // as they all require similar computation. - regMaskTP coversSet; - regMaskTP preferenceSet; - regMaskTP coversRelatedSet; - regMaskTP coversFullSet; + SingleTypeRegSet coversSet; + SingleTypeRegSet preferenceSet; + SingleTypeRegSet coversRelatedSet; + SingleTypeRegSet coversFullSet; bool coversSetsCalculated = false; bool found = false; bool skipAllocation = false; @@ -1356,8 +1356,8 @@ class LinearScan : public LinearScanInterface return (prevRegBit & preferences) == foundRegBit; } - bool applySelection(int selectionScore, regMaskTP selectionCandidates); - bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate); + bool applySelection(int selectionScore, SingleTypeRegSet selectionCandidates); + bool applySingleRegSelection(int selectionScore, SingleTypeRegSet selectionCandidate); FORCEINLINE void calculateCoversSets(); FORCEINLINE void calculateUnassignedSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); @@ -1418,7 +1418,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs); + SingleTypeRegSet terminatorConsumedRegs); #ifdef TARGET_ARM64 typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; @@ -2125,9 +2125,9 @@ class LinearScan : public LinearScanInterface // NOTE: we currently don't need a LinearScan `this` pointer for this definition, and some callers // don't have one available, so make is static. // - static FORCEINLINE regMaskTP calleeSaveRegs(RegisterType rt) + static FORCEINLINE SingleTypeRegSet calleeSaveRegs(RegisterType rt) { - static const regMaskTP varTypeCalleeSaveRegs[] = { + static const SingleTypeRegSet varTypeCalleeSaveRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) csr, #include "typelist.h" #undef DEF_TP @@ -2432,8 +2432,7 @@ class Interval : public Referenceable if (preferCalleeSave) { - SingleTypeRegSet calleeSaveMask = - (LinearScan::calleeSaveRegs(this->registerType) & newPreferences).GetRegSetForType(this-registerType); + SingleTypeRegSet calleeSaveMask = LinearScan::calleeSaveRegs(this->registerType) & newPreferences; if (calleeSaveMask != RBM_NONE) { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 882cb4811f2cff..9a1a504ad8c728 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -423,7 +423,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); regMaskTP freeCandidates = allCandidates & m_AvailableRegs; - assert((freeCandidates == RBM_NONE) || (freeCandidates & availableFloatRegs) != 0); + assert((freeCandidates.IsEmpty()) || (freeCandidates.getLow() & availableFloatRegs)); SingleTypeRegSet floatFreeCandidates = freeCandidates.getLow(); #ifdef DEBUG diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index d40e91cdb8c545..a01965f91b5206 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -185,7 +185,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - regMaskTP candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; assert(candidates != RBM_NONE); buildInternalIntRegisterDefForNode(call, candidates); } @@ -521,13 +521,14 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) int dstCount = argNode->gtNumRegs; regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } + assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || ((argMask & availableFloatRegs) != RBM_NONE)); if (src->OperGet() == GT_FIELD_LIST) { @@ -585,7 +586,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) if (argNode->gtNumRegs == 1) { // We can use a ldr/str sequence so we need an internal register - buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); + buildInternalIntRegisterDefForNode(argNode, (allRegs(TYP_INT) & ~argMask)); } // We will generate code that loads from the OBJ's address, which must be in a register. @@ -619,9 +620,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { @@ -678,7 +679,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = + SingleTypeRegSet internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 1c4cf680e93bfb..329582772ffb39 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1311,7 +1311,7 @@ bool LinearScan::checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode) // currentLoc - Location of the temp Def position // regMask - register mask of candidates for temp // -RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, SingleTypeRegSet regMask) { Interval* current = newInterval(regType); current->isInternal = true; @@ -1331,7 +1331,7 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regTy // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only integer registers. assert((internalCands & ~availableIntRegs) == RBM_NONE); @@ -1350,7 +1350,7 @@ RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMa // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only float registers. assert((internalCands & ~availableFloatRegs) == RBM_NONE); @@ -1360,7 +1360,7 @@ RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, reg } #if defined(FEATURE_SIMD) && defined(TARGET_XARCH) -RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only float registers. assert((internalCands & ~availableMaskRegs) == RBM_NONE); @@ -1391,7 +1391,7 @@ void LinearScan::buildInternalRegisterUses() for (int i = 0; i < internalCount; i++) { RefPosition* def = internalDefs[i]; - regMaskTP mask = def->registerAssignment; + SingleTypeRegSet mask = def->registerAssignment; RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); if (setInternalRegsDelayFree) { @@ -1858,8 +1858,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc if (newRefPosition->IsActualRef() && doReverseCallerCallee()) { Interval* interval = newRefPosition->getInterval(); - regMaskTP oldAssignment = newRefPosition->registerAssignment; - regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); + SingleTypeRegSet oldAssignment = newRefPosition->registerAssignment; + SingleTypeRegSet calleeSaveMask = calleeSaveRegs(interval->registerType); #ifdef TARGET_ARM64 if (newRefPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) { @@ -2865,7 +2865,7 @@ void LinearScan::buildInitialParamDef(const LclVarDsc* varDsc, regNumber paramRe Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); const var_types regType = varDsc->GetRegisterType(); - regMaskTP mask = allRegs(regType); + SingleTypeRegSet mask = allRegs(regType); if ((paramReg != REG_NA) && !stressInitialParamReg()) { // Set this interval as currently assigned to that register @@ -4121,7 +4121,7 @@ int LinearScan::BuildReturn(GenTree* tree) if (srcType != dstType) { hasMismatchedRegTypes = true; - regMaskTP dstRegMask = + SingleTypeRegSet dstRegMask = genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); if (varTypeUsesIntReg(dstType)) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b8aff287e9983a..55f0814efb9272 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -282,6 +282,16 @@ struct regMaskTP return high; } + bool IsEmpty() + { + return (low | high) == RBM_NONE; + } + + bool IsNonEmpty() + { + return !IsEmpty(); + } + SingleTypeRegSet GetRegSetForType(var_types type) const { return getLow(); From 9a15670c4ed2b648801c902a364c2ed95b120bee Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 May 2024 23:01:53 -0700 Subject: [PATCH 12/44] Everything except newRefPosition/killMask --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/lsra.cpp | 70 +++++++++++++++---------------- src/coreclr/jit/lsra.h | 12 +++--- src/coreclr/jit/lsraarm64.cpp | 6 +-- src/coreclr/jit/lsrabuild.cpp | 5 ++- 5 files changed, 48 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e541123d7ba68d..ec1a050ad9dbc6 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -124,7 +124,7 @@ regNumber NodeInternalRegisters::Extract(GenTree* tree, SingleTypeRegSet mask) // regNumber NodeInternalRegisters::GetSingle(GenTree* tree, SingleTypeRegSet mask) { - regMaskTP* regs = m_table.LookupPointer(tree); + SingleTypeRegSet* regs = m_table.LookupPointer(tree); assert(regs != nullptr); regMaskTP availableSet = *regs & mask; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 86093f2dc3ae33..f19961abe30f55 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -299,15 +299,15 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo nextFixedRef[regRecord->regNum] = nextLocation; } -regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition) +SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); - regMaskTP candidates = (mask & m_RegistersWithConstants); - regMaskTP result = RBM_NONE; + SingleTypeRegSet candidates = (mask & m_RegistersWithConstants).GetRegSetForType(currentInterval->registerType); + SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { regNumber regNum = genFirstRegNumFromMask(candidates); - regMaskTP candidateBit = genRegMask(regNum); + SingleTypeRegSet candidateBit = genRegMask(regNum); candidates ^= candidateBit; RegRecord* physRegRecord = getRegisterRecord(regNum); @@ -3047,7 +3047,7 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = + SingleTypeRegSet foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { @@ -12816,15 +12816,15 @@ void LinearScan::RegisterSelection::try_BEST_FIT() } #endif - regMaskTP bestFitSet = RBM_NONE; + SingleTypeRegSet bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. bool earliestIsBest = coversFullApplied; LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; - for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) + for (SingleTypeRegSet bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); - regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); + SingleTypeRegSet bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); bestFitCandidates ^= bestFitCandidateBit; // Find the next RefPosition of the register. @@ -12919,11 +12919,11 @@ void LinearScan::RegisterSelection::try_REG_ORDER() // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. unsigned lowestRegOrder = UINT_MAX; - regMaskTP lowestRegOrderBit = RBM_NONE; - for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) + SingleTypeRegSet lowestRegOrderBit = RBM_NONE; + for (SingleTypeRegSet regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); - regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); + SingleTypeRegSet regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; @@ -12945,7 +12945,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() assert(!found); // The set of registers with the lowest spill weight. - regMaskTP lowestCostSpillSet = RBM_NONE; + SingleTypeRegSet lowestCostSpillSet = RBM_NONE; // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. // The spill weight for 'refPosition' (the one we're allocating now). @@ -12956,10 +12956,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST() bool foundLowerSpillWeight = false; LsraLocation thisLocation = refPosition->nodeLocation; - for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) + for (SingleTypeRegSet spillCandidates = candidates; spillCandidates != RBM_NONE;) { regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); - regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum); + SingleTypeRegSet spillCandidateBit = genRegMask(spillCandidateRegNum); spillCandidates ^= spillCandidateBit; RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; @@ -13080,11 +13080,11 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() assert(!found); LsraLocation farthestLocation = MinLocation; - regMaskTP farthestSet = RBM_NONE; - for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;) + SingleTypeRegSet farthestSet = RBM_NONE; + for (SingleTypeRegSet farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); - regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum); + SingleTypeRegSet farthestCandidateBit = genRegMask(farthestCandidateRegNum); farthestCandidates ^= farthestCandidateBit; // Find the next RefPosition of the register. @@ -13113,11 +13113,11 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() { assert(!found); - regMaskTP prevRegOptSet = RBM_NONE; - for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) + SingleTypeRegSet prevRegOptSet = RBM_NONE; + for (SingleTypeRegSet prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); - regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); + SingleTypeRegSet prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); prevRegOptCandidates ^= prevRegOptCandidateBit; Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; bool foundPrevRegOptReg = true; @@ -13407,9 +13407,9 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* nextRelatedInterval = nullptr; // First, get the preferences for this interval - regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); + SingleTypeRegSet thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); // Now, determine if they are compatible and update the relatedPreferences that we'll consider. - regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences; + SingleTypeRegSet newRelatedPreferences = thisRelatedPreferences & relatedPreferences; if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS)) { // TODO-CQ: The following isFree() check doesn't account for the possibility that there's an @@ -13482,7 +13482,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* { // We'll only prefer a callee-save register if it's already been used. SingleTypeRegSet unusedCalleeSaves = - calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); + calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; } @@ -13506,7 +13506,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* found = false; // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + SingleTypeRegSet fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13522,7 +13522,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* } #ifdef DEBUG - regMaskTP inUseOrBusyRegsMask = RBM_NONE; + SingleTypeRegSet inUseOrBusyRegsMask = RBM_NONE; #endif // Eliminate candidates that are in-use or busy. @@ -13531,7 +13531,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13553,11 +13553,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13613,7 +13613,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* if (needsConsecutiveRegisters) { #ifdef TARGET_ARM64 - regMaskTP busyConsecutiveCandidates = RBM_NONE; + SingleTypeRegSet busyConsecutiveCandidates = RBM_NONE; if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); @@ -13632,7 +13632,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // refpositions. assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); - freeCandidates = candidates & linearScan->m_AvailableRegs; + freeCandidates = candidates & linearScan->m_AvailableRegs.GetRegSetForType(regType); } if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) @@ -13647,7 +13647,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one // such range that is consecutive. Next, append that range to the `candidates`. // - SingleTypeRegSet limitCandidatesForConsecutive = ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs).getLow(); + SingleTypeRegSet limitCandidatesForConsecutive = ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs); SingleTypeRegSet overallLimitCandidates; regMaskTP limitConsecutiveResult = linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount, @@ -13677,7 +13677,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); } // If no free candidates, then double check if refPosition is an actual ref. @@ -13853,8 +13853,8 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; - candidates &= ~busyRegs.GetRegSetForType(regType); + SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); + candidates &= ~busyRegs; #ifdef TARGET_ARM // For TYP_DOUBLE on ARM, we can only use an even floating-point register for which the odd half @@ -13906,7 +13906,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); if (freeCandidates != RBM_NONE) { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 218f685a324118..f9ec5bebd703ee 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1232,7 +1232,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); + SingleTypeRegSet getConsecutiveCandidates(SingleTypeRegSet candidates, RefPosition* refPosition, SingleTypeRegSet* busyCandidates); SingleTypeRegSet filterConsecutiveCandidates(SingleTypeRegSet candidates, unsigned int registersNeeded, SingleTypeRegSet* allConsecutiveCandidates); @@ -1240,7 +1240,7 @@ class LinearScan : public LinearScanInterface unsigned int registersNeeded); #endif // TARGET_ARM64 - SingleTypeRegSet getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) + SingleTypeRegSet getFreeCandidates(regMaskTP candidates, var_types regType) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1251,7 +1251,7 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM - return result; + return result.GetRegSetForType(regType); } #ifdef DEBUG @@ -1862,7 +1862,7 @@ class LinearScan : public LinearScanInterface regMaskTP regMask = getRegMask(reg, regType); return (m_RegistersWithConstants & regMask) == regMask; } - regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); + SingleTypeRegSet getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition); regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; @@ -2147,10 +2147,10 @@ class LinearScan : public LinearScanInterface //------------------------------------------------------------------------ // callerSaveRegs: Get the set of caller-save registers of the given RegisterType // - FORCEINLINE regMaskTP callerSaveRegs(RegisterType rt) const + FORCEINLINE SingleTypeRegSet callerSaveRegs(RegisterType rt) const { #if !defined(TARGET_XARCH) - static const regMaskTP varTypeCalleeTrashRegs[] = { + static const SingleTypeRegSet varTypeCalleeTrashRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9a1a504ad8c728..f0ebebd768b4df 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -416,9 +416,9 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidatesForSpill(SingleTypeRegSe // allCandidates = 0x1C080D0F00000000, the consecutive register mask returned // will be 0x400000300000000. // -regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, +SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandidates, RefPosition* refPosition, - regMaskTP* busyCandidates) + SingleTypeRegSet* busyCandidates) { assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); @@ -555,7 +555,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first // register of such series, return the mask that starts with free register, if possible. The busy // registers will be spilled during assignment of subsequent RefPosition. - *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy); + *busyCandidates = (m_AvailableRegs.GetRegSetForType(TYP_FLOAT) & consecutiveResultForBusy); } } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 329582772ffb39..21ef62d1398ba7 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3290,8 +3290,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) } #endif - interval->registerAversion |= unpref; - regMaskTP newPreferences = allRegs(interval->registerType) & ~unpref; + SingleTypeRegSet unprefSet = unpref.GetRegSetForType(interval->registerType); + interval->registerAversion |= unprefSet; + SingleTypeRegSet newPreferences = allRegs(interval->registerType) & ~unprefSet; interval->updateRegisterPreferences(newPreferences); } } From 5e739a4d65c04a0e4b45510dbab9f892c23530c8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 13:42:58 -0700 Subject: [PATCH 13/44] refactor code around buildkill --- src/coreclr/jit/lsra.h | 10 +- src/coreclr/jit/lsraarm.cpp | 6 +- src/coreclr/jit/lsraarm64.cpp | 14 +-- src/coreclr/jit/lsraarmarch.cpp | 55 +++++++++-- src/coreclr/jit/lsrabuild.cpp | 163 +++++++++++++++++++++++--------- src/coreclr/jit/lsraxarch.cpp | 71 ++++++++------ src/coreclr/jit/target.h | 5 + 7 files changed, 235 insertions(+), 89 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7d1624a578045f..9f822698b92d89 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2009,9 +2009,17 @@ class LinearScan : public LinearScanInterface int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); + RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0); void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE); - void BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); + void BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates); + void BuildKills(GenTree* tree, regMaskTP killMask); +#ifdef TARGET_ARMARCH + void BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask); +#else + void BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); +#endif + void BuildCallDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); int BuildReturn(GenTree* tree); #ifdef TARGET_XARCH diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 2192265984d68e..e653514d3c14c0 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -373,7 +373,7 @@ int LinearScan::BuildNode(GenTree* tree) assert(dstCount == 0); BuildUse(tree->gtGetOp1()); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; case GT_MUL: @@ -422,7 +422,7 @@ int LinearScan::BuildNode(GenTree* tree) // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, RBM_NONE); break; case GT_LONG: @@ -469,7 +469,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURN: srcCount = BuildReturn(tree); killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; case GT_RETFILT: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c133a4a6917762..31f4d114d544a6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -666,14 +666,14 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; assert(dstCount == 0); killMask = getKillSetForProfilerHook(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; case GT_START_PREEMPTGC: // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, RBM_NONE); break; case GT_CNS_DBL: @@ -738,7 +738,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURN: srcCount = BuildReturn(tree); killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; #ifdef SWIFT_SUPPORT @@ -747,7 +747,7 @@ int LinearScan::BuildNode(GenTree* tree) // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; #endif // SWIFT_SUPPORT @@ -839,7 +839,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 1; assert(dstCount == 0); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; case GT_MOD: @@ -1927,11 +1927,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if ((dstCount == 1) || (dstCount == 2)) { - BuildDef(intrinsicTree, dstCandidates); + BuildDef(intrinsicTree); if (dstCount == 2) { - BuildDef(intrinsicTree, dstCandidates, 1); + BuildDef(intrinsicTree, RBM_NONE, 1); } } else diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index d40e91cdb8c545..0a600751a53a67 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,8 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP dstCandidates = RBM_NONE; + regMaskTP multiDstCandidates; + regMaskTP singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -234,19 +235,19 @@ int LinearScan::BuildCall(GenTreeCall* call) if (hasMultiRegRetVal) { assert(retTypeDesc != nullptr); - dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + multiDstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); } else if (varTypeUsesFloatArgReg(registerType)) { - dstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET; } else if (registerType == TYP_LONG) { - dstCandidates = RBM_LNGRET; + singleDstCandidates = RBM_LNGRET; } else { - dstCandidates = RBM_INTRET; + singleDstCandidates = RBM_INTRET; } // First, count reg args @@ -399,7 +400,23 @@ int LinearScan::BuildCall(GenTreeCall* call) // Now generate defs and kills. regMaskTP killMask = getKillSetForCall(call); - BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + if (dstCount > 0) + { + if (hasMultiRegRetVal) + { + assert(multiDstCandidates.Count() > 0); + BuildCallDefsWithKills(call, dstCount, multiDstCandidates, killMask); + } + else + { + assert(dstCount == 1); + BuildDefWithKills(call, singleDstCandidates, killMask); + } + } + else + { + BuildKills(call, killMask); + } #ifdef SWIFT_SUPPORT if (call->HasSwiftErrorHandling()) @@ -414,6 +431,30 @@ int LinearScan::BuildCall(GenTreeCall* call) return srcCount; } +//------------------------------------------------------------------------ +// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The call node that defines a register +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask) +{ + assert(!tree->AsCall()->HasMultiRegRetVal()); + assert((int)genCountBits(dstCandidates) == 1); + + // Build the kill RefPositions + BuildKills(tree, killMask); + BuildDef(tree, dstCandidates); +} + //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node // @@ -836,7 +877,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalRegisterUses(); regMaskTP killMask = getKillSetForBlockStore(blkNode); - BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + BuildKills(blkNode, killMask); return useCount; } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 43c75a118b218d..f7eea368ed2b27 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3037,7 +3037,7 @@ void setTgtPref(Interval* interval, RefPosition* tgtPrefUse) #endif // !TARGET_ARM //------------------------------------------------------------------------ -// BuildDef: Build a RefTypeDef RefPosition for the given node +// BuildDef: Build one RefTypeDef RefPosition for the given node at given index // // Arguments: // tree - The node that defines a register @@ -3130,7 +3130,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu } //------------------------------------------------------------------------ -// BuildDef: Build one or more RefTypeDef RefPositions for the given node +// BuildDef: Build one or more RefTypeDef RefPositions for the given call node // // Arguments: // tree - The node that defines a register @@ -3140,61 +3140,74 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu // Notes: // Adds the RefInfo for the definitions to the defList. // -void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) { - bool fixedReg = false; - if ((dstCount > 1) && (dstCandidates != RBM_NONE) && ((int)genCountBits(dstCandidates) == dstCount)) + assert(dstCount > 0); +#ifdef TARGET_ARM64 + assert((int)dstCandidates.Count() == dstCount); +#endif + assert(tree->IsMultiRegCall()); + + const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + assert(retTypeDesc != nullptr); + + for (int i = 0; i < dstCount; i++) { - fixedReg = true; + // In case of multi-reg call node, we have to query the i'th position return register. + // For all other cases of multi-reg definitions, the registers must be in sequential order. + regMaskTP thisDstCandidates = + genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv())); + + assert((dstCandidates & thisDstCandidates) != RBM_NONE); + dstCandidates &= ~thisDstCandidates; + + BuildDef(tree, thisDstCandidates, i); } - const ReturnTypeDesc* retTypeDesc = nullptr; - if (tree->IsMultiRegCall()) +} + +//------------------------------------------------------------------------ +// BuildDef: Build one or more RefTypeDef RefPositions for the given node +// +// Arguments: +// tree - The node that defines a register +// dstCount - The number of registers defined by the node +// dstCandidates - the candidate registers for the definition +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// Also, the `dstCandidates` is assumed to be of "onlyOne" type. If there are +// both gpr and float registers, use `BuildDefs` that takes `AllRegsMask` +// +void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +{ + assert(dstCount > 0); + + if ((dstCandidates == RBM_NONE) || ((int)genCountBits(dstCandidates) != dstCount)) { - retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); + // This is not fixedReg case, so just create definitions based on dstCandidates + for (int i = 0; i < dstCount; i++) + { + BuildDef(tree, dstCandidates, i); + } + return; } + for (int i = 0; i < dstCount; i++) { - regMaskTP thisDstCandidates; - if (fixedReg) - { - // In case of multi-reg call node, we have to query the i'th position return register. - // For all other cases of multi-reg definitions, the registers must be in sequential order. - if (retTypeDesc != nullptr) - { - thisDstCandidates = genRegMask( - tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv())); - assert((dstCandidates & thisDstCandidates) != RBM_NONE); - } - else - { - thisDstCandidates = genFindLowestBit(dstCandidates); - } - dstCandidates &= ~thisDstCandidates; - } - else - { - thisDstCandidates = dstCandidates; - } + regMaskTP thisDstCandidates = genFindLowestBit(dstCandidates); BuildDef(tree, thisDstCandidates, i); + dstCandidates &= ~thisDstCandidates; } } //------------------------------------------------------------------------ -// BuildDef: Build one or more RefTypeDef RefPositions for the given node, -// as well as kills as specified by the given mask. +// BuildDef: Build Kills RefPositions as specified by the given mask. // // Arguments: // tree - The node that defines a register -// dstCount - The number of registers defined by the node -// dstCandidates - The candidate registers for the definition // killMask - The mask of registers killed by this node // -// Notes: -// Adds the RefInfo for the definitions to the defList. -// The def and kill functionality is folded into a single method so that the -// save and restores of upper vector registers can be bracketed around the def. -// -void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildKills(GenTree* tree, regMaskTP killMask) { assert(killMask == getKillSetForNode(tree)); @@ -3217,13 +3230,77 @@ void LinearScan::BuildDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCa // if ((killMask & RBM_FLT_CALLEE_TRASH) != RBM_NONE) { - buildUpperVectorSaveRefPositions(tree, currentLoc + 1, killMask); + buildUpperVectorSaveRefPositions(tree, currentLoc + 1 DEBUG_ARG(killMask & RBM_FLT_CALLEE_TRASH)); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } +} + +#ifndef TARGET_ARMARCH +//------------------------------------------------------------------------ +// BuildDefWithKills: Build one or two (for 32-bit) RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The call node that defines a register +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +{ + // Build the kill RefPositions + BuildKills(tree, killMask); + +#ifdef TARGET_64BIT + // For 64 bits, + assert(dstCount == 1); + BuildDef(tree, dstCandidates); +#else + if (dstCount == 1) + { + BuildDef(tree, dstCandidates); + } + else + { + assert(dstCount == 2); + BuildDefs(tree, 2, dstCandidates); + } +#endif // TARGET_64BIT +} +#endif + +//------------------------------------------------------------------------ +// BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node, +// as well as kills as specified by the given mask. +// +// Arguments: +// tree - The node that defines a register +// dstCount - The number of registers defined by the node +// dstCandidates - The candidate registers for the definition +// killMask - The mask of registers killed by this node +// +// Notes: +// Adds the RefInfo for the definitions to the defList. +// The def and kill functionality is folded into a single method so that the +// save and restores of upper vector registers can be bracketed around the def. +// +void LinearScan::BuildCallDefsWithKills(GenTree* tree, + int dstCount, + regMaskTP dstCandidates, regMaskTP killMask) +{ + assert(dstCount > 0); + assert(dstCandidates != RBM_NONE); + + // Build the kill RefPositions + BuildKills(tree, killMask); - // Now, create the Def(s) - BuildDefs(tree, dstCount, dstCandidates); + // And then the Def(s) + BuildCallDefs(tree, dstCount, dstCandidates); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 3f2865aa50f326..141f1ea2f608f3 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -138,14 +138,14 @@ int LinearScan::BuildNode(GenTree* tree) // This kills GC refs in callee save regs srcCount = 0; assert(dstCount == 0); - BuildDefsWithKills(tree, 0, RBM_NONE, RBM_NONE); + BuildKills(tree, RBM_NONE); break; case GT_PROF_HOOK: srcCount = 0; assert(dstCount == 0); killMask = getKillSetForProfilerHook(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; case GT_CNS_INT: @@ -190,7 +190,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_RETURN: srcCount = BuildReturn(tree); killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; #ifdef SWIFT_SUPPORT @@ -306,7 +306,7 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = BuildOperandUses(tree->gtGetOp1()); buildInternalRegisterUses(); killMask = compiler->compHelperCallKillSet(CORINFO_HELP_STOP_FOR_GC); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); } break; @@ -1151,7 +1151,7 @@ int LinearScan::BuildCall(GenTreeCall* call) const ReturnTypeDesc* retTypeDesc = nullptr; int srcCount = 0; int dstCount = 0; - regMaskTP dstCandidates = RBM_NONE; + regMaskTP singleDstCandidates = RBM_NONE; assert(!call->isContained()); if (call->TypeGet() != TYP_VOID) @@ -1189,32 +1189,29 @@ int LinearScan::BuildCall(GenTreeCall* call) } else #endif // TARGET_X86 - if (hasMultiRegRetVal) - { - assert(retTypeDesc != nullptr); - dstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); - assert((int)genCountBits(dstCandidates) == dstCount); - } - else if (varTypeUsesFloatReg(registerType)) + if (!hasMultiRegRetVal) { + if (varTypeUsesFloatReg(registerType)) + { #ifdef TARGET_X86 - // The return value will be on the X87 stack, and we will need to move it. - dstCandidates = allRegs(registerType); + // The return value will be on the X87 stack, and we will need to move it. + singleDstCandidates = allRegs(registerType); #else // !TARGET_X86 - dstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET; #endif // !TARGET_X86 - } - else - { - assert(varTypeUsesIntReg(registerType)); - - if (registerType == TYP_LONG) - { - dstCandidates = RBM_LNGRET; } else { - dstCandidates = RBM_INTRET; + assert(varTypeUsesIntReg(registerType)); + + if (registerType == TYP_LONG) + { + singleDstCandidates = RBM_LNGRET; + } + else + { + singleDstCandidates = RBM_INTRET; + } } } @@ -1375,7 +1372,25 @@ int LinearScan::BuildCall(GenTreeCall* call) // Now generate defs and kills. regMaskTP killMask = getKillSetForCall(call); - BuildDefsWithKills(call, dstCount, dstCandidates, killMask); + if (dstCount > 0) + { + if (hasMultiRegRetVal) + { + assert(retTypeDesc != nullptr); + regMaskTP multiDstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + assert((int)genCountBits(multiDstCandidates) == dstCount); + BuildCallDefsWithKills(call, dstCount, multiDstCandidates, killMask); + } + else + { + assert(dstCount == 1); + BuildDefWithKills(call, dstCount, singleDstCandidates, killMask); + } + } + else + { + BuildKills(call, killMask); + } #ifdef SWIFT_SUPPORT if (call->HasSwiftErrorHandling()) @@ -1667,7 +1682,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) buildInternalRegisterUses(); regMaskTP killMask = getKillSetForBlockStore(blkNode); - BuildDefsWithKills(blkNode, 0, RBM_NONE, killMask); + BuildKills(blkNode, killMask); return useCount; } @@ -1959,7 +1974,7 @@ int LinearScan::BuildModDiv(GenTree* tree) buildInternalRegisterUses(); regMaskTP killMask = getKillSetForModDiv(tree->AsOp()); - BuildDefsWithKills(tree, 1, dstCandidates, killMask); + BuildDefWithKills(tree, 1, dstCandidates, killMask); return srcCount; } @@ -3086,7 +3101,7 @@ int LinearScan::BuildMul(GenTree* tree) containedMemOp = op2; } regMaskTP killMask = getKillSetForMul(tree->AsOp()); - BuildDefsWithKills(tree, dstCount, dstCandidates, killMask); + BuildDefWithKills(tree, dstCount, dstCandidates, killMask); return srcCount; } diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 8fdc53c5341613..ae0cfade1c518c 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -249,6 +249,11 @@ struct regMaskTP { return low; } + + unsigned Count() const + { + return BitOperations::PopCount(getLow()); + } }; static regMaskTP operator^(regMaskTP first, regMaskTP second) From e93f5286cb7b6d2a99392af1b70ddf31a81552ab Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 13:48:13 -0700 Subject: [PATCH 14/44] fix build errors --- src/coreclr/jit/lsraarmarch.cpp | 35 ++++++++++++++++----------------- src/coreclr/jit/lsrabuild.cpp | 4 +--- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/target.h | 8 ++++---- 4 files changed, 23 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 0a600751a53a67..4a7fcfe2a16393 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,6 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP multiDstCandidates; regMaskTP singleDstCandidates = RBM_NONE; int srcCount = 0; @@ -228,26 +227,24 @@ int LinearScan::BuildCall(GenTreeCall* call) { // The ARM CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. - dstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB; } else #endif // TARGET_ARM - if (hasMultiRegRetVal) - { - assert(retTypeDesc != nullptr); - multiDstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); - } - else if (varTypeUsesFloatArgReg(registerType)) - { - singleDstCandidates = RBM_FLOATRET; - } - else if (registerType == TYP_LONG) - { - singleDstCandidates = RBM_LNGRET; - } - else + if (!hasMultiRegRetVal) { - singleDstCandidates = RBM_INTRET; + if (varTypeUsesFloatArgReg(registerType)) + { + singleDstCandidates = RBM_FLOATRET; + } + else if (registerType == TYP_LONG) + { + singleDstCandidates = RBM_LNGRET; + } + else + { + singleDstCandidates = RBM_INTRET; + } } // First, count reg args @@ -404,7 +401,9 @@ int LinearScan::BuildCall(GenTreeCall* call) { if (hasMultiRegRetVal) { - assert(multiDstCandidates.Count() > 0); + assert(retTypeDesc != nullptr); + regMaskTP multiDstCandidates = retTypeDesc->GetABIReturnRegs(call->GetUnmanagedCallConv()); + assert(genCountBits(multiDstCandidates) > 0); BuildCallDefsWithKills(call, dstCount, multiDstCandidates, killMask); } else diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f7eea368ed2b27..aa490e799e4f69 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3143,9 +3143,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) { assert(dstCount > 0); -#ifdef TARGET_ARM64 - assert((int)dstCandidates.Count() == dstCount); -#endif + assert((int)genCountBits(dstCandidates) == dstCount); assert(tree->IsMultiRegCall()); const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 141f1ea2f608f3..6808d8b4da617e 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -199,7 +199,7 @@ int LinearScan::BuildNode(GenTree* tree) // Plus one for error register srcCount = BuildReturn(tree) + 1; killMask = getKillSetForReturn(); - BuildDefsWithKills(tree, 0, RBM_NONE, killMask); + BuildKills(tree, killMask); break; #endif // SWIFT_SUPPORT diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ae0cfade1c518c..ee0e8228cb5335 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -250,10 +250,10 @@ struct regMaskTP return low; } - unsigned Count() const - { - return BitOperations::PopCount(getLow()); - } + //unsigned Count() const + //{ + // return BitOperations::PopCount(getLow()); + //} }; static regMaskTP operator^(regMaskTP first, regMaskTP second) From 05b09b25c7f4a208ee9c482dff7bafe7f258ada8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 14:40:59 -0700 Subject: [PATCH 15/44] some more errors --- src/coreclr/jit/lsra.h | 3 ++- src/coreclr/jit/lsrabuild.cpp | 3 ++- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/target.h | 5 ----- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 9f822698b92d89..116ab769efc9f6 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1051,7 +1051,8 @@ class LinearScan : public LinearScanInterface void buildRefPositionsForNode(GenTree* tree, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); + void buildUpperVectorSaveRefPositions(GenTree* tree, + LsraLocation currentLoc DEBUG_ARG(regMaskTP fpCalleeKillSet)); void buildUpperVectorRestoreRefPosition( Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node, bool isUse, unsigned multiRegIdx); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index aa490e799e4f69..58e7b59a8d0d97 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1459,7 +1459,8 @@ Interval* LinearScan::getUpperVectorInterval(unsigned varIndex) // But we will use as a proxy any node that kills floating point registers. // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) // -void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet) +void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, + LsraLocation currentLoc DEBUG_ARG(regMaskTP fpCalleeKillSet)) { if ((tree != nullptr) && tree->IsCall()) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6808d8b4da617e..8b922d024d5a15 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1185,7 +1185,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the // correct argument registers. - dstCandidates = RBM_PINVOKE_TCB; + singleDstCandidates = RBM_PINVOKE_TCB; } else #endif // TARGET_X86 diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ee0e8228cb5335..8fdc53c5341613 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -249,11 +249,6 @@ struct regMaskTP { return low; } - - //unsigned Count() const - //{ - // return BitOperations::PopCount(getLow()); - //} }; static regMaskTP operator^(regMaskTP first, regMaskTP second) From f00f7a84e3f9aad14299eff6c1f14ee7d332a4e5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 14:41:11 -0700 Subject: [PATCH 16/44] jit format --- src/coreclr/jit/lsra.h | 23 +++++++++++------------ src/coreclr/jit/lsraarmarch.cpp | 4 ++-- src/coreclr/jit/lsrabuild.cpp | 6 ++---- src/coreclr/jit/lsraxarch.cpp | 10 +++++----- 4 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 116ab769efc9f6..e567436a994124 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1051,8 +1051,7 @@ class LinearScan : public LinearScanInterface void buildRefPositionsForNode(GenTree* tree, LsraLocation loc); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - void buildUpperVectorSaveRefPositions(GenTree* tree, - LsraLocation currentLoc DEBUG_ARG(regMaskTP fpCalleeKillSet)); + void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc DEBUG_ARG(regMaskTP fpCalleeKillSet)); void buildUpperVectorRestoreRefPosition( Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node, bool isUse, unsigned multiRegIdx); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -2000,16 +1999,16 @@ class LinearScan : public LinearScanInterface void getTgtPrefOperands(GenTree* tree, GenTree* op1, GenTree* op2, bool* prefOp1, bool* prefOp2); bool supportsSpecialPutArg(); - int BuildSimple(GenTree* tree); - int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); - void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); - int BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode = nullptr, - regMaskTP candidates = RBM_NONE, - RefPosition** useRefPosition = nullptr); - int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); - int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); - void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); + int BuildSimple(GenTree* tree); + int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); + void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); + int BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode = nullptr, + regMaskTP candidates = RBM_NONE, + RefPosition** useRefPosition = nullptr); + int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); + int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); + void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0); void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 4a7fcfe2a16393..c3562dc8a11d0e 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -127,8 +127,8 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) // int LinearScan::BuildCall(GenTreeCall* call) { - bool hasMultiRegRetVal = false; - const ReturnTypeDesc* retTypeDesc = nullptr; + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; regMaskTP singleDstCandidates = RBM_NONE; int srcCount = 0; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 58e7b59a8d0d97..28c9207fbbfda8 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1459,7 +1459,7 @@ Interval* LinearScan::getUpperVectorInterval(unsigned varIndex) // But we will use as a proxy any node that kills floating point registers. // (Note that some calls are masquerading as other nodes at this point so we can't just check for calls.) // -void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, +void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc DEBUG_ARG(regMaskTP fpCalleeKillSet)) { if ((tree != nullptr) && tree->IsCall()) @@ -3288,9 +3288,7 @@ void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCan // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildCallDefsWithKills(GenTree* tree, - int dstCount, - regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildCallDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) { assert(dstCount > 0); assert(dstCandidates != RBM_NONE); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 8b922d024d5a15..c636fa34759026 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1147,10 +1147,10 @@ int LinearScan::BuildShiftRotate(GenTree* tree) // int LinearScan::BuildCall(GenTreeCall* call) { - bool hasMultiRegRetVal = false; - const ReturnTypeDesc* retTypeDesc = nullptr; - int srcCount = 0; - int dstCount = 0; + bool hasMultiRegRetVal = false; + const ReturnTypeDesc* retTypeDesc = nullptr; + int srcCount = 0; + int dstCount = 0; regMaskTP singleDstCandidates = RBM_NONE; assert(!call->isContained()); @@ -1197,7 +1197,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. singleDstCandidates = allRegs(registerType); #else // !TARGET_X86 - singleDstCandidates = RBM_FLOATRET; + singleDstCandidates = RBM_FLOATRET; #endif // !TARGET_X86 } else From 06bd9e2e5d965d5d4ce06b3d8f16fd195cc851fa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 16:36:43 -0700 Subject: [PATCH 17/44] fixed build error for arm64 --- src/coreclr/jit/CMakeLists.txt | 1 + src/coreclr/jit/lsra.h | 22 ++++++------ src/coreclr/jit/lsraarm64.cpp | 10 +++--- src/coreclr/jit/lsraarmarch.cpp | 8 ++--- src/coreclr/jit/lsrabuild.cpp | 60 ++++++++++++++++++--------------- src/coreclr/jit/target.h | 4 +++ 6 files changed, 57 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 766a0a0150e873..7932e0d452c43f 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -167,6 +167,7 @@ set( JIT_SOURCES redundantbranchopts.cpp regalloc.cpp registerargconvention.cpp + regMaskTPOps.cpp regset.cpp scev.cpp scopeinfo.cpp diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5a217b0f741eab..84f86170265f68 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1433,7 +1433,7 @@ class LinearScan : public LinearScanInterface return nextConsecutiveRefPositionMap; } FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); - void getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates); + void getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, SingleTypeRegSet* candidates); #endif #ifdef DEBUG @@ -1983,10 +1983,10 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); + RefPosition* BuildUse(GenTree* operand, SingleTypeRegSet candidates = RBM_NONE, int multiRegIdx = 0); void setDelayFree(RefPosition* use); - int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); - int BuildCastUses(GenTreeCast* cast, regMaskTP candidates); + int BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates = RBM_NONE); + int BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates); #ifdef TARGET_XARCH int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree); @@ -2001,22 +2001,22 @@ class LinearScan : public LinearScanInterface bool supportsSpecialPutArg(); int BuildSimple(GenTree* tree); - int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildOperandUses(GenTree* node, SingleTypeRegSet candidates = RBM_NONE); void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, - regMaskTP candidates = RBM_NONE, + SingleTypeRegSet candidates = RBM_NONE, RefPosition** useRefPosition = nullptr); - int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); - int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); + int BuildIndirUses(GenTreeIndir* indirTree, SingleTypeRegSet candidates = RBM_NONE); + int BuildAddrUses(GenTree* addr, SingleTypeRegSet candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); - RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0); - void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE); + RefPosition* BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates = RBM_NONE, int multiRegIdx = 0); + void BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates = RBM_NONE); void BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates); void BuildKills(GenTree* tree, regMaskTP killMask); #ifdef TARGET_ARMARCH - void BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask); + void BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask); #else void BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); #endif diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 5e59f150cfb39e..2469fab4489cec 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1562,7 +1562,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - regMaskTP predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; + SingleTypeRegSet predMask = HWIntrinsicInfo::IsLowMaskedOperation(intrin.id) ? RBM_LOWMASK : RBM_ALLMASK; srcCount += BuildOperandUses(intrin.op1, predMask); } else if (intrinsicTree->OperIsMemoryLoadOrStore()) @@ -1844,7 +1844,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); bool forceOp2DelayFree = false; - regMaskTP lowVectorCandidates = RBM_NONE; + SingleTypeRegSet lowVectorCandidates = RBM_NONE; size_t lowVectorOperandNum = 0; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { @@ -1897,7 +1897,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - regMaskTP candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE; + SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE; if (forceOp2DelayFree) { srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates); @@ -1911,7 +1911,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (intrin.op3 != nullptr) { - regMaskTP candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE; + SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE; srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates) : BuildOperandUses(intrin.op3, candidates); @@ -2221,7 +2221,7 @@ bool RefPosition::isLiveAtConsecutiveRegistersLoc(LsraLocation consecutiveRegist // operandNum (out) - The operand number having the low vector register restriction // candidates (out) - The restricted low vector registers // -void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates) +void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, SingleTypeRegSet* candidates) { assert(HWIntrinsicInfo::IsLowVectorOperation(intrin.id)); unsigned baseElementSize = genTypeSize(intrin.baseType); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 2cadc7b246a6f0..43361a2cef04c5 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -149,7 +149,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } GenTree* ctrlExpr = call->gtControlExpr; - regMaskTP ctrlExprCandidates = RBM_NONE; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -444,7 +444,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) { assert(!tree->AsCall()->HasMultiRegRetVal()); assert((int)genCountBits(dstCandidates) == 1); @@ -602,7 +602,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - regMaskTP sourceMask = RBM_NONE; + SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 7d2186cc0217e6..5bb056354498d1 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -708,7 +708,11 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); + *killTail = pos; + killTail = &pos->nextRefPosition; + + pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getHigh()); *killTail = pos; killTail = &pos->nextRefPosition; @@ -1148,6 +1152,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo } Interval* interval = getIntervalForLocalVar(varIndex); const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); if (isCallKill) { @@ -1160,7 +1165,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // See the "heuristics for writeThru intervals" in 'buildIntervals()'. if (!interval->isWriteThru || !isCallKill) { - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + SingleTypeRegSet newPreferences = allRegs(interval->registerType) & (~regsKillMask); if (newPreferences != RBM_NONE) { @@ -1168,7 +1173,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { // Update the register aversion as long as this is not write-thru vars for // reason mentioned above. - interval->registerAversion |= killMask; + interval->registerAversion |= regsKillMask; } interval->updateRegisterPreferences(newPreferences); } @@ -3056,7 +3061,7 @@ void setTgtPref(Interval* interval, RefPosition* tgtPrefUse) // Notes: // Adds the RefInfo for the definition to the defList. // -RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx) +RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, int multiRegIdx) { assert(!tree->isContained()); @@ -3158,13 +3163,12 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida { // In case of multi-reg call node, we have to query the i'th position return register. // For all other cases of multi-reg definitions, the registers must be in sequential order. - regMaskTP thisDstCandidates = - genRegMask(tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv())); + regNumber thisReg = tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); - assert((dstCandidates & thisDstCandidates) != RBM_NONE); - dstCandidates &= ~thisDstCandidates; + assert(dstCandidates.IsRegNumInMask(thisReg)); + dstCandidates.RemoveRegNumFromMask(thisReg); - BuildDef(tree, thisDstCandidates, i); + BuildDef(tree, genRegMask(thisReg), i); } } @@ -3181,7 +3185,7 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida // Also, the `dstCandidates` is assumed to be of "onlyOne" type. If there are // both gpr and float registers, use `BuildDefs` that takes `AllRegsMask` // -void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +void LinearScan::BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates) { assert(dstCount > 0); @@ -3197,7 +3201,7 @@ void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) for (int i = 0; i < dstCount; i++) { - regMaskTP thisDstCandidates = genFindLowestBit(dstCandidates); + SingleTypeRegSet thisDstCandidates = genFindLowestBit(dstCandidates); BuildDef(tree, thisDstCandidates, i); dstCandidates &= ~thisDstCandidates; } @@ -3386,7 +3390,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) +RefPosition* LinearScan::BuildUse(GenTree* operand, SingleTypeRegSet candidates, int multiRegIdx) { assert(!operand->isContained()); Interval* interval; @@ -3456,12 +3460,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu // Notes: // This method may only be used if the candidates are the same for all sources. // -int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates) +int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, SingleTypeRegSet candidates) { return BuildAddrUses(indirTree->Addr(), candidates); } -int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) +int LinearScan::BuildAddrUses(GenTree* addr, SingleTypeRegSet candidates) { if (!addr->isContained()) { @@ -3518,7 +3522,7 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildOperandUses(GenTree* node, SingleTypeRegSet candidates) { if (!node->isContained()) { @@ -3674,7 +3678,7 @@ void LinearScan::AddDelayFreeUses(RefPosition* useRefPosition, GenTree* rmwNode) // int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, - regMaskTP candidates, + SingleTypeRegSet candidates, RefPosition** useRefPositionRef) { RefPosition* use = nullptr; @@ -3771,7 +3775,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, // The operands must already have been processed by buildRefPositionsForNode, and their // RefInfoListNodes placed in the defList. // -int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) +int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2IfPresent(); @@ -3805,7 +3809,7 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) // Return Value: // The number of actual register operands. // -int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates) +int LinearScan::BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates) { GenTree* src = cast->CastOp(); @@ -3870,7 +3874,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, } } - regMaskTP defCandidates = RBM_NONE; + SingleTypeRegSet defCandidates = RBM_NONE; var_types type = varDsc->GetRegisterType(); #ifdef TARGET_X86 @@ -3956,7 +3960,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc) if (isMultiRegSrc) { - regMaskTP srcCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = fieldVarDsc->TypeGet(); if (varTypeIsByte(type)) @@ -4067,7 +4071,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) else { srcCount = 1; - regMaskTP srcCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = varDsc->GetRegisterType(storeLoc); if (varTypeIsByte(type)) @@ -4158,7 +4162,7 @@ int LinearScan::BuildReturn(GenTree* tree) #endif // !defined(TARGET_64BIT) if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) { - regMaskTP useCandidates = RBM_NONE; + SingleTypeRegSet useCandidates = RBM_NONE; #if FEATURE_MULTIREG_RET #ifdef TARGET_ARM64 @@ -4351,7 +4355,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - regMaskTP argMask = genRegMask(argReg); + SingleTypeRegSet argMask = genRegMask(argReg); RefPosition* use = BuildUse(op1, argMask); // Record that this register is occupied by a register now. @@ -4448,8 +4452,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // is an indir through an lea, we need to actually instantiate the // lea in a register assert(!addr->isContained() && !src->isContained()); - regMaskTP addrCandidates = RBM_WRITE_BARRIER_DST; - regMaskTP srcCandidates = RBM_WRITE_BARRIER_SRC; + SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST; + SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC; #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS @@ -4496,7 +4500,7 @@ int LinearScan::BuildCmp(GenTree* tree) if (!tree->TypeIs(TYP_VOID)) { - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; #ifdef TARGET_X86 // If the compare is used by a jump, we just need to set the condition codes. If not, then we need @@ -4520,8 +4524,8 @@ int LinearScan::BuildCmp(GenTree* tree) // int LinearScan::BuildCmpOperands(GenTree* tree) { - regMaskTP op1Candidates = RBM_NONE; - regMaskTP op2Candidates = RBM_NONE; + SingleTypeRegSet op1Candidates = RBM_NONE; + SingleTypeRegSet op2Candidates = RBM_NONE; GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index de7c2c91be58bd..ac8a6ad669f233 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -296,6 +296,10 @@ struct regMaskTP { return getLow(); } + + void RemoveRegNumFromMask(regNumber reg); + + bool IsRegNumInMask(regNumber reg); }; static regMaskTP operator^(regMaskTP first, regMaskTP second) From 325bc6e241513b4b9305e747af2eee7c775502e3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 16:44:53 -0700 Subject: [PATCH 18/44] REVERT: temporary add #ifdef TARGET_ARM64 for accessing regMaskTP methods --- src/coreclr/jit/lsra.cpp | 33 ++++++++++++++++++++++++++++++++- src/coreclr/jit/lsra.h | 4 ++++ src/coreclr/jit/lsrabuild.cpp | 18 ++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index f19961abe30f55..726c76164f83ea 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -302,7 +302,11 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); +#ifdef TARGET_ARM64 SingleTypeRegSet candidates = (mask & m_RegistersWithConstants).GetRegSetForType(currentInterval->registerType); +#else + SingleTypeRegSet candidates = (mask & m_RegistersWithConstants); +#endif SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { @@ -495,7 +499,12 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); //TODO: Pass the right type +#ifdef TARGET_ARM64 + // TODO-lsra: Pass the right type + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); +#else + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation); +#endif if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -13481,8 +13490,14 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* if (currentInterval->isWriteThru) { // We'll only prefer a callee-save register if it's already been used. +#ifdef TARGET_ARM64 SingleTypeRegSet unusedCalleeSaves = calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); +#else + SingleTypeRegSet unusedCalleeSaves = + calleeSaveCandidates & + ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); +#endif callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; } @@ -13531,7 +13546,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. +#ifdef TARGET_ARM64 SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); +#else + SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); +#endif candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13553,7 +13572,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. +#ifdef TARGET_ARM64 SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); +#else + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs; +#endif while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); @@ -13853,7 +13876,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. +#ifdef TARGET_ARM64 SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); +#else + SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); +#endif // TARGET_ARM64 candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13871,7 +13898,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. +#ifdef TARGET_ARM64 SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); +#else + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs; +#endif while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 84f86170265f68..876f37b9970e6e 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1251,7 +1251,11 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM +#ifdef TARGET_ARM64 return result.GetRegSetForType(regType); +#else + return result; +#endif } #ifdef DEBUG diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5bb056354498d1..b5877295c2bf23 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -708,11 +708,15 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); +#ifdef TARGET_ARM64 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); *killTail = pos; killTail = &pos->nextRefPosition; pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getHigh()); +#else + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); +#endif *killTail = pos; killTail = &pos->nextRefPosition; @@ -1152,7 +1156,11 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo } Interval* interval = getIntervalForLocalVar(varIndex); const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); +#ifdef TARGET_ARM64 SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); +#else + SingleTypeRegSet regsKillMask = killMask; +#endif if (isCallKill) { @@ -3165,8 +3173,14 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida // For all other cases of multi-reg definitions, the registers must be in sequential order. regNumber thisReg = tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); +#ifdef TARGET_ARM64 assert(dstCandidates.IsRegNumInMask(thisReg)); dstCandidates.RemoveRegNumFromMask(thisReg); +#else + assert((dstCandidates & genRegMask(thisReg)) != RBM_NONE); + dstCandidates &= ~genRegMask(thisReg); +#endif // TARGET_ARM64 + BuildDef(tree, genRegMask(thisReg), i); } @@ -3368,7 +3382,11 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) } #endif +#ifdef TARGET_ARM64 SingleTypeRegSet unprefSet = unpref.GetRegSetForType(interval->registerType); +#else + SingleTypeRegSet unprefSet = unpref; +#endif interval->registerAversion |= unprefSet; SingleTypeRegSet newPreferences = allRegs(interval->registerType) & ~unprefSet; interval->updateRegisterPreferences(newPreferences); From e96428f318c8db196a56e2a6993db0413a2f04ea Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 22:28:21 -0700 Subject: [PATCH 19/44] forgot to add the new file --- src/coreclr/jit/regMaskTPOps.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/coreclr/jit/regMaskTPOps.cpp diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp new file mode 100644 index 00000000000000..6c5119054daa47 --- /dev/null +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -0,0 +1,18 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "target.h" + +#ifdef TARGET_ARM64 +struct regMaskTP; + +void regMaskTP::RemoveRegNumFromMask(regNumber reg) +{ + low &= ~genRegMask(reg); +} + +bool regMaskTP::IsRegNumInMask(regNumber reg) +{ + return (low & genRegMask(reg)) != 0; +} +#endif // TARGET_ARM64 From 1cf6b7a3af5cddd85b105fb0a3049aa63bcb9220 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 22:36:36 -0700 Subject: [PATCH 20/44] make addRegsForKill only on low --- src/coreclr/jit/lsrabuild.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b5877295c2bf23..f7af3d0fb94799 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -710,14 +710,11 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) #ifdef TARGET_ARM64 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); - *killTail = pos; - killTail = &pos->nextRefPosition; - - pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getHigh()); #else RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); #endif + *killTail = pos; killTail = &pos->nextRefPosition; } From bf865157384e7fb1673ff650b4398bc080930317 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 May 2024 22:36:43 -0700 Subject: [PATCH 21/44] jit format --- src/coreclr/jit/codegeninterface.h | 10 ++-- src/coreclr/jit/lsra.cpp | 74 ++++++++++++++++-------------- src/coreclr/jit/lsra.h | 70 ++++++++++++++-------------- src/coreclr/jit/lsraarm64.cpp | 44 +++++++++--------- src/coreclr/jit/lsraarmarch.cpp | 9 ++-- src/coreclr/jit/lsrabuild.cpp | 59 ++++++++++++------------ src/coreclr/jit/target.h | 2 - 7 files changed, 136 insertions(+), 132 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 380bc5ee484e27..55cdadb076800a 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -49,16 +49,16 @@ CodeGenInterface* getCodeGenerator(Compiler* comp); class NodeInternalRegisters { typedef JitHashTable, SingleTypeRegSet> NodeInternalRegistersTable; - NodeInternalRegistersTable m_table; + NodeInternalRegistersTable m_table; public: NodeInternalRegisters(Compiler* comp); - void Add(GenTree* tree, SingleTypeRegSet reg); - regNumber Extract(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); - regNumber GetSingle(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + void Add(GenTree* tree, SingleTypeRegSet reg); + regNumber Extract(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + regNumber GetSingle(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); SingleTypeRegSet GetAll(GenTree* tree); - unsigned Count(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + unsigned Count(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); }; class CodeGenInterface diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 726c76164f83ea..91199b3403f28f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -299,7 +299,9 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo nextFixedRef[regRecord->regNum] = nextLocation; } -SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition) +SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, + Interval* currentInterval, + RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); #ifdef TARGET_ARM64 @@ -307,10 +309,10 @@ SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, Interva #else SingleTypeRegSet candidates = (mask & m_RegistersWithConstants); #endif - SingleTypeRegSet result = RBM_NONE; + SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { - regNumber regNum = genFirstRegNumFromMask(candidates); + regNumber regNum = genFirstRegNumFromMask(candidates); SingleTypeRegSet candidateBit = genRegMask(regNum); candidates ^= candidateBit; @@ -486,9 +488,9 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // Otherwise returns regMaskActual. // SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, - SingleTypeRegSet regMaskActual, - SingleTypeRegSet regMaskConstraint, - unsigned minRegCount) + SingleTypeRegSet regMaskActual, + SingleTypeRegSet regMaskConstraint, + unsigned minRegCount) { SingleTypeRegSet newMask = regMaskActual & regMaskConstraint; if (genCountBits(newMask) < minRegCount) @@ -2994,9 +2996,9 @@ regNumber LinearScan::allocateRegMinimal(Interval* currentInterva RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!enregisterLocalVars); - regNumber foundReg; + regNumber foundReg; SingleTypeRegSet foundRegBit; - RegRecord* availablePhysRegRecord; + RegRecord* availablePhysRegRecord; foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { @@ -5264,7 +5266,7 @@ void LinearScan::allocateRegistersMinimal() } SingleTypeRegSet assignedRegBit = RBM_NONE; - bool isInRegister = false; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -6196,7 +6198,7 @@ void LinearScan::allocateRegisters() } SingleTypeRegSet assignedRegBit = RBM_NONE; - bool isInRegister = false; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -9527,7 +9529,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - SingleTypeRegSet terminatorConsumedRegs) + SingleTypeRegSet terminatorConsumedRegs) { VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); VarToRegMap toVarToRegMap; @@ -12832,7 +12834,7 @@ void LinearScan::RegisterSelection::try_BEST_FIT() LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; for (SingleTypeRegSet bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { - regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); + regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); SingleTypeRegSet bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); bestFitCandidates ^= bestFitCandidateBit; @@ -12927,11 +12929,11 @@ void LinearScan::RegisterSelection::try_REG_ORDER() // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. - unsigned lowestRegOrder = UINT_MAX; + unsigned lowestRegOrder = UINT_MAX; SingleTypeRegSet lowestRegOrderBit = RBM_NONE; for (SingleTypeRegSet regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { - regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); + regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); SingleTypeRegSet regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; @@ -12967,7 +12969,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() for (SingleTypeRegSet spillCandidates = candidates; spillCandidates != RBM_NONE;) { - regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); + regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); SingleTypeRegSet spillCandidateBit = genRegMask(spillCandidateRegNum); spillCandidates ^= spillCandidateBit; @@ -13088,11 +13090,11 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() { assert(!found); - LsraLocation farthestLocation = MinLocation; - SingleTypeRegSet farthestSet = RBM_NONE; + LsraLocation farthestLocation = MinLocation; + SingleTypeRegSet farthestSet = RBM_NONE; for (SingleTypeRegSet farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { - regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); + regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); SingleTypeRegSet farthestCandidateBit = genRegMask(farthestCandidateRegNum); farthestCandidates ^= farthestCandidateBit; @@ -13125,7 +13127,7 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() SingleTypeRegSet prevRegOptSet = RBM_NONE; for (SingleTypeRegSet prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { - regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); + regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); SingleTypeRegSet prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); prevRegOptCandidates ^= prevRegOptCandidateBit; Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; @@ -13228,7 +13230,7 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate SingleTypeRegSet coversCandidates = candidates; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; @@ -13252,11 +13254,11 @@ void LinearScan::RegisterSelection::calculateCoversSets() return; } - preferenceSet = (candidates & preferences); + preferenceSet = (candidates & preferences); SingleTypeRegSet coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; @@ -13329,7 +13331,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() // template SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG *registerScore = NONE; @@ -13492,11 +13494,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // We'll only prefer a callee-save register if it's already been used. #ifdef TARGET_ARM64 SingleTypeRegSet unusedCalleeSaves = - calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); + calleeSaveCandidates & + ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); #else SingleTypeRegSet unusedCalleeSaves = - calleeSaveCandidates & - ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); + calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); #endif callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; @@ -13547,7 +13549,8 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. #ifdef TARGET_ARM64 - SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); + SingleTypeRegSet busyRegs = + (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); #else SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); #endif @@ -13579,7 +13582,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* #endif while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; @@ -13670,9 +13673,10 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one // such range that is consecutive. Next, append that range to the `candidates`. // - SingleTypeRegSet limitCandidatesForConsecutive = ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs); + SingleTypeRegSet limitCandidatesForConsecutive = + ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs); SingleTypeRegSet overallLimitCandidates; - regMaskTP limitConsecutiveResult = + regMaskTP limitConsecutiveResult = linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount, &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); @@ -13802,8 +13806,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // are busy). // SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( - Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) + Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!linearScan->enregisterLocalVars); #ifdef DEBUG @@ -13877,10 +13880,11 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. #ifdef TARGET_ARM64 - SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); + SingleTypeRegSet busyRegs = + (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); #else SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13905,7 +13909,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( #endif while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 876f37b9970e6e..e513f0db453295 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -775,10 +775,10 @@ class LinearScan : public LinearScanInterface return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); } - SingleTypeRegSet getConstrainedRegMask(RefPosition* refPosition, + SingleTypeRegSet getConstrainedRegMask(RefPosition* refPosition, SingleTypeRegSet regMaskActual, SingleTypeRegSet regMaskConstrain, - unsigned minRegCount); + unsigned minRegCount); SingleTypeRegSet stressLimitRegs(RefPosition* refPosition, SingleTypeRegSet mask); // This controls the heuristics used to select registers @@ -1166,12 +1166,12 @@ class LinearScan : public LinearScanInterface RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); - RefPosition* newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, + RefPosition* newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, SingleTypeRegSet mask, - unsigned multiRegIdx = 0); + unsigned multiRegIdx = 0); RefPosition* newRefPosition( regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, SingleTypeRegSet mask); @@ -1230,12 +1230,14 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - SingleTypeRegSet getConsecutiveCandidates(SingleTypeRegSet candidates, RefPosition* refPosition, SingleTypeRegSet* busyCandidates); - SingleTypeRegSet filterConsecutiveCandidates(SingleTypeRegSet candidates, - unsigned int registersNeeded, - SingleTypeRegSet* allConsecutiveCandidates); + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + SingleTypeRegSet getConsecutiveCandidates(SingleTypeRegSet candidates, + RefPosition* refPosition, + SingleTypeRegSet* busyCandidates); + SingleTypeRegSet filterConsecutiveCandidates(SingleTypeRegSet candidates, + unsigned int registersNeeded, + SingleTypeRegSet* allConsecutiveCandidates); SingleTypeRegSet filterConsecutiveCandidatesForSpill(SingleTypeRegSet consecutiveCandidates, unsigned int registersNeeded); #endif // TARGET_ARM64 @@ -1275,10 +1277,10 @@ class LinearScan : public LinearScanInterface // Perform register selection and update currentInterval or refPosition template FORCEINLINE SingleTypeRegSet select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); FORCEINLINE SingleTypeRegSet selectMinimal(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); // If the register is from unassigned set such that it was not already // assigned to the current interval @@ -1321,16 +1323,16 @@ class LinearScan : public LinearScanInterface SingleTypeRegSet candidates; SingleTypeRegSet preferences = RBM_NONE; - Interval* relatedInterval = nullptr; + Interval* relatedInterval = nullptr; SingleTypeRegSet relatedPreferences = RBM_NONE; - LsraLocation rangeEndLocation; - LsraLocation relatedLastLocation; - bool preferCalleeSave = false; - RefPosition* rangeEndRefPosition; - RefPosition* lastRefPosition; + LsraLocation rangeEndLocation; + LsraLocation relatedLastLocation; + bool preferCalleeSave = false; + RefPosition* rangeEndRefPosition; + RefPosition* lastRefPosition; SingleTypeRegSet callerCalleePrefs = RBM_NONE; - LsraLocation lastLocation; + LsraLocation lastLocation; SingleTypeRegSet foundRegBit; @@ -1347,11 +1349,11 @@ class LinearScan : public LinearScanInterface SingleTypeRegSet preferenceSet; SingleTypeRegSet coversRelatedSet; SingleTypeRegSet coversFullSet; - bool coversSetsCalculated = false; - bool found = false; - bool skipAllocation = false; - bool coversFullApplied = false; - bool constAvailableApplied = false; + bool coversSetsCalculated = false; + bool found = false; + bool skipAllocation = false; + bool coversFullApplied = false; + bool constAvailableApplied = false; // If the selected register is already assigned to the current internal FORCEINLINE bool isAlreadyAssigned() @@ -1422,7 +1424,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - SingleTypeRegSet terminatorConsumedRegs); + SingleTypeRegSet terminatorConsumedRegs); #ifdef TARGET_ARM64 typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; @@ -1713,8 +1715,8 @@ class LinearScan : public LinearScanInterface VarToRegMap* outVarToRegMaps; // A temporary VarToRegMap used during the resolution of critical edges. - VarToRegMap sharedCriticalVarToRegMap; - PhasedVar actualRegistersMask; + VarToRegMap sharedCriticalVarToRegMap; + PhasedVar actualRegistersMask; PhasedVar availableIntRegs; PhasedVar availableFloatRegs; PhasedVar availableDoubleRegs; @@ -2007,10 +2009,10 @@ class LinearScan : public LinearScanInterface int BuildSimple(GenTree* tree); int BuildOperandUses(GenTree* node, SingleTypeRegSet candidates = RBM_NONE); void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); - int BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode = nullptr, - SingleTypeRegSet candidates = RBM_NONE, - RefPosition** useRefPosition = nullptr); + int BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode = nullptr, + SingleTypeRegSet candidates = RBM_NONE, + RefPosition** useRefPosition = nullptr); int BuildIndirUses(GenTreeIndir* indirTree, SingleTypeRegSet candidates = RBM_NONE); int BuildAddrUses(GenTree* addr, SingleTypeRegSet candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 2469fab4489cec..5481803b319c2e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -177,8 +177,8 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // set. // SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet floatCandidates, - unsigned int registersNeeded, - SingleTypeRegSet* allConsecutiveCandidates) + unsigned int registersNeeded, + SingleTypeRegSet* allConsecutiveCandidates) { assert((floatCandidates == RBM_NONE) || (floatCandidates & availableFloatRegs) != RBM_NONE); @@ -196,10 +196,10 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. -#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ - consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; @@ -207,7 +207,7 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet float do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitScanForward(currAvailableRegs); + regAvailableStartIndex = BitScanForward(currAvailableRegs); SingleTypeRegSet startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. @@ -332,8 +332,8 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidatesForSpill(SingleTypeRegSe assert((registersNeeded >= 2) && (registersNeeded <= 4)); SingleTypeRegSet consecutiveResultForBusy = RBM_NONE; SingleTypeRegSet unprocessedRegs = consecutiveCandidates; - unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; - int maxSpillRegs = registersNeeded; + unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; + int maxSpillRegs = registersNeeded; SingleTypeRegSet registersNeededMask = (1ULL << registersNeeded) - 1; do { @@ -416,9 +416,9 @@ SingleTypeRegSet LinearScan::filterConsecutiveCandidatesForSpill(SingleTypeRegSe // allCandidates = 0x1C080D0F00000000, the consecutive register mask returned // will be 0x400000300000000. // -SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandidates, - RefPosition* refPosition, - SingleTypeRegSet* busyCandidates) +SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandidates, + RefPosition* refPosition, + SingleTypeRegSet* busyCandidates) { assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); @@ -431,13 +431,13 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCan { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | - RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); + RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); } #endif *busyCandidates = RBM_NONE; - SingleTypeRegSet overallResult; - unsigned int registersNeeded = refPosition->regCount; + SingleTypeRegSet overallResult; + unsigned int registersNeeded = refPosition->regCount; if (floatFreeCandidates != RBM_NONE) { @@ -451,9 +451,9 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCan // register out of the `consecutiveResult` is available for the first RefPosition, then just use // that. This will avoid unnecessary copies. - regNumber firstRegNum = REG_NA; - regNumber prevRegNum = REG_NA; - int foundCount = 0; + regNumber firstRegNum = REG_NA; + regNumber prevRegNum = REG_NA; + int foundCount = 0; RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); assert(consecutiveRefPosition != nullptr); @@ -465,7 +465,7 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCan if (!interval->isActive) { - foundCount = 0; + foundCount = 0; continue; } @@ -482,7 +482,7 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCan continue; } - foundCount = 0; + foundCount = 0; break; } @@ -1843,9 +1843,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); - bool forceOp2DelayFree = false; + bool forceOp2DelayFree = false; SingleTypeRegSet lowVectorCandidates = RBM_NONE; - size_t lowVectorOperandNum = 0; + size_t lowVectorOperandNum = 0; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal())) diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 43361a2cef04c5..0c6daac078c0d2 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - SingleTypeRegSet singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -148,7 +148,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - GenTree* ctrlExpr = call->gtControlExpr; + GenTree* ctrlExpr = call->gtControlExpr; SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { @@ -560,7 +560,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); + regNumber argReg = argNode->GetRegNum(); SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { @@ -568,7 +568,8 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } - assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || ((argMask & availableFloatRegs) != RBM_NONE)); + assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || + ((argMask & availableFloatRegs) != RBM_NONE)); if (src->OperGet() == GT_FIELD_LIST) { diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f7af3d0fb94799..d323f054d8ac0a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -250,15 +250,15 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de { assert(!interval->isLocalVar); - RefPosition* useRefPosition = defRefPosition->nextRefPosition; + RefPosition* useRefPosition = defRefPosition->nextRefPosition; SingleTypeRegSet defRegAssignment = defRefPosition->registerAssignment; SingleTypeRegSet useRegAssignment = useRefPosition->registerAssignment; - RegRecord* defRegRecord = nullptr; - RegRecord* useRegRecord = nullptr; - regNumber defReg = REG_NA; - regNumber useReg = REG_NA; - bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); - bool useRegConflict = defRegConflict; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); + bool useRegConflict = defRegConflict; // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment // on it, or we will fail to ensure that the fixedReg is busy at the time the target @@ -554,12 +554,12 @@ RefPosition* LinearScan::newRefPosition( // Return Value: // a new RefPosition // -RefPosition* LinearScan::newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, SingleTypeRegSet mask, - unsigned multiRegIdx /* = 0 */) + unsigned multiRegIdx /* = 0 */) { if (theInterval != nullptr) { @@ -714,7 +714,6 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); #endif - *killTail = pos; killTail = &pos->nextRefPosition; } @@ -1400,9 +1399,9 @@ void LinearScan::buildInternalRegisterUses() assert(internalCount <= MaxInternalCount); for (int i = 0; i < internalCount; i++) { - RefPosition* def = internalDefs[i]; + RefPosition* def = internalDefs[i]; SingleTypeRegSet mask = def->registerAssignment; - RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); + RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); if (setInternalRegsDelayFree) { use->delayRegFree = true; @@ -1868,7 +1867,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc newRefPosition->minRegCandidateCount = minRegCountForRef; if (newRefPosition->IsActualRef() && doReverseCallerCallee()) { - Interval* interval = newRefPosition->getInterval(); + Interval* interval = newRefPosition->getInterval(); SingleTypeRegSet oldAssignment = newRefPosition->registerAssignment; SingleTypeRegSet calleeSaveMask = calleeSaveRegs(interval->registerType); #ifdef TARGET_ARM64 @@ -2874,8 +2873,8 @@ void LinearScan::buildInitialParamDef(const LclVarDsc* varDsc, regNumber paramRe { assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - const var_types regType = varDsc->GetRegisterType(); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + const var_types regType = varDsc->GetRegisterType(); SingleTypeRegSet mask = allRegs(regType); if ((paramReg != REG_NA) && !stressInitialParamReg()) { @@ -3168,7 +3167,8 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida { // In case of multi-reg call node, we have to query the i'th position return register. // For all other cases of multi-reg definitions, the registers must be in sequential order. - regNumber thisReg = tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); + regNumber thisReg = + tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); #ifdef TARGET_ARM64 assert(dstCandidates.IsRegNumInMask(thisReg)); @@ -3178,7 +3178,6 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida dstCandidates &= ~genRegMask(thisReg); #endif // TARGET_ARM64 - BuildDef(tree, genRegMask(thisReg), i); } } @@ -3691,10 +3690,10 @@ void LinearScan::AddDelayFreeUses(RefPosition* useRefPosition, GenTree* rmwNode) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode, - SingleTypeRegSet candidates, - RefPosition** useRefPositionRef) +int LinearScan::BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode, + SingleTypeRegSet candidates, + RefPosition** useRefPositionRef) { RefPosition* use = nullptr; GenTree* addr = nullptr; @@ -3890,7 +3889,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, } SingleTypeRegSet defCandidates = RBM_NONE; - var_types type = varDsc->GetRegisterType(); + var_types type = varDsc->GetRegisterType(); #ifdef TARGET_X86 if (varTypeIsByte(type)) @@ -4085,7 +4084,7 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) } else { - srcCount = 1; + srcCount = 1; SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = varDsc->GetRegisterType(storeLoc); @@ -4370,8 +4369,8 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - SingleTypeRegSet argMask = genRegMask(argReg); - RefPosition* use = BuildUse(op1, argMask); + SingleTypeRegSet argMask = genRegMask(argReg); + RefPosition* use = BuildUse(op1, argMask); // Record that this register is occupied by a register now. placedArgRegs |= argMask; @@ -4541,8 +4540,8 @@ int LinearScan::BuildCmpOperands(GenTree* tree) { SingleTypeRegSet op1Candidates = RBM_NONE; SingleTypeRegSet op2Candidates = RBM_NONE; - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); #ifdef TARGET_X86 bool needByteRegs = false; diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index ac8a6ad669f233..2af40b502abe5d 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -211,7 +211,6 @@ enum _regMask_enum : unsigned typedef _regNumber_enum regNumber; typedef unsigned char regNumberSmall; - #if REGMASK_BITS == 8 typedef unsigned char regMaskSmall; #define REG_MASK_INT_FMT "%02X" @@ -383,7 +382,6 @@ static uint32_t BitScanForward(regMaskTP mask) #endif } - /*****************************************************************************/ #ifdef DEBUG From 7d566b450d3d7b835994d0a8db7ebcc4e1f095dc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 12:48:00 -0700 Subject: [PATCH 22/44] Revert "REVERT: temporary add #ifdef TARGET_ARM64 for accessing regMaskTP methods" This reverts commit 325bc6e241513b4b9305e747af2eee7c775502e3. --- src/coreclr/jit/lsra.cpp | 32 +------------------------------- src/coreclr/jit/lsra.h | 4 ---- src/coreclr/jit/lsrabuild.cpp | 18 ------------------ 3 files changed, 1 insertion(+), 53 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 91199b3403f28f..50d60fff613aa4 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -304,11 +304,7 @@ SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); -#ifdef TARGET_ARM64 SingleTypeRegSet candidates = (mask & m_RegistersWithConstants).GetRegSetForType(currentInterval->registerType); -#else - SingleTypeRegSet candidates = (mask & m_RegistersWithConstants); -#endif SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { @@ -501,12 +497,7 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { -#ifdef TARGET_ARM64 - // TODO-lsra: Pass the right type - SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); -#else - SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation); -#endif + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); //TODO: Pass the right type if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -13492,14 +13483,9 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* if (currentInterval->isWriteThru) { // We'll only prefer a callee-save register if it's already been used. -#ifdef TARGET_ARM64 SingleTypeRegSet unusedCalleeSaves = calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); -#else - SingleTypeRegSet unusedCalleeSaves = - calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); -#endif callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; } @@ -13548,12 +13534,8 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. -#ifdef TARGET_ARM64 SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); -#else - SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); -#endif candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13575,11 +13557,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. -#ifdef TARGET_ARM64 SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); -#else - SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs; -#endif while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); @@ -13879,12 +13857,8 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. -#ifdef TARGET_ARM64 SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); -#else - SingleTypeRegSet busyRegs = (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation); -#endif // TARGET_ARM64 candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13902,11 +13876,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. -#ifdef TARGET_ARM64 SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); -#else - SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs; -#endif while (checkConflictMask != RBM_NONE) { regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e513f0db453295..96464838ae3864 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1253,11 +1253,7 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM -#ifdef TARGET_ARM64 return result.GetRegSetForType(regType); -#else - return result; -#endif } #ifdef DEBUG diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5e0f188d430042..da34f449ce91fe 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -708,11 +708,7 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); -#ifdef TARGET_ARM64 RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); -#else - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); -#endif *killTail = pos; killTail = &pos->nextRefPosition; @@ -1152,11 +1148,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo } Interval* interval = getIntervalForLocalVar(varIndex); const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); -#ifdef TARGET_ARM64 SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); -#else - SingleTypeRegSet regsKillMask = killMask; -#endif if (isCallKill) { @@ -3173,14 +3165,8 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida regNumber thisReg = tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); -#ifdef TARGET_ARM64 assert(dstCandidates.IsRegNumInMask(thisReg)); dstCandidates.RemoveRegNumFromMask(thisReg); -#else - assert((dstCandidates & genRegMask(thisReg)) != RBM_NONE); - dstCandidates &= ~genRegMask(thisReg); -#endif // TARGET_ARM64 - BuildDef(tree, genRegMask(thisReg), i); } } @@ -3407,11 +3393,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) } #endif -#ifdef TARGET_ARM64 SingleTypeRegSet unprefSet = unpref.GetRegSetForType(interval->registerType); -#else - SingleTypeRegSet unprefSet = unpref; -#endif interval->registerAversion |= unprefSet; SingleTypeRegSet newPreferences = allRegs(interval->registerType) & ~unprefSet; interval->updateRegisterPreferences(newPreferences); From 1963c98903d3bff4c7442270b3e9a56e0bb09b57 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 13:57:04 -0700 Subject: [PATCH 23/44] Various fixes after merge --- src/coreclr/jit/gcencode.cpp | 2 +- src/coreclr/jit/jit.h | 2 +- src/coreclr/jit/lsra.h | 6 +-- src/coreclr/jit/lsrabuild.cpp | 66 +++++++++++++++++--------------- src/coreclr/jit/regMaskTPOps.cpp | 3 +- src/coreclr/jit/target.h | 26 ++++++------- 6 files changed, 52 insertions(+), 53 deletions(-) diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 902029791f20c0..d8ece2112d6fa2 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, while (regMask) { // Get hold of the next register bit. - regMaskSmall tmpMask = genFindLowestBit(regMask); + SingleTypeRegSet tmpMask = genFindLowestBit(regMask); assert(tmpMask); // Remember the new state of this register. diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index 7a282efc1f200e..983103b80cef12 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -513,7 +513,7 @@ class GlobalJitOptions #define TRACK_LSRA_STATS 1 // Collect LSRA stats #define TRACK_ENREG_STATS 1 // Collect enregistration stats #else -#define MEASURE_MEM_ALLOC 1 // You can set this to 1 to get memory stats in retail, as well +#define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well #define LOOP_HOIST_STATS 0 // You can set this to 1 to get loop hoist stats in retail, as well #define TRACK_LSRA_STATS 0 // You can set this to 1 to get LSRA stats in retail, as well #define TRACK_ENREG_STATS 0 diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 96464838ae3864..97670a7ff436e8 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2017,11 +2017,11 @@ class LinearScan : public LinearScanInterface void BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates = RBM_NONE); void BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates); void BuildKills(GenTree* tree, regMaskTP killMask); -#ifdef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) void BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask); #else - void BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); -#endif + void BuildDefWithKills(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates, regMaskTP killMask); +#endif // TARGET_ARMARCH || TARGET_RISCV64 void BuildCallDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); int BuildReturn(GenTree* tree); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index da34f449ce91fe..1684c805a41d56 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3147,10 +3147,14 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, // Adds the RefInfo for the definitions to the defList. // void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +{ const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); assert(retTypeDesc != nullptr); if (retTypeDesc == nullptr) -{ + { + return; + } + assert(dstCount > 0); assert((int)genCountBits(dstCandidates) == dstCount); assert(tree->IsMultiRegCall()); @@ -3242,9 +3246,10 @@ void LinearScan::BuildKills(GenTree* tree, regMaskTP killMask) } } -#ifndef TARGET_ARMARCH +#if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) + //------------------------------------------------------------------------ -// BuildDefWithKills: Build one or two (for 32-bit) RefTypeDef RefPositions for the given node, +// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, // as well as kills as specified by the given mask. // // Arguments: @@ -3257,36 +3262,23 @@ void LinearScan::BuildKills(GenTree* tree, regMaskTP killMask) // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) { + assert(!tree->AsCall()->HasMultiRegRetVal()); + assert((int)genCountBits(dstCandidates) == 1); + // Build the kill RefPositions BuildKills(tree, killMask); - -#ifdef TARGET_64BIT - // For 64 bits, - assert(dstCount == 1); BuildDef(tree, dstCandidates); -#else - if (dstCount == 1) - { - BuildDef(tree, dstCandidates); - } - else - { - assert(dstCount == 2); - BuildDefs(tree, 2, dstCandidates); - } -#endif // TARGET_64BIT } -#endif +#else //------------------------------------------------------------------------ -// BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node, +// BuildDefWithKills: Build one or two (for 32-bit) RefTypeDef RefPositions for the given node, // as well as kills as specified by the given mask. // // Arguments: -// tree - The node that defines a register -// dstCount - The number of registers defined by the node +// tree - The call node that defines a register // dstCandidates - The candidate registers for the definition // killMask - The mask of registers killed by this node // @@ -3295,16 +3287,28 @@ void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCan // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildCallDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates, regMaskTP killMask) { - assert(dstCount > 0); - assert(dstCandidates != RBM_NONE); - // Build the kill RefPositions BuildKills(tree, killMask); - // And then the Def(s) - BuildCallDefs(tree, dstCount, dstCandidates); +#ifdef TARGET_64BIT + // For 64 bits, + assert(dstCount == 1); + BuildDef(tree, dstCandidates); +#else + if (dstCount == 1) + { + BuildDef(tree, dstCandidates); + } + else + { + assert(dstCount == 2); + BuildDefs(tree, 2, dstCandidates); + } +#endif // TARGET_64BIT +} +#endif // defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ // BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node, @@ -3377,11 +3381,11 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) { // This local's value is going to be available in this register so // keep it in the preferences. - unpref &= ~genRegMask(placedArgLocals[i].Reg); + unpref.RemoveRegNumFromMask(placedArgLocals[i].Reg); } } - if (unpref != RBM_NONE) + if (unpref.IsNonEmpty()) { #ifdef DEBUG if (VERBOSE) diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp index 6c5119054daa47..af503e26d24adc 100644 --- a/src/coreclr/jit/regMaskTPOps.cpp +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -3,7 +3,6 @@ #include "target.h" -#ifdef TARGET_ARM64 struct regMaskTP; void regMaskTP::RemoveRegNumFromMask(regNumber reg) @@ -15,4 +14,4 @@ bool regMaskTP::IsRegNumInMask(regNumber reg) { return (low & genRegMask(reg)) != 0; } -#endif // TARGET_ARM64 + diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 1c5cd006c00c0b..fd256e3e243eda 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -224,15 +224,15 @@ typedef unsigned regMaskSmall; #define REG_MASK_INT_FMT "%08X" #define REG_MASK_ALL_FMT "%08X" #else -typedef unsigned __int64 regMaskSmall; +typedef uint64_t regMaskSmall; #define REG_MASK_INT_FMT "%04llX" #define REG_MASK_ALL_FMT "%016llX" #endif typedef regMaskSmall SingleTypeRegSet; - -#define REG_MASK_INT_FMT "%04llX" -#define REG_MASK_ALL_FMT "%016llX" +// +//#define REG_MASK_INT_FMT "%04llX" +//#define REG_MASK_ALL_FMT "%016llX" #endif struct regMaskTP @@ -245,10 +245,6 @@ struct regMaskTP : low(lowRegMask) , high(highRegMask) { - } - : low(regMask) - , high(RBM_NONE) - { } regMaskTP() @@ -362,7 +358,7 @@ static bool operator!=(regMaskTP first, regMaskTP second) #ifdef TARGET_ARM static regMaskTP operator-(regMaskTP first, regMaskTP second) { - regMaskTP result(~first.getLow(), ~first.getHigh()); + regMaskTP result(first.getLow() - first.getHigh()); return result; } @@ -380,7 +376,7 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) static regMaskTP operator~(regMaskTP first) { - regMaskTP result(~first.getLow()); + regMaskTP result(~first.getLow(), ~first.getHigh()); return result; } @@ -617,7 +613,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv) // theFixedRetBuffMask: // Returns the regNumber to use for the fixed return buffer // -inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv) +inline SingleTypeRegSet theFixedRetBuffMask(CorInfoCallConvExtension callConv) { assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method #if defined(TARGET_ARM64) @@ -652,9 +648,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv) // Returns the full mask of all possible integer registers // Note this includes the fixed return buffer register on Arm64 // -inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv) +inline SingleTypeRegSet fullIntArgRegMask(CorInfoCallConvExtension callConv) { - regMaskTP result = RBM_ARG_REGS; + SingleTypeRegSet result = RBM_ARG_REGS; if (hasFixedRetBuffReg(callConv)) { result |= theFixedRetBuffMask(callConv); @@ -813,10 +809,10 @@ inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* // For registers that are used in pairs, the caller will be handling // each member of the pair separately. // -inline regMaskTP genRegMask(regNumber regNum, var_types type) +inline SingleTypeRegSet genRegMask(regNumber regNum, var_types type) { #if defined(TARGET_ARM) - regMaskTP regMask = RBM_NONE; + SingleTypeRegSet regMask = RBM_NONE; if (varTypeUsesIntReg(type)) { From 31498ef55ae00f5120f015266ef6fee7589cf2d9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 15:44:41 -0700 Subject: [PATCH 24/44] passing arm64 build --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/compiler.hpp | 10 +++---- src/coreclr/jit/emit.h | 4 +-- src/coreclr/jit/lsra.cpp | 4 +-- src/coreclr/jit/lsraarmarch.cpp | 44 +++++++++++++++---------------- src/coreclr/jit/lsrabuild.cpp | 7 ++--- src/coreclr/jit/target.h | 16 ++++++++++- 8 files changed, 50 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8657a351899fc9..6cfb84b5859d94 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -165,7 +165,7 @@ SingleTypeRegSet NodeInternalRegisters::GetAll(GenTree* tree) unsigned NodeInternalRegisters::Count(GenTree* tree, SingleTypeRegSet mask) { SingleTypeRegSet regs; - return m_table.Lookup(tree, ®s) ? genCountBits(regs & mask) : 0; + return m_table.Lookup(tree, ®s) ? PopCount(regs & mask) : 0; } // CodeGen constructor diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f9c66b63d15afc..85209812b6d3ea 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1016,7 +1016,7 @@ class LclVarDsc regMaskTP lvRegMask() const { - regMaskTP regMask = RBM_NONE; + regMaskTP regMask(RBM_NONE, RBM_NONE); if (GetRegNum() != REG_STK) { if (varTypeUsesFloatReg(this)) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index f393693b792770..9c902a0b00ec24 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -935,7 +935,7 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) inline regNumber genRegNumFromMask(regMaskTP mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ @@ -960,7 +960,7 @@ inline regNumber genRegNumFromMask(regMaskTP mask) inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ @@ -983,7 +983,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) inline regNumber genFirstRegNumFromMask(regMaskTP mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ @@ -3504,7 +3504,7 @@ inline regMaskTP genIntAllRegArgMask(unsigned numRegs) { assert(numRegs <= MAX_REG_ARG); - regMaskTP result = RBM_NONE; + regMaskTP result(RBM_NONE, RBM_NONE); for (unsigned i = 0; i < numRegs; i++) { result |= intArgMasks[i]; @@ -3517,7 +3517,7 @@ inline regMaskTP genFltAllRegArgMask(unsigned numRegs) #ifndef TARGET_X86 assert(numRegs <= MAX_FLOAT_REG_ARG); - regMaskTP result = RBM_NONE; + regMaskTP result(RBM_NONE, RBM_NONE); for (unsigned i = 0; i < numRegs; i++) { result |= fltArgMasks[i]; diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 9f722d32e91182..b13d62a00758c1 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3526,8 +3526,8 @@ class emitter VarSetOps::AssignNoCopy(emitComp, debugPrevGCrefVars, VarSetOps::MakeEmpty(emitComp)); VarSetOps::AssignNoCopy(emitComp, debugThisGCrefVars, VarSetOps::MakeEmpty(emitComp)); debugPrevRegPtrDsc = nullptr; - debugPrevGCrefRegs = RBM_NONE; - debugPrevByrefRegs = RBM_NONE; + debugPrevGCrefRegs = regMaskTP(RBM_NONE, RBM_NONE); + debugPrevByrefRegs = regMaskTP(RBM_NONE, RBM_NONE); #endif } }; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 50d60fff613aa4..ae088d62cd997a 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -489,7 +489,7 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, unsigned minRegCount) { SingleTypeRegSet newMask = regMaskActual & regMaskConstraint; - if (genCountBits(newMask) < minRegCount) + if (PopCount(newMask) < minRegCount) { // Constrained mask does not have minimum required registers needed. return regMaskActual; @@ -13634,7 +13634,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* { // We should have a single candidate that will be used for subsequent // refpositions. - assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + assert((refPosition->refType == RefTypeUpperVectorRestore) || genMaxOneBit(candidates)); freeCandidates = candidates & linearScan->m_AvailableRegs.GetRegSetForType(regType); } diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 0c6daac078c0d2..65d2a022035498 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -430,29 +430,29 @@ int LinearScan::BuildCall(GenTreeCall* call) return srcCount; } -//------------------------------------------------------------------------ -// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, -// as well as kills as specified by the given mask. -// -// Arguments: -// tree - The call node that defines a register -// dstCandidates - The candidate registers for the definition -// killMask - The mask of registers killed by this node -// -// Notes: -// Adds the RefInfo for the definitions to the defList. -// The def and kill functionality is folded into a single method so that the -// save and restores of upper vector registers can be bracketed around the def. +////------------------------------------------------------------------------ +//// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, +//// as well as kills as specified by the given mask. +//// +//// Arguments: +//// tree - The call node that defines a register +//// dstCandidates - The candidate registers for the definition +//// killMask - The mask of registers killed by this node +//// +//// Notes: +//// Adds the RefInfo for the definitions to the defList. +//// The def and kill functionality is folded into a single method so that the +//// save and restores of upper vector registers can be bracketed around the def. +//// +//void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) +//{ +// assert(!tree->AsCall()->HasMultiRegRetVal()); +// assert((int)PopCount(dstCandidates) == 1); // -void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) -{ - assert(!tree->AsCall()->HasMultiRegRetVal()); - assert((int)genCountBits(dstCandidates) == 1); - - // Build the kill RefPositions - BuildKills(tree, killMask); - BuildDef(tree, dstCandidates); -} +// // Build the kill RefPositions +// BuildKills(tree, killMask); +// BuildDef(tree, dstCandidates); +//} //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 1684c805a41d56..7170d0aa64f05a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3159,9 +3159,6 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida assert((int)genCountBits(dstCandidates) == dstCount); assert(tree->IsMultiRegCall()); - const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); - assert(retTypeDesc != nullptr); - for (int i = 0; i < dstCount; i++) { // In case of multi-reg call node, we have to query the i'th position return register. @@ -3192,7 +3189,7 @@ void LinearScan::BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCand { assert(dstCount > 0); - if ((dstCandidates == RBM_NONE) || ((int)genCountBits(dstCandidates) != dstCount)) + if ((dstCandidates == RBM_NONE) || ((int)PopCount(dstCandidates) != dstCount)) { // This is not fixedReg case, so just create definitions based on dstCandidates for (int i = 0; i < dstCount; i++) @@ -3265,7 +3262,7 @@ void LinearScan::BuildKills(GenTree* tree, regMaskTP killMask) void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) { assert(!tree->AsCall()->HasMultiRegRetVal()); - assert((int)genCountBits(dstCandidates) == 1); + assert((int)PopCount(dstCandidates) == 1); // Build the kill RefPositions BuildKills(tree, killMask); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index fd256e3e243eda..46cacc79849f63 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -233,7 +233,7 @@ typedef regMaskSmall SingleTypeRegSet; // //#define REG_MASK_INT_FMT "%04llX" //#define REG_MASK_ALL_FMT "%016llX" -#endif +//#endif struct regMaskTP { @@ -247,6 +247,14 @@ struct regMaskTP { } + + constexpr regMaskTP(uint64_t lowRegMask) + : low(lowRegMask) + , high(RBM_NONE) + { + // intentionally do not initialize high + } + regMaskTP() { } @@ -321,6 +329,12 @@ static regMaskTP operator&(regMaskTP first, regMaskTP second) return result; } +//static SingleTypeRegSet operator&(regMaskTP first, SingleTypeRegSet second) +//{ +// regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); +// return result; +//} + static regMaskTP operator|(regMaskTP first, regMaskTP second) { regMaskTP result(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); From f17257dbb1714aafa53805ed66edc778061f246d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 16:29:10 -0700 Subject: [PATCH 25/44] clrjit build works --- src/coreclr/jit/codegeninterface.h | 16 +++++----- src/coreclr/jit/compiler.h | 24 +++++++-------- src/coreclr/jit/lsra.cpp | 28 +++++++++--------- src/coreclr/jit/lsra.h | 22 +++++++------- src/coreclr/jit/lsraxarch.cpp | 47 +++++++++++++++--------------- 5 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 55cdadb076800a..9b96092c1c60b8 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -75,31 +75,31 @@ class CodeGenInterface } #if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; // Call this function after the equivalent fields in Compiler have been initialized. void CopyRegisterInfo(); - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 85209812b6d3ea..95bfcb1625d07a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11246,8 +11246,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLFLOAT(); - // regMaskTP get_RBM_FLT_CALLEE_TRASH(); + // SingleTypeRegSet get_RBM_ALLFLOAT(); + // SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_FLOAT(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11256,16 +11256,16 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_AMD64 requires one. // - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; unsigned cntCalleeTrashFloat; public: - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } @@ -11284,8 +11284,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLMASK(); - // regMaskTP get_RBM_MSK_CALLEE_TRASH(); + // SingleTypeRegSet get_RBM_ALLMASK(); + // SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_MASK(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11294,17 +11294,17 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_XARCH requires one. // - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; unsigned cntCalleeTrashMask; regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; public: - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ae088d62cd997a..6eb9674124f72b 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -519,32 +519,34 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); +static const SingleTypeRegSet LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); +static const SingleTypeRegSet LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI -static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -static const regMaskTP LsraLimitUpperSimdSet = +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const SingleTypeRegSet LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. -static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); -static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); #elif defined(TARGET_LOONGARCH64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #elif defined(TARGET_RISCV64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -837,7 +839,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); #endif #if ETW_EBP_FRAMED diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 97670a7ff436e8..b4e370f1f11e43 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1990,8 +1990,8 @@ class LinearScan : public LinearScanInterface int BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates = RBM_NONE); int BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates); #ifdef TARGET_XARCH - int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); - inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree); + int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates = RBM_NONE); + inline SingleTypeRegSet BuildEvexIncompatibleMask(GenTree* tree); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); // This is the main entry point for building the RefPositions for a node. @@ -2096,28 +2096,28 @@ class LinearScan : public LinearScanInterface int BuildLclHeap(GenTree* tree); #if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -2152,7 +2152,7 @@ class LinearScan : public LinearScanInterface // Not all of the callee trash values are constant, so don't declare this as a method local static // doing so results in significantly more complex codegen and we'd rather just initialize this once // as part of initializing LSRA instead - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; #endif // TARGET_XARCH //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a3d7647ad36512..54de135a1e00b1 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -442,7 +442,7 @@ int LinearScan::BuildNode(GenTree* tree) // Comparand is preferenced to RAX. // The remaining two operands can be in any reg other than RAX. - const regMaskTP nonRaxCandidates = availableIntRegs & ~RBM_RAX; + const SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX; BuildUse(addr, nonRaxCandidates); BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS) : nonRaxCandidates); BuildUse(comparand, RBM_RAX); @@ -778,11 +778,11 @@ bool LinearScan::isRMWRegOper(GenTree* tree) } // Support for building RefPositions for RMW nodes. -int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates) +int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates) { int srcCount = 0; - regMaskTP op1Candidates = candidates; - regMaskTP op2Candidates = candidates; + SingleTypeRegSet op1Candidates = candidates; + SingleTypeRegSet op2Candidates = candidates; #ifdef TARGET_X86 if (varTypeIsByte(node)) @@ -1042,8 +1042,8 @@ int LinearScan::BuildShiftRotate(GenTree* tree) int srcCount = 0; GenTree* shiftBy = tree->gtGetOp2(); GenTree* source = tree->gtGetOp1(); - regMaskTP srcCandidates = RBM_NONE; - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. @@ -1151,7 +1151,7 @@ int LinearScan::BuildCall(GenTreeCall* call) const ReturnTypeDesc* retTypeDesc = nullptr; int srcCount = 0; int dstCount = 0; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; assert(!call->isContained()); if (call->TypeGet() != TYP_VOID) @@ -1321,7 +1321,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { - regMaskTP ctrlExprCandidates = RBM_NONE; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into appropriate registers. @@ -1422,9 +1422,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; RefPosition* internalIntDef = nullptr; #ifdef TARGET_X86 @@ -1543,7 +1543,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // or if are but the remainder is a power of 2 and less than the // size of a register - regMaskTP regMask = availableIntRegs; + SingleTypeRegSet regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) { @@ -1915,7 +1915,7 @@ int LinearScan::BuildModDiv(GenTree* tree) { GenTree* op1 = tree->gtGetOp1(); GenTree* op2 = tree->gtGetOp2(); - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; int srcCount = 0; if (varTypeIsFloating(tree->TypeGet())) @@ -2131,7 +2131,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou dstCount = 0; } - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; if (intrinsicTree->GetOperandCount() == 0) { @@ -2721,7 +2721,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (buildUses) { - regMaskTP op1RegCandidates = RBM_NONE; + SingleTypeRegSet op1RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2746,7 +2746,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op2 != nullptr) { - regMaskTP op2RegCandidates = RBM_NONE; + SingleTypeRegSet op2RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2792,7 +2792,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op3 != nullptr) { - regMaskTP op3RegCandidates = RBM_NONE; + SingleTypeRegSet op3RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2806,7 +2806,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op4 != nullptr) { - regMaskTP op4RegCandidates = RBM_NONE; + SingleTypeRegSet op4RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) assert(isEvexCompatible); @@ -2872,7 +2872,7 @@ int LinearScan::BuildCast(GenTreeCast* cast) buildInternalIntRegisterDefForNode(cast); } - regMaskTP candidates = RBM_NONE; + SingleTypeRegSet candidates = RBM_NONE; #ifdef TARGET_X86 if (varTypeIsByte(castType)) { @@ -2922,8 +2922,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // FEATURE_SIMD - regMaskTP indirCandidates = RBM_NONE; - int srcCount = BuildIndirUses(indirTree, indirCandidates); + int srcCount = BuildIndirUses(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtGetOp2(); @@ -2939,7 +2938,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } else { - regMaskTP srcCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 // Determine if we need byte regs for the non-mem source, if any. @@ -3046,7 +3045,7 @@ int LinearScan::BuildMul(GenTree* tree) int srcCount = BuildBinaryUses(tree->AsOp()); int dstCount = 1; - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0); bool requiresOverflowCheck = tree->gtOverflowEx(); @@ -3144,7 +3143,7 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/) // RBM_NONE if compatible with EVEX (or not a floating/SIMD register), // lowSIMDRegs() (XMM0-XMM16) otherwise. // -inline regMaskTP LinearScan::BuildEvexIncompatibleMask(GenTree* tree) +inline SingleTypeRegSet LinearScan::BuildEvexIncompatibleMask(GenTree* tree) { #if defined(TARGET_AMD64) assert(!varTypeIsMask(tree)); From a85312db9d166e0e2b10a10a4914f1d30cb08aab Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 18:20:34 -0700 Subject: [PATCH 26/44] clrjit_universal_arm_x64 build works --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsraarm.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/target.h | 12 ++++++++++++ 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 6eb9674124f72b..ddc67dab08b0a7 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8696,7 +8696,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, VarToRegMap toVarToRegMap = toBlock == nullptr ? nullptr : getInVarToRegMap(toBlock->bbNum); #ifdef TARGET_ARM - regMaskTP freeRegs; + SingleTypeRegSet freeRegs; if (type == TYP_DOUBLE) { // We have to consider all float registers for TYP_DOUBLE diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index e653514d3c14c0..41509416c41a6f 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -667,7 +667,7 @@ int LinearScan::BuildNode(GenTree* tree) { assert(dstCount == 1); regNumber argReg = tree->GetRegNum(); - regMaskTP argMask = RBM_NONE; + SingleTypeRegSet argMask = RBM_NONE; if (argReg != REG_COUNT) { argMask = genRegMask(argReg); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 65d2a022035498..1fb69ee7b04b97 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -213,7 +213,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; + SingleTypeRegSet candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 7170d0aa64f05a..0020a41c22a85a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4413,7 +4413,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) if (node->TypeGet() == TYP_LONG) { srcCount++; - regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg)); + SingleTypeRegSet argMaskHi = genRegMask(REG_NEXT(argReg)); assert(genRegArgNext(argReg) == REG_NEXT(argReg)); use = BuildUse(op1, argMaskHi, 1); BuildDef(node, argMask, 0); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 46cacc79849f63..b5532e1d95dadb 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -381,6 +381,18 @@ static bool operator>(regMaskTP first, regMaskTP second) return first.getLow() > second.getLow(); } +static regMaskTP operator<<(regMaskTP& first, const int b) +{ + regMaskTP result(first.getLow() << b); + return result; +} + +static regMaskTP operator>>(regMaskTP& first, const int b) +{ + regMaskTP result(first.getLow() >> b); + return result; +} + static regMaskTP& operator<<=(regMaskTP& first, const int b) { first = first << b; From d7c10101f75acda303eec8c8440070426a2bc49d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 18:21:39 -0700 Subject: [PATCH 27/44] clrjit_unix_x64_x64 build works --- src/coreclr/jit/lsraxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 54de135a1e00b1..f63c94ad40c2cb 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1807,7 +1807,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. if ((loadSize % XMM_REGSIZE_BYTES) != 0) { - regMaskTP regMask = availableIntRegs; + SingleTypeRegSet regMask = availableIntRegs; #ifdef TARGET_X86 // Storing at byte granularity requires a byteable register. if ((loadSize & 1) != 0) From ba4ef0d6438d2a323e6729ca730642dc4569f0d4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 18:29:15 -0700 Subject: [PATCH 28/44] clrjit_win_x86_x64 build works --- src/coreclr/jit/codegencommon.cpp | 32 +++++++++++++++---------------- src/coreclr/jit/lsraxarch.cpp | 2 +- src/coreclr/jit/target.h | 8 ++++---- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 6cfb84b5859d94..581cd13c2ece90 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6252,26 +6252,26 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* regMaskTP pushedRegs = regs; - for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskTP regMask = genRegMask(reg); - if ((regBit & regs) == RBM_NONE) + if ((regMask & pushedRegs) == RBM_NONE) continue; var_types type; - if (regBit & gcInfo.gcRegGCrefSetCur) + if (regMask & gcInfo.gcRegGCrefSetCur) { type = TYP_REF; } - else if (regBit & gcInfo.gcRegByrefSetCur) + else if (regMask & gcInfo.gcRegByrefSetCur) { - *byrefRegs |= regBit; + *byrefRegs |= regMask; type = TYP_BYREF; } else if (noRefRegs != NULL) { - *noRefRegs |= regBit; + *noRefRegs |= regMask; type = TYP_I_IMPL; } else @@ -6282,9 +6282,7 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* inst_RV(INS_push, reg, type); genSinglePush(); - gcInfo.gcMarkRegSetNpt(regBit); - - regs &= ~regBit; + gcInfo.gcMarkRegSetNpt(regMask); } return pushedRegs; @@ -6323,20 +6321,22 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); + regMaskTP popedRegs = regs; + // Walk the registers in the reverse order as genPushRegs() - for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) + for (regNumber reg = REG_INT_LAST; reg >= REG_INT_LAST; reg = REG_PREV(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskTP regMask = genRegMask(reg); - if ((regBit & regs) == RBM_NONE) + if ((regMask & popedRegs) == RBM_NONE) continue; var_types type; - if (regBit & byrefRegs) + if (regMask & byrefRegs) { type = TYP_BYREF; } - else if (regBit & noRefRegs) + else if (regMask & noRefRegs) { type = TYP_INT; } @@ -6350,8 +6350,6 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg if (type != TYP_INT) gcInfo.gcMarkRegPtrVal(reg, type); - - regs &= ~regBit; } #endif // FEATURE_FIXED_OUT_ARGS diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index f63c94ad40c2cb..c037000164cd2f 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -787,7 +787,7 @@ int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTy #ifdef TARGET_X86 if (varTypeIsByte(node)) { - regMaskTP byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); + SingleTypeRegSet byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); if (!op1->isContained()) { assert(byteCandidates != RBM_NONE); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index b5532e1d95dadb..de73f237ec0763 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -241,14 +241,14 @@ struct regMaskTP regMaskSmall low; uint64_t high; public: - constexpr regMaskTP(uint64_t lowRegMask, uint64_t highRegMask) + constexpr regMaskTP(regMaskSmall lowRegMask, uint64_t highRegMask) : low(lowRegMask) , high(highRegMask) { } - constexpr regMaskTP(uint64_t lowRegMask) + constexpr regMaskTP(regMaskSmall lowRegMask) : low(lowRegMask) , high(RBM_NONE) { @@ -287,12 +287,12 @@ struct regMaskTP } #endif - SingleTypeRegSet getLow() const + regMaskSmall getLow() const { return low; } - SingleTypeRegSet getHigh() const + uint64_t getHigh() const { return high; } From 18684242f7014413d5804e896b89e9d2c7038d12 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 21:11:06 -0700 Subject: [PATCH 29/44] fix a bug in size --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lsra.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 95bfcb1625d07a..3f17f46c745122 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11297,7 +11297,7 @@ class Compiler SingleTypeRegSet rbmAllMask; SingleTypeRegSet rbmMskCalleeTrash; unsigned cntCalleeTrashMask; - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; public: FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ddc67dab08b0a7..cdb56242d2b1cb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -782,7 +782,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_XARCH) rbmAllMask = compiler->rbmAllMask; rbmMskCalleeTrash = compiler->rbmMskCalleeTrash; - memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); if (!compiler->canUseEvexEncoding()) { From 384fef8673fb9aa8f9c5733f95e57c5126d220bf Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 21:11:14 -0700 Subject: [PATCH 30/44] delete unwanted method --- src/coreclr/jit/compiler.hpp | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 9c902a0b00ec24..648e2acc65acdc 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -3496,39 +3496,6 @@ inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type, CorInf } } -/*****************************************************************************/ -/* Return a register mask with the first 'numRegs' argument registers set. - */ - -inline regMaskTP genIntAllRegArgMask(unsigned numRegs) -{ - assert(numRegs <= MAX_REG_ARG); - - regMaskTP result(RBM_NONE, RBM_NONE); - for (unsigned i = 0; i < numRegs; i++) - { - result |= intArgMasks[i]; - } - return result; -} - -inline regMaskTP genFltAllRegArgMask(unsigned numRegs) -{ -#ifndef TARGET_X86 - assert(numRegs <= MAX_FLOAT_REG_ARG); - - regMaskTP result(RBM_NONE, RBM_NONE); - for (unsigned i = 0; i < numRegs; i++) - { - result |= fltArgMasks[i]; - } - return result; -#else - assert(!"no x86 float arg regs\n"); - return RBM_NONE; -#endif -} - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX From f0ef2bc4a04d0f671a530a72d774a02599a02bdd Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 May 2024 22:02:40 -0700 Subject: [PATCH 31/44] jit format --- src/coreclr/jit/compiler.cpp | 4 ++-- src/coreclr/jit/compiler.h | 4 ++-- src/coreclr/jit/lsra.cpp | 5 +++-- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 14 +++++++------- src/coreclr/jit/lsrabuild.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 22 +++++++++++----------- src/coreclr/jit/regMaskTPOps.cpp | 1 - src/coreclr/jit/target.h | 17 ++++++++--------- 10 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 54be15ed7f20bc..1b5592a5b59e5a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3485,12 +3485,12 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // Make sure we copy the register info and initialize the // trash regs after the underlying fields are initialized - const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{ + const SingleTypeRegSet vtCalleeTrashRegs[TYP_COUNT]{ #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP }; - memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); codeGen->CopyRegisterInfo(); #endif // TARGET_XARCH diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ed6fd73b39ede0..6db5378625679e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11258,7 +11258,7 @@ class Compiler // SingleTypeRegSet rbmAllFloat; SingleTypeRegSet rbmFltCalleeTrash; - unsigned cntCalleeTrashFloat; + unsigned cntCalleeTrashFloat; public: FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const @@ -11296,7 +11296,7 @@ class Compiler // SingleTypeRegSet rbmAllMask; SingleTypeRegSet rbmMskCalleeTrash; - unsigned cntCalleeTrashMask; + unsigned cntCalleeTrashMask; SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; public: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index cdb56242d2b1cb..df3326d338c9b9 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -305,7 +305,7 @@ SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); SingleTypeRegSet candidates = (mask & m_RegistersWithConstants).GetRegSetForType(currentInterval->registerType); - SingleTypeRegSet result = RBM_NONE; + SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { regNumber regNum = genFirstRegNumFromMask(candidates); @@ -497,7 +497,8 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); //TODO: Pass the right type + SingleTypeRegSet busyRegs = + (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); // TODO: Pass the right type if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index b4e370f1f11e43..43232954877895 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1990,7 +1990,7 @@ class LinearScan : public LinearScanInterface int BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates = RBM_NONE); int BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates); #ifdef TARGET_XARCH - int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates = RBM_NONE); + int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates = RBM_NONE); inline SingleTypeRegSet BuildEvexIncompatibleMask(GenTree* tree); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 41509416c41a6f..f2c60cde13eb04 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -666,7 +666,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_BITCAST: { assert(dstCount == 1); - regNumber argReg = tree->GetRegNum(); + regNumber argReg = tree->GetRegNum(); SingleTypeRegSet argMask = RBM_NONE; if (argReg != REG_COUNT) { diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 1fb69ee7b04b97..4e47080fed9cc4 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -444,15 +444,15 @@ int LinearScan::BuildCall(GenTreeCall* call) //// The def and kill functionality is folded into a single method so that the //// save and restores of upper vector registers can be bracketed around the def. //// -//void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) +// void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) //{ -// assert(!tree->AsCall()->HasMultiRegRetVal()); -// assert((int)PopCount(dstCandidates) == 1); +// assert(!tree->AsCall()->HasMultiRegRetVal()); +// assert((int)PopCount(dstCandidates) == 1); // -// // Build the kill RefPositions -// BuildKills(tree, killMask); -// BuildDef(tree, dstCandidates); -//} +// // Build the kill RefPositions +// BuildKills(tree, killMask); +// BuildDef(tree, dstCandidates); +// } //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 7867f46ffdf836..cb083b73d31784 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1147,8 +1147,8 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { continue; } - Interval* interval = getIntervalForLocalVar(varIndex); - const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + Interval* interval = getIntervalForLocalVar(varIndex); + const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); if (isCallKill) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index c037000164cd2f..fe21be0ec8938d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -780,7 +780,7 @@ bool LinearScan::isRMWRegOper(GenTree* tree) // Support for building RefPositions for RMW nodes. int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates) { - int srcCount = 0; + int srcCount = 0; SingleTypeRegSet op1Candidates = candidates; SingleTypeRegSet op2Candidates = candidates; @@ -1039,9 +1039,9 @@ int LinearScan::BuildShiftRotate(GenTree* tree) // For shift operations, we need that the number // of bits moved gets stored in CL in case // the number of bits to shift is not a constant. - int srcCount = 0; - GenTree* shiftBy = tree->gtGetOp2(); - GenTree* source = tree->gtGetOp1(); + int srcCount = 0; + GenTree* shiftBy = tree->gtGetOp2(); + GenTree* source = tree->gtGetOp1(); SingleTypeRegSet srcCandidates = RBM_NONE; SingleTypeRegSet dstCandidates = RBM_NONE; @@ -1151,7 +1151,7 @@ int LinearScan::BuildCall(GenTreeCall* call) const ReturnTypeDesc* retTypeDesc = nullptr; int srcCount = 0; int dstCount = 0; - SingleTypeRegSet singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; assert(!call->isContained()); if (call->TypeGet() != TYP_VOID) @@ -1913,10 +1913,10 @@ int LinearScan::BuildLclHeap(GenTree* tree) // int LinearScan::BuildModDiv(GenTree* tree) { - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); SingleTypeRegSet dstCandidates = RBM_NONE; - int srcCount = 0; + int srcCount = 0; if (varTypeIsFloating(tree->TypeGet())) { @@ -2922,7 +2922,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // FEATURE_SIMD - int srcCount = BuildIndirUses(indirTree); + int srcCount = BuildIndirUses(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtGetOp2(); @@ -3043,8 +3043,8 @@ int LinearScan::BuildMul(GenTree* tree) return BuildSimple(tree); } - int srcCount = BuildBinaryUses(tree->AsOp()); - int dstCount = 1; + int srcCount = BuildBinaryUses(tree->AsOp()); + int dstCount = 1; SingleTypeRegSet dstCandidates = RBM_NONE; bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0); diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp index af503e26d24adc..c9f2acad46e3df 100644 --- a/src/coreclr/jit/regMaskTPOps.cpp +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -14,4 +14,3 @@ bool regMaskTP::IsRegNumInMask(regNumber reg) { return (low & genRegMask(reg)) != 0; } - diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index de73f237ec0763..69bf2bbc46f445 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -231,15 +231,15 @@ typedef uint64_t regMaskSmall; typedef regMaskSmall SingleTypeRegSet; // -//#define REG_MASK_INT_FMT "%04llX" -//#define REG_MASK_ALL_FMT "%016llX" -//#endif +// #define REG_MASK_INT_FMT "%04llX" +// #define REG_MASK_ALL_FMT "%016llX" +// #endif struct regMaskTP { private: regMaskSmall low; - uint64_t high; + uint64_t high; public: constexpr regMaskTP(regMaskSmall lowRegMask, uint64_t highRegMask) : low(lowRegMask) @@ -247,7 +247,6 @@ struct regMaskTP { } - constexpr regMaskTP(regMaskSmall lowRegMask) : low(lowRegMask) , high(RBM_NONE) @@ -329,11 +328,11 @@ static regMaskTP operator&(regMaskTP first, regMaskTP second) return result; } -//static SingleTypeRegSet operator&(regMaskTP first, SingleTypeRegSet second) +// static SingleTypeRegSet operator&(regMaskTP first, SingleTypeRegSet second) //{ -// regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); -// return result; -//} +// regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); +// return result; +// } static regMaskTP operator|(regMaskTP first, regMaskTP second) { From db03c23009567926ebc93ee546650d321ddf34af Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 23 May 2024 11:13:34 -0700 Subject: [PATCH 32/44] Remove high --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/emit.h | 4 ++-- src/coreclr/jit/lsrabuild.cpp | 4 ---- src/coreclr/jit/target.h | 45 +++++++---------------------------- 4 files changed, 12 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6db5378625679e..428c817c751c78 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1016,7 +1016,7 @@ class LclVarDsc regMaskTP lvRegMask() const { - regMaskTP regMask(RBM_NONE, RBM_NONE); + regMaskTP regMask = RBM_NONE; if (GetRegNum() != REG_STK) { if (varTypeUsesFloatReg(this)) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index b13d62a00758c1..9f722d32e91182 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -3526,8 +3526,8 @@ class emitter VarSetOps::AssignNoCopy(emitComp, debugPrevGCrefVars, VarSetOps::MakeEmpty(emitComp)); VarSetOps::AssignNoCopy(emitComp, debugThisGCrefVars, VarSetOps::MakeEmpty(emitComp)); debugPrevRegPtrDsc = nullptr; - debugPrevGCrefRegs = regMaskTP(RBM_NONE, RBM_NONE); - debugPrevByrefRegs = regMaskTP(RBM_NONE, RBM_NONE); + debugPrevGCrefRegs = RBM_NONE; + debugPrevByrefRegs = RBM_NONE; #endif } }; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index cb083b73d31784..24eb824907db7f 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2824,9 +2824,6 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } -#ifdef TARGET_ARM64 - actualRegistersMask = regMaskTP(~RBM_NONE, RBM_NONE); -#else if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount @@ -2836,7 +2833,6 @@ void LinearScan::buildIntervals() { actualRegistersMask = ~RBM_NONE; } -#endif #ifdef DEBUG // Make sure we don't have any blocks that were not visited diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 69bf2bbc46f445..2cb4c55c5139a6 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -239,19 +239,10 @@ struct regMaskTP { private: regMaskSmall low; - uint64_t high; public: - constexpr regMaskTP(regMaskSmall lowRegMask, uint64_t highRegMask) - : low(lowRegMask) - , high(highRegMask) - { - } - constexpr regMaskTP(regMaskSmall lowRegMask) : low(lowRegMask) - , high(RBM_NONE) { - // intentionally do not initialize high } regMaskTP() @@ -291,14 +282,9 @@ struct regMaskTP return low; } - uint64_t getHigh() const - { - return high; - } - bool IsEmpty() { - return (low | high) == RBM_NONE; + return low == RBM_NONE; } bool IsNonEmpty() @@ -318,25 +304,19 @@ struct regMaskTP static regMaskTP operator^(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() ^ second.getLow(), first.getHigh() ^ second.getHigh()); + regMaskTP result(first.getLow() ^ second.getLow()); return result; } static regMaskTP operator&(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); + regMaskTP result(first.getLow() & second.getLow()); return result; } -// static SingleTypeRegSet operator&(regMaskTP first, SingleTypeRegSet second) -//{ -// regMaskTP result(first.getLow() & second.getLow(), first.getHigh() & second.getHigh()); -// return result; -// } - static regMaskTP operator|(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() | second.getLow(), first.getHigh() | second.getHigh()); + regMaskTP result(first.getLow() | second.getLow()); return result; } @@ -360,7 +340,7 @@ static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) static bool operator==(regMaskTP first, regMaskTP second) { - return (first.getLow() == second.getLow()) && (first.getHigh() == second.getHigh()); + return (first.getLow() == second.getLow()); } static bool operator!=(regMaskTP first, regMaskTP second) @@ -371,7 +351,7 @@ static bool operator!=(regMaskTP first, regMaskTP second) #ifdef TARGET_ARM static regMaskTP operator-(regMaskTP first, regMaskTP second) { - regMaskTP result(first.getLow() - first.getHigh()); + regMaskTP result(first.getLow() - second.getLow()); return result; } @@ -401,25 +381,18 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) static regMaskTP operator~(regMaskTP first) { - regMaskTP result(~first.getLow(), ~first.getHigh()); + regMaskTP result(~first.getLow()); return result; } static uint32_t PopCount(regMaskTP value) { - return BitOperations::PopCount(value.getLow()) + BitOperations::PopCount(value.getHigh()); + return BitOperations::PopCount(value.getLow()); } static uint32_t BitScanForward(regMaskTP mask) { - if (mask.getLow() != RBM_NONE) - { - return BitOperations::BitScanForward(mask.getLow()); - } - else - { - return 64 + BitOperations::BitScanForward(mask.getHigh()); - } + return BitOperations::BitScanForward(mask.getLow()); } /*****************************************************************************/ From d0941097f9b57d59f8846debdc197edc17d47066 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 23 May 2024 14:14:00 -0700 Subject: [PATCH 33/44] Continue using regMaskTP for NodeInternalRegisters --- src/coreclr/jit/codegencommon.cpp | 22 +++++++++++----------- src/coreclr/jit/codegeninterface.h | 12 ++++++------ src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/target.h | 8 ++------ 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 581cd13c2ece90..89944fd1f51977 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -74,11 +74,11 @@ NodeInternalRegisters::NodeInternalRegisters(Compiler* comp) // tree - IR node to add internal allocated registers to // regs - Registers to add // -void NodeInternalRegisters::Add(GenTree* tree, SingleTypeRegSet regs) +void NodeInternalRegisters::Add(GenTree* tree, regMaskTP regs) { assert(regs != RBM_NONE); - SingleTypeRegSet* result = m_table.LookupPointerOrAdd(tree, RBM_NONE); + regMaskTP* result = m_table.LookupPointerOrAdd(tree, RBM_NONE); *result |= regs; } @@ -95,12 +95,12 @@ void NodeInternalRegisters::Add(GenTree* tree, SingleTypeRegSet regs) // Returns: // Register number. // -regNumber NodeInternalRegisters::Extract(GenTree* tree, SingleTypeRegSet mask) +regNumber NodeInternalRegisters::Extract(GenTree* tree, regMaskTP mask) { - SingleTypeRegSet* regs = m_table.LookupPointer(tree); + regMaskTP* regs = m_table.LookupPointer(tree); assert(regs != nullptr); - SingleTypeRegSet availableSet = *regs & mask; + regMaskTP availableSet = *regs & mask; assert(availableSet != RBM_NONE); regNumber result = genFirstRegNumFromMask(availableSet); @@ -122,9 +122,9 @@ regNumber NodeInternalRegisters::Extract(GenTree* tree, SingleTypeRegSet mask) // Returns: // Register number. // -regNumber NodeInternalRegisters::GetSingle(GenTree* tree, SingleTypeRegSet mask) +regNumber NodeInternalRegisters::GetSingle(GenTree* tree, regMaskTP mask) { - SingleTypeRegSet* regs = m_table.LookupPointer(tree); + regMaskTP* regs = m_table.LookupPointer(tree); assert(regs != nullptr); regMaskTP availableSet = *regs & mask; @@ -145,9 +145,9 @@ regNumber NodeInternalRegisters::GetSingle(GenTree* tree, SingleTypeRegSet mask) // Returns: // Mask of registers. // -SingleTypeRegSet NodeInternalRegisters::GetAll(GenTree* tree) +regMaskTP NodeInternalRegisters::GetAll(GenTree* tree) { - SingleTypeRegSet regs; + regMaskTP regs; return m_table.Lookup(tree, ®s) ? regs : RBM_NONE; } @@ -162,9 +162,9 @@ SingleTypeRegSet NodeInternalRegisters::GetAll(GenTree* tree) // Returns: // Count of nodes // -unsigned NodeInternalRegisters::Count(GenTree* tree, SingleTypeRegSet mask) +unsigned NodeInternalRegisters::Count(GenTree* tree, regMaskTP mask) { - SingleTypeRegSet regs; + regMaskTP regs; return m_table.Lookup(tree, ®s) ? PopCount(regs & mask) : 0; } diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 9b96092c1c60b8..68a313812622a1 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -48,17 +48,17 @@ CodeGenInterface* getCodeGenerator(Compiler* comp); class NodeInternalRegisters { - typedef JitHashTable, SingleTypeRegSet> NodeInternalRegistersTable; + typedef JitHashTable, regMaskTP> NodeInternalRegistersTable; NodeInternalRegistersTable m_table; public: NodeInternalRegisters(Compiler* comp); - void Add(GenTree* tree, SingleTypeRegSet reg); - regNumber Extract(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); - regNumber GetSingle(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); - SingleTypeRegSet GetAll(GenTree* tree); - unsigned Count(GenTree* tree, SingleTypeRegSet mask = static_cast(-1)); + void Add(GenTree* tree, regMaskTP reg); + regNumber Extract(GenTree* tree, regMaskTP mask = static_cast(-1)); + regNumber GetSingle(GenTree* tree, regMaskTP mask = static_cast(-1)); + regMaskTP GetAll(GenTree* tree); + unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); }; class CodeGenInterface diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index df3326d338c9b9..65268c7ef4cdf0 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -9003,7 +9003,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) GenTree* switchTable = LIR::AsRange(block).LastNode(); assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE); - consumedRegs = compiler->codeGen->internalRegisters.GetAll(switchTable); + consumedRegs = compiler->codeGen->internalRegisters.GetAll(switchTable).GetRegSetForType(IntRegisterType); GenTree* op1 = switchTable->gtGetOp1(); GenTree* op2 = switchTable->gtGetOp2(); noway_assert(op1 != nullptr && op2 != nullptr); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 2cb4c55c5139a6..7e144eb9c5b213 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -230,18 +230,14 @@ typedef uint64_t regMaskSmall; #endif typedef regMaskSmall SingleTypeRegSet; -// -// #define REG_MASK_INT_FMT "%04llX" -// #define REG_MASK_ALL_FMT "%016llX" -// #endif struct regMaskTP { private: regMaskSmall low; public: - constexpr regMaskTP(regMaskSmall lowRegMask) - : low(lowRegMask) + constexpr regMaskTP(regMaskSmall regMask) + : low(regMask) { } From ca77f79511447c6cfabc3ced5f3b4b43e8f2251f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 23 May 2024 14:46:08 -0700 Subject: [PATCH 34/44] Pass regType to getConstrainedRegMask() --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/gcencode.cpp | 2 +- src/coreclr/jit/lsra.cpp | 22 ++++++++++++---------- src/coreclr/jit/lsra.h | 3 ++- src/coreclr/jit/lsraarmarch.cpp | 24 ------------------------ src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/regMaskTPOps.cpp | 12 ++++++++++++ 7 files changed, 29 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 89944fd1f51977..f8963becfe419a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -165,7 +165,7 @@ regMaskTP NodeInternalRegisters::GetAll(GenTree* tree) unsigned NodeInternalRegisters::Count(GenTree* tree, regMaskTP mask) { regMaskTP regs; - return m_table.Lookup(tree, ®s) ? PopCount(regs & mask) : 0; + return m_table.Lookup(tree, ®s) ? genCountBits(regs & mask) : 0; } // CodeGen constructor diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index d8ece2112d6fa2..902029791f20c0 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, while (regMask) { // Get hold of the next register bit. - SingleTypeRegSet tmpMask = genFindLowestBit(regMask); + regMaskSmall tmpMask = genFindLowestBit(regMask); assert(tmpMask); // Remember the new state of this register. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 65268c7ef4cdf0..d6e68157d838cb 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -473,6 +473,7 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // // Arguments: // refPosition - RefPosition which we want to constrain. +// regType - Type of register for which we want constrained mask // regMaskActual - regMask that needs to be constrained // regMaskConstraint - regMask constraint that needs to be // applied to regMaskActual @@ -484,6 +485,7 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // Otherwise returns regMaskActual. // SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, SingleTypeRegSet regMaskActual, SingleTypeRegSet regMaskConstraint, unsigned minRegCount) @@ -498,7 +500,7 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { SingleTypeRegSet busyRegs = - (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(TYP_VOID); // TODO: Pass the right type + (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(regType); if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -566,7 +568,7 @@ static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | // This is the method used to implement the stress options that limit // the set of registers considered for allocation. // -SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, SingleTypeRegSet mask) +SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterType regType, SingleTypeRegSet mask) { #ifdef TARGET_ARM64 if ((refPosition != nullptr) && refPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) @@ -590,24 +592,24 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, SingleTyp case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED, minRegCount); } break; case LSRA_LIMIT_CALLER: { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); } break; case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet, minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallFPSet, minRegCount); } break; @@ -615,7 +617,7 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, SingleTyp case LSRA_LIMIT_UPPER_SIMD_SET: if ((mask & LsraLimitUpperSimdSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitUpperSimdSet, minRegCount); } break; #endif @@ -8717,7 +8719,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, return REG_NA; } #endif // DEBUG - INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs)); + INDEBUG(freeRegs = stressLimitRegs(nullptr, type, freeRegs)); freeRegs &= ~terminatorConsumedRegs; @@ -13377,7 +13379,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, regType, candidates); #endif assert(candidates != RBM_NONE); @@ -13830,7 +13832,7 @@ SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, regType, candidates); #endif assert(candidates != RBM_NONE); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 43232954877895..814e85fad17c5f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -776,10 +776,11 @@ class LinearScan : public LinearScanInterface } SingleTypeRegSet getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, SingleTypeRegSet regMaskActual, SingleTypeRegSet regMaskConstrain, unsigned minRegCount); - SingleTypeRegSet stressLimitRegs(RefPosition* refPosition, SingleTypeRegSet mask); + SingleTypeRegSet stressLimitRegs(RefPosition* refPosition, RegisterType regType, SingleTypeRegSet mask); // This controls the heuristics used to select registers // These can be combined. diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 4e47080fed9cc4..0341ee7c6e24a6 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -430,30 +430,6 @@ int LinearScan::BuildCall(GenTreeCall* call) return srcCount; } -////------------------------------------------------------------------------ -//// BuildDefWithKills: Build one RefTypeDef RefPositions for the given node, -//// as well as kills as specified by the given mask. -//// -//// Arguments: -//// tree - The call node that defines a register -//// dstCandidates - The candidate registers for the definition -//// killMask - The mask of registers killed by this node -//// -//// Notes: -//// Adds the RefInfo for the definitions to the defList. -//// The def and kill functionality is folded into a single method so that the -//// save and restores of upper vector registers can be bracketed around the def. -//// -// void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) -//{ -// assert(!tree->AsCall()->HasMultiRegRetVal()); -// assert((int)PopCount(dstCandidates) == 1); -// -// // Build the kill RefPositions -// BuildKills(tree, killMask); -// BuildDef(tree, dstCandidates); -// } - //------------------------------------------------------------------------ // BuildPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node // diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 24eb824907db7f..e657b3cf288f27 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1876,7 +1876,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc #endif // TARGET_ARM64 { newRefPosition->registerAssignment = - getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef); + getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask, minRegCountForRef); } if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp index c9f2acad46e3df..86de50a08cb95c 100644 --- a/src/coreclr/jit/regMaskTPOps.cpp +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -5,11 +5,23 @@ struct regMaskTP; +//------------------------------------------------------------------------ +// RemoveRegNumFromMask: Removes `reg` from the mask +// +// Parameters: +// reg - Register to remove +// void regMaskTP::RemoveRegNumFromMask(regNumber reg) { low &= ~genRegMask(reg); } +//------------------------------------------------------------------------ +// IsRegNumInMask: Checks if `reg` is in the mask +// +// Parameters: +// reg - Register to check +// bool regMaskTP::IsRegNumInMask(regNumber reg) { return (low & genRegMask(reg)) != 0; From e0b87d425aeaa75b43efba3a008fa3ef5bbe0167 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 23 May 2024 14:52:23 -0700 Subject: [PATCH 35/44] jit format --- src/coreclr/jit/codegeninterface.h | 10 +++++----- src/coreclr/jit/lsra.cpp | 3 +-- src/coreclr/jit/lsrabuild.cpp | 3 ++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 68a313812622a1..a025285cbc0917 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -49,16 +49,16 @@ CodeGenInterface* getCodeGenerator(Compiler* comp); class NodeInternalRegisters { typedef JitHashTable, regMaskTP> NodeInternalRegistersTable; - NodeInternalRegistersTable m_table; + NodeInternalRegistersTable m_table; public: NodeInternalRegisters(Compiler* comp); - void Add(GenTree* tree, regMaskTP reg); - regNumber Extract(GenTree* tree, regMaskTP mask = static_cast(-1)); - regNumber GetSingle(GenTree* tree, regMaskTP mask = static_cast(-1)); + void Add(GenTree* tree, regMaskTP reg); + regNumber Extract(GenTree* tree, regMaskTP mask = static_cast(-1)); + regNumber GetSingle(GenTree* tree, regMaskTP mask = static_cast(-1)); regMaskTP GetAll(GenTree* tree); - unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); + unsigned Count(GenTree* tree, regMaskTP mask = static_cast(-1)); }; class CodeGenInterface diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d6e68157d838cb..650ed9314cb56d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -499,8 +499,7 @@ SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - SingleTypeRegSet busyRegs = - (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(regType); + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(regType); if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e657b3cf288f27..bfabc42cf91295 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1876,7 +1876,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc #endif // TARGET_ARM64 { newRefPosition->registerAssignment = - getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask, minRegCountForRef); + getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask, + minRegCountForRef); } if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && From 5b65ddc41ed1ec8eff5cf05a41b3287a8d354f7e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 10:33:52 -0700 Subject: [PATCH 36/44] fix a wrong parameter for consecutive register --- src/coreclr/jit/lsraarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4853a94950f725..5283d2fc00fff7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -531,7 +531,7 @@ SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandi // SingleTypeRegSet overallResultForBusy; SingleTypeRegSet consecutiveResultForBusy = - filterConsecutiveCandidates(floatFreeCandidates, registersNeeded, &overallResultForBusy); + filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); *busyCandidates = consecutiveResultForBusy; From 494d862ec1e6ae24c6a6e1b2143107e18e7f1691 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 10:44:34 -0700 Subject: [PATCH 37/44] fix riscv64 build errors --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsraarmarch.cpp | 2 +- src/coreclr/jit/lsraloongarch64.cpp | 6 +++--- src/coreclr/jit/lsrariscv64.cpp | 20 +++++++++++--------- 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 650ed9314cb56d..b642d59bca05a2 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -839,7 +839,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // Once that is addressed, we may consider allowing LR in availableIntRegs. availableIntRegs = ((RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd)).getLow(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); #else availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); #endif diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 0341ee7c6e24a6..85f1f664424046 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -603,7 +603,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) if (argNode->gtNumRegs == 1) { // We can use a ldr/str sequence so we need an internal register - buildInternalIntRegisterDefForNode(argNode, (allRegs(TYP_INT) & ~argMask)); + buildInternalIntRegisterDefForNode(argNode, allRegs(TYP_INT) & ~argMask); } // We will generate code that loads from the OBJ's address, which must be in a register. diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 5fda31befc69fa..f246de4347ac0a 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1062,9 +1062,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index d495246f9d1ea4..889f201f0608f8 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -840,7 +840,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -860,7 +860,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } GenTree* ctrlExpr = call->gtControlExpr; - regMaskTP ctrlExprCandidates = RBM_NONE; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -894,7 +894,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - regMaskTP candidates = RBM_NONE; + SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; @@ -1145,13 +1145,15 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) int dstCount = argNode->gtNumRegs; regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } + assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || + ((argMask & availableFloatRegs) != RBM_NONE)); if (src->OperGet() == GT_FIELD_LIST) { @@ -1176,7 +1178,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - regMaskTP sourceMask = RBM_NONE; + SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); @@ -1234,9 +1236,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { @@ -1294,7 +1296,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = + SingleTypeRegSet internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); From eb4515d81d1f17557688e6089c1b98f4e1662194 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 10:51:06 -0700 Subject: [PATCH 38/44] jit format --- src/coreclr/jit/lsrariscv64.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 889f201f0608f8..ebea9cce714727 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -840,7 +840,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - SingleTypeRegSet singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -859,7 +859,7 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - GenTree* ctrlExpr = call->gtControlExpr; + GenTree* ctrlExpr = call->gtControlExpr; SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { @@ -1144,7 +1144,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); + regNumber argReg = argNode->GetRegNum(); SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { From 95abd7c09b5abaad3d8b2d3fc32f2dfc64e6943f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 14:24:08 -0700 Subject: [PATCH 39/44] Add high field --- src/coreclr/jit/compiler.hpp | 42 ++++++++++++++++++++++++------ src/coreclr/jit/lsra.cpp | 8 +++--- src/coreclr/jit/lsrabuild.cpp | 4 +++ src/coreclr/jit/regset.h | 2 +- src/coreclr/jit/target.h | 49 +++++++++++++++++++++++------------ 5 files changed, 75 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 648e2acc65acdc..975422682d70c8 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -933,7 +933,7 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) // Assumptions: // The mask contains one and only one register. -inline regNumber genRegNumFromMask(regMaskTP mask) +inline regNumber genRegNumFromMask(const regMaskTP& mask) { assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero @@ -947,6 +947,27 @@ inline regNumber genRegNumFromMask(regMaskTP mask) return regNum; } +//------------------------------------------------------------------------------ +// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. +// +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask. +// + +inline regNumber genFirstRegNumFromMask(const regMaskTP& mask) +{ + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + + regNumber regNum = (regNumber)BitScanForward(mask); + + return regNum; +} + //------------------------------------------------------------------------------ // genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a // register number and also toggle the bit in the `mask`. @@ -964,34 +985,39 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitScanForward(mask); + regNumber regNum = (regNumber)genFirstRegNumFromMask(mask); - mask ^= genRegMask(regNum); + mask ^= regNum; return regNum; } //------------------------------------------------------------------------------ -// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. -// +// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a +// register number and also toggle the bit in the `mask`. // Arguments: // mask - the register mask // // Return Value: -// The number of the first register contained in the mask. +// The number of the first register contained in the mask and updates the `mask` to toggle +// the bit. // -inline regNumber genFirstRegNumFromMask(regMaskTP mask) +inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) { - assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero + assert(mask != RBM_NONE); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ regNumber regNum = (regNumber)BitScanForward(mask); + mask ^= genRegMask(regNum); + return regNum; } + + /***************************************************************************** * * Return the size in bytes of the given type. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b642d59bca05a2..82357f2007bb2d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4022,7 +4022,7 @@ void LinearScan::processKills(RefPosition* killRefPosition) { RefPosition* nextKill = killRefPosition->nextRefPosition; - regMaskTP killedRegs = killRefPosition->registerAssignment; + SingleTypeRegSet killedRegs = killRefPosition->registerAssignment; while (killedRegs != RBM_NONE) { regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); @@ -4064,7 +4064,7 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) { // For each physical register that can hold a GC type, // if it is occupied by an interval of a GC type, spill that interval. - regMaskTP candidateRegs = killRefPosition->registerAssignment; + SingleTypeRegSet candidateRegs = killRefPosition->registerAssignment; INDEBUG(bool killedRegs = false); while (candidateRegs != RBM_NONE) { @@ -4157,7 +4157,7 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar { // If we're rotating the register locations at block boundaries, try to use // the next higher register number of the appropriate register type. - regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs; + SingleTypeRegSet candidateRegs = allRegs(interval->registerType) & availableRegs.GetRegSetForType(interval->registerType); regNumber firstReg = REG_NA; regNumber newReg = REG_NA; while (candidateRegs != RBM_NONE) @@ -12182,7 +12182,7 @@ void LinearScan::verifyFinalAllocation() // However, we will assert that, at resolution time, no registers contain GC refs. { DBEXEC(VERBOSE, printf(" ")); - regMaskTP candidateRegs = currentRefPosition.registerAssignment; + SingleTypeRegSet candidateRegs = currentRefPosition.registerAssignment; while (candidateRegs != RBM_NONE) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3be436c53b0187..565f70a3e11c56 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2825,6 +2825,9 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } +#ifdef HAS_MORE_THAN_64_REGISTERS + actualRegistersMask = regMaskTP(~RBM_NONE); +#else if (availableRegCount < (sizeof(regMaskTP) * 8)) { // Mask out the bits that are between 64 ~ availableRegCount @@ -2834,6 +2837,7 @@ void LinearScan::buildIntervals() { actualRegistersMask = ~RBM_NONE; } +#endif #ifdef DEBUG // Make sure we don't have any blocks that were not visited diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index 99c7f8be6bc556..49cd4dd2e6ecda 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -119,7 +119,7 @@ class RegSet bool rsRegsModified(regMaskTP mask) const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & mask) != 0; + return (rsModifiedRegsMask & mask).IsNonEmpty(); } void verifyRegUsed(regNumber reg); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 7e144eb9c5b213..c0d211f7cc1ead 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,16 +229,28 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif +#ifdef TARGET_ARM64 +#define HAS_MORE_THAN_64_REGISTERS 1 +#define MORE_THAN_64_REGISTERS(x) x +#else +#define MORE_THAN_64_REGISTERS(x) +#endif // TARGET_ARM64 + typedef regMaskSmall SingleTypeRegSet; +inline SingleTypeRegSet genRegMask(regNumber reg); +inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); struct regMaskTP { private: regMaskSmall low; + MORE_THAN_64_REGISTERS(regMaskSmall high); + public: - constexpr regMaskTP(regMaskSmall regMask) + regMaskTP(regMaskSmall regMask) : low(regMask) { + } regMaskTP() @@ -278,12 +290,12 @@ struct regMaskTP return low; } - bool IsEmpty() + bool IsEmpty() const { return low == RBM_NONE; } - bool IsNonEmpty() + bool IsNonEmpty() const { return !IsEmpty(); } @@ -298,48 +310,54 @@ struct regMaskTP bool IsRegNumInMask(regNumber reg); }; -static regMaskTP operator^(regMaskTP first, regMaskTP second) +static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() ^ second.getLow()); return result; } -static regMaskTP operator&(regMaskTP first, regMaskTP second) +static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() & second.getLow()); return result; } -static regMaskTP operator|(regMaskTP first, regMaskTP second) +static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() | second.getLow()); return result; } -static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) { first = first | second; return first; } -static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) { first = first ^ second; return first; } -static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator^=(regMaskTP& first, const regNumber reg) +{ + first = first ^ genRegMask(reg); + return first; +} + +static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) { first = first & second; return first; } -static bool operator==(regMaskTP first, regMaskTP second) +static bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()); } -static bool operator!=(regMaskTP first, regMaskTP second) +static bool operator!=(const regMaskTP& first, const regMaskTP& second) { return !(first == second); } @@ -375,18 +393,18 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator~(regMaskTP first) +static regMaskTP operator~(const regMaskTP& first) { regMaskTP result(~first.getLow()); return result; } -static uint32_t PopCount(regMaskTP value) +static uint32_t PopCount(const regMaskTP& value) { return BitOperations::PopCount(value.getLow()); } -static uint32_t BitScanForward(regMaskTP mask) +static uint32_t BitScanForward(const regMaskTP& mask) { return BitOperations::BitScanForward(mask.getLow()); } @@ -508,9 +526,6 @@ inline bool isByteReg(regNumber reg) } #endif -inline SingleTypeRegSet genRegMask(regNumber reg); -inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); - /***************************************************************************** * Return true if the register number is valid */ From d53b62053d2abe4e0f82050aabb09328c740e667 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 14:34:28 -0700 Subject: [PATCH 40/44] Do not use `const regMaskTP&` as parameter --- src/coreclr/jit/target.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index c0d211f7cc1ead..368052ca6342b1 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -310,31 +310,31 @@ struct regMaskTP bool IsRegNumInMask(regNumber reg); }; -static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator^(regMaskTP first, regMaskTP second) { regMaskTP result(first.getLow() ^ second.getLow()); return result; } -static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator&(regMaskTP first, regMaskTP second) { regMaskTP result(first.getLow() & second.getLow()); return result; } -static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) +static regMaskTP operator|(regMaskTP first, regMaskTP second) { regMaskTP result(first.getLow() | second.getLow()); return result; } -static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) { first = first | second; return first; } -static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) { first = first ^ second; return first; @@ -346,18 +346,18 @@ static regMaskTP& operator^=(regMaskTP& first, const regNumber reg) return first; } -static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) +static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) { first = first & second; return first; } -static bool operator==(const regMaskTP& first, const regMaskTP& second) +static bool operator==(regMaskTP first, regMaskTP second) { return (first.getLow() == second.getLow()); } -static bool operator!=(const regMaskTP& first, const regMaskTP& second) +static bool operator!=(regMaskTP first, regMaskTP second) { return !(first == second); } @@ -393,18 +393,18 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator~(const regMaskTP& first) +static regMaskTP operator~(regMaskTP first) { regMaskTP result(~first.getLow()); return result; } -static uint32_t PopCount(const regMaskTP& value) +static uint32_t PopCount(regMaskTP value) { return BitOperations::PopCount(value.getLow()); } -static uint32_t BitScanForward(const regMaskTP& mask) +static uint32_t BitScanForward(regMaskTP mask) { return BitOperations::BitScanForward(mask.getLow()); } From e2c0f038cab12b6dca2cf88eb7e5ae5a59f5836c Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 14:34:33 -0700 Subject: [PATCH 41/44] Revert "Do not use `const regMaskTP&` as parameter" This reverts commit d53b62053d2abe4e0f82050aabb09328c740e667. By not passing `regMaskTP` using constant reference, there is a small cost we have to pay: Without constant reference: Overall: 1.80% MinOpts: 2.05% FullOpts: 1.62% With constant reference: Overall: 1.74% MinOpts: 1.94% FullOpts: 1.6% --- src/coreclr/jit/target.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 368052ca6342b1..c0d211f7cc1ead 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -310,31 +310,31 @@ struct regMaskTP bool IsRegNumInMask(regNumber reg); }; -static regMaskTP operator^(regMaskTP first, regMaskTP second) +static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() ^ second.getLow()); return result; } -static regMaskTP operator&(regMaskTP first, regMaskTP second) +static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() & second.getLow()); return result; } -static regMaskTP operator|(regMaskTP first, regMaskTP second) +static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { regMaskTP result(first.getLow() | second.getLow()); return result; } -static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) { first = first | second; return first; } -static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) { first = first ^ second; return first; @@ -346,18 +346,18 @@ static regMaskTP& operator^=(regMaskTP& first, const regNumber reg) return first; } -static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) +static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) { first = first & second; return first; } -static bool operator==(regMaskTP first, regMaskTP second) +static bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()); } -static bool operator!=(regMaskTP first, regMaskTP second) +static bool operator!=(const regMaskTP& first, const regMaskTP& second) { return !(first == second); } @@ -393,18 +393,18 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator~(regMaskTP first) +static regMaskTP operator~(const regMaskTP& first) { regMaskTP result(~first.getLow()); return result; } -static uint32_t PopCount(regMaskTP value) +static uint32_t PopCount(const regMaskTP& value) { return BitOperations::PopCount(value.getLow()); } -static uint32_t BitScanForward(regMaskTP mask) +static uint32_t BitScanForward(const regMaskTP& mask) { return BitOperations::BitScanForward(mask.getLow()); } From 9086ad85b9f03d4ab8930b69481db2735626c6b3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 14:57:47 -0700 Subject: [PATCH 42/44] Furture reduce the TP cost Moved the *= operators as instance of `regMaskTP` so the `.low` private field can directly be manipulated instead of converting the `64-bit` value in `regMaskTP` before doing any operation. Overall: 0.74% MinOpts: 0.82% FullOpts: 0.68% --- src/coreclr/jit/compiler.hpp | 2 +- src/coreclr/jit/target.h | 48 +++++++++++++++++------------------- 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 975422682d70c8..b4d79335bb2eb9 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1009,7 +1009,7 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitScanForward(mask); + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); mask ^= genRegMask(regNum); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index c0d211f7cc1ead..c73d948210aa74 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -249,8 +249,7 @@ struct regMaskTP public: regMaskTP(regMaskSmall regMask) : low(regMask) - { - + { } regMaskTP() @@ -308,6 +307,27 @@ struct regMaskTP void RemoveRegNumFromMask(regNumber reg); bool IsRegNumInMask(regNumber reg); + + + void operator|=(const regMaskTP& second) + { + low |= second.getLow(); + } + + void operator^=(const regMaskTP& second) + { + low ^= second.getLow(); + } + + void operator^=(const regNumber reg) + { + low ^= genRegMask(reg); + } + + void operator&=(const regMaskTP& second) + { + low &= second.getLow(); + } }; static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) @@ -328,30 +348,6 @@ static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) return result; } -static regMaskTP& operator|=(regMaskTP& first, const regMaskTP& second) -{ - first = first | second; - return first; -} - -static regMaskTP& operator^=(regMaskTP& first, const regMaskTP& second) -{ - first = first ^ second; - return first; -} - -static regMaskTP& operator^=(regMaskTP& first, const regNumber reg) -{ - first = first ^ genRegMask(reg); - return first; -} - -static regMaskTP& operator&=(regMaskTP& first, const regMaskTP& second) -{ - first = first & second; - return first; -} - static bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()); From aff022f1a6034b0022808ef3f1f8b7a670bcbe87 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 15:09:58 -0700 Subject: [PATCH 43/44] jit fornat --- src/coreclr/jit/compiler.hpp | 2 -- src/coreclr/jit/lsra.cpp | 7 ++++--- src/coreclr/jit/target.h | 7 +++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index b4d79335bb2eb9..be01a5095bc33b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1016,8 +1016,6 @@ inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) return regNum; } - - /***************************************************************************** * * Return the size in bytes of the given type. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 82357f2007bb2d..b437c34dac874f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -4157,9 +4157,10 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar { // If we're rotating the register locations at block boundaries, try to use // the next higher register number of the appropriate register type. - SingleTypeRegSet candidateRegs = allRegs(interval->registerType) & availableRegs.GetRegSetForType(interval->registerType); - regNumber firstReg = REG_NA; - regNumber newReg = REG_NA; + SingleTypeRegSet candidateRegs = + allRegs(interval->registerType) & availableRegs.GetRegSetForType(interval->registerType); + regNumber firstReg = REG_NA; + regNumber newReg = REG_NA; while (candidateRegs != RBM_NONE) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index c73d948210aa74..a0de3339a99e7a 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -231,12 +231,12 @@ typedef uint64_t regMaskSmall; #ifdef TARGET_ARM64 #define HAS_MORE_THAN_64_REGISTERS 1 -#define MORE_THAN_64_REGISTERS(x) x +#define MORE_THAN_64_REGISTERS(x) x #else #define MORE_THAN_64_REGISTERS(x) #endif // TARGET_ARM64 -typedef regMaskSmall SingleTypeRegSet; +typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genRegMask(regNumber reg); inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); @@ -249,7 +249,7 @@ struct regMaskTP public: regMaskTP(regMaskSmall regMask) : low(regMask) - { + { } regMaskTP() @@ -308,7 +308,6 @@ struct regMaskTP bool IsRegNumInMask(regNumber reg); - void operator|=(const regMaskTP& second) { low |= second.getLow(); From 2d2e4573a9c1867289132fee54c07bc207397564 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 May 2024 15:52:13 -0700 Subject: [PATCH 44/44] Add high to all platforms --- src/coreclr/jit/lsrabuild.cpp | 21 +++++++++++++-------- src/coreclr/jit/target.h | 16 ++++++++-------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 565f70a3e11c56..e416f5e40c98aa 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2825,19 +2825,24 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } -#ifdef HAS_MORE_THAN_64_REGISTERS - actualRegistersMask = regMaskTP(~RBM_NONE); -#else - if (availableRegCount < (sizeof(regMaskTP) * 8)) + static_assert(sizeof(regMaskTP) == 2 * sizeof(regMaskSmall)); + + if (availableRegCount < (sizeof(regMaskSmall) * 8)) { - // Mask out the bits that are between 64 ~ availableRegCount - actualRegistersMask = (1ULL << availableRegCount) - 1; + // Mask out the bits that are between (8 * regMaskSmall) ~ availableRegCount + actualRegistersMask = regMaskTP((1ULL << availableRegCount) - 1); + } + else if (availableRegCount < (sizeof(regMaskTP) * 8)) + { + // Mask out the bits that are between (8 * regMaskTP) ~ availableRegCount + // Subtract one extra for stack. + unsigned topRegCount = availableRegCount - sizeof(regMaskSmall) * 8 - 1; + actualRegistersMask = regMaskTP(~RBM_NONE, (1ULL << topRegCount) - 1); } else { - actualRegistersMask = ~RBM_NONE; + actualRegistersMask = regMaskTP(~RBM_NONE, ~RBM_NONE); } -#endif #ifdef DEBUG // Make sure we don't have any blocks that were not visited diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index a0de3339a99e7a..3e3e4591281372 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,13 +229,6 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif -#ifdef TARGET_ARM64 -#define HAS_MORE_THAN_64_REGISTERS 1 -#define MORE_THAN_64_REGISTERS(x) x -#else -#define MORE_THAN_64_REGISTERS(x) -#endif // TARGET_ARM64 - typedef regMaskSmall SingleTypeRegSet; inline SingleTypeRegSet genRegMask(regNumber reg); inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); @@ -244,9 +237,16 @@ struct regMaskTP { private: regMaskSmall low; - MORE_THAN_64_REGISTERS(regMaskSmall high); + regMaskSmall high; public: + + regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) + : low(lowMask) + , high(highMask) + { + } + regMaskTP(regMaskSmall regMask) : low(regMask) {