diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 766a0a0150e873..7932e0d452c43f 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -167,6 +167,7 @@ set( JIT_SOURCES redundantbranchopts.cpp regalloc.cpp registerargconvention.cpp + regMaskTPOps.cpp regset.cpp scev.cpp scopeinfo.cpp diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index a4c4bb9b45e902..f8963becfe419a 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -6252,26 +6252,26 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* regMaskTP pushedRegs = regs; - for (regNumber reg = REG_INT_FIRST; regs != RBM_NONE; reg = REG_NEXT(reg)) + for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskTP regMask = genRegMask(reg); - if ((regBit & regs) == RBM_NONE) + if ((regMask & pushedRegs) == RBM_NONE) continue; var_types type; - if (regBit & gcInfo.gcRegGCrefSetCur) + if (regMask & gcInfo.gcRegGCrefSetCur) { type = TYP_REF; } - else if (regBit & gcInfo.gcRegByrefSetCur) + else if (regMask & gcInfo.gcRegByrefSetCur) { - *byrefRegs |= regBit; + *byrefRegs |= regMask; type = TYP_BYREF; } else if (noRefRegs != NULL) { - *noRefRegs |= regBit; + *noRefRegs |= regMask; type = TYP_I_IMPL; } else @@ -6282,9 +6282,7 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP* inst_RV(INS_push, reg, type); genSinglePush(); - gcInfo.gcMarkRegSetNpt(regBit); - - regs &= ~regBit; + gcInfo.gcMarkRegSetNpt(regMask); } return pushedRegs; @@ -6323,20 +6321,22 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg noway_assert(genTypeStSz(TYP_REF) == genTypeStSz(TYP_INT)); noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_INT)); + regMaskTP popedRegs = regs; + // Walk the registers in the reverse order as genPushRegs() - for (regNumber reg = REG_INT_LAST; regs != RBM_NONE; reg = REG_PREV(reg)) + for (regNumber reg = REG_INT_LAST; reg >= REG_INT_LAST; reg = REG_PREV(reg)) { - regMaskTP regBit = regMaskTP(1) << reg; + regMaskTP regMask = genRegMask(reg); - if ((regBit & regs) == RBM_NONE) + if ((regMask & popedRegs) == RBM_NONE) continue; var_types type; - if (regBit & byrefRegs) + if (regMask & byrefRegs) { type = TYP_BYREF; } - else if (regBit & noRefRegs) + else if (regMask & noRefRegs) { type = TYP_INT; } @@ -6350,8 +6350,6 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg if (type != TYP_INT) gcInfo.gcMarkRegPtrVal(reg, type); - - regs &= ~regBit; } #endif // FEATURE_FIXED_OUT_ARGS diff --git a/src/coreclr/jit/codegeninterface.h b/src/coreclr/jit/codegeninterface.h index 608c72c22d48d0..a025285cbc0917 100644 --- a/src/coreclr/jit/codegeninterface.h +++ b/src/coreclr/jit/codegeninterface.h @@ -75,31 +75,31 @@ class CodeGenInterface } #if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; // Call this function after the equivalent fields in Compiler have been initialized. void CopyRegisterInfo(); - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 54be15ed7f20bc..1b5592a5b59e5a 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3485,12 +3485,12 @@ void Compiler::compInitOptions(JitFlags* jitFlags) // Make sure we copy the register info and initialize the // trash regs after the underlying fields are initialized - const regMaskTP vtCalleeTrashRegs[TYP_COUNT]{ + const SingleTypeRegSet vtCalleeTrashRegs[TYP_COUNT]{ #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP }; - memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, vtCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); codeGen->CopyRegisterInfo(); #endif // TARGET_XARCH diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d7a0086ed9bb90..428c817c751c78 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -11246,8 +11246,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLFLOAT(); - // regMaskTP get_RBM_FLT_CALLEE_TRASH(); + // SingleTypeRegSet get_RBM_ALLFLOAT(); + // SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_FLOAT(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11256,16 +11256,16 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_AMD64 requires one. // - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; - unsigned cntCalleeTrashFloat; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; + unsigned cntCalleeTrashFloat; public: - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } @@ -11284,8 +11284,8 @@ class Compiler // // Users of these values need to define four accessor functions: // - // regMaskTP get_RBM_ALLMASK(); - // regMaskTP get_RBM_MSK_CALLEE_TRASH(); + // SingleTypeRegSet get_RBM_ALLMASK(); + // SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH(); // unsigned get_CNT_CALLEE_TRASH_MASK(); // unsigned get_AVAILABLE_REG_COUNT(); // @@ -11294,17 +11294,17 @@ class Compiler // This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only // TARGET_XARCH requires one. // - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; - unsigned cntCalleeTrashMask; - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; + unsigned cntCalleeTrashMask; + SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; public: - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index f393693b792770..be01a5095bc33b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -933,9 +933,9 @@ inline unsigned Compiler::funGetFuncIdx(BasicBlock* block) // Assumptions: // The mask contains one and only one register. -inline regNumber genRegNumFromMask(regMaskTP mask) +inline regNumber genRegNumFromMask(const regMaskTP& mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ @@ -947,6 +947,27 @@ inline regNumber genRegNumFromMask(regMaskTP mask) return regNum; } +//------------------------------------------------------------------------------ +// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. +// +// Arguments: +// mask - the register mask +// +// Return Value: +// The number of the first register contained in the mask. +// + +inline regNumber genFirstRegNumFromMask(const regMaskTP& mask) +{ + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero + + /* Convert the mask to a register number */ + + regNumber regNum = (regNumber)BitScanForward(mask); + + return regNum; +} + //------------------------------------------------------------------------------ // genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a // register number and also toggle the bit in the `mask`. @@ -960,34 +981,37 @@ inline regNumber genRegNumFromMask(regMaskTP mask) inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask.IsNonEmpty()); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitScanForward(mask); + regNumber regNum = (regNumber)genFirstRegNumFromMask(mask); - mask ^= genRegMask(regNum); + mask ^= regNum; return regNum; } //------------------------------------------------------------------------------ -// genFirstRegNumFromMask : Maps first bit set in the register mask to a register number. -// +// genFirstRegNumFromMaskAndToggle : Maps first bit set in the register mask to a +// register number and also toggle the bit in the `mask`. // Arguments: // mask - the register mask // // Return Value: -// The number of the first register contained in the mask. +// The number of the first register contained in the mask and updates the `mask` to toggle +// the bit. // -inline regNumber genFirstRegNumFromMask(regMaskTP mask) +inline regNumber genFirstRegNumFromMaskAndToggle(SingleTypeRegSet& mask) { - assert(mask != 0); // Must have one bit set, so can't have a mask of zero + assert(mask != RBM_NONE); // Must have one bit set, so can't have a mask of zero /* Convert the mask to a register number */ - regNumber regNum = (regNumber)BitScanForward(mask); + regNumber regNum = (regNumber)BitOperations::BitScanForward(mask); + + mask ^= genRegMask(regNum); return regNum; } @@ -3496,39 +3520,6 @@ inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type, CorInf } } -/*****************************************************************************/ -/* Return a register mask with the first 'numRegs' argument registers set. - */ - -inline regMaskTP genIntAllRegArgMask(unsigned numRegs) -{ - assert(numRegs <= MAX_REG_ARG); - - regMaskTP result = RBM_NONE; - for (unsigned i = 0; i < numRegs; i++) - { - result |= intArgMasks[i]; - } - return result; -} - -inline regMaskTP genFltAllRegArgMask(unsigned numRegs) -{ -#ifndef TARGET_X86 - assert(numRegs <= MAX_FLOAT_REG_ARG); - - regMaskTP result = RBM_NONE; - for (unsigned i = 0; i < numRegs; i++) - { - result |= fltArgMasks[i]; - } - return result; -#else - assert(!"no x86 float arg regs\n"); - return RBM_NONE; -#endif -} - /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 9e68a57805e42a..022e3aa492a0c0 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -10062,7 +10062,7 @@ void emitter::emitStackPopLargeStk(BYTE* addr, bool isCall, unsigned char callIn // of callee-saved registers only). for (unsigned calleeSavedRegIdx = 0; calleeSavedRegIdx < CNT_CALL_GC_REGS; calleeSavedRegIdx++) { - regMaskTP calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; + regMaskSmall calleeSavedRbm = raRbmCalleeSaveOrder[calleeSavedRegIdx]; if (emitThisGCrefRegs & calleeSavedRbm) { gcrefRegs |= (1 << calleeSavedRegIdx); diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index d1721f7896bb61..902029791f20c0 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -4620,7 +4620,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder, while (regMask) { // Get hold of the next register bit. - regMaskTP tmpMask = genFindLowestBit(regMask); + regMaskSmall tmpMask = genFindLowestBit(regMask); assert(tmpMask); // Remember the new state of this register. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 060f5d497d3778..b437c34dac874f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -234,13 +234,13 @@ weight_t LinearScan::getWeight(RefPosition* refPos) // allRegs represents a set of registers that can // be used to allocate the specified type in any point // in time (more of a 'bank' of registers). -regMaskTP LinearScan::allRegs(RegisterType rt) +SingleTypeRegSet LinearScan::allRegs(RegisterType rt) { assert((rt != TYP_UNDEF) && (rt != TYP_STRUCT)); return *availableRegs[rt]; } -regMaskTP LinearScan::allByteRegs() +SingleTypeRegSet LinearScan::allByteRegs() { #ifdef TARGET_X86 return availableIntRegs & RBM_BYTE_REGS; @@ -249,7 +249,7 @@ regMaskTP LinearScan::allByteRegs() #endif } -regMaskTP LinearScan::allSIMDRegs() +SingleTypeRegSet LinearScan::allSIMDRegs() { return availableFloatRegs; } @@ -262,7 +262,7 @@ regMaskTP LinearScan::allSIMDRegs() // Return Value: // Register mask of the SSE/VEX-only SIMD registers // -regMaskTP LinearScan::lowSIMDRegs() +SingleTypeRegSet LinearScan::lowSIMDRegs() { #if defined(TARGET_AMD64) return (availableFloatRegs & RBM_LOWFLOAT); @@ -299,15 +299,17 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo nextFixedRef[regRecord->regNum] = nextLocation; } -regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition) +SingleTypeRegSet LinearScan::getMatchingConstants(SingleTypeRegSet mask, + Interval* currentInterval, + RefPosition* refPosition) { assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); - regMaskTP candidates = (mask & m_RegistersWithConstants); - regMaskTP result = RBM_NONE; + SingleTypeRegSet candidates = (mask & m_RegistersWithConstants).GetRegSetForType(currentInterval->registerType); + SingleTypeRegSet result = RBM_NONE; while (candidates != RBM_NONE) { - regNumber regNum = genFirstRegNumFromMask(candidates); - regMaskTP candidateBit = genRegMask(regNum); + regNumber regNum = genFirstRegNumFromMask(candidates); + SingleTypeRegSet candidateBit = genRegMask(regNum); candidates ^= candidateBit; RegRecord* physRegRecord = getRegisterRecord(regNum); @@ -436,7 +438,7 @@ void LinearScan::updateRegsFreeBusyState(RefPosition& refPosition, // that it will select a callee-save register. But to be safe, we restrict // the set of candidates if compFloatingPointUsed is not already set. // -regMaskTP LinearScan::internalFloatRegCandidates() +SingleTypeRegSet LinearScan::internalFloatRegCandidates() { needNonIntegerRegisters = true; @@ -471,6 +473,7 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // // Arguments: // refPosition - RefPosition which we want to constrain. +// regType - Type of register for which we want constrained mask // regMaskActual - regMask that needs to be constrained // regMaskConstraint - regMask constraint that needs to be // applied to regMaskActual @@ -481,13 +484,14 @@ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) // New regMask that has minRegCount registers after intersection. // Otherwise returns regMaskActual. // -regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstraint, - unsigned minRegCount) +SingleTypeRegSet LinearScan::getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, + SingleTypeRegSet regMaskActual, + SingleTypeRegSet regMaskConstraint, + unsigned minRegCount) { - regMaskTP newMask = regMaskActual & regMaskConstraint; - if (genCountBits(newMask) < minRegCount) + SingleTypeRegSet newMask = regMaskActual & regMaskConstraint; + if (PopCount(newMask) < minRegCount) { // Constrained mask does not have minimum required registers needed. return regMaskActual; @@ -495,7 +499,7 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, if ((refPosition != nullptr) && !refPosition->RegOptional()) { - regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + SingleTypeRegSet busyRegs = (regsBusyUntilKill | regsInUseThisLocation).GetRegSetForType(regType); if ((newMask & ~busyRegs) == RBM_NONE) { // Constrained mask does not have at least one free register to allocate. @@ -517,32 +521,34 @@ regMaskTP LinearScan::getConstrainedRegMask(RefPosition* refPosition, #if defined(TARGET_AMD64) #ifdef UNIX_AMD64_ABI // On System V the RDI and RSI are not callee saved. Use R12 ans R13 as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); +static const SingleTypeRegSet LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_R12 | RBM_R13); #else // !UNIX_AMD64_ABI // On Windows Amd64 use the RDI and RSI as callee saved registers. -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); +static const SingleTypeRegSet LsraLimitSmallIntSet = + (RBM_EAX | RBM_ECX | RBM_EBX | RBM_ETW_FRAMED_EBP | RBM_ESI | RBM_EDI); #endif // !UNIX_AMD64_ABI -static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); -static const regMaskTP LsraLimitUpperSimdSet = +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const SingleTypeRegSet LsraLimitUpperSimdSet = (RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 | RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31); #elif defined(TARGET_ARM) // On ARM, we may need two registers to set up the target register for a virtual call, so we need // to have at least the maximum number of arg registers, plus 2. -static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); -static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); #elif defined(TARGET_X86) -static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); -static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); #elif defined(TARGET_LOONGARCH64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #elif defined(TARGET_RISCV64) -static const regMaskTP LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); -static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); +static const SingleTypeRegSet LsraLimitSmallIntSet = (RBM_T1 | RBM_T3 | RBM_A0 | RBM_A1 | RBM_T0); +static const SingleTypeRegSet LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 | RBM_F9); #else #error Unsupported or unset target architecture #endif // target @@ -561,7 +567,7 @@ static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F8 // This is the method used to implement the stress options that limit // the set of registers considered for allocation. // -regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) +SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterType regType, SingleTypeRegSet mask) { #ifdef TARGET_ARM64 if ((refPosition != nullptr) && refPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) @@ -585,24 +591,24 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) case LSRA_LIMIT_CALLEE: if (!compiler->opts.compDbgEnC) { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_SAVED, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_SAVED, minRegCount); } break; case LSRA_LIMIT_CALLER: { - mask = getConstrainedRegMask(refPosition, mask, RBM_CALLEE_TRASH, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, RBM_CALLEE_TRASH, minRegCount); } break; case LSRA_LIMIT_SMALL_SET: if ((mask & LsraLimitSmallIntSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallIntSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallIntSet, minRegCount); } else if ((mask & LsraLimitSmallFPSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitSmallFPSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitSmallFPSet, minRegCount); } break; @@ -610,7 +616,7 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) case LSRA_LIMIT_UPPER_SIMD_SET: if ((mask & LsraLimitUpperSimdSet) != RBM_NONE) { - mask = getConstrainedRegMask(refPosition, mask, LsraLimitUpperSimdSet, minRegCount); + mask = getConstrainedRegMask(refPosition, regType, mask, LsraLimitUpperSimdSet, minRegCount); } break; #endif @@ -778,7 +784,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #if defined(TARGET_XARCH) rbmAllMask = compiler->rbmAllMask; rbmMskCalleeTrash = compiler->rbmMskCalleeTrash; - memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(regMaskTP) * TYP_COUNT); + memcpy(varTypeCalleeTrashRegs, compiler->varTypeCalleeTrashRegs, sizeof(SingleTypeRegSet) * TYP_COUNT); if (!compiler->canUseEvexEncoding()) { @@ -831,11 +837,11 @@ LinearScan::LinearScan(Compiler* theCompiler) // Note: one known reason why we exclude LR is because NativeAOT has dependency on not // using LR as a GPR. See: https://github.com/dotnet/runtime/issues/101932 // Once that is addressed, we may consider allowing LR in availableIntRegs. - availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = ((RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd)).getLow(); #elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~(RBM_FP | RBM_RA) & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd.getLow()); #endif #if ETW_EBP_FRAMED @@ -2786,7 +2792,7 @@ void LinearScan::setFrameType() // If we are using FPBASE as the frame register, we cannot also use it for // a local var. - regMaskTP removeMask = RBM_NONE; + SingleTypeRegSet removeMask = RBM_NONE; if (frameType == FT_EBP_FRAME) { removeMask |= RBM_FPBASE; @@ -2985,9 +2991,9 @@ regNumber LinearScan::allocateRegMinimal(Interval* currentInterva RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!enregisterLocalVars); - regNumber foundReg; - regMaskTP foundRegBit; - RegRecord* availablePhysRegRecord; + regNumber foundReg; + SingleTypeRegSet foundRegBit; + RegRecord* availablePhysRegRecord; foundRegBit = regSelector->selectMinimal(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { @@ -3047,7 +3053,7 @@ template regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = + SingleTypeRegSet foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { @@ -3501,7 +3507,7 @@ void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) // Assign the given physical register interval to the given interval void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) { - regMaskTP assignedRegMask = genRegMask(regRec->regNum); + SingleTypeRegSet assignedRegMask = genRegMask(regRec->regNum); compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); interval->assignedReg = regRec; @@ -4016,7 +4022,7 @@ void LinearScan::processKills(RefPosition* killRefPosition) { RefPosition* nextKill = killRefPosition->nextRefPosition; - regMaskTP killedRegs = killRefPosition->registerAssignment; + SingleTypeRegSet killedRegs = killRefPosition->registerAssignment; while (killedRegs != RBM_NONE) { regNumber killedReg = genFirstRegNumFromMaskAndToggle(killedRegs); @@ -4058,7 +4064,7 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) { // For each physical register that can hold a GC type, // if it is occupied by an interval of a GC type, spill that interval. - regMaskTP candidateRegs = killRefPosition->registerAssignment; + SingleTypeRegSet candidateRegs = killRefPosition->registerAssignment; INDEBUG(bool killedRegs = false); while (candidateRegs != RBM_NONE) { @@ -4151,9 +4157,10 @@ regNumber LinearScan::rotateBlockStartLocation(Interval* interval, regNumber tar { // If we're rotating the register locations at block boundaries, try to use // the next higher register number of the appropriate register type. - regMaskTP candidateRegs = allRegs(interval->registerType) & availableRegs; - regNumber firstReg = REG_NA; - regNumber newReg = REG_NA; + SingleTypeRegSet candidateRegs = + allRegs(interval->registerType) & availableRegs.GetRegSetForType(interval->registerType); + regNumber firstReg = REG_NA; + regNumber newReg = REG_NA; while (candidateRegs != RBM_NONE) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); @@ -5254,8 +5261,8 @@ void LinearScan::allocateRegistersMinimal() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; - bool isInRegister = false; + SingleTypeRegSet assignedRegBit = RBM_NONE; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -6186,8 +6193,8 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); } - regMaskTP assignedRegBit = RBM_NONE; - bool isInRegister = false; + SingleTypeRegSet assignedRegBit = RBM_NONE; + bool isInRegister = false; if (assignedRegister != REG_NA) { isInRegister = true; @@ -8684,7 +8691,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs) + SingleTypeRegSet terminatorConsumedRegs) { // TODO-Throughput: This would be much more efficient if we add RegToVarMaps instead of VarToRegMaps // and they would be more space-efficient as well. @@ -8692,7 +8699,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, VarToRegMap toVarToRegMap = toBlock == nullptr ? nullptr : getInVarToRegMap(toBlock->bbNum); #ifdef TARGET_ARM - regMaskTP freeRegs; + SingleTypeRegSet freeRegs; if (type == TYP_DOUBLE) { // We have to consider all float registers for TYP_DOUBLE @@ -8703,7 +8710,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, freeRegs = allRegs(type); } #else // !TARGET_ARM - regMaskTP freeRegs = allRegs(type); + SingleTypeRegSet freeRegs = allRegs(type); #endif // !TARGET_ARM #ifdef DEBUG @@ -8712,7 +8719,7 @@ regNumber LinearScan::getTempRegForResolution(BasicBlock* fromBlock, return REG_NA; } #endif // DEBUG - INDEBUG(freeRegs = stressLimitRegs(nullptr, freeRegs)); + INDEBUG(freeRegs = stressLimitRegs(nullptr, type, freeRegs)); freeRegs &= ~terminatorConsumedRegs; @@ -8990,7 +8997,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) // // Note: Only switches and JCMP/JTEST (for Arm4) have input regs (and so can be fed by copies), so those // are the only block-ending branches that need special handling. - regMaskTP consumedRegs = RBM_NONE; + SingleTypeRegSet consumedRegs = RBM_NONE; if (block->KindIs(BBJ_SWITCH)) { // At this point, Lowering has transformed any non-switch-table blocks into @@ -8998,7 +9005,7 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) GenTree* switchTable = LIR::AsRange(block).LastNode(); assert(switchTable != nullptr && switchTable->OperGet() == GT_SWITCH_TABLE); - consumedRegs = compiler->codeGen->internalRegisters.GetAll(switchTable); + consumedRegs = compiler->codeGen->internalRegisters.GetAll(switchTable).GetRegSetForType(IntRegisterType); GenTree* op1 = switchTable->gtGetOp1(); GenTree* op2 = switchTable->gtGetOp2(); noway_assert(op1 != nullptr && op2 != nullptr); @@ -9518,7 +9525,7 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs) + SingleTypeRegSet terminatorConsumedRegs) { VarToRegMap fromVarToRegMap = getOutVarToRegMap(fromBlock->bbNum); VarToRegMap toVarToRegMap; @@ -12176,7 +12183,7 @@ void LinearScan::verifyFinalAllocation() // However, we will assert that, at resolution time, no registers contain GC refs. { DBEXEC(VERBOSE, printf(" ")); - regMaskTP candidateRegs = currentRefPosition.registerAssignment; + SingleTypeRegSet candidateRegs = currentRefPosition.registerAssignment; while (candidateRegs != RBM_NONE) { regNumber nextReg = genFirstRegNumFromMaskAndToggle(candidateRegs); @@ -12560,9 +12567,9 @@ void LinearScan::RegisterSelection::reset(Interval* interval, RefPosition* refPo // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP selectionCandidates) +bool LinearScan::RegisterSelection::applySelection(int selectionScore, SingleTypeRegSet selectionCandidates) { - regMaskTP newCandidates = candidates & selectionCandidates; + SingleTypeRegSet newCandidates = candidates & selectionCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12581,10 +12588,10 @@ bool LinearScan::RegisterSelection::applySelection(int selectionScore, regMaskTP // Return Values: // 'true' if there was a single register candidate available after the heuristic is applied. // -bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate) +bool LinearScan::RegisterSelection::applySingleRegSelection(int selectionScore, SingleTypeRegSet selectionCandidate) { assert(LinearScan::isSingleRegister(selectionCandidate)); - regMaskTP newCandidates = candidates & selectionCandidate; + SingleTypeRegSet newCandidates = candidates & selectionCandidate; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12631,7 +12638,7 @@ void LinearScan::RegisterSelection::try_CONST_AVAILABLE() if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - regMaskTP newCandidates = candidates & matchingConstants; + SingleTypeRegSet newCandidates = candidates & matchingConstants; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12792,7 +12799,7 @@ void LinearScan::RegisterSelection::try_COVERS_FULL() calculateCoversSets(); #endif - regMaskTP newCandidates = candidates & coversFullSet & freeCandidates; + SingleTypeRegSet newCandidates = candidates & coversFullSet & freeCandidates; if (newCandidates != RBM_NONE) { candidates = newCandidates; @@ -12816,15 +12823,15 @@ void LinearScan::RegisterSelection::try_BEST_FIT() } #endif - regMaskTP bestFitSet = RBM_NONE; + SingleTypeRegSet bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. bool earliestIsBest = coversFullApplied; LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; - for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) + for (SingleTypeRegSet bestFitCandidates = candidates; bestFitCandidates != RBM_NONE;) { - regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); - regMaskTP bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); + regNumber bestFitCandidateRegNum = genFirstRegNumFromMask(bestFitCandidates); + SingleTypeRegSet bestFitCandidateBit = genRegMask(bestFitCandidateRegNum); bestFitCandidates ^= bestFitCandidateBit; // Find the next RefPosition of the register. @@ -12918,12 +12925,12 @@ void LinearScan::RegisterSelection::try_REG_ORDER() // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. - unsigned lowestRegOrder = UINT_MAX; - regMaskTP lowestRegOrderBit = RBM_NONE; - for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) + unsigned lowestRegOrder = UINT_MAX; + SingleTypeRegSet lowestRegOrderBit = RBM_NONE; + for (SingleTypeRegSet regOrderCandidates = candidates; regOrderCandidates != RBM_NONE;) { - regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); - regMaskTP regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); + regNumber regOrderCandidateRegNum = genFirstRegNumFromMask(regOrderCandidates); + SingleTypeRegSet regOrderCandidateBit = genRegMask(regOrderCandidateRegNum); regOrderCandidates ^= regOrderCandidateBit; unsigned thisRegOrder = linearScan->getRegisterRecord(regOrderCandidateRegNum)->regOrder; @@ -12945,7 +12952,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() assert(!found); // The set of registers with the lowest spill weight. - regMaskTP lowestCostSpillSet = RBM_NONE; + SingleTypeRegSet lowestCostSpillSet = RBM_NONE; // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. // The spill weight for 'refPosition' (the one we're allocating now). @@ -12956,10 +12963,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST() bool foundLowerSpillWeight = false; LsraLocation thisLocation = refPosition->nodeLocation; - for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) + for (SingleTypeRegSet spillCandidates = candidates; spillCandidates != RBM_NONE;) { - regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); - regMaskTP spillCandidateBit = genRegMask(spillCandidateRegNum); + regNumber spillCandidateRegNum = genFirstRegNumFromMask(spillCandidates); + SingleTypeRegSet spillCandidateBit = genRegMask(spillCandidateRegNum); spillCandidates ^= spillCandidateBit; RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; @@ -13079,12 +13086,12 @@ void LinearScan::RegisterSelection::try_FAR_NEXT_REF() { assert(!found); - LsraLocation farthestLocation = MinLocation; - regMaskTP farthestSet = RBM_NONE; - for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE;) + LsraLocation farthestLocation = MinLocation; + SingleTypeRegSet farthestSet = RBM_NONE; + for (SingleTypeRegSet farthestCandidates = candidates; farthestCandidates != RBM_NONE;) { - regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); - regMaskTP farthestCandidateBit = genRegMask(farthestCandidateRegNum); + regNumber farthestCandidateRegNum = genFirstRegNumFromMask(farthestCandidates); + SingleTypeRegSet farthestCandidateBit = genRegMask(farthestCandidateRegNum); farthestCandidates ^= farthestCandidateBit; // Find the next RefPosition of the register. @@ -13113,11 +13120,11 @@ void LinearScan::RegisterSelection::try_PREV_REG_OPT() { assert(!found); - regMaskTP prevRegOptSet = RBM_NONE; - for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) + SingleTypeRegSet prevRegOptSet = RBM_NONE; + for (SingleTypeRegSet prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE;) { - regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); - regMaskTP prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); + regNumber prevRegOptCandidateRegNum = genFirstRegNumFromMask(prevRegOptCandidates); + SingleTypeRegSet prevRegOptCandidateBit = genRegMask(prevRegOptCandidateRegNum); prevRegOptCandidates ^= prevRegOptCandidateBit; Interval* assignedInterval = linearScan->physRegs[prevRegOptCandidateRegNum].assignedInterval; bool foundPrevRegOptReg = true; @@ -13216,11 +13223,11 @@ void LinearScan::RegisterSelection::calculateUnassignedSets() // TODO: Seperate return; } - regMaskTP coversCandidates = candidates; + SingleTypeRegSet coversCandidates = candidates; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // The register is considered unassigned if it has no assignedInterval, OR @@ -13243,12 +13250,12 @@ void LinearScan::RegisterSelection::calculateCoversSets() return; } - preferenceSet = (candidates & preferences); - regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; + preferenceSet = (candidates & preferences); + SingleTypeRegSet coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; for (; coversCandidates != RBM_NONE;) { - regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); - regMaskTP coversCandidateBit = genRegMask(coversCandidateRegNum); + regNumber coversCandidateRegNum = genFirstRegNumFromMask(coversCandidates); + SingleTypeRegSet coversCandidateBit = genRegMask(coversCandidateRegNum); coversCandidates ^= coversCandidateBit; // If we have a single candidate we don't need to compute the preference-related sets, but we @@ -13319,8 +13326,8 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Register bit selected (a single register) and REG_NA if no register was selected. // template -regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +SingleTypeRegSet LinearScan::RegisterSelection::select(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { #ifdef DEBUG *registerScore = NONE; @@ -13372,7 +13379,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, regType, candidates); #endif assert(candidates != RBM_NONE); @@ -13407,9 +13414,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current nextRelatedInterval = nullptr; // First, get the preferences for this interval - regMaskTP thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); + SingleTypeRegSet thisRelatedPreferences = finalRelatedInterval->getCurrentPreferences(); // Now, determine if they are compatible and update the relatedPreferences that we'll consider. - regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences; + SingleTypeRegSet newRelatedPreferences = thisRelatedPreferences & relatedPreferences; if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS)) { // TODO-CQ: The following isFree() check doesn't account for the possibility that there's an @@ -13477,12 +13484,13 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current if (preferCalleeSave) { - regMaskTP calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType); + SingleTypeRegSet calleeSaveCandidates = linearScan->calleeSaveRegs(currentInterval->registerType); if (currentInterval->isWriteThru) { // We'll only prefer a callee-save register if it's already been used. - regMaskTP unusedCalleeSaves = - calleeSaveCandidates & ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()); + SingleTypeRegSet unusedCalleeSaves = + calleeSaveCandidates & + ~(linearScan->compiler->codeGen->regSet.rsGetModifiedRegsMask()).GetRegSetForType(regType); callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; preferences &= ~unusedCalleeSaves; } @@ -13506,7 +13514,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current found = false; // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + SingleTypeRegSet fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13522,7 +13530,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current } #ifdef DEBUG - regMaskTP inUseOrBusyRegsMask = RBM_NONE; + SingleTypeRegSet inUseOrBusyRegsMask = RBM_NONE; #endif // Eliminate candidates that are in-use or busy. @@ -13531,7 +13539,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + SingleTypeRegSet busyRegs = + (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13553,11 +13562,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13613,7 +13622,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current if (needsConsecutiveRegisters) { #ifdef TARGET_ARM64 - regMaskTP busyConsecutiveCandidates = RBM_NONE; + SingleTypeRegSet busyConsecutiveCandidates = RBM_NONE; if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); @@ -13630,9 +13639,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current { // We should have a single candidate that will be used for subsequent // refpositions. - assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + assert((refPosition->refType == RefTypeUpperVectorRestore) || genMaxOneBit(candidates)); - freeCandidates = candidates & linearScan->m_AvailableRegs; + freeCandidates = candidates & linearScan->m_AvailableRegs.GetRegSetForType(regType); } if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) @@ -13647,16 +13656,17 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // Remove the `inUseOrBusyRegsMask` from the original candidates list and find one // such range that is consecutive. Next, append that range to the `candidates`. // - regMaskTP limitCandidatesForConsecutive = refPosition->registerAssignment & ~inUseOrBusyRegsMask; - regMaskTP overallLimitCandidates; - regMaskTP limitConsecutiveResult = + SingleTypeRegSet limitCandidatesForConsecutive = + ((refPosition->registerAssignment & ~inUseOrBusyRegsMask) & linearScan->availableFloatRegs); + SingleTypeRegSet overallLimitCandidates; + regMaskTP limitConsecutiveResult = linearScan->filterConsecutiveCandidates(limitCandidatesForConsecutive, refPosition->regCount, &overallLimitCandidates); assert(limitConsecutiveResult != RBM_NONE); unsigned startRegister = BitScanForward(limitConsecutiveResult); - regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1; + SingleTypeRegSet registersNeededMask = (1ULL << refPosition->regCount) - 1; candidates |= (registersNeededMask << startRegister); } @@ -13677,7 +13687,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); } // If no free candidates, then double check if refPosition is an actual ref. @@ -13778,8 +13788,8 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current // select the REG_ORDER heuristics (if there are any free candidates) or REG_NUM (if all registers // are busy). // -regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) +SingleTypeRegSet LinearScan::RegisterSelection::selectMinimal( + Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { assert(!linearScan->enregisterLocalVars); #ifdef DEBUG @@ -13822,7 +13832,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* } #ifdef DEBUG - candidates = linearScan->stressLimitRegs(refPosition, candidates); + candidates = linearScan->stressLimitRegs(refPosition, regType, candidates); #endif assert(candidates != RBM_NONE); @@ -13835,7 +13845,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* #endif // Is this a fixedReg? - regMaskTP fixedRegMask = RBM_NONE; + SingleTypeRegSet fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); @@ -13852,7 +13862,8 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. // When we allocate for USE, we see that the register is busy at current location // and we end up with that candidate is no longer available. - regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; + SingleTypeRegSet busyRegs = + (linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation).GetRegSetForType(regType); candidates &= ~busyRegs; #ifdef TARGET_ARM @@ -13870,11 +13881,11 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* // Also eliminate as busy any register with a conflicting fixed reference at this or // the next location. // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates & linearScan->fixedRegs; + SingleTypeRegSet checkConflictMask = candidates & linearScan->fixedRegs.GetRegSetForType(regType); while (checkConflictMask != RBM_NONE) { - regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); - regMaskTP checkConflictBit = genRegMask(checkConflictReg); + regNumber checkConflictReg = genFirstRegNumFromMask(checkConflictMask); + SingleTypeRegSet checkConflictBit = genRegMask(checkConflictReg); checkConflictMask ^= checkConflictBit; LsraLocation checkConflictLocation = linearScan->nextFixedRef[checkConflictReg]; @@ -13905,7 +13916,7 @@ regMaskTP LinearScan::RegisterSelection::selectMinimal(Interval* return RBM_NONE; } - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates, regType); if (freeCandidates != RBM_NONE) { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5206b17884b4e7..e20a92a695426b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -725,7 +725,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, ResolveType resolveType, VARSET_VALARG_TP liveSet, - regMaskTP terminatorConsumedRegs); + SingleTypeRegSet terminatorConsumedRegs); void resolveEdges(); @@ -775,11 +775,12 @@ class LinearScan : public LinearScanInterface return (LsraStressLimitRegs)(lsraStressMask & LSRA_LIMIT_MASK); } - regMaskTP getConstrainedRegMask(RefPosition* refPosition, - regMaskTP regMaskActual, - regMaskTP regMaskConstrain, - unsigned minRegCount); - regMaskTP stressLimitRegs(RefPosition* refPosition, regMaskTP mask); + SingleTypeRegSet getConstrainedRegMask(RefPosition* refPosition, + RegisterType regType, + SingleTypeRegSet regMaskActual, + SingleTypeRegSet regMaskConstrain, + unsigned minRegCount); + SingleTypeRegSet stressLimitRegs(RefPosition* refPosition, RegisterType regType, SingleTypeRegSet mask); // This controls the heuristics used to select registers // These can be combined. @@ -1103,11 +1104,11 @@ class LinearScan : public LinearScanInterface // Given some tree node add refpositions for all the registers this node kills bool buildKillPositionsForNode(GenTree* tree, LsraLocation currentLoc, regMaskTP killMask); - regMaskTP allRegs(RegisterType rt); - regMaskTP allByteRegs(); - regMaskTP allSIMDRegs(); - regMaskTP lowSIMDRegs(); - regMaskTP internalFloatRegCandidates(); + SingleTypeRegSet allRegs(RegisterType rt); + SingleTypeRegSet allByteRegs(); + SingleTypeRegSet allSIMDRegs(); + SingleTypeRegSet lowSIMDRegs(); + SingleTypeRegSet internalFloatRegCandidates(); void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); @@ -1129,11 +1130,11 @@ class LinearScan : public LinearScanInterface } // Managing internal registers during the BuildNode process. - RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP candidates); - RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); - RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); + RefPosition* defineNewInternalTemp(GenTree* tree, RegisterType regType, SingleTypeRegSet candidates); + RefPosition* buildInternalIntRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); + RefPosition* buildInternalFloatRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); #if defined(FEATURE_SIMD) - RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands = RBM_NONE); + RefPosition* buildInternalMaskRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands = RBM_NONE); #endif void buildInternalRegisterUses(); @@ -1166,15 +1167,15 @@ class LinearScan : public LinearScanInterface RefPosition* newRefPositionRaw(LsraLocation nodeLocation, GenTree* treeNode, RefType refType); - RefPosition* newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx = 0); + RefPosition* newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + SingleTypeRegSet mask, + unsigned multiRegIdx = 0); RefPosition* newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask); + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, SingleTypeRegSet mask); void applyCalleeSaveHeuristics(RefPosition* rp); @@ -1230,16 +1231,19 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); - regMaskTP filterConsecutiveCandidates(regMaskTP candidates, - unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates); - regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded); + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + SingleTypeRegSet getConsecutiveCandidates(SingleTypeRegSet candidates, + RefPosition* refPosition, + SingleTypeRegSet* busyCandidates); + SingleTypeRegSet filterConsecutiveCandidates(SingleTypeRegSet candidates, + unsigned int registersNeeded, + SingleTypeRegSet* allConsecutiveCandidates); + SingleTypeRegSet filterConsecutiveCandidatesForSpill(SingleTypeRegSet consecutiveCandidates, + unsigned int registersNeeded); #endif // TARGET_ARM64 - regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) + SingleTypeRegSet getFreeCandidates(regMaskTP candidates, var_types regType) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1250,7 +1254,7 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM - return result; + return result.GetRegSetForType(regType); } #ifdef DEBUG @@ -1269,12 +1273,12 @@ class LinearScan : public LinearScanInterface // Perform register selection and update currentInterval or refPosition template - FORCEINLINE regMaskTP select(Interval* currentInterval, - RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); - - FORCEINLINE regMaskTP selectMinimal(Interval* currentInterval, + FORCEINLINE SingleTypeRegSet select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + FORCEINLINE SingleTypeRegSet selectMinimal(Interval* currentInterval, + RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); + // If the register is from unassigned set such that it was not already // assigned to the current interval FORCEINLINE bool foundUnassignedReg() @@ -1314,39 +1318,39 @@ class LinearScan : public LinearScanInterface RegisterType regType = RegisterType::TYP_UNKNOWN; - regMaskTP candidates; - regMaskTP preferences = RBM_NONE; - Interval* relatedInterval = nullptr; + SingleTypeRegSet candidates; + SingleTypeRegSet preferences = RBM_NONE; + Interval* relatedInterval = nullptr; - regMaskTP relatedPreferences = RBM_NONE; - LsraLocation rangeEndLocation; - LsraLocation relatedLastLocation; - bool preferCalleeSave = false; - RefPosition* rangeEndRefPosition; - RefPosition* lastRefPosition; - regMaskTP callerCalleePrefs = RBM_NONE; - LsraLocation lastLocation; + SingleTypeRegSet relatedPreferences = RBM_NONE; + LsraLocation rangeEndLocation; + LsraLocation relatedLastLocation; + bool preferCalleeSave = false; + RefPosition* rangeEndRefPosition; + RefPosition* lastRefPosition; + SingleTypeRegSet callerCalleePrefs = RBM_NONE; + LsraLocation lastLocation; - regMaskTP foundRegBit; + SingleTypeRegSet foundRegBit; - regMaskTP prevRegBit = RBM_NONE; + SingleTypeRegSet prevRegBit = RBM_NONE; // These are used in the post-selection updates, and must be set for any selection. - regMaskTP freeCandidates; - regMaskTP matchingConstants; - regMaskTP unassignedSet; + SingleTypeRegSet freeCandidates; + SingleTypeRegSet matchingConstants; + SingleTypeRegSet unassignedSet; // Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together, // as they all require similar computation. - regMaskTP coversSet; - regMaskTP preferenceSet; - regMaskTP coversRelatedSet; - regMaskTP coversFullSet; - bool coversSetsCalculated = false; - bool found = false; - bool skipAllocation = false; - bool coversFullApplied = false; - bool constAvailableApplied = false; + SingleTypeRegSet coversSet; + SingleTypeRegSet preferenceSet; + SingleTypeRegSet coversRelatedSet; + SingleTypeRegSet coversFullSet; + bool coversSetsCalculated = false; + bool found = false; + bool skipAllocation = false; + bool coversFullApplied = false; + bool constAvailableApplied = false; // If the selected register is already assigned to the current internal FORCEINLINE bool isAlreadyAssigned() @@ -1355,8 +1359,8 @@ class LinearScan : public LinearScanInterface return (prevRegBit & preferences) == foundRegBit; } - bool applySelection(int selectionScore, regMaskTP selectionCandidates); - bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate); + bool applySelection(int selectionScore, SingleTypeRegSet selectionCandidates); + bool applySingleRegSelection(int selectionScore, SingleTypeRegSet selectionCandidate); FORCEINLINE void calculateCoversSets(); FORCEINLINE void calculateUnassignedSets(); FORCEINLINE void reset(Interval* interval, RefPosition* refPosition); @@ -1417,7 +1421,7 @@ class LinearScan : public LinearScanInterface BasicBlock* toBlock, var_types type, VARSET_VALARG_TP sharedCriticalLiveSet, - regMaskTP terminatorConsumedRegs); + SingleTypeRegSet terminatorConsumedRegs); #ifdef TARGET_ARM64 typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; @@ -1432,7 +1436,7 @@ class LinearScan : public LinearScanInterface return nextConsecutiveRefPositionMap; } FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); - void getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates); + void getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, SingleTypeRegSet* candidates); #endif #ifdef DEBUG @@ -1708,15 +1712,15 @@ class LinearScan : public LinearScanInterface VarToRegMap* outVarToRegMaps; // A temporary VarToRegMap used during the resolution of critical edges. - VarToRegMap sharedCriticalVarToRegMap; - PhasedVar actualRegistersMask; - PhasedVar availableIntRegs; - PhasedVar availableFloatRegs; - PhasedVar availableDoubleRegs; + VarToRegMap sharedCriticalVarToRegMap; + PhasedVar actualRegistersMask; + PhasedVar availableIntRegs; + PhasedVar availableFloatRegs; + PhasedVar availableDoubleRegs; #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - PhasedVar availableMaskRegs; + PhasedVar availableMaskRegs; #endif - PhasedVar* availableRegs[TYP_COUNT]; + PhasedVar* availableRegs[TYP_COUNT]; #if defined(TARGET_XARCH) || defined(TARGET_ARM64) #define allAvailableRegs (availableIntRegs | availableFloatRegs | availableMaskRegs) @@ -1862,7 +1866,7 @@ class LinearScan : public LinearScanInterface regMaskTP regMask = getRegMask(reg, regType); return (m_RegistersWithConstants & regMask) == regMask; } - regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); + SingleTypeRegSet getMatchingConstants(SingleTypeRegSet mask, Interval* currentInterval, RefPosition* refPosition); regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; @@ -1897,7 +1901,7 @@ class LinearScan : public LinearScanInterface regMaskTP regsInUseThisLocation; regMaskTP regsInUseNextLocation; #ifdef TARGET_ARM64 - regMaskTP consecutiveRegsInUseThisLocation; + SingleTypeRegSet consecutiveRegsInUseThisLocation; #endif bool isRegBusy(regNumber reg, var_types regType) { @@ -1982,13 +1986,13 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); + RefPosition* BuildUse(GenTree* operand, SingleTypeRegSet candidates = RBM_NONE, int multiRegIdx = 0); void setDelayFree(RefPosition* use); - int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); - int BuildCastUses(GenTreeCast* cast, regMaskTP candidates); + int BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates = RBM_NONE); + int BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates); #ifdef TARGET_XARCH - int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); - inline regMaskTP BuildEvexIncompatibleMask(GenTree* tree); + int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates = RBM_NONE); + inline SingleTypeRegSet BuildEvexIncompatibleMask(GenTree* tree); #endif // !TARGET_XARCH int BuildSelect(GenTreeOp* select); // This is the main entry point for building the RefPositions for a node. @@ -2000,24 +2004,24 @@ class LinearScan : public LinearScanInterface bool supportsSpecialPutArg(); int BuildSimple(GenTree* tree); - int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildOperandUses(GenTree* node, SingleTypeRegSet candidates = RBM_NONE); void AddDelayFreeUses(RefPosition* refPosition, GenTree* rmwNode); - int BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode = nullptr, - regMaskTP candidates = RBM_NONE, - RefPosition** useRefPosition = nullptr); - int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); - int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); + int BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode = nullptr, + SingleTypeRegSet candidates = RBM_NONE, + RefPosition** useRefPosition = nullptr); + int BuildIndirUses(GenTreeIndir* indirTree, SingleTypeRegSet candidates = RBM_NONE); + int BuildAddrUses(GenTree* addr, SingleTypeRegSet candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); - RefPosition* BuildDef(GenTree* tree, regMaskTP dstCandidates = RBM_NONE, int multiRegIdx = 0); - void BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates = RBM_NONE); + RefPosition* BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates = RBM_NONE, int multiRegIdx = 0); + void BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates = RBM_NONE); void BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates); void BuildKills(GenTree* tree, regMaskTP killMask); #if defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - void BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask); + void BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask); #else - void BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); + void BuildDefWithKills(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates, regMaskTP killMask); #endif // TARGET_ARMARCH || TARGET_RISCV64 || TARGET_LOONGARCH64 void BuildCallDefsWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask); @@ -2093,28 +2097,28 @@ class LinearScan : public LinearScanInterface int BuildLclHeap(GenTree* tree); #if defined(TARGET_AMD64) - regMaskTP rbmAllFloat; - regMaskTP rbmFltCalleeTrash; + SingleTypeRegSet rbmAllFloat; + SingleTypeRegSet rbmFltCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLFLOAT() const { return this->rbmAllFloat; } - FORCEINLINE regMaskTP get_RBM_FLT_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_FLT_CALLEE_TRASH() const { return this->rbmFltCalleeTrash; } #endif // TARGET_AMD64 #if defined(TARGET_XARCH) - regMaskTP rbmAllMask; - regMaskTP rbmMskCalleeTrash; + SingleTypeRegSet rbmAllMask; + SingleTypeRegSet rbmMskCalleeTrash; - FORCEINLINE regMaskTP get_RBM_ALLMASK() const + FORCEINLINE SingleTypeRegSet get_RBM_ALLMASK() const { return this->rbmAllMask; } - FORCEINLINE regMaskTP get_RBM_MSK_CALLEE_TRASH() const + FORCEINLINE SingleTypeRegSet get_RBM_MSK_CALLEE_TRASH() const { return this->rbmMskCalleeTrash; } @@ -2133,9 +2137,9 @@ class LinearScan : public LinearScanInterface // NOTE: we currently don't need a LinearScan `this` pointer for this definition, and some callers // don't have one available, so make is static. // - static FORCEINLINE regMaskTP calleeSaveRegs(RegisterType rt) + static FORCEINLINE SingleTypeRegSet calleeSaveRegs(RegisterType rt) { - static const regMaskTP varTypeCalleeSaveRegs[] = { + static const SingleTypeRegSet varTypeCalleeSaveRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) csr, #include "typelist.h" #undef DEF_TP @@ -2149,16 +2153,16 @@ class LinearScan : public LinearScanInterface // Not all of the callee trash values are constant, so don't declare this as a method local static // doing so results in significantly more complex codegen and we'd rather just initialize this once // as part of initializing LSRA instead - regMaskTP varTypeCalleeTrashRegs[TYP_COUNT]; + SingleTypeRegSet varTypeCalleeTrashRegs[TYP_COUNT]; #endif // TARGET_XARCH //------------------------------------------------------------------------ // callerSaveRegs: Get the set of caller-save registers of the given RegisterType // - FORCEINLINE regMaskTP callerSaveRegs(RegisterType rt) const + FORCEINLINE SingleTypeRegSet callerSaveRegs(RegisterType rt) const { #if !defined(TARGET_XARCH) - static const regMaskTP varTypeCalleeTrashRegs[] = { + static const SingleTypeRegSet varTypeCalleeTrashRegs[] = { #define DEF_TP(tn, nm, jitType, sz, sze, asze, st, al, regTyp, regFld, csr, ctr, tf) ctr, #include "typelist.h" #undef DEF_TP @@ -2229,10 +2233,10 @@ class Interval : public Referenceable void setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l); // Fixed registers for which this Interval has a preference - regMaskTP registerPreferences; + SingleTypeRegSet registerPreferences; // Registers that should be avoided for this interval - regMaskTP registerAversion; + SingleTypeRegSet registerAversion; // The relatedInterval is: // - for any other interval, it is the interval to which this interval @@ -2381,12 +2385,12 @@ class Interval : public Referenceable // definitions. This method will return the current assigned register if any, or // the 'registerPreferences' otherwise. // - regMaskTP getCurrentPreferences() + SingleTypeRegSet getCurrentPreferences() { return (assignedReg == nullptr) ? registerPreferences : genRegMask(assignedReg->regNum); } - void mergeRegisterPreferences(regMaskTP preferences) + void mergeRegisterPreferences(SingleTypeRegSet preferences) { // We require registerPreferences to have been initialized. assert(registerPreferences != RBM_NONE); @@ -2401,7 +2405,7 @@ class Interval : public Referenceable return; } - regMaskTP commonPreferences = (registerPreferences & preferences); + SingleTypeRegSet commonPreferences = (registerPreferences & preferences); if (commonPreferences != RBM_NONE) { registerPreferences = commonPreferences; @@ -2436,11 +2440,12 @@ class Interval : public Referenceable // Keep only the callee-save preferences, if not empty. // Otherwise, take the union of the preferences. - regMaskTP newPreferences = registerPreferences | preferences; + SingleTypeRegSet newPreferences = registerPreferences | preferences; if (preferCalleeSave) { - regMaskTP calleeSaveMask = (LinearScan::calleeSaveRegs(this->registerType) & newPreferences); + SingleTypeRegSet calleeSaveMask = LinearScan::calleeSaveRegs(this->registerType) & newPreferences; + if (calleeSaveMask != RBM_NONE) { newPreferences = calleeSaveMask; @@ -2455,7 +2460,7 @@ class Interval : public Referenceable // An exception is made in the case where one of the existing or new // preferences are all callee-save, in which case we "prefer" the callee-save - void updateRegisterPreferences(regMaskTP preferences) + void updateRegisterPreferences(SingleTypeRegSet preferences) { // If this interval is preferenced, that interval may have already been assigned a // register, and we want to include that in the preferences. @@ -2493,7 +2498,7 @@ class RefPosition // Prior to the allocation pass, registerAssignment captures the valid registers // for this RefPosition. // After the allocation pass, this contains the actual assignment - regMaskTP registerAssignment; + SingleTypeRegSet registerAssignment; RefType refType; diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index e653514d3c14c0..f2c60cde13eb04 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -666,8 +666,8 @@ int LinearScan::BuildNode(GenTree* tree) case GT_BITCAST: { assert(dstCount == 1); - regNumber argReg = tree->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = tree->GetRegNum(); + SingleTypeRegSet argMask = RBM_NONE; if (argReg != REG_COUNT) { argMask = genRegMask(argReg); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4f29c7169e7ad4..5283d2fc00fff7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -176,27 +176,29 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit // set. // -regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, - unsigned int registersNeeded, - regMaskTP* allConsecutiveCandidates) +SingleTypeRegSet LinearScan::filterConsecutiveCandidates(SingleTypeRegSet floatCandidates, + unsigned int registersNeeded, + SingleTypeRegSet* allConsecutiveCandidates) { - if (PopCount(candidates) < registersNeeded) + assert((floatCandidates == RBM_NONE) || (floatCandidates & availableFloatRegs) != RBM_NONE); + + if (PopCount(floatCandidates) < registersNeeded) { // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. return RBM_NONE; } - regMaskTP currAvailableRegs = candidates; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; + SingleTypeRegSet currAvailableRegs = floatCandidates; + SingleTypeRegSet overallResult = RBM_NONE; + SingleTypeRegSet consecutiveResult = RBM_NONE; // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + SingleTypeRegSet selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + SingleTypeRegSet selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; @@ -205,11 +207,11 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitScanForward(currAvailableRegs); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + regAvailableStartIndex = BitScanForward(currAvailableRegs); + SingleTypeRegSet startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + SingleTypeRegSet maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). if (maskProcessed == RBM_NONE) @@ -225,7 +227,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { regAvailableEndIndex = BitScanForward(maskProcessed); } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + SingleTypeRegSet endMask = (1ULL << regAvailableEndIndex) - 1; // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. @@ -236,8 +238,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; - if ((candidates & v0_v31_mask) == v0_v31_mask) + SingleTypeRegSet v0_v31_mask = RBM_V0 | RBM_V31; + if ((floatCandidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register // round to first register is available. @@ -251,7 +253,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { case 2: { - if ((candidates & v0_v31_mask) != RBM_NONE) + if ((floatCandidates & v0_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v31_mask; @@ -260,15 +262,15 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 3: { - regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; - if ((candidates & v0_v30_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + if ((floatCandidates & v0_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v30_v31_mask; } - regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; - if ((candidates & v0_v1_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + if ((floatCandidates & v0_v1_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v1_v31_mask; @@ -277,22 +279,22 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } case 4: { - regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((floatCandidates & v0_v29_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V29; overallResult |= v0_v29_v30_v31_mask; } - regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((floatCandidates & v0_v1_v30_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V30; overallResult |= v0_v1_v30_v31_mask; } - regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; - if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) + SingleTypeRegSet v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((floatCandidates & v0_v1_v2_v31_mask) != RBM_NONE) { consecutiveResult |= RBM_V31; overallResult |= v0_v1_v2_v31_mask; @@ -323,15 +325,16 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // Returns: // Filtered candidates that needs fewer spilling. // -regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded) +SingleTypeRegSet LinearScan::filterConsecutiveCandidatesForSpill(SingleTypeRegSet consecutiveCandidates, + unsigned int registersNeeded) { assert(consecutiveCandidates != RBM_NONE); assert((registersNeeded >= 2) && (registersNeeded <= 4)); - regMaskTP consecutiveResultForBusy = RBM_NONE; - regMaskTP unprocessedRegs = consecutiveCandidates; - unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; - int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + SingleTypeRegSet consecutiveResultForBusy = RBM_NONE; + SingleTypeRegSet unprocessedRegs = consecutiveCandidates; + unsigned regAvailableStartIndex = 0, regAvailableEndIndex = 0; + int maxSpillRegs = registersNeeded; + SingleTypeRegSet registersNeededMask = (1ULL << registersNeeded) - 1; do { // From LSB, find the first available register (bit `1`) @@ -413,31 +416,33 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC // allCandidates = 0x1C080D0F00000000, the consecutive register mask returned // will be 0x400000300000000. // -regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, - RefPosition* refPosition, - regMaskTP* busyCandidates) +SingleTypeRegSet LinearScan::getConsecutiveCandidates(SingleTypeRegSet allCandidates, + RefPosition* refPosition, + SingleTypeRegSet* busyCandidates) { assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); regMaskTP freeCandidates = allCandidates & m_AvailableRegs; + assert((freeCandidates.IsEmpty()) || (freeCandidates.getLow() & availableFloatRegs)); + SingleTypeRegSet floatFreeCandidates = freeCandidates.getLow(); #ifdef DEBUG if (getStressLimitRegs() != LSRA_LIMIT_NONE) { // For stress, make only alternate registers available so we can stress the selection of free/busy registers. - freeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | - RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); + floatFreeCandidates &= (RBM_V0 | RBM_V2 | RBM_V4 | RBM_V6 | RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14 | RBM_V16 | + RBM_V18 | RBM_V20 | RBM_V22 | RBM_V24 | RBM_V26 | RBM_V28 | RBM_V30); } #endif *busyCandidates = RBM_NONE; - regMaskTP overallResult; - unsigned int registersNeeded = refPosition->regCount; + SingleTypeRegSet overallResult; + unsigned int registersNeeded = refPosition->regCount; - if (freeCandidates != RBM_NONE) + if (floatFreeCandidates != RBM_NONE) { - regMaskTP consecutiveResultForFree = - filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); + SingleTypeRegSet consecutiveResultForFree = + filterConsecutiveCandidates(floatFreeCandidates, registersNeeded, &overallResult); if (consecutiveResultForFree != RBM_NONE) { @@ -446,10 +451,9 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // register out of the `consecutiveResult` is available for the first RefPosition, then just use // that. This will avoid unnecessary copies. - regNumber firstRegNum = REG_NA; - regNumber prevRegNum = REG_NA; - int foundCount = 0; - regMaskTP foundRegMask = RBM_NONE; + regNumber firstRegNum = REG_NA; + regNumber prevRegNum = REG_NA; + int foundCount = 0; RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); assert(consecutiveRefPosition != nullptr); @@ -461,8 +465,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if (!interval->isActive) { - foundRegMask = RBM_NONE; - foundCount = 0; + foundCount = 0; continue; } @@ -470,7 +473,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if ((prevRegNum == REG_NA) || (prevRegNum == REG_PREV(currRegNum)) || ((prevRegNum == REG_FP_LAST) && (currRegNum == REG_FP_FIRST))) { - foundRegMask |= genRegMask(currRegNum); if (prevRegNum == REG_NA) { firstRegNum = currRegNum; @@ -480,8 +482,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, continue; } - foundRegMask = RBM_NONE; - foundCount = 0; + foundCount = 0; break; } @@ -528,8 +529,8 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select // registers based on fewer number of registers that has to be spilled. // - regMaskTP overallResultForBusy; - regMaskTP consecutiveResultForBusy = + SingleTypeRegSet overallResultForBusy; + SingleTypeRegSet consecutiveResultForBusy = filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); *busyCandidates = consecutiveResultForBusy; @@ -541,7 +542,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // If there is an overlap of that with free registers, then try to find a series that will need least // registers spilling as mentioned in #1 above. - regMaskTP optimalConsecutiveResultForBusy = + SingleTypeRegSet optimalConsecutiveResultForBusy = filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded); if (optimalConsecutiveResultForBusy != RBM_NONE) @@ -554,7 +555,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first // register of such series, return the mask that starts with free register, if possible. The busy // registers will be spilled during assignment of subsequent RefPosition. - *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy); + *busyCandidates = (m_AvailableRegs.GetRegSetForType(TYP_FLOAT) & consecutiveResultForBusy); } } @@ -1561,7 +1562,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id)) { - regMaskTP predMask = RBM_ALLMASK; + SingleTypeRegSet predMask = RBM_ALLMASK; if (intrin.id == NI_Sve_ConditionalSelect) { // If this is conditional select, make sure to check the embedded @@ -1878,9 +1879,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op1 != nullptr); - bool forceOp2DelayFree = false; - regMaskTP lowVectorCandidates = RBM_NONE; - size_t lowVectorOperandNum = 0; + bool forceOp2DelayFree = false; + SingleTypeRegSet lowVectorCandidates = RBM_NONE; + size_t lowVectorOperandNum = 0; if ((intrin.id == NI_Vector64_GetElement) || (intrin.id == NI_Vector128_GetElement)) { if (!intrin.op2->IsCnsIntOrI() && (!intrin.op1->isContained() || intrin.op1->OperIsLocal())) @@ -1936,7 +1937,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - regMaskTP candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE; + SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE; if (forceOp2DelayFree) { srcCount += BuildDelayFreeUses(intrin.op2, nullptr, candidates); @@ -1950,7 +1951,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (intrin.op3 != nullptr) { - regMaskTP candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE; + SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE; srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates) : BuildOperandUses(intrin.op3, candidates); @@ -2260,7 +2261,7 @@ bool RefPosition::isLiveAtConsecutiveRegistersLoc(LsraLocation consecutiveRegist // operandNum (out) - The operand number having the low vector register restriction // candidates (out) - The restricted low vector registers // -void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, regMaskTP* candidates) +void LinearScan::getLowVectorOperandAndCandidates(HWIntrinsic intrin, size_t* operandNum, SingleTypeRegSet* candidates) { assert(HWIntrinsicInfo::IsLowVectorOperation(intrin.id)); unsigned baseElementSize = genTypeSize(intrin.baseType); diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index e70de49a8e793c..85f1f664424046 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -129,7 +129,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -148,8 +148,8 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - GenTree* ctrlExpr = call->gtControlExpr; - regMaskTP ctrlExprCandidates = RBM_NONE; + GenTree* ctrlExpr = call->gtControlExpr; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -185,7 +185,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - regMaskTP candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; + SingleTypeRegSet candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; assert(candidates != RBM_NONE); buildInternalIntRegisterDefForNode(call, candidates); } @@ -213,7 +213,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // the target. We do not handle these constraints on the same // refposition too well so we help ourselves a bit here by forcing the // null check with LR. - regMaskTP candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; + SingleTypeRegSet candidates = call->IsFastTailCall() ? RBM_LR : RBM_NONE; buildInternalIntRegisterDefForNode(call, candidates); } #endif // TARGET_ARM @@ -536,14 +536,16 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = argNode->GetRegNum(); + SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } + assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || + ((argMask & availableFloatRegs) != RBM_NONE)); if (src->OperGet() == GT_FIELD_LIST) { @@ -577,7 +579,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - regMaskTP sourceMask = RBM_NONE; + SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); @@ -635,9 +637,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { @@ -694,7 +696,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = + SingleTypeRegSet internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 34b7eaa9d3a3dc..e416f5e40c98aa 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -250,15 +250,15 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de { assert(!interval->isLocalVar); - RefPosition* useRefPosition = defRefPosition->nextRefPosition; - regMaskTP defRegAssignment = defRefPosition->registerAssignment; - regMaskTP useRegAssignment = useRefPosition->registerAssignment; - RegRecord* defRegRecord = nullptr; - RegRecord* useRegRecord = nullptr; - regNumber defReg = REG_NA; - regNumber useReg = REG_NA; - bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); - bool useRegConflict = defRegConflict; + RefPosition* useRefPosition = defRefPosition->nextRefPosition; + SingleTypeRegSet defRegAssignment = defRefPosition->registerAssignment; + SingleTypeRegSet useRegAssignment = useRefPosition->registerAssignment; + RegRecord* defRegRecord = nullptr; + RegRecord* useRegRecord = nullptr; + regNumber defReg = REG_NA; + regNumber useReg = REG_NA; + bool defRegConflict = ((defRegAssignment & useRegAssignment) == RBM_NONE); + bool useRegConflict = defRegConflict; // If the useRefPosition is a "delayRegFree", we can't change the registerAssignment // on it, or we will fail to ensure that the fixedReg is busy at the time the target @@ -352,7 +352,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de RegisterType regType = interval->registerType; assert((getRegisterType(interval, defRefPosition) == regType) && (getRegisterType(interval, useRefPosition) == regType)); - regMaskTP candidates = allRegs(regType); + SingleTypeRegSet candidates = allRegs(regType); defRefPosition->registerAssignment = candidates; defRefPosition->isFixedRegRef = false; return; @@ -423,8 +423,8 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP) // All defs must have a valid treeNode, but we check it below to be conservative. assert(defRP->treeNode != nullptr); - regMaskTP prevAssignment = defRP->registerAssignment; - regMaskTP newAssignment = (prevAssignment & useRP->registerAssignment); + SingleTypeRegSet prevAssignment = defRP->registerAssignment; + SingleTypeRegSet newAssignment = (prevAssignment & useRP->registerAssignment); if (newAssignment != RBM_NONE) { if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) @@ -519,7 +519,7 @@ void LinearScan::associateRefPosWithInterval(RefPosition* rp) // a new RefPosition // RefPosition* LinearScan::newRefPosition( - regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, regMaskTP mask) + regNumber reg, LsraLocation theLocation, RefType theRefType, GenTree* theTreeNode, SingleTypeRegSet mask) { RefPosition* newRP = newRefPositionRaw(theLocation, theTreeNode, theRefType); @@ -554,12 +554,12 @@ RefPosition* LinearScan::newRefPosition( // Return Value: // a new RefPosition // -RefPosition* LinearScan::newRefPosition(Interval* theInterval, - LsraLocation theLocation, - RefType theRefType, - GenTree* theTreeNode, - regMaskTP mask, - unsigned multiRegIdx /* = 0 */) +RefPosition* LinearScan::newRefPosition(Interval* theInterval, + LsraLocation theLocation, + RefType theRefType, + GenTree* theTreeNode, + SingleTypeRegSet mask, + unsigned multiRegIdx /* = 0 */) { if (theInterval != nullptr) { @@ -708,7 +708,7 @@ void LinearScan::addKillForRegs(regMaskTP mask, LsraLocation currentLoc) // modified until codegen, which is too late. compiler->codeGen->regSet.rsSetRegsModified(mask DEBUGARG(true)); - RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask); + RefPosition* pos = newRefPosition((Interval*)nullptr, currentLoc, RefTypeKill, nullptr, mask.getLow()); *killTail = pos; killTail = &pos->nextRefPosition; @@ -1147,8 +1147,9 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { continue; } - Interval* interval = getIntervalForLocalVar(varIndex); - const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + Interval* interval = getIntervalForLocalVar(varIndex); + const bool isCallKill = ((killMask == RBM_INT_CALLEE_TRASH) || (killMask == RBM_CALLEE_TRASH)); + SingleTypeRegSet regsKillMask = killMask.GetRegSetForType(interval->registerType); if (isCallKill) { @@ -1161,7 +1162,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo // See the "heuristics for writeThru intervals" in 'buildIntervals()'. if (!interval->isWriteThru || !isCallKill) { - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + SingleTypeRegSet newPreferences = allRegs(interval->registerType) & (~regsKillMask); if (newPreferences != RBM_NONE) { @@ -1169,7 +1170,7 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { // Update the register aversion as long as this is not write-thru vars for // reason mentioned above. - interval->registerAversion |= killMask; + interval->registerAversion |= regsKillMask; } interval->updateRegisterPreferences(newPreferences); } @@ -1312,7 +1313,7 @@ bool LinearScan::checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode) // currentLoc - Location of the temp Def position // regMask - register mask of candidates for temp // -RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, regMaskTP regMask) +RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regType, SingleTypeRegSet regMask) { Interval* current = newInterval(regType); current->isInternal = true; @@ -1332,7 +1333,7 @@ RefPosition* LinearScan::defineNewInternalTemp(GenTree* tree, RegisterType regTy // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only integer registers. assert((internalCands & ~availableIntRegs) == RBM_NONE); @@ -1351,7 +1352,7 @@ RefPosition* LinearScan::buildInternalIntRegisterDefForNode(GenTree* tree, regMa // Returns: // The def RefPosition created for this internal temp. // -RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only float registers. assert((internalCands & ~availableFloatRegs) == RBM_NONE); @@ -1361,7 +1362,7 @@ RefPosition* LinearScan::buildInternalFloatRegisterDefForNode(GenTree* tree, reg } #if defined(FEATURE_SIMD) && defined(TARGET_XARCH) -RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, regMaskTP internalCands) +RefPosition* LinearScan::buildInternalMaskRegisterDefForNode(GenTree* tree, SingleTypeRegSet internalCands) { // The candidate set should contain only float registers. assert((internalCands & ~availableMaskRegs) == RBM_NONE); @@ -1391,9 +1392,9 @@ void LinearScan::buildInternalRegisterUses() assert(internalCount <= MaxInternalCount); for (int i = 0; i < internalCount; i++) { - RefPosition* def = internalDefs[i]; - regMaskTP mask = def->registerAssignment; - RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); + RefPosition* def = internalDefs[i]; + SingleTypeRegSet mask = def->registerAssignment; + RefPosition* use = newRefPosition(def->getInterval(), currentLoc, RefTypeUse, def->treeNode, mask, 0); if (setInternalRegsDelayFree) { use->delayRegFree = true; @@ -1859,9 +1860,9 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc newRefPosition->minRegCandidateCount = minRegCountForRef; if (newRefPosition->IsActualRef() && doReverseCallerCallee()) { - Interval* interval = newRefPosition->getInterval(); - regMaskTP oldAssignment = newRefPosition->registerAssignment; - regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); + Interval* interval = newRefPosition->getInterval(); + SingleTypeRegSet oldAssignment = newRefPosition->registerAssignment; + SingleTypeRegSet calleeSaveMask = calleeSaveRegs(interval->registerType); #ifdef TARGET_ARM64 if (newRefPosition->isLiveAtConsecutiveRegistersLoc(consecutiveRegistersLocation)) { @@ -1875,7 +1876,8 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc #endif // TARGET_ARM64 { newRefPosition->registerAssignment = - getConstrainedRegMask(newRefPosition, oldAssignment, calleeSaveMask, minRegCountForRef); + getConstrainedRegMask(newRefPosition, interval->registerType, oldAssignment, calleeSaveMask, + minRegCountForRef); } if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && @@ -2823,14 +2825,23 @@ void LinearScan::buildIntervals() availableRegCount = REG_INT_COUNT; } - if (availableRegCount < (sizeof(regMaskTP) * 8)) + static_assert(sizeof(regMaskTP) == 2 * sizeof(regMaskSmall)); + + if (availableRegCount < (sizeof(regMaskSmall) * 8)) + { + // Mask out the bits that are between (8 * regMaskSmall) ~ availableRegCount + actualRegistersMask = regMaskTP((1ULL << availableRegCount) - 1); + } + else if (availableRegCount < (sizeof(regMaskTP) * 8)) { - // Mask out the bits that are between 64 ~ availableRegCount - actualRegistersMask = (1ULL << availableRegCount) - 1; + // Mask out the bits that are between (8 * regMaskTP) ~ availableRegCount + // Subtract one extra for stack. + unsigned topRegCount = availableRegCount - sizeof(regMaskSmall) * 8 - 1; + actualRegistersMask = regMaskTP(~RBM_NONE, (1ULL << topRegCount) - 1); } else { - actualRegistersMask = ~RBM_NONE; + actualRegistersMask = regMaskTP(~RBM_NONE, ~RBM_NONE); } #ifdef DEBUG @@ -2861,9 +2872,9 @@ void LinearScan::buildInitialParamDef(const LclVarDsc* varDsc, regNumber paramRe { assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); - const var_types regType = varDsc->GetRegisterType(); - regMaskTP mask = allRegs(regType); + Interval* interval = getIntervalForLocalVar(varDsc->lvVarIndex); + const var_types regType = varDsc->GetRegisterType(); + SingleTypeRegSet mask = allRegs(regType); if ((paramReg != REG_NA) && !stressInitialParamReg()) { // Set this interval as currently assigned to that register @@ -3053,7 +3064,7 @@ void setTgtPref(Interval* interval, RefPosition* tgtPrefUse) // Notes: // Adds the RefInfo for the definition to the defList. // -RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int multiRegIdx) +RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates, int multiRegIdx) { assert(!tree->isContained()); @@ -3144,10 +3155,6 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, regMaskTP dstCandidates, int mu // void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) { - assert(dstCount > 0); - assert((int)genCountBits(dstCandidates) == dstCount); - assert(tree->IsMultiRegCall()); - const ReturnTypeDesc* retTypeDesc = tree->AsCall()->GetReturnTypeDesc(); assert(retTypeDesc != nullptr); if (retTypeDesc == nullptr) @@ -3155,17 +3162,20 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida return; } + assert(dstCount > 0); + assert((int)genCountBits(dstCandidates) == dstCount); + assert(tree->IsMultiRegCall()); + for (int i = 0; i < dstCount; i++) { // In case of multi-reg call node, we have to query the i'th position return register. // For all other cases of multi-reg definitions, the registers must be in sequential order. - regMaskTP thisDstCandidates = - genRegMask(retTypeDesc->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv())); + regNumber thisReg = + tree->AsCall()->GetReturnTypeDesc()->GetABIReturnReg(i, tree->AsCall()->GetUnmanagedCallConv()); - assert((dstCandidates & thisDstCandidates) != RBM_NONE); - dstCandidates &= ~thisDstCandidates; - - BuildDef(tree, thisDstCandidates, i); + assert(dstCandidates.IsRegNumInMask(thisReg)); + dstCandidates.RemoveRegNumFromMask(thisReg); + BuildDef(tree, genRegMask(thisReg), i); } } @@ -3182,11 +3192,11 @@ void LinearScan::BuildCallDefs(GenTree* tree, int dstCount, regMaskTP dstCandida // Also, the `dstCandidates` is assumed to be of "onlyOne" type. If there are // both gpr and float registers, use `BuildDefs` that takes `AllRegsMask` // -void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) +void LinearScan::BuildDefs(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates) { assert(dstCount > 0); - if ((dstCandidates == RBM_NONE) || ((int)genCountBits(dstCandidates) != dstCount)) + if ((dstCandidates == RBM_NONE) || ((int)PopCount(dstCandidates) != dstCount)) { // This is not fixedReg case, so just create definitions based on dstCandidates for (int i = 0; i < dstCount; i++) @@ -3198,7 +3208,7 @@ void LinearScan::BuildDefs(GenTree* tree, int dstCount, regMaskTP dstCandidates) for (int i = 0; i < dstCount; i++) { - regMaskTP thisDstCandidates = genFindLowestBit(dstCandidates); + SingleTypeRegSet thisDstCandidates = genFindLowestBit(dstCandidates); BuildDef(tree, thisDstCandidates, i); dstCandidates &= ~thisDstCandidates; } @@ -3256,10 +3266,10 @@ void LinearScan::BuildKills(GenTree* tree, regMaskTP killMask) // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildDefWithKills(GenTree* tree, SingleTypeRegSet dstCandidates, regMaskTP killMask) { assert(!tree->AsCall()->HasMultiRegRetVal()); - assert((int)genCountBits(dstCandidates) == 1); + assert((int)PopCount(dstCandidates) == 1); // Build the kill RefPositions BuildKills(tree, killMask); @@ -3281,7 +3291,7 @@ void LinearScan::BuildDefWithKills(GenTree* tree, regMaskTP dstCandidates, regMa // The def and kill functionality is folded into a single method so that the // save and restores of upper vector registers can be bracketed around the def. // -void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCandidates, regMaskTP killMask) +void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, SingleTypeRegSet dstCandidates, regMaskTP killMask) { // Build the kill RefPositions BuildKills(tree, killMask); @@ -3302,7 +3312,7 @@ void LinearScan::BuildDefWithKills(GenTree* tree, int dstCount, regMaskTP dstCan } #endif // TARGET_64BIT } -#endif +#endif // defined(TARGET_ARMARCH) || defined(TARGET_RISCV64) //------------------------------------------------------------------------ // BuildCallDefsWithKills: Build one or more RefTypeDef RefPositions for the given node, @@ -3375,11 +3385,11 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) { // This local's value is going to be available in this register so // keep it in the preferences. - unpref &= ~genRegMask(placedArgLocals[i].Reg); + unpref.RemoveRegNumFromMask(placedArgLocals[i].Reg); } } - if (unpref != RBM_NONE) + if (unpref.IsNonEmpty()) { #ifdef DEBUG if (VERBOSE) @@ -3391,8 +3401,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) } #endif - interval->registerAversion |= unpref; - regMaskTP newPreferences = allRegs(interval->registerType) & ~unpref; + SingleTypeRegSet unprefSet = unpref.GetRegSetForType(interval->registerType); + interval->registerAversion |= unprefSet; + SingleTypeRegSet newPreferences = allRegs(interval->registerType) & ~unprefSet; interval->updateRegisterPreferences(newPreferences); } } @@ -3412,7 +3423,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) +RefPosition* LinearScan::BuildUse(GenTree* operand, SingleTypeRegSet candidates, int multiRegIdx) { assert(!operand->isContained()); Interval* interval; @@ -3482,12 +3493,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu // Notes: // This method may only be used if the candidates are the same for all sources. // -int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates) +int LinearScan::BuildIndirUses(GenTreeIndir* indirTree, SingleTypeRegSet candidates) { return BuildAddrUses(indirTree->Addr(), candidates); } -int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) +int LinearScan::BuildAddrUses(GenTree* addr, SingleTypeRegSet candidates) { if (!addr->isContained()) { @@ -3544,7 +3555,7 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildOperandUses(GenTree* node, SingleTypeRegSet candidates) { if (!node->isContained()) { @@ -3698,10 +3709,10 @@ void LinearScan::AddDelayFreeUses(RefPosition* useRefPosition, GenTree* rmwNode) // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, - GenTree* rmwNode, - regMaskTP candidates, - RefPosition** useRefPositionRef) +int LinearScan::BuildDelayFreeUses(GenTree* node, + GenTree* rmwNode, + SingleTypeRegSet candidates, + RefPosition** useRefPositionRef) { RefPosition* use = nullptr; GenTree* addr = nullptr; @@ -3797,7 +3808,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, // The operands must already have been processed by buildRefPositionsForNode, and their // RefInfoListNodes placed in the defList. // -int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) +int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates) { GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2IfPresent(); @@ -3831,7 +3842,7 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) // Return Value: // The number of actual register operands. // -int LinearScan::BuildCastUses(GenTreeCast* cast, regMaskTP candidates) +int LinearScan::BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates) { GenTree* src = cast->CastOp(); @@ -3896,8 +3907,8 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, } } - regMaskTP defCandidates = RBM_NONE; - var_types type = varDsc->GetRegisterType(); + SingleTypeRegSet defCandidates = RBM_NONE; + var_types type = varDsc->GetRegisterType(); #ifdef TARGET_X86 if (varTypeIsByte(type)) @@ -3982,7 +3993,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc) if (isMultiRegSrc) { - regMaskTP srcCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = fieldVarDsc->TypeGet(); if (varTypeIsByte(type)) @@ -4092,8 +4103,8 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) } else { - srcCount = 1; - regMaskTP srcCandidates = RBM_NONE; + srcCount = 1; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 var_types type = varDsc->GetRegisterType(storeLoc); if (varTypeIsByte(type)) @@ -4184,7 +4195,7 @@ int LinearScan::BuildReturn(GenTree* tree) #endif // !defined(TARGET_64BIT) if ((tree->TypeGet() != TYP_VOID) && !op1->isContained()) { - regMaskTP useCandidates = RBM_NONE; + SingleTypeRegSet useCandidates = RBM_NONE; #if FEATURE_MULTIREG_RET #ifdef TARGET_ARM64 @@ -4222,7 +4233,7 @@ int LinearScan::BuildReturn(GenTree* tree) if (srcType != dstType) { hasMismatchedRegTypes = true; - regMaskTP dstRegMask = + SingleTypeRegSet dstRegMask = genRegMask(retTypeDesc.GetABIReturnReg(i, compiler->info.compCallConv)); if (varTypeUsesIntReg(dstType)) @@ -4377,8 +4388,8 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) // To avoid redundant moves, have the argument operand computed in the // register in which the argument is passed to the call. - regMaskTP argMask = genRegMask(argReg); - RefPosition* use = BuildUse(op1, argMask); + SingleTypeRegSet argMask = genRegMask(argReg); + RefPosition* use = BuildUse(op1, argMask); // Record that this register is occupied by a register now. placedArgRegs |= argMask; @@ -4409,7 +4420,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) if (node->TypeGet() == TYP_LONG) { srcCount++; - regMaskTP argMaskHi = genRegMask(REG_NEXT(argReg)); + SingleTypeRegSet argMaskHi = genRegMask(REG_NEXT(argReg)); assert(genRegArgNext(argReg) == REG_NEXT(argReg)); use = BuildUse(op1, argMaskHi, 1); BuildDef(node, argMask, 0); @@ -4474,8 +4485,8 @@ int LinearScan::BuildGCWriteBarrier(GenTree* tree) // is an indir through an lea, we need to actually instantiate the // lea in a register assert(!addr->isContained() && !src->isContained()); - regMaskTP addrCandidates = RBM_WRITE_BARRIER_DST; - regMaskTP srcCandidates = RBM_WRITE_BARRIER_SRC; + SingleTypeRegSet addrCandidates = RBM_WRITE_BARRIER_DST; + SingleTypeRegSet srcCandidates = RBM_WRITE_BARRIER_SRC; #if defined(TARGET_X86) && NOGC_WRITE_BARRIERS @@ -4522,7 +4533,7 @@ int LinearScan::BuildCmp(GenTree* tree) if (!tree->TypeIs(TYP_VOID)) { - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; #ifdef TARGET_X86 // If the compare is used by a jump, we just need to set the condition codes. If not, then we need @@ -4546,10 +4557,10 @@ int LinearScan::BuildCmp(GenTree* tree) // int LinearScan::BuildCmpOperands(GenTree* tree) { - regMaskTP op1Candidates = RBM_NONE; - regMaskTP op2Candidates = RBM_NONE; - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); + SingleTypeRegSet op1Candidates = RBM_NONE; + SingleTypeRegSet op2Candidates = RBM_NONE; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); #ifdef TARGET_X86 bool needByteRegs = false; diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 42dccded84fb49..c77b8d5c557d31 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -1078,9 +1078,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index d495246f9d1ea4..ebea9cce714727 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -840,7 +840,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { bool hasMultiRegRetVal = false; const ReturnTypeDesc* retTypeDesc = nullptr; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; int srcCount = 0; int dstCount = 0; @@ -859,8 +859,8 @@ int LinearScan::BuildCall(GenTreeCall* call) } } - GenTree* ctrlExpr = call->gtControlExpr; - regMaskTP ctrlExprCandidates = RBM_NONE; + GenTree* ctrlExpr = call->gtControlExpr; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; if (call->gtCallType == CT_INDIRECT) { // either gtControlExpr != null or gtCallAddr != null. @@ -894,7 +894,7 @@ int LinearScan::BuildCall(GenTreeCall* call) { // For R2R and VSD we have stub address in REG_R2R_INDIRECT_PARAM // and will load call address into the temp register from this register. - regMaskTP candidates = RBM_NONE; + SingleTypeRegSet candidates = RBM_NONE; if (call->IsFastTailCall()) { candidates = allRegs(TYP_INT) & RBM_INT_CALLEE_TRASH; @@ -1144,14 +1144,16 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // Registers for split argument corresponds to source int dstCount = argNode->gtNumRegs; - regNumber argReg = argNode->GetRegNum(); - regMaskTP argMask = RBM_NONE; + regNumber argReg = argNode->GetRegNum(); + SingleTypeRegSet argMask = RBM_NONE; for (unsigned i = 0; i < argNode->gtNumRegs; i++) { regNumber thisArgReg = (regNumber)((unsigned)argReg + i); argMask |= genRegMask(thisArgReg); argNode->SetRegNumByIdx(thisArgReg, i); } + assert((argMask == RBM_NONE) || ((argMask & availableIntRegs) != RBM_NONE) || + ((argMask & availableFloatRegs) != RBM_NONE)); if (src->OperGet() == GT_FIELD_LIST) { @@ -1176,7 +1178,7 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - regMaskTP sourceMask = RBM_NONE; + SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { sourceMask = genRegMask((regNumber)((unsigned)argReg + sourceRegCount)); @@ -1234,9 +1236,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; if (blkNode->OperIsInitBlkOp()) { @@ -1294,7 +1296,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // We don't need to materialize the struct size but we still need // a temporary register to perform the sequence of loads and stores. // We can't use the special Write Barrier registers, so exclude them from the mask - regMaskTP internalIntCandidates = + SingleTypeRegSet internalIntCandidates = allRegs(TYP_INT) & ~(RBM_WRITE_BARRIER_DST_BYREF | RBM_WRITE_BARRIER_SRC_BYREF); buildInternalIntRegisterDefForNode(blkNode, internalIntCandidates); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a3d7647ad36512..fe21be0ec8938d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -442,7 +442,7 @@ int LinearScan::BuildNode(GenTree* tree) // Comparand is preferenced to RAX. // The remaining two operands can be in any reg other than RAX. - const regMaskTP nonRaxCandidates = availableIntRegs & ~RBM_RAX; + const SingleTypeRegSet nonRaxCandidates = availableIntRegs & ~RBM_RAX; BuildUse(addr, nonRaxCandidates); BuildUse(data, varTypeIsByte(tree) ? (nonRaxCandidates & RBM_BYTE_REGS) : nonRaxCandidates); BuildUse(comparand, RBM_RAX); @@ -778,16 +778,16 @@ bool LinearScan::isRMWRegOper(GenTree* tree) } // Support for building RefPositions for RMW nodes. -int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates) +int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates) { - int srcCount = 0; - regMaskTP op1Candidates = candidates; - regMaskTP op2Candidates = candidates; + int srcCount = 0; + SingleTypeRegSet op1Candidates = candidates; + SingleTypeRegSet op2Candidates = candidates; #ifdef TARGET_X86 if (varTypeIsByte(node)) { - regMaskTP byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); + SingleTypeRegSet byteCandidates = (candidates == RBM_NONE) ? allByteRegs() : (candidates & allByteRegs()); if (!op1->isContained()) { assert(byteCandidates != RBM_NONE); @@ -1039,11 +1039,11 @@ int LinearScan::BuildShiftRotate(GenTree* tree) // For shift operations, we need that the number // of bits moved gets stored in CL in case // the number of bits to shift is not a constant. - int srcCount = 0; - GenTree* shiftBy = tree->gtGetOp2(); - GenTree* source = tree->gtGetOp1(); - regMaskTP srcCandidates = RBM_NONE; - regMaskTP dstCandidates = RBM_NONE; + int srcCount = 0; + GenTree* shiftBy = tree->gtGetOp2(); + GenTree* source = tree->gtGetOp1(); + SingleTypeRegSet srcCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off) // We will allow whatever can be encoded - hope you know what you are doing. @@ -1151,7 +1151,7 @@ int LinearScan::BuildCall(GenTreeCall* call) const ReturnTypeDesc* retTypeDesc = nullptr; int srcCount = 0; int dstCount = 0; - regMaskTP singleDstCandidates = RBM_NONE; + SingleTypeRegSet singleDstCandidates = RBM_NONE; assert(!call->isContained()); if (call->TypeGet() != TYP_VOID) @@ -1321,7 +1321,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // set reg requirements on call target represented as control sequence. if (ctrlExpr != nullptr) { - regMaskTP ctrlExprCandidates = RBM_NONE; + SingleTypeRegSet ctrlExprCandidates = RBM_NONE; // In case of fast tail implemented as jmp, make sure that gtControlExpr is // computed into appropriate registers. @@ -1422,9 +1422,9 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) GenTree* srcAddrOrFill = nullptr; - regMaskTP dstAddrRegMask = RBM_NONE; - regMaskTP srcRegMask = RBM_NONE; - regMaskTP sizeRegMask = RBM_NONE; + SingleTypeRegSet dstAddrRegMask = RBM_NONE; + SingleTypeRegSet srcRegMask = RBM_NONE; + SingleTypeRegSet sizeRegMask = RBM_NONE; RefPosition* internalIntDef = nullptr; #ifdef TARGET_X86 @@ -1543,7 +1543,7 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) // or if are but the remainder is a power of 2 and less than the // size of a register - regMaskTP regMask = availableIntRegs; + SingleTypeRegSet regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) { @@ -1807,7 +1807,7 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk) // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg. if ((loadSize % XMM_REGSIZE_BYTES) != 0) { - regMaskTP regMask = availableIntRegs; + SingleTypeRegSet regMask = availableIntRegs; #ifdef TARGET_X86 // Storing at byte granularity requires a byteable register. if ((loadSize & 1) != 0) @@ -1913,10 +1913,10 @@ int LinearScan::BuildLclHeap(GenTree* tree) // int LinearScan::BuildModDiv(GenTree* tree) { - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - regMaskTP dstCandidates = RBM_NONE; - int srcCount = 0; + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); + SingleTypeRegSet dstCandidates = RBM_NONE; + int srcCount = 0; if (varTypeIsFloating(tree->TypeGet())) { @@ -2131,7 +2131,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou dstCount = 0; } - regMaskTP dstCandidates = RBM_NONE; + SingleTypeRegSet dstCandidates = RBM_NONE; if (intrinsicTree->GetOperandCount() == 0) { @@ -2721,7 +2721,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (buildUses) { - regMaskTP op1RegCandidates = RBM_NONE; + SingleTypeRegSet op1RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2746,7 +2746,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op2 != nullptr) { - regMaskTP op2RegCandidates = RBM_NONE; + SingleTypeRegSet op2RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2792,7 +2792,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op3 != nullptr) { - regMaskTP op3RegCandidates = RBM_NONE; + SingleTypeRegSet op3RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) if (!isEvexCompatible) @@ -2806,7 +2806,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (op4 != nullptr) { - regMaskTP op4RegCandidates = RBM_NONE; + SingleTypeRegSet op4RegCandidates = RBM_NONE; #if defined(TARGET_AMD64) assert(isEvexCompatible); @@ -2872,7 +2872,7 @@ int LinearScan::BuildCast(GenTreeCast* cast) buildInternalIntRegisterDefForNode(cast); } - regMaskTP candidates = RBM_NONE; + SingleTypeRegSet candidates = RBM_NONE; #ifdef TARGET_X86 if (varTypeIsByte(castType)) { @@ -2922,8 +2922,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } #endif // FEATURE_SIMD - regMaskTP indirCandidates = RBM_NONE; - int srcCount = BuildIndirUses(indirTree, indirCandidates); + int srcCount = BuildIndirUses(indirTree); if (indirTree->gtOper == GT_STOREIND) { GenTree* source = indirTree->gtGetOp2(); @@ -2939,7 +2938,7 @@ int LinearScan::BuildIndir(GenTreeIndir* indirTree) } else { - regMaskTP srcCandidates = RBM_NONE; + SingleTypeRegSet srcCandidates = RBM_NONE; #ifdef TARGET_X86 // Determine if we need byte regs for the non-mem source, if any. @@ -3044,9 +3043,9 @@ int LinearScan::BuildMul(GenTree* tree) return BuildSimple(tree); } - int srcCount = BuildBinaryUses(tree->AsOp()); - int dstCount = 1; - regMaskTP dstCandidates = RBM_NONE; + int srcCount = BuildBinaryUses(tree->AsOp()); + int dstCount = 1; + SingleTypeRegSet dstCandidates = RBM_NONE; bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0); bool requiresOverflowCheck = tree->gtOverflowEx(); @@ -3144,7 +3143,7 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/) // RBM_NONE if compatible with EVEX (or not a floating/SIMD register), // lowSIMDRegs() (XMM0-XMM16) otherwise. // -inline regMaskTP LinearScan::BuildEvexIncompatibleMask(GenTree* tree) +inline SingleTypeRegSet LinearScan::BuildEvexIncompatibleMask(GenTree* tree) { #if defined(TARGET_AMD64) assert(!varTypeIsMask(tree)); diff --git a/src/coreclr/jit/regMaskTPOps.cpp b/src/coreclr/jit/regMaskTPOps.cpp new file mode 100644 index 00000000000000..86de50a08cb95c --- /dev/null +++ b/src/coreclr/jit/regMaskTPOps.cpp @@ -0,0 +1,28 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "target.h" + +struct regMaskTP; + +//------------------------------------------------------------------------ +// RemoveRegNumFromMask: Removes `reg` from the mask +// +// Parameters: +// reg - Register to remove +// +void regMaskTP::RemoveRegNumFromMask(regNumber reg) +{ + low &= ~genRegMask(reg); +} + +//------------------------------------------------------------------------ +// IsRegNumInMask: Checks if `reg` is in the mask +// +// Parameters: +// reg - Register to check +// +bool regMaskTP::IsRegNumInMask(regNumber reg) +{ + return (low & genRegMask(reg)) != 0; +} diff --git a/src/coreclr/jit/regset.cpp b/src/coreclr/jit/regset.cpp index 2ef2f9c1e17f16..a033e49fcad1fd 100644 --- a/src/coreclr/jit/regset.cpp +++ b/src/coreclr/jit/regset.cpp @@ -950,7 +950,7 @@ regNumber genRegArgNext(regNumber argReg) * are encoded in GC information at call sites. */ -const regMaskTP raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; +const regMaskSmall raRbmCalleeSaveOrder[] = {RBM_CALL_GC_REGS_ORDER}; regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask) { diff --git a/src/coreclr/jit/regset.h b/src/coreclr/jit/regset.h index 99c7f8be6bc556..49cd4dd2e6ecda 100644 --- a/src/coreclr/jit/regset.h +++ b/src/coreclr/jit/regset.h @@ -119,7 +119,7 @@ class RegSet bool rsRegsModified(regMaskTP mask) const { assert(rsModifiedRegsMaskInitialized); - return (rsModifiedRegsMask & mask) != 0; + return (rsModifiedRegsMask & mask).IsNonEmpty(); } void verifyRegUsed(regNumber reg); diff --git a/src/coreclr/jit/target.h b/src/coreclr/jit/target.h index 117d598e8bcb62..3e3e4591281372 100644 --- a/src/coreclr/jit/target.h +++ b/src/coreclr/jit/target.h @@ -229,12 +229,25 @@ typedef uint64_t regMaskSmall; #define REG_MASK_ALL_FMT "%016llX" #endif +typedef regMaskSmall SingleTypeRegSet; +inline SingleTypeRegSet genRegMask(regNumber reg); +inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); + struct regMaskTP { private: regMaskSmall low; + regMaskSmall high; + public: - constexpr regMaskTP(regMaskSmall regMask) + + regMaskTP(regMaskSmall lowMask, regMaskSmall highMask) + : low(lowMask) + , high(highMask) + { + } + + regMaskTP(regMaskSmall regMask) : low(regMask) { } @@ -275,88 +288,73 @@ struct regMaskTP { return low; } -}; -static regMaskTP operator^(regMaskTP first, regMaskTP second) -{ - regMaskTP result(first.getLow() ^ second.getLow()); - return result; -} + bool IsEmpty() const + { + return low == RBM_NONE; + } -static regMaskTP operator&(regMaskTP first, regMaskTP second) -{ - regMaskTP result(first.getLow() & second.getLow()); - return result; -} + bool IsNonEmpty() const + { + return !IsEmpty(); + } -static regMaskTP operator|(regMaskTP first, regMaskTP second) -{ - regMaskTP result(first.getLow() | second.getLow()); - return result; -} + SingleTypeRegSet GetRegSetForType(var_types type) const + { + return getLow(); + } -static regMaskTP operator<<(regMaskTP first, const int b) -{ - regMaskTP result(first.getLow() << b); - return result; -} + void RemoveRegNumFromMask(regNumber reg); -static regMaskTP operator>>(regMaskTP first, const int b) -{ - regMaskTP result(first.getLow() >> b); - return result; -} + bool IsRegNumInMask(regNumber reg); -static regMaskTP& operator>>=(regMaskTP& first, const int b) -{ - first = first >> b; - return first; -} + void operator|=(const regMaskTP& second) + { + low |= second.getLow(); + } -static regMaskTP& operator|=(regMaskTP& first, regMaskTP second) -{ - first = first | second; - return first; -} + void operator^=(const regMaskTP& second) + { + low ^= second.getLow(); + } -static regMaskTP& operator^=(regMaskTP& first, regMaskTP second) -{ - first = first ^ second; - return first; -} + void operator^=(const regNumber reg) + { + low ^= genRegMask(reg); + } -static regMaskSmall operator^=(regMaskSmall& first, regMaskTP second) -{ - first ^= second.getLow(); - return first; -} + void operator&=(const regMaskTP& second) + { + low &= second.getLow(); + } +}; -static regMaskSmall operator&=(regMaskSmall& first, regMaskTP second) +static regMaskTP operator^(const regMaskTP& first, const regMaskTP& second) { - first &= second.getLow(); - return first; + regMaskTP result(first.getLow() ^ second.getLow()); + return result; } -static regMaskSmall operator|=(regMaskSmall& first, regMaskTP second) +static regMaskTP operator&(const regMaskTP& first, const regMaskTP& second) { - first |= second.getLow(); - return first; + regMaskTP result(first.getLow() & second.getLow()); + return result; } -static regMaskTP& operator&=(regMaskTP& first, regMaskTP second) +static regMaskTP operator|(const regMaskTP& first, const regMaskTP& second) { - first = first & second; - return first; + regMaskTP result(first.getLow() | second.getLow()); + return result; } -static bool operator==(regMaskTP first, regMaskTP second) +static bool operator==(const regMaskTP& first, const regMaskTP& second) { return (first.getLow() == second.getLow()); } -static bool operator!=(regMaskTP first, regMaskTP second) +static bool operator!=(const regMaskTP& first, const regMaskTP& second) { - return (first.getLow() != second.getLow()); + return !(first == second); } #ifdef TARGET_ARM @@ -371,6 +369,18 @@ static bool operator>(regMaskTP first, regMaskTP second) return first.getLow() > second.getLow(); } +static regMaskTP operator<<(regMaskTP& first, const int b) +{ + regMaskTP result(first.getLow() << b); + return result; +} + +static regMaskTP operator>>(regMaskTP& first, const int b) +{ + regMaskTP result(first.getLow() >> b); + return result; +} + static regMaskTP& operator<<=(regMaskTP& first, const int b) { first = first << b; @@ -378,18 +388,18 @@ static regMaskTP& operator<<=(regMaskTP& first, const int b) } #endif -static regMaskTP operator~(regMaskTP first) +static regMaskTP operator~(const regMaskTP& first) { regMaskTP result(~first.getLow()); return result; } -static uint32_t PopCount(regMaskTP value) +static uint32_t PopCount(const regMaskTP& value) { return BitOperations::PopCount(value.getLow()); } -static uint32_t BitScanForward(regMaskTP mask) +static uint32_t BitScanForward(const regMaskTP& mask) { return BitOperations::BitScanForward(mask.getLow()); } @@ -511,9 +521,6 @@ inline bool isByteReg(regNumber reg) } #endif -inline regMaskTP genRegMask(regNumber reg); -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type = TYP_DOUBLE)); - /***************************************************************************** * Return true if the register number is valid */ @@ -610,7 +617,7 @@ inline regNumber theFixedRetBuffReg(CorInfoCallConvExtension callConv) // theFixedRetBuffMask: // Returns the regNumber to use for the fixed return buffer // -inline regMaskTP theFixedRetBuffMask(CorInfoCallConvExtension callConv) +inline SingleTypeRegSet theFixedRetBuffMask(CorInfoCallConvExtension callConv) { assert(hasFixedRetBuffReg(callConv)); // This predicate should be checked before calling this method #if defined(TARGET_ARM64) @@ -645,9 +652,9 @@ inline unsigned theFixedRetBuffArgNum(CorInfoCallConvExtension callConv) // Returns the full mask of all possible integer registers // Note this includes the fixed return buffer register on Arm64 // -inline regMaskTP fullIntArgRegMask(CorInfoCallConvExtension callConv) +inline SingleTypeRegSet fullIntArgRegMask(CorInfoCallConvExtension callConv) { - regMaskTP result = RBM_ARG_REGS; + SingleTypeRegSet result = RBM_ARG_REGS; if (hasFixedRetBuffReg(callConv)) { result |= theFixedRetBuffMask(callConv); @@ -742,7 +749,7 @@ inline bool floatRegCanHoldType(regNumber reg, var_types type) extern const regMaskSmall regMasks[REG_COUNT]; -inline regMaskTP genRegMask(regNumber reg) +inline SingleTypeRegSet genRegMask(regNumber reg) { assert((unsigned)reg < ArrLen(regMasks)); #ifdef TARGET_AMD64 @@ -750,7 +757,7 @@ inline regMaskTP genRegMask(regNumber reg) // (L1 latency on sandy bridge is 4 cycles for [base] and 5 for [base + index*c] ) // the reason this is AMD-only is because the x86 BE will try to get reg masks for REG_STK // and the result needs to be zero. - regMaskTP result = 1ULL << reg; + SingleTypeRegSet result = 1ULL << reg; assert(result == regMasks[reg]); return result; #else @@ -763,7 +770,7 @@ inline regMaskTP genRegMask(regNumber reg) * Map a register number to a floating-point register mask. */ -inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) +inline SingleTypeRegSet genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_DOUBLE */)) { #if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || \ defined(TARGET_RISCV64) @@ -806,10 +813,10 @@ inline regMaskTP genRegMaskFloat(regNumber reg ARM_ARG(var_types type /* = TYP_D // For registers that are used in pairs, the caller will be handling // each member of the pair separately. // -inline regMaskTP genRegMask(regNumber regNum, var_types type) +inline SingleTypeRegSet genRegMask(regNumber regNum, var_types type) { #if defined(TARGET_ARM) - regMaskTP regMask = RBM_NONE; + SingleTypeRegSet regMask = RBM_NONE; if (varTypeUsesIntReg(type)) { @@ -832,7 +839,7 @@ inline regMaskTP genRegMask(regNumber regNum, var_types type) * These arrays list the callee-saved register numbers (and bitmaps, respectively) for * the current architecture. */ -extern const regMaskTP raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; +extern const regMaskSmall raRbmCalleeSaveOrder[CNT_CALL_GC_REGS]; // This method takes a "compact" bitset of the callee-saved registers, and "expands" it to a full register mask. regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short); diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index a51a52ab21d640..97f05939013906 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -224,12 +224,11 @@ void Compiler::unwindPushPopMaskCFI(regMaskTP regMask, bool isFloat) // because LLVM only know about D0-D31. // As such pairs Sx,Sx+1 are referenced as D0-D15 registers in DWARF // For that we process registers in pairs. - regBit >>= isFloat ? 2 : 1; regNum = isFloat ? REG_PREV(REG_PREV(regNum)) : REG_PREV(regNum); #else - regBit >>= 1; regNum = REG_PREV(regNum); #endif + regBit = genRegMask(regNum); } }