diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 0bf6b32b214fa5..5a42b3ebfb2fc3 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -370,9 +370,15 @@ void CodeGen::genFnEpilog(BasicBlock* block) bool jmpEpilog = block->HasFlag(BBF_HAS_JMP); + // BBF_HAS_JMP on wasm comes only from fast tail calls. The return_call already + // left the function, but the body still needs an INS_end if this is the last block. if (jmpEpilog) { - NYI_WASM("genFnEpilog: jmpEpilog"); + if (block->IsLast() || m_compiler->bbIsFuncletBeg(block->Next())) + { + instGen(INS_end); + } + return; } // TODO-WASM: shadow stack maintenance @@ -2408,6 +2414,19 @@ void CodeGen::genCodeForPhysReg(GenTreePhysReg* tree) { assert(genIsValidReg(tree->gtSrcReg)); GetEmitter()->emitIns_I(INS_local_get, emitActualTypeSize(tree), WasmRegToIndex(tree->gtSrcReg)); + + if ((tree->gtLIRFlags & LIR::Flags::WasmFastTailCallSp) != 0) + { + // Fast tail call SP arg: undo the prolog SP adjustment (asserts funclet tail calls don't happen). + assert(m_compiler->funCurrentFuncIdx() == ROOT_FUNC_IDX); + assert(tree->gtSrcReg == GetStackPointerReg(m_compiler->funCurrentFuncIdx())); + if (m_compiler->compLclFrameSize != 0) + { + GetEmitter()->emitIns_I(INS_I_const, EA_PTRSIZE, m_compiler->compLclFrameSize); + GetEmitter()->emitIns(INS_I_add); + } + } + WasmProduceReg(tree); } @@ -2571,7 +2590,33 @@ void CodeGen::genCallInstruction(GenTreeCall* call) ArrayStack typeStack(m_compiler->getAllocator(CMK_Codegen)); - if (call->TypeIs(TYP_STRUCT)) + // For a fast tail call wasm requires the callee's result type to match the enclosing + // function's, so derive it from the caller's signature (call->gtType is TYP_VOID). + if (params.isJump) + { + if (m_compiler->info.compRetBuffArg != BAD_VAR_NUM) + { + // The enclosing method returns its struct via a retbuf arg, so the wasm-level + // return is empty. + typeStack.Push(CORINFO_WASM_TYPE_VOID); + } + else if (m_compiler->info.compRetType == TYP_VOID) + { + typeStack.Push(CORINFO_WASM_TYPE_VOID); + } + else if (m_compiler->info.compRetType == TYP_STRUCT) + { + typeStack.Push( + m_compiler->info.compCompHnd->getWasmLowering(m_compiler->info.compMethodInfo->args.retTypeClass)); + } + else + { + // Normalize small ints (bool/byte/short/...). + typeStack.Push((CorInfoWasmType)emitter::GetWasmValueTypeCode( + ActualTypeToWasmValueType(m_compiler->info.compRetType))); + } + } + else if (call->TypeIs(TYP_STRUCT)) { typeStack.Push(m_compiler->info.compCompHnd->getWasmLowering(call->gtRetClsHnd)); } diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index b212ee69974fbd..a0a0c452213259 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -44,7 +44,10 @@ class LIR final #ifdef TARGET_WASM MultiplyUsed = 0x08, // Set by lowering on nodes that the RA should allocate into // a dedicated register (WASM local), for multiple uses. -#endif // TARGET_WASM + + WasmFastTailCallSp = 0x10, // SP arg of a fast tail call; codegen adds compLclFrameSize + // to undo the prolog's SP adjustment. +#endif // TARGET_WASM }; }; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index db913fba5a9a71..65e266c06dfb18 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -4264,11 +4264,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // the fast tail call cannot be performed. This is common to all platforms. // Note that the GC'ness of on stack args need not match since the arg setup area is marked // as non-interruptible for fast tail calls. + // + // Wasm passes args via fresh wasm locals, not the caller's stack, so this check doesn't apply. +#ifndef TARGET_WASM if (calleeArgStackSize > callerArgStackSize) { reportFastTailCallDecision("Not enough incoming arg space"); return false; } +#endif // !TARGET_WASM // For Windows some struct parameters are copied on the local frame // and then passed by reference. We cannot fast tail call in these situation diff --git a/src/coreclr/jit/regallocwasm.cpp b/src/coreclr/jit/regallocwasm.cpp index e6e4a0969f3ba0..af49da2e4b952e 100644 --- a/src/coreclr/jit/regallocwasm.cpp +++ b/src/coreclr/jit/regallocwasm.cpp @@ -571,6 +571,22 @@ void WasmRegAlloc::CollectReferencesForCall(GenTreeCall* callNode) { ConsumeTemporaryRegForOperand(thisArg->GetNode() DEBUGARG("call this argument")); } + + // Tag the SP arg of a fast tail call so codegen undoes the prolog SP adjustment. + // The arg has been rewritten to GT_PHYSREG above (args are visited before the call). + if (callNode->IsFastTailCall()) + { + CallArg* const spArg = callNode->gtArgs.FindWellKnownArg(WellKnownArg::WasmShadowStackPointer); + if (spArg != nullptr) + { + GenTree* const argNode = spArg->GetNode(); + assert(argNode != nullptr); + assert(argNode->OperIs(GT_PHYSREG)); + assert(argNode->AsPhysReg()->gtSrcReg == m_perFuncletData[m_currentFunclet]->m_spReg); + + argNode->gtLIRFlags |= LIR::Flags::WasmFastTailCallSp; + } + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/targetwasm.h b/src/coreclr/jit/targetwasm.h index 0f1ce3af46ddc3..f6521159c8d1c0 100644 --- a/src/coreclr/jit/targetwasm.h +++ b/src/coreclr/jit/targetwasm.h @@ -23,8 +23,8 @@ #define FEATURE_FIXED_OUT_ARGS 0 // Preallocate the outgoing arg area in the prolog #define FEATURE_STRUCTPROMOTE 1 // JIT Optimization to promote fields of structs into registers #define FEATURE_MULTIREG_STRUCT_PROMOTE 1 // True when we want to promote fields of a multireg struct into registers -#define FEATURE_FASTTAILCALL 0 // Tail calls made as epilog+jmp -#define FEATURE_TAILCALL_OPT 0 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. +#define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp. On wasm the "jmp" is the native return_call / return_call_indirect opcode. +#define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_IMPLICIT_BYREFS 1 // Support for struct parameters passed via pointers to shadow copies #define FEATURE_MULTIREG_ARGS_OR_RET 0 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register