//===- SIMemoryLegalizer.cpp ----------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// \file /// \brief Memory legalizer - implements memory model. More information can be /// found here: /// http://llvm.org/docs/AMDGPUUsage.html#memory-model // //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "AMDGPUMachineModuleInfo.h" #include "AMDGPUSubtarget.h" #include "SIDefines.h" #include "SIInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/Pass.h" #include "llvm/Support/AtomicOrdering.h" #include #include using namespace llvm; using namespace llvm::AMDGPU; #define DEBUG_TYPE "si-memory-legalizer" #define PASS_NAME "SI Memory Legalizer" namespace { class SIMemOpInfo final { private: SyncScope::ID SSID = SyncScope::System; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; bool IsNonTemporal = false; SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering) : SSID(SSID), Ordering(Ordering) {} SIMemOpInfo(SyncScope::ID SSID, AtomicOrdering Ordering, AtomicOrdering FailureOrdering, bool IsNonTemporal = false) : SSID(SSID), Ordering(Ordering), FailureOrdering(FailureOrdering), IsNonTemporal(IsNonTemporal) {} /// \returns Info constructed from \p MI, which has at least machine memory /// operand. static Optional constructFromMIWithMMO( const MachineBasicBlock::iterator &MI); public: /// \returns Synchronization scope ID of the machine instruction used to /// create this SIMemOpInfo. SyncScope::ID getSSID() const { return SSID; } /// \returns Ordering constraint of the machine instruction used to /// create this SIMemOpInfo. AtomicOrdering getOrdering() const { return Ordering; } /// \returns Failure ordering constraint of the machine instruction used to /// create this SIMemOpInfo. AtomicOrdering getFailureOrdering() const { return FailureOrdering; } /// \returns True if memory access of the machine instruction used to /// create this SIMemOpInfo is non-temporal, false otherwise. bool isNonTemporal() const { return IsNonTemporal; } /// \returns True if ordering constraint of the machine instruction used to /// create this SIMemOpInfo is unordered or higher, false otherwise. bool isAtomic() const { return Ordering != AtomicOrdering::NotAtomic; } /// \returns Load info if \p MI is a load operation, "None" otherwise. static Optional getLoadInfo( const MachineBasicBlock::iterator &MI); /// \returns Store info if \p MI is a store operation, "None" otherwise. static Optional getStoreInfo( const MachineBasicBlock::iterator &MI); /// \returns Atomic fence info if \p MI is an atomic fence operation, /// "None" otherwise. static Optional getAtomicFenceInfo( const MachineBasicBlock::iterator &MI); /// \returns Atomic cmpxchg info if \p MI is an atomic cmpxchg operation, /// "None" otherwise. static Optional getAtomicCmpxchgInfo( const MachineBasicBlock::iterator &MI); /// \returns Atomic rmw info if \p MI is an atomic rmw operation, /// "None" otherwise. static Optional getAtomicRmwInfo( const MachineBasicBlock::iterator &MI); /// \brief Reports unknown synchronization scope used in \p MI to LLVM /// context. static void reportUnknownSyncScope( const MachineBasicBlock::iterator &MI); }; class SIMemoryLegalizer final : public MachineFunctionPass { private: /// \brief Machine module info. const AMDGPUMachineModuleInfo *MMI = nullptr; /// \brief Instruction info. const SIInstrInfo *TII = nullptr; /// \brief Immediate for "vmcnt(0)". unsigned Vmcnt0Immediate = 0; /// \brief Opcode for cache invalidation instruction (L1). unsigned Wbinvl1Opcode = 0; /// \brief List of atomic pseudo instructions. std::list AtomicPseudoMIs; /// \brief Sets named bit (BitName) to "true" if present in \p MI. Returns /// true if \p MI is modified, false otherwise. template bool enableNamedBit(const MachineBasicBlock::iterator &MI) const { int BitIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), BitName); if (BitIdx == -1) return false; MachineOperand &Bit = MI->getOperand(BitIdx); if (Bit.getImm() != 0) return false; Bit.setImm(1); return true; } /// \brief Sets GLC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableGLCBit(const MachineBasicBlock::iterator &MI) const { return enableNamedBit(MI); } /// \brief Sets SLC bit to "true" if present in \p MI. Returns true if \p MI /// is modified, false otherwise. bool enableSLCBit(const MachineBasicBlock::iterator &MI) const { return enableNamedBit(MI); } /// \brief Inserts "buffer_wbinvl1_vol" instruction \p Before or after \p MI. /// Always returns true. bool insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, bool Before = true) const; /// \brief Inserts "s_waitcnt vmcnt(0)" instruction \p Before or after \p MI. /// Always returns true. bool insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, bool Before = true) const; /// \brief Removes all processed atomic pseudo instructions from the current /// function. Returns true if current function is modified, false otherwise. bool removeAtomicPseudoMIs(); /// \brief Expands load operation \p MI. Returns true if instructions are /// added/deleted or \p MI is modified, false otherwise. bool expandLoad(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); /// \brief Expands store operation \p MI. Returns true if instructions are /// added/deleted or \p MI is modified, false otherwise. bool expandStore(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); /// \brief Expands atomic fence operation \p MI. Returns true if /// instructions are added/deleted or \p MI is modified, false otherwise. bool expandAtomicFence(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); /// \brief Expands atomic cmpxchg operation \p MI. Returns true if /// instructions are added/deleted or \p MI is modified, false otherwise. bool expandAtomicCmpxchg(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); /// \brief Expands atomic rmw operation \p MI. Returns true if /// instructions are added/deleted or \p MI is modified, false otherwise. bool expandAtomicRmw(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); public: static char ID; SIMemoryLegalizer() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } StringRef getPassName() const override { return PASS_NAME; } bool runOnMachineFunction(MachineFunction &MF) override; }; } // end namespace anonymous /* static */ Optional SIMemOpInfo::constructFromMIWithMMO( const MachineBasicBlock::iterator &MI) { assert(MI->getNumMemOperands() > 0); const MachineFunction *MF = MI->getParent()->getParent(); const AMDGPUMachineModuleInfo *MMI = &MF->getMMI().getObjFileInfo(); SyncScope::ID SSID = SyncScope::SingleThread; AtomicOrdering Ordering = AtomicOrdering::NotAtomic; AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic; bool IsNonTemporal = true; // Validator should check whether or not MMOs cover the entire set of // locations accessed by the memory instruction. for (const auto &MMO : MI->memoperands()) { const auto &IsSyncScopeInclusion = MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID()); if (!IsSyncScopeInclusion) { reportUnknownSyncScope(MI); return None; } SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID(); Ordering = isStrongerThan(Ordering, MMO->getOrdering()) ? Ordering : MMO->getOrdering(); FailureOrdering = isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ? FailureOrdering : MMO->getFailureOrdering(); if (!(MMO->getFlags() & MachineMemOperand::MONonTemporal)) IsNonTemporal = false; } return SIMemOpInfo(SSID, Ordering, FailureOrdering, IsNonTemporal); } /* static */ Optional SIMemOpInfo::getLoadInfo( const MachineBasicBlock::iterator &MI) { assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(MI->mayLoad() && !MI->mayStore())) return None; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) return SIMemOpInfo(SyncScope::System, AtomicOrdering::SequentiallyConsistent); return SIMemOpInfo::constructFromMIWithMMO(MI); } /* static */ Optional SIMemOpInfo::getStoreInfo( const MachineBasicBlock::iterator &MI) { assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(!MI->mayLoad() && MI->mayStore())) return None; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) return SIMemOpInfo(SyncScope::System, AtomicOrdering::SequentiallyConsistent); return SIMemOpInfo::constructFromMIWithMMO(MI); } /* static */ Optional SIMemOpInfo::getAtomicFenceInfo( const MachineBasicBlock::iterator &MI) { assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE) return None; SyncScope::ID SSID = static_cast(MI->getOperand(1).getImm()); AtomicOrdering Ordering = static_cast(MI->getOperand(0).getImm()); return SIMemOpInfo(SSID, Ordering); } /* static */ Optional SIMemOpInfo::getAtomicCmpxchgInfo( const MachineBasicBlock::iterator &MI) { assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(MI->mayLoad() && MI->mayStore())) return None; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) return SIMemOpInfo(SyncScope::System, AtomicOrdering::SequentiallyConsistent, AtomicOrdering::SequentiallyConsistent); return SIMemOpInfo::constructFromMIWithMMO(MI); } /* static */ Optional SIMemOpInfo::getAtomicRmwInfo( const MachineBasicBlock::iterator &MI) { assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); if (!(MI->mayLoad() && MI->mayStore())) return None; // Be conservative if there are no memory operands. if (MI->getNumMemOperands() == 0) return SIMemOpInfo(SyncScope::System, AtomicOrdering::SequentiallyConsistent); return SIMemOpInfo::constructFromMIWithMMO(MI); } /* static */ void SIMemOpInfo::reportUnknownSyncScope( const MachineBasicBlock::iterator &MI) { DiagnosticInfoUnsupported Diag(MI->getParent()->getParent()->getFunction(), "Unsupported synchronization scope"); LLVMContext *CTX = &MI->getParent()->getParent()->getFunction().getContext(); CTX->diagnose(Diag); } bool SIMemoryLegalizer::insertBufferWbinvl1Vol(MachineBasicBlock::iterator &MI, bool Before) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); if (!Before) ++MI; BuildMI(MBB, MI, DL, TII->get(Wbinvl1Opcode)); if (!Before) --MI; return true; } bool SIMemoryLegalizer::insertWaitcntVmcnt0(MachineBasicBlock::iterator &MI, bool Before) const { MachineBasicBlock &MBB = *MI->getParent(); DebugLoc DL = MI->getDebugLoc(); if (!Before) ++MI; BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT)).addImm(Vmcnt0Immediate); if (!Before) --MI; return true; } bool SIMemoryLegalizer::removeAtomicPseudoMIs() { if (AtomicPseudoMIs.empty()) return false; for (auto &MI : AtomicPseudoMIs) MI->eraseFromParent(); AtomicPseudoMIs.clear(); return true; } bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI) { assert(MI->mayLoad() && !MI->mayStore()); bool Changed = false; if (MOI.isAtomic()) { if (MOI.getSSID() == SyncScope::System || MOI.getSSID() == MMI->getAgentSSID()) { if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= enableGLCBit(MI); if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { Changed |= insertWaitcntVmcnt0(MI, false); Changed |= insertBufferWbinvl1Vol(MI, false); } return Changed; } if (MOI.getSSID() == SyncScope::SingleThread || MOI.getSSID() == MMI->getWorkgroupSSID() || MOI.getSSID() == MMI->getWavefrontSSID()) { return Changed; } llvm_unreachable("Unsupported synchronization scope"); } // Atomic instructions do not have the nontemporal attribute. if (MOI.isNonTemporal()) { Changed |= enableGLCBit(MI); Changed |= enableSLCBit(MI); return Changed; } return Changed; } bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI) { assert(!MI->mayLoad() && MI->mayStore()); bool Changed = false; if (MOI.isAtomic()) { if (MOI.getSSID() == SyncScope::System || MOI.getSSID() == MMI->getAgentSSID()) { if (MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); return Changed; } if (MOI.getSSID() == SyncScope::SingleThread || MOI.getSSID() == MMI->getWorkgroupSSID() || MOI.getSSID() == MMI->getWavefrontSSID()) { return Changed; } llvm_unreachable("Unsupported synchronization scope"); } // Atomic instructions do not have the nontemporal attribute. if (MOI.isNonTemporal()) { Changed |= enableGLCBit(MI); Changed |= enableSLCBit(MI); return Changed; } return Changed; } bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI) { assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE); bool Changed = false; if (MOI.isAtomic()) { if (MOI.getSSID() == SyncScope::System || MOI.getSSID() == MMI->getAgentSSID()) { if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertBufferWbinvl1Vol(MI); AtomicPseudoMIs.push_back(MI); return Changed; } if (MOI.getSSID() == SyncScope::SingleThread || MOI.getSSID() == MMI->getWorkgroupSSID() || MOI.getSSID() == MMI->getWavefrontSSID()) { AtomicPseudoMIs.push_back(MI); return Changed; } SIMemOpInfo::reportUnknownSyncScope(MI); } return Changed; } bool SIMemoryLegalizer::expandAtomicCmpxchg(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI) { assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; if (MOI.isAtomic()) { if (MOI.getSSID() == SyncScope::System || MOI.getSSID() == MMI->getAgentSSID()) { if (MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent || MOI.getFailureOrdering() == AtomicOrdering::Acquire || MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { Changed |= insertWaitcntVmcnt0(MI, false); Changed |= insertBufferWbinvl1Vol(MI, false); } return Changed; } if (MOI.getSSID() == SyncScope::SingleThread || MOI.getSSID() == MMI->getWorkgroupSSID() || MOI.getSSID() == MMI->getWavefrontSSID()) { Changed |= enableGLCBit(MI); return Changed; } llvm_unreachable("Unsupported synchronization scope"); } return Changed; } bool SIMemoryLegalizer::expandAtomicRmw(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI) { assert(MI->mayLoad() && MI->mayStore()); bool Changed = false; if (MOI.isAtomic()) { if (MOI.getSSID() == SyncScope::System || MOI.getSSID() == MMI->getAgentSSID()) { if (MOI.getOrdering() == AtomicOrdering::Release || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) Changed |= insertWaitcntVmcnt0(MI); if (MOI.getOrdering() == AtomicOrdering::Acquire || MOI.getOrdering() == AtomicOrdering::AcquireRelease || MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) { Changed |= insertWaitcntVmcnt0(MI, false); Changed |= insertBufferWbinvl1Vol(MI, false); } return Changed; } if (MOI.getSSID() == SyncScope::SingleThread || MOI.getSSID() == MMI->getWorkgroupSSID() || MOI.getSSID() == MMI->getWavefrontSSID()) { Changed |= enableGLCBit(MI); return Changed; } llvm_unreachable("Unsupported synchronization scope"); } return Changed; } bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const SISubtarget &ST = MF.getSubtarget(); const IsaInfo::IsaVersion IV = IsaInfo::getIsaVersion(ST.getFeatureBits()); MMI = &MF.getMMI().getObjFileInfo(); TII = ST.getInstrInfo(); Vmcnt0Immediate = AMDGPU::encodeWaitcnt(IV, 0, getExpcntBitMask(IV), getLgkmcntBitMask(IV)); Wbinvl1Opcode = ST.getGeneration() <= AMDGPUSubtarget::SOUTHERN_ISLANDS ? AMDGPU::BUFFER_WBINVL1 : AMDGPU::BUFFER_WBINVL1_VOL; for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) continue; if (const auto &MOI = SIMemOpInfo::getLoadInfo(MI)) Changed |= expandLoad(MOI.getValue(), MI); else if (const auto &MOI = SIMemOpInfo::getStoreInfo(MI)) Changed |= expandStore(MOI.getValue(), MI); else if (const auto &MOI = SIMemOpInfo::getAtomicFenceInfo(MI)) Changed |= expandAtomicFence(MOI.getValue(), MI); else if (const auto &MOI = SIMemOpInfo::getAtomicCmpxchgInfo(MI)) Changed |= expandAtomicCmpxchg(MOI.getValue(), MI); else if (const auto &MOI = SIMemOpInfo::getAtomicRmwInfo(MI)) Changed |= expandAtomicRmw(MOI.getValue(), MI); } } Changed |= removeAtomicPseudoMIs(); return Changed; } INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false) char SIMemoryLegalizer::ID = 0; char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID; FunctionPass *llvm::createSIMemoryLegalizerPass() { return new SIMemoryLegalizer(); }