diff --git a/src/include/mscclppfifo.h b/src/include/mscclppfifo.h index 04843a66..323ba9de 100644 --- a/src/include/mscclppfifo.h +++ b/src/include/mscclppfifo.h @@ -40,6 +40,7 @@ struct mscclppConcurrentFifo { __forceinline__ __device__ uint64_t push(uint64_t type, uint64_t dstDataOffset, uint64_t srcDataOffset, uint64_t dataSize){ uint64_t curFifoHead = atomicAdd((unsigned long long int*)this->triggerFifoHead,1); while (curFifoHead >= MSCCLPP_PROXY_FIFO_SIZE + *((volatile uint64_t*)this->triggerFifoTail)); + while (*(volatile uint64_t*)&this->triggerFifo[curFifoHead % MSCCLPP_PROXY_FIFO_SIZE] != 0); uint64_t* valptr = (uint64_t*)&(this->triggerFifo[curFifoHead % MSCCLPP_PROXY_FIFO_SIZE].value); asm volatile( "st.volatile.global.v2.u64 [%0], {%1,%2};" ::"l"(valptr),