290 lines
6.2 KiB
C++
290 lines
6.2 KiB
C++
|
// Copyright 2009-2021 Intel Corporation
|
||
|
// SPDX-License-Identifier: Apache-2.0
|
||
|
|
||
|
#include "barrier.h"
|
||
|
#include "condition.h"
|
||
|
#include "regression.h"
|
||
|
#include "thread.h"
|
||
|
|
||
|
#if defined (__WIN32__)
|
||
|
|
||
|
#define WIN32_LEAN_AND_MEAN
|
||
|
#include <windows.h>
|
||
|
|
||
|
namespace embree
|
||
|
{
|
||
|
struct BarrierSysImplementation
|
||
|
{
|
||
|
__forceinline BarrierSysImplementation (size_t N)
|
||
|
: i(0), enterCount(0), exitCount(0), barrierSize(0)
|
||
|
{
|
||
|
events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
|
||
|
events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);
|
||
|
init(N);
|
||
|
}
|
||
|
|
||
|
__forceinline ~BarrierSysImplementation ()
|
||
|
{
|
||
|
CloseHandle(events[0]);
|
||
|
CloseHandle(events[1]);
|
||
|
}
|
||
|
|
||
|
__forceinline void init(size_t N)
|
||
|
{
|
||
|
barrierSize = N;
|
||
|
enterCount.store(N);
|
||
|
exitCount.store(N);
|
||
|
}
|
||
|
|
||
|
__forceinline void wait()
|
||
|
{
|
||
|
/* every thread entering the barrier decrements this count */
|
||
|
size_t i0 = i;
|
||
|
size_t cnt0 = enterCount--;
|
||
|
|
||
|
/* all threads except the last one are wait in the barrier */
|
||
|
if (cnt0 > 1)
|
||
|
{
|
||
|
if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)
|
||
|
THROW_RUNTIME_ERROR("WaitForSingleObjects failed");
|
||
|
}
|
||
|
|
||
|
/* the last thread starts all threads waiting at the barrier */
|
||
|
else
|
||
|
{
|
||
|
i = 1-i;
|
||
|
enterCount.store(barrierSize);
|
||
|
if (SetEvent(events[i0]) == 0)
|
||
|
THROW_RUNTIME_ERROR("SetEvent failed");
|
||
|
}
|
||
|
|
||
|
/* every thread leaving the barrier decrements this count */
|
||
|
size_t cnt1 = exitCount--;
|
||
|
|
||
|
/* the last thread that left the barrier resets the event again */
|
||
|
if (cnt1 == 1)
|
||
|
{
|
||
|
exitCount.store(barrierSize);
|
||
|
if (ResetEvent(events[i0]) == 0)
|
||
|
THROW_RUNTIME_ERROR("ResetEvent failed");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
HANDLE events[2];
|
||
|
atomic<size_t> i;
|
||
|
atomic<size_t> enterCount;
|
||
|
atomic<size_t> exitCount;
|
||
|
size_t barrierSize;
|
||
|
};
|
||
|
}
|
||
|
|
||
|
#else
|
||
|
|
||
|
namespace embree
|
||
|
{
|
||
|
struct BarrierSysImplementation
|
||
|
{
|
||
|
__forceinline BarrierSysImplementation (size_t N)
|
||
|
: count(0), barrierSize(0)
|
||
|
{
|
||
|
init(N);
|
||
|
}
|
||
|
|
||
|
__forceinline void init(size_t N)
|
||
|
{
|
||
|
assert(count == 0);
|
||
|
count = 0;
|
||
|
barrierSize = N;
|
||
|
}
|
||
|
|
||
|
__forceinline void wait()
|
||
|
{
|
||
|
mutex.lock();
|
||
|
count++;
|
||
|
|
||
|
if (count == barrierSize) {
|
||
|
count = 0;
|
||
|
cond.notify_all();
|
||
|
mutex.unlock();
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
cond.wait(mutex);
|
||
|
mutex.unlock();
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
public:
|
||
|
MutexSys mutex;
|
||
|
ConditionSys cond;
|
||
|
volatile size_t count;
|
||
|
volatile size_t barrierSize;
|
||
|
};
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
|
||
|
namespace embree
|
||
|
{
|
||
|
BarrierSys::BarrierSys (size_t N) {
|
||
|
opaque = new BarrierSysImplementation(N);
|
||
|
}
|
||
|
|
||
|
BarrierSys::~BarrierSys () {
|
||
|
delete (BarrierSysImplementation*) opaque;
|
||
|
}
|
||
|
|
||
|
void BarrierSys::init(size_t count) {
|
||
|
((BarrierSysImplementation*) opaque)->init(count);
|
||
|
}
|
||
|
|
||
|
void BarrierSys::wait() {
|
||
|
((BarrierSysImplementation*) opaque)->wait();
|
||
|
}
|
||
|
|
||
|
LinearBarrierActive::LinearBarrierActive (size_t N)
|
||
|
: count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)
|
||
|
{
|
||
|
if (N == 0) N = getNumberOfLogicalThreads();
|
||
|
init(N);
|
||
|
}
|
||
|
|
||
|
LinearBarrierActive::~LinearBarrierActive()
|
||
|
{
|
||
|
delete[] count0;
|
||
|
delete[] count1;
|
||
|
}
|
||
|
|
||
|
void LinearBarrierActive::init(size_t N)
|
||
|
{
|
||
|
if (threadCount != N) {
|
||
|
threadCount = N;
|
||
|
if (count0) delete[] count0; count0 = new unsigned char[N];
|
||
|
if (count1) delete[] count1; count1 = new unsigned char[N];
|
||
|
}
|
||
|
mode = 0;
|
||
|
flag0 = 0;
|
||
|
flag1 = 0;
|
||
|
for (size_t i=0; i<N; i++) count0[i] = 0;
|
||
|
for (size_t i=0; i<N; i++) count1[i] = 0;
|
||
|
}
|
||
|
|
||
|
void LinearBarrierActive::wait (const size_t threadIndex)
|
||
|
{
|
||
|
if (mode == 0)
|
||
|
{
|
||
|
if (threadIndex == 0)
|
||
|
{
|
||
|
for (size_t i=0; i<threadCount; i++)
|
||
|
count1[i] = 0;
|
||
|
|
||
|
for (size_t i=1; i<threadCount; i++)
|
||
|
{
|
||
|
while (likely(count0[i] == 0))
|
||
|
pause_cpu();
|
||
|
}
|
||
|
mode = 1;
|
||
|
flag1 = 0;
|
||
|
__memory_barrier();
|
||
|
flag0 = 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
count0[threadIndex] = 1;
|
||
|
{
|
||
|
while (likely(flag0 == 0))
|
||
|
pause_cpu();
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (threadIndex == 0)
|
||
|
{
|
||
|
for (size_t i=0; i<threadCount; i++)
|
||
|
count0[i] = 0;
|
||
|
|
||
|
for (size_t i=1; i<threadCount; i++)
|
||
|
{
|
||
|
while (likely(count1[i] == 0))
|
||
|
pause_cpu();
|
||
|
}
|
||
|
|
||
|
mode = 0;
|
||
|
flag0 = 0;
|
||
|
__memory_barrier();
|
||
|
flag1 = 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
count1[threadIndex] = 1;
|
||
|
{
|
||
|
while (likely(flag1 == 0))
|
||
|
pause_cpu();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct barrier_sys_regression_test : public RegressionTest
|
||
|
{
|
||
|
BarrierSys barrier;
|
||
|
std::atomic<size_t> threadID;
|
||
|
std::atomic<size_t> numFailed;
|
||
|
std::vector<size_t> threadResults;
|
||
|
|
||
|
barrier_sys_regression_test()
|
||
|
: RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)
|
||
|
{
|
||
|
registerRegressionTest(this);
|
||
|
}
|
||
|
|
||
|
static void thread_alloc(barrier_sys_regression_test* This)
|
||
|
{
|
||
|
size_t tid = This->threadID++;
|
||
|
for (size_t j=0; j<1000; j++)
|
||
|
{
|
||
|
This->barrier.wait();
|
||
|
This->threadResults[tid] = tid;
|
||
|
This->barrier.wait();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool run ()
|
||
|
{
|
||
|
threadID.store(0);
|
||
|
numFailed.store(0);
|
||
|
|
||
|
size_t numThreads = getNumberOfLogicalThreads();
|
||
|
threadResults.resize(numThreads);
|
||
|
barrier.init(numThreads+1);
|
||
|
|
||
|
/* create threads */
|
||
|
std::vector<thread_t> threads;
|
||
|
for (size_t i=0; i<numThreads; i++)
|
||
|
threads.push_back(createThread((thread_func)thread_alloc,this));
|
||
|
|
||
|
/* run test */
|
||
|
for (size_t i=0; i<1000; i++)
|
||
|
{
|
||
|
for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;
|
||
|
barrier.wait();
|
||
|
barrier.wait();
|
||
|
for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;
|
||
|
}
|
||
|
|
||
|
/* destroy threads */
|
||
|
for (size_t i=0; i<numThreads; i++)
|
||
|
join(threads[i]);
|
||
|
|
||
|
return numFailed == 0;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
barrier_sys_regression_test barrier_sys_regression_test;
|
||
|
}
|
||
|
|
||
|
|