| 1 | #include <atomic> |
| 2 | |
| 3 | #include "basis/seadRawPrint.h" |
| 4 | #include "framework/seadProcessMeter.h" |
| 5 | #include "mc/seadJobQueue.h" |
| 6 | #include "mc/seadWorker.h" |
| 7 | #include "prim/seadScopedLock.h" |
| 8 | |
| 9 | namespace sead |
| 10 | { |
| 11 | // NON_MATCHING |
| 12 | JobQueue::JobQueue() |
| 13 | { |
| 14 | mCoreEnabled.fill(value: 0); |
| 15 | mNumDoneJobs = 0; |
| 16 | mGranularity.fill(value: 8); |
| 17 | } |
| 18 | |
| 19 | bool JobQueue::run(u32, u32* finished_jobs, Worker*) |
| 20 | { |
| 21 | *finished_jobs = 0; |
| 22 | return true; |
| 23 | } |
| 24 | |
| 25 | void JobQueue::runAll(u32* finished_jobs) |
| 26 | { |
| 27 | const u32 size = getNumJobs(); |
| 28 | *finished_jobs = 0; |
| 29 | while (true) |
| 30 | { |
| 31 | u32 finished_jobs_batch = 0; |
| 32 | const bool ok = run(size, finished_jobs: &finished_jobs_batch, nullptr); |
| 33 | *finished_jobs += finished_jobs_batch; |
| 34 | if (ok) |
| 35 | break; |
| 36 | } |
| 37 | SEAD_ASSERT(*finished_jobs == size); |
| 38 | } |
| 39 | |
| 40 | bool JobQueue::isAllParticipantThrough() const |
| 41 | { |
| 42 | for (auto value : mCoreEnabled.mBuffer) |
| 43 | if (value) |
| 44 | return false; |
| 45 | return true; |
| 46 | } |
| 47 | |
| 48 | void JobQueue::setGranularity(CoreId core, u32 x) |
| 49 | { |
| 50 | mGranularity[core] = x ? x : 1; |
| 51 | } |
| 52 | |
| 53 | void JobQueue::setGranularity(u32 x) |
| 54 | { |
| 55 | for (s32 i = 0; i < mGranularity.size(); ++i) |
| 56 | setGranularity(core: i, x); |
| 57 | } |
| 58 | |
| 59 | // NON_MATCHING: CMP (AND x y), #0 gets optimized into a TST |
| 60 | void JobQueue::setCoreMaskAndWaitType(CoreIdMask mask, SyncType type) |
| 61 | { |
| 62 | mStatus = Status::_6; |
| 63 | mMask = mask; |
| 64 | for (u32 i = 0; i < CoreInfo::getNumCores(); ++i) |
| 65 | { |
| 66 | mCoreEnabled[i] = mask.isOn(id: i); |
| 67 | mNumDoneJobs = 0; |
| 68 | } |
| 69 | mSyncType = type; |
| 70 | } |
| 71 | |
| 72 | void JobQueue::FINISH(CoreId core) |
| 73 | { |
| 74 | std::atomic_thread_fence(m: std::memory_order_seq_cst); |
| 75 | mCoreEnabled[core] = 0; |
| 76 | wait_AT_WORKER(); |
| 77 | } |
| 78 | |
| 79 | void JobQueue::wait_AT_WORKER() |
| 80 | { |
| 81 | std::atomic_thread_fence(m: std::memory_order_seq_cst); |
| 82 | |
| 83 | switch (mSyncType) |
| 84 | { |
| 85 | case SyncType::cCore: |
| 86 | if (!isDone_()) |
| 87 | mFinishEvent.wait(); |
| 88 | break; |
| 89 | case SyncType::cThread: |
| 90 | SEAD_ASSERT_MSG(false, "*NOT YET\n" ); |
| 91 | if (!isDone_()) |
| 92 | mFinishEvent.wait(); |
| 93 | break; |
| 94 | default: |
| 95 | break; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | void JobQueue::wait() |
| 100 | { |
| 101 | if (u32(mSyncType) >= 2) |
| 102 | { |
| 103 | if (mSyncType != SyncType::cThread) |
| 104 | return; |
| 105 | SEAD_ASSERT_MSG(false, "NOT IMPLEMENTED.\n" ); |
| 106 | } |
| 107 | if (!isDone_()) |
| 108 | mFinishEvent.wait(); |
| 109 | } |
| 110 | |
| 111 | bool JobQueue::isDone_() |
| 112 | { |
| 113 | return mNumDoneJobs == getNumJobs(); |
| 114 | } |
| 115 | |
| 116 | // NON_MATCHING: stack |
| 117 | void PerfJobQueue::initialize(const char* name, Heap* heap) |
| 118 | { |
| 119 | mBars.allocBufferAssert(size: CoreInfo::getNumCores(), heap); |
| 120 | mInts.allocBufferAssert(size: CoreInfo::getNumCores(), heap); |
| 121 | |
| 122 | for (s32 i = 0; i < mInts.size(); ++i) |
| 123 | mInts[CoreId(i)] = 0; |
| 124 | |
| 125 | for (s32 i = 0; i < mBars.size(); ++i) |
| 126 | mBars[i].setName(CoreId(i).text()); |
| 127 | |
| 128 | mProcessMeterBar.setColor({1, 1, 0, 1}); |
| 129 | mProcessMeterBar.setName(name); |
| 130 | } |
| 131 | |
| 132 | void PerfJobQueue::finalize() |
| 133 | { |
| 134 | mInts.freeBuffer(); |
| 135 | mBars.freeBuffer(); |
| 136 | } |
| 137 | |
| 138 | void PerfJobQueue::reset() |
| 139 | { |
| 140 | for (s32 i = 0; i < mInts.size(); ++i) |
| 141 | mInts[CoreId(i)] = 0; |
| 142 | } |
| 143 | |
| 144 | // NON_MATCHING: stack |
| 145 | void PerfJobQueue::measureBeginDeque() |
| 146 | { |
| 147 | auto& bar = mBars[CoreInfo::getCurrentCoreId()]; |
| 148 | static_cast<void>(mInts[CoreInfo::getCurrentCoreId()]); |
| 149 | bar.measureBegin(color: Color4f::cWhite); |
| 150 | } |
| 151 | |
| 152 | void PerfJobQueue::measureEndDeque() |
| 153 | { |
| 154 | mBars[CoreInfo::getCurrentCoreId()].measureEnd(); |
| 155 | } |
| 156 | |
| 157 | void PerfJobQueue::measureBeginRun() |
| 158 | { |
| 159 | auto& bar = mBars[CoreInfo::getCurrentCoreId()]; |
| 160 | auto& idx = mInts[CoreInfo::getCurrentCoreId()]; |
| 161 | bar.measureBegin(color: getBarColor(idx)); |
| 162 | idx = (idx + 1) % 9; |
| 163 | } |
| 164 | |
| 165 | void PerfJobQueue::measureEndRun() |
| 166 | { |
| 167 | mBars[CoreInfo::getCurrentCoreId()].measureEnd(); |
| 168 | } |
| 169 | |
| 170 | // NON_MATCHING: loading sColors... |
| 171 | const Color4f& PerfJobQueue::getBarColor(u32 idx) const |
| 172 | { |
| 173 | static const SafeArray<Color4f, 9> sColors = {.mBuffer: { |
| 174 | {0.2078431397676468, 0.8313725590705872, 0.6274510025978088, 1.0}, |
| 175 | {0.0, 0.6666666865348816, 0.4470588266849518, 1.0}, |
| 176 | {0.125490203499794, 0.49803921580314636, 0.3764705955982208, 1.0}, |
| 177 | {0.7490196228027344, 0.5254902243614197, 0.1882352977991104, 1.0}, |
| 178 | {1.0, 0.6000000238418579, 0.0, 1.0}, |
| 179 | {1.0, 0.6980392336845398, 0.250980406999588, 1.0}, |
| 180 | {0.6901960968971252, 0.1725490242242813, 0.29411765933036804, 1.0}, |
| 181 | {0.0, 0.9176470637321472, 0.21568627655506134, 1.0}, |
| 182 | {0.9607843160629272, 0.239215686917305, 0.40784314274787903, 1.0}, |
| 183 | }}; |
| 184 | return sColors.mBuffer[idx]; |
| 185 | } |
| 186 | |
| 187 | void PerfJobQueue::attachProcessMeter() |
| 188 | { |
| 189 | if (!ProcessMeter::instance()) |
| 190 | return; |
| 191 | |
| 192 | for (s32 i = 0; i < mBars.size(); ++i) |
| 193 | ProcessMeter::instance()->attachProcessMeterBar(bar: &mBars[i]); |
| 194 | |
| 195 | ProcessMeter::instance()->attachProcessMeterBar(bar: &mProcessMeterBar); |
| 196 | } |
| 197 | |
| 198 | void PerfJobQueue::detachProcessMeter() |
| 199 | { |
| 200 | if (!ProcessMeter::instance()) |
| 201 | return; |
| 202 | |
| 203 | for (s32 i = 0; i < mBars.size(); ++i) |
| 204 | ProcessMeter::instance()->detachProcessMeterBar(bar: &mBars[i]); |
| 205 | |
| 206 | ProcessMeter::instance()->detachProcessMeterBar(bar: &mProcessMeterBar); |
| 207 | } |
| 208 | |
| 209 | FixedSizeJQ::FixedSizeJQ() |
| 210 | { |
| 211 | _230 = true; |
| 212 | mStatus = Status::_0; |
| 213 | mNumJobs = 0; |
| 214 | mNumProcessedJobs = 0; |
| 215 | } |
| 216 | |
| 217 | void FixedSizeJQ::begin() {} |
| 218 | |
| 219 | // TODO: Splatoon 2 and BotW sead have a different implementation which checks _230 and the current |
| 220 | // core number... |
| 221 | bool FixedSizeJQ::run(u32 size, u32* finished_jobs, Worker* worker) |
| 222 | { |
| 223 | *finished_jobs = 0; |
| 224 | |
| 225 | #ifdef SEAD_DEBUG |
| 226 | mPerf.measureBeginDeque(); |
| 227 | #endif |
| 228 | u32 num_finished = 0; |
| 229 | // NON_MATCHING: Clang refuses to materialize these variables here... |
| 230 | bool ret = true; |
| 231 | s32 begin = 0; |
| 232 | s32 end = -1; |
| 233 | if (size > 0 && mNumJobs > 0) |
| 234 | { |
| 235 | if (worker) |
| 236 | worker->setState(Worker::State::cRunning_WaitLock); |
| 237 | |
| 238 | mLock.lock(); |
| 239 | |
| 240 | if (worker) |
| 241 | worker->setState(Worker::State::cRunning_GetLock); |
| 242 | |
| 243 | begin = mNumProcessedJobs; |
| 244 | const auto num_jobs = mNumJobs; |
| 245 | num_finished = std::min(a: num_jobs - begin, b: size); |
| 246 | |
| 247 | mNumProcessedJobs = num_finished + begin; |
| 248 | mLock.unlock(); |
| 249 | end = num_finished + begin - 1; |
| 250 | ret = num_finished + begin >= num_jobs; |
| 251 | } |
| 252 | #ifdef SEAD_DEBUG |
| 253 | mPerf.measureEndDeque(); |
| 254 | #endif |
| 255 | |
| 256 | #ifdef SEAD_DEBUG |
| 257 | mPerf.measureBeginRun(); |
| 258 | #endif |
| 259 | if (worker) |
| 260 | worker->setState(Worker::State::cRunning_Run); |
| 261 | |
| 262 | for (s32 i = begin; i <= end; ++i) |
| 263 | mJobs[i]->invoke(); |
| 264 | |
| 265 | if (worker) |
| 266 | worker->setState(Worker::State::cRunning_AfterRun); |
| 267 | #ifdef SEAD_DEBUG |
| 268 | mPerf.measureEndRun(); |
| 269 | #endif |
| 270 | |
| 271 | if (ret) |
| 272 | { |
| 273 | if (worker) |
| 274 | worker->setState(Worker::State::cRunning_AllJobDoneReturn); |
| 275 | } |
| 276 | else |
| 277 | { |
| 278 | if (worker) |
| 279 | worker->setState(Worker::State::cRunning_BeforeReturn); |
| 280 | } |
| 281 | |
| 282 | *finished_jobs = num_finished; |
| 283 | return ret; |
| 284 | } |
| 285 | |
| 286 | u32 FixedSizeJQ::getNumJobs() const |
| 287 | { |
| 288 | return mNumJobs; |
| 289 | } |
| 290 | |
| 291 | void FixedSizeJQ::initialize(u32 size, Heap* heap) |
| 292 | { |
| 293 | #ifdef SEAD_DEBUG |
| 294 | mPerf.initialize(getName().cstr(), heap); |
| 295 | #endif |
| 296 | |
| 297 | ScopedLock<JobQueueLock> lock(&mLock); |
| 298 | mJobs.allocBufferAssert(size, heap); |
| 299 | mNumJobs = 0; |
| 300 | mNumProcessedJobs = 0; |
| 301 | mStatus = Status::_1; |
| 302 | } |
| 303 | |
| 304 | void FixedSizeJQ::finalize() |
| 305 | { |
| 306 | #ifdef SEAD_DEBUG |
| 307 | mPerf.finalize(); |
| 308 | #endif |
| 309 | mJobs.freeBuffer(); |
| 310 | } |
| 311 | |
| 312 | bool FixedSizeJQ::enque(Job* job) |
| 313 | { |
| 314 | mStatus = Status::_3; |
| 315 | |
| 316 | if (mNumJobs >= u32(mJobs.size())) |
| 317 | return false; |
| 318 | |
| 319 | mJobs[mNumJobs++] = job; |
| 320 | return true; |
| 321 | } |
| 322 | |
| 323 | bool FixedSizeJQ::enqueSafe(Job* job) |
| 324 | { |
| 325 | mStatus = Status::_3; |
| 326 | |
| 327 | ScopedLock<JobQueueLock> lock(&mLock); |
| 328 | if (mNumJobs >= u32(mJobs.size())) |
| 329 | return false; |
| 330 | |
| 331 | mJobs[mNumJobs++] = job; |
| 332 | return true; |
| 333 | } |
| 334 | |
| 335 | Job* FixedSizeJQ::deque() |
| 336 | { |
| 337 | ScopedLock<JobQueueLock> lock(&mLock); |
| 338 | |
| 339 | if (mNumProcessedJobs >= mNumJobs) |
| 340 | return nullptr; |
| 341 | |
| 342 | return mJobs[mNumProcessedJobs++]; |
| 343 | } |
| 344 | |
| 345 | u32 FixedSizeJQ::deque(Job** jobs, u32 count) |
| 346 | { |
| 347 | ScopedLock<JobQueueLock> lock(&mLock); |
| 348 | |
| 349 | u32 ret = 0; |
| 350 | while (mNumProcessedJobs < mNumJobs && ret < count) |
| 351 | { |
| 352 | jobs[ret] = mJobs[mNumProcessedJobs++]; |
| 353 | ++ret; |
| 354 | } |
| 355 | return ret; |
| 356 | } |
| 357 | |
| 358 | bool FixedSizeJQ::rewind() |
| 359 | { |
| 360 | #ifdef SEAD_DEBUG |
| 361 | mPerf.reset(); |
| 362 | #endif |
| 363 | mNumProcessedJobs = 0; |
| 364 | return true; |
| 365 | } |
| 366 | |
| 367 | void FixedSizeJQ::clear() |
| 368 | { |
| 369 | mStatus = Status::_5; |
| 370 | #ifdef SEAD_DEBUG |
| 371 | mPerf.reset(); |
| 372 | #endif |
| 373 | mNumJobs = 0; |
| 374 | mNumProcessedJobs = 0; |
| 375 | mSyncType = SyncType::cNoSync; |
| 376 | } |
| 377 | |
| 378 | bool FixedSizeJQ::debug_IsAllJobDone() |
| 379 | { |
| 380 | return mNumProcessedJobs >= mNumJobs; |
| 381 | } |
| 382 | } // namespace sead |
| 383 | |