It looks like this in my app:
I'm using 8 threads and I have an i7-3770K with 4 cores and 8 hyper-threads.
The job pushing code is just incrementing a semaphore and the threads wait on that with waitforSingleObject().
Below is a small program that demonstrates the issue.
If I manually set the affinity to be even across all threads the problem goes away mostly, at least in this test program. In the main app that doesn't work, because there is often one thread that drastically lags behind, slowing everything down, even more than using the operating systems scheduling.
Is this normal behaviour or am I just missing something totally obvious?
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | #define USE_AFFINITY false #include <windows.h> #include <cstdio> struct Timer { double frequency; LARGE_INTEGER timeStamp; double dt; void init() { LARGE_INTEGER frequency; QueryPerformanceFrequency(&frequency); this->frequency = (double)frequency.QuadPart; } void start() { QueryPerformanceCounter(&timeStamp); } double stop() { LARGE_INTEGER newTimeStamp; QueryPerformanceCounter(&newTimeStamp); dt = newTimeStamp.QuadPart - timeStamp.QuadPart; dt /= frequency; return dt; } }; struct ThreadSettings { bool active; HANDLE semaphore; void* data; void (*function)(void* data); }; DWORD WINAPI threadProcessX(LPVOID data) { ThreadSettings* settings = (ThreadSettings*)data; while(true) { WaitForSingleObjectEx(settings->semaphore, INFINITE, FALSE); settings->function(settings->data); settings->active = false; } return 0; } int main(int argc, char** argv) { if(USE_AFFINITY) { __int64 threadMask = SetThreadAffinityMask(GetCurrentThread(), 1); } ThreadSettings settings[8] = {}; int i = 0; for(auto& it : settings) { it.active = false; it.semaphore = CreateSemaphoreA(0, 0, 1, 0); HANDLE thread = CreateThread(0, 0, threadProcessX, &it, 0, 0); if(USE_AFFINITY) { SetThreadAffinityMask(thread, 2 << i); } CloseHandle(thread); i++; } Timer timer; timer.init(); struct ThreadData { Timer timer; int count; float temp; }; auto threadFunc = [](void* data) { ThreadData* d = (ThreadData*)data; d->timer.start(); d->temp = 0; int count = 5000000 / d->count; for(int i = 0 ; i < count; i++) { d->temp += 123 * 456; // Do some work. } d->timer.stop(); }; for(int i = 0; i < 8; i++) { int threadCount = i+1; timer.start(); ThreadData threadData[8]; for(int i = 0; i < threadCount; i++) { Timer tim = timer; threadData[i] = {tim, threadCount}; } for(int i = 0; i < threadCount; i++) { if(i < threadCount-1) { settings[i].active = true; settings[i].function = threadFunc; settings[i].data = threadData + i; ReleaseSemaphore(settings[i].semaphore, 1, 0); } else { threadFunc(threadData + i); } } while(true) { bool done = true; for(int i = 0; i < threadCount; i++) { ThreadSettings* s = settings + i; if(s->active) { done = false; break; } } if(done) break; } timer.stop(); printf("\n"); for(int i = 0; i < threadCount; i++) { ThreadData* it = threadData + i; float startTime = (it->timer.timeStamp.QuadPart - timer.timeStamp.QuadPart) / timer.frequency; printf("Time : %f, Start time: %f\n", it->timer.dt, startTime); } printf("Total: %f, \n", timer.dt); } return 0; } |
Automatic scheduling:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | Time : 0.003572, Start time: 0.000000 Total: 0.003572, Time : 0.001689, Start time: 0.000002 Time : 0.001667, Start time: 0.000001 Total: 0.001691, Time : 0.001112, Start time: 0.000001 Time : 0.001229, Start time: 0.000002 Time : 0.001111, Start time: 0.000001 Total: 0.001232, Time : 0.000837, Start time: 0.000162 Time : 0.000834, Start time: 0.000016 Time : 0.000872, Start time: 0.000013 Time : 0.000834, Start time: 0.000001 Total: 0.000999, Time : 0.000668, Start time: 0.000001 Time : 0.000668, Start time: 0.000002 Time : 0.000688, Start time: 0.000002 Time : 0.000667, Start time: 0.000671 Time : 0.000667, Start time: 0.000002 Total: 0.001339, Time : 0.000556, Start time: 0.000001 Time : 0.000556, Start time: 0.000002 Time : 0.000556, Start time: 0.000002 Time : 0.000556, Start time: 0.000558 Time : 0.000556, Start time: 0.000559 Time : 0.000555, Start time: 0.000002 Total: 0.001114, Time : 0.000476, Start time: 0.000001 Time : 0.000476, Start time: 0.000001 Time : 0.000476, Start time: 0.000002 Time : 0.000476, Start time: 0.000479 Time : 0.000477, Start time: 0.000479 Time : 0.000477, Start time: 0.000478 Time : 0.000476, Start time: 0.000002 Total: 0.000956, Time : 0.000417, Start time: 0.000001 Time : 0.000425, Start time: 0.000002 Time : 0.000419, Start time: 0.000419 Time : 0.000436, Start time: 0.000030 Time : 0.000443, Start time: 0.000466 Time : 0.000437, Start time: 0.000864 Time : 0.000420, Start time: 0.000427 Time : 0.000553, Start time: 0.000003 Total: 0.001301, |
Fixed scheduling:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | Time : 0.003348, Start time: 0.000000 Total: 0.003348, Time : 0.001667, Start time: 0.000002 Time : 0.001667, Start time: 0.000001 Total: 0.001669, Time : 0.001112, Start time: 0.000001 Time : 0.001117, Start time: 0.000003 Time : 0.001112, Start time: 0.000001 Total: 0.001120, Time : 0.000834, Start time: 0.000001 Time : 0.000833, Start time: 0.000002 Time : 0.000834, Start time: 0.000002 Time : 0.000833, Start time: 0.000001 Total: 0.000836, Time : 0.000667, Start time: 0.000001 Time : 0.000667, Start time: 0.000002 Time : 0.000666, Start time: 0.000002 Time : 0.000667, Start time: 0.000003 Time : 0.000731, Start time: 0.000002 Total: 0.000733, Time : 0.000556, Start time: 0.000001 Time : 0.000556, Start time: 0.000002 Time : 0.000556, Start time: 0.000002 Time : 0.000556, Start time: 0.000003 Time : 0.000556, Start time: 0.000004 Time : 0.000556, Start time: 0.000002 Total: 0.000559, Time : 0.000476, Start time: 0.000001 Time : 0.000476, Start time: 0.000002 Time : 0.000476, Start time: 0.000002 Time : 0.000476, Start time: 0.000003 Time : 0.000476, Start time: 0.000003 Time : 0.000476, Start time: 0.000004 Time : 0.000476, Start time: 0.000003 Total: 0.000480, Time : 0.000417, Start time: 0.000001 Time : 0.000417, Start time: 0.000002 Time : 0.000417, Start time: 0.000002 Time : 0.000417, Start time: 0.000003 Time : 0.000417, Start time: 0.000003 Time : 0.000417, Start time: 0.000004 Time : 0.000417, Start time: 0.000004 Time : 0.000417, Start time: 0.000003 Total: 0.000421, |