I have a problem where a certain number of threads in my thread queue don't wake up when they are supposed to. It seems like windows schedules some threads to be on the same processor and those don't get woken up until the previous thread finishes its work and goes back to sleep. The effect of that is that the actual work process time doubles and sometimes tripples.
It looks like this in my app:
I'm using 8 threads and I have an i7-3770K with 4 cores and 8 hyper-threads.
The job pushing code is just incrementing a semaphore and the threads wait on that with waitforSingleObject().
Below is a small program that demonstrates the issue.
If I manually set the affinity to be even across all threads the problem goes away mostly, at least in this test program. In the main app that doesn't work, because there is often one thread that drastically lags behind, slowing everything down, even more than using the operating systems scheduling.
Is this normal behaviour or am I just missing something totally obvious?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136 | #define USE_AFFINITY false
#include <windows.h>
#include <cstdio>
struct Timer {
double frequency;
LARGE_INTEGER timeStamp;
double dt;
void init() {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
this->frequency = (double)frequency.QuadPart;
}
void start() { QueryPerformanceCounter(&timeStamp); }
double stop() {
LARGE_INTEGER newTimeStamp;
QueryPerformanceCounter(&newTimeStamp);
dt = newTimeStamp.QuadPart - timeStamp.QuadPart;
dt /= frequency;
return dt;
}
};
struct ThreadSettings {
bool active;
HANDLE semaphore;
void* data;
void (*function)(void* data);
};
DWORD WINAPI threadProcessX(LPVOID data) {
ThreadSettings* settings = (ThreadSettings*)data;
while(true) {
WaitForSingleObjectEx(settings->semaphore, INFINITE, FALSE);
settings->function(settings->data);
settings->active = false;
}
return 0;
}
int main(int argc, char** argv) {
if(USE_AFFINITY) {
__int64 threadMask = SetThreadAffinityMask(GetCurrentThread(), 1);
}
ThreadSettings settings[8] = {};
int i = 0;
for(auto& it : settings) {
it.active = false;
it.semaphore = CreateSemaphoreA(0, 0, 1, 0);
HANDLE thread = CreateThread(0, 0, threadProcessX, &it, 0, 0);
if(USE_AFFINITY) {
SetThreadAffinityMask(thread, 2 << i);
}
CloseHandle(thread);
i++;
}
Timer timer;
timer.init();
struct ThreadData {
Timer timer;
int count;
float temp;
};
auto threadFunc = [](void* data) {
ThreadData* d = (ThreadData*)data;
d->timer.start();
d->temp = 0;
int count = 5000000 / d->count;
for(int i = 0 ; i < count; i++) {
d->temp += 123 * 456; // Do some work.
}
d->timer.stop();
};
for(int i = 0; i < 8; i++) {
int threadCount = i+1;
timer.start();
ThreadData threadData[8];
for(int i = 0; i < threadCount; i++) {
Timer tim = timer;
threadData[i] = {tim, threadCount};
}
for(int i = 0; i < threadCount; i++) {
if(i < threadCount-1) {
settings[i].active = true;
settings[i].function = threadFunc;
settings[i].data = threadData + i;
ReleaseSemaphore(settings[i].semaphore, 1, 0);
} else {
threadFunc(threadData + i);
}
}
while(true) {
bool done = true;
for(int i = 0; i < threadCount; i++) {
ThreadSettings* s = settings + i;
if(s->active) {
done = false;
break;
}
}
if(done) break;
}
timer.stop();
printf("\n");
for(int i = 0; i < threadCount; i++) {
ThreadData* it = threadData + i;
float startTime = (it->timer.timeStamp.QuadPart - timer.timeStamp.QuadPart) / timer.frequency;
printf("Time : %f, Start time: %f\n", it->timer.dt, startTime);
}
printf("Total: %f, \n", timer.dt);
}
return 0;
}
|
Automatic scheduling:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | Time : 0.003572, Start time: 0.000000
Total: 0.003572,
Time : 0.001689, Start time: 0.000002
Time : 0.001667, Start time: 0.000001
Total: 0.001691,
Time : 0.001112, Start time: 0.000001
Time : 0.001229, Start time: 0.000002
Time : 0.001111, Start time: 0.000001
Total: 0.001232,
Time : 0.000837, Start time: 0.000162
Time : 0.000834, Start time: 0.000016
Time : 0.000872, Start time: 0.000013
Time : 0.000834, Start time: 0.000001
Total: 0.000999,
Time : 0.000668, Start time: 0.000001
Time : 0.000668, Start time: 0.000002
Time : 0.000688, Start time: 0.000002
Time : 0.000667, Start time: 0.000671
Time : 0.000667, Start time: 0.000002
Total: 0.001339,
Time : 0.000556, Start time: 0.000001
Time : 0.000556, Start time: 0.000002
Time : 0.000556, Start time: 0.000002
Time : 0.000556, Start time: 0.000558
Time : 0.000556, Start time: 0.000559
Time : 0.000555, Start time: 0.000002
Total: 0.001114,
Time : 0.000476, Start time: 0.000001
Time : 0.000476, Start time: 0.000001
Time : 0.000476, Start time: 0.000002
Time : 0.000476, Start time: 0.000479
Time : 0.000477, Start time: 0.000479
Time : 0.000477, Start time: 0.000478
Time : 0.000476, Start time: 0.000002
Total: 0.000956,
Time : 0.000417, Start time: 0.000001
Time : 0.000425, Start time: 0.000002
Time : 0.000419, Start time: 0.000419
Time : 0.000436, Start time: 0.000030
Time : 0.000443, Start time: 0.000466
Time : 0.000437, Start time: 0.000864
Time : 0.000420, Start time: 0.000427
Time : 0.000553, Start time: 0.000003
Total: 0.001301,
|
Fixed scheduling:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 | Time : 0.003348, Start time: 0.000000
Total: 0.003348,
Time : 0.001667, Start time: 0.000002
Time : 0.001667, Start time: 0.000001
Total: 0.001669,
Time : 0.001112, Start time: 0.000001
Time : 0.001117, Start time: 0.000003
Time : 0.001112, Start time: 0.000001
Total: 0.001120,
Time : 0.000834, Start time: 0.000001
Time : 0.000833, Start time: 0.000002
Time : 0.000834, Start time: 0.000002
Time : 0.000833, Start time: 0.000001
Total: 0.000836,
Time : 0.000667, Start time: 0.000001
Time : 0.000667, Start time: 0.000002
Time : 0.000666, Start time: 0.000002
Time : 0.000667, Start time: 0.000003
Time : 0.000731, Start time: 0.000002
Total: 0.000733,
Time : 0.000556, Start time: 0.000001
Time : 0.000556, Start time: 0.000002
Time : 0.000556, Start time: 0.000002
Time : 0.000556, Start time: 0.000003
Time : 0.000556, Start time: 0.000004
Time : 0.000556, Start time: 0.000002
Total: 0.000559,
Time : 0.000476, Start time: 0.000001
Time : 0.000476, Start time: 0.000002
Time : 0.000476, Start time: 0.000002
Time : 0.000476, Start time: 0.000003
Time : 0.000476, Start time: 0.000003
Time : 0.000476, Start time: 0.000004
Time : 0.000476, Start time: 0.000003
Total: 0.000480,
Time : 0.000417, Start time: 0.000001
Time : 0.000417, Start time: 0.000002
Time : 0.000417, Start time: 0.000002
Time : 0.000417, Start time: 0.000003
Time : 0.000417, Start time: 0.000003
Time : 0.000417, Start time: 0.000004
Time : 0.000417, Start time: 0.000004
Time : 0.000417, Start time: 0.000003
Total: 0.000421,
|