ARGOBOTS
abti_thread.h
Go to the documentation of this file.
1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3  * See COPYRIGHT in top-level directory.
4  */
5 
6 #ifndef ABTI_THREAD_H_INCLUDED
7 #define ABTI_THREAD_H_INCLUDED
8 
9 /* Inlined functions for User-level Thread (ULT) */
10 
11 static inline ABTI_thread *ABTI_thread_get_ptr(ABT_thread thread)
12 {
13 #ifndef ABT_CONFIG_DISABLE_ERROR_CHECK
14  ABTI_thread *p_thread;
15  if (thread == ABT_THREAD_NULL) {
16  p_thread = NULL;
17  } else {
18  p_thread = (ABTI_thread *)thread;
19  }
20  return p_thread;
21 #else
22  return (ABTI_thread *)thread;
23 #endif
24 }
25 
26 static inline ABT_thread ABTI_thread_get_handle(ABTI_thread *p_thread)
27 {
28 #ifndef ABT_CONFIG_DISABLE_ERROR_CHECK
29  ABT_thread h_thread;
30  if (p_thread == NULL) {
31  h_thread = ABT_THREAD_NULL;
32  } else {
33  h_thread = (ABT_thread)p_thread;
34  }
35  return h_thread;
36 #else
37  return (ABT_thread)p_thread;
38 #endif
39 }
40 
41 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
42 static inline ABT_bool ABTI_thread_is_dynamic_promoted(ABTI_thread *p_thread)
43 {
44  /*
45  * Create a context and switch to it. The flow of the dynamic promotion
46  * thread is as follows:
47  *
48  * - When a ULT does not yield:
49  * ABTI_xstream_schedule_thread : call init_and_call_fcontext
50  * init_and_call_fcontext : jump to the stack top
51  * : save the scheduler's context
52  * : call the thread function
53  * thread_f : start thread_f
54  * : [ULT body]
55  * : end thread_f
56  * init_and_call_fcontext : calculate the return address, which is
57  * [1] => the original return address.
58  * : `return`
59  * ABTI_xstream_schedule_thread : resume the scheduler
60  *
61  * The ULT can return to the original scheduler by `return` if the scheduler
62  * has never been resumed during the execution of the ULT, because the
63  * context of the parent scheduler can be restored in a normal return
64  * procedure. In this case, the context is saved only once.
65  *
66  * - When a ULT yields:
67  * ABTI_xstream_schedule_thread : call init_and_call_fcontext
68  * init_and_call_fcontext : jump to the stack top
69  * : save the scheduler's context
70  * : call the thread function
71  * thread_f : start thread_f
72  * : [yield in ULT body]
73  * ABTD_thread_context_dynamic_promote
74  * : rewrite the return address to
75  * ABTD_thread_terminate_thread_no_arg
76  * jump_fcontext : save the ULT's context
77  * : restore the scheduler's context
78  * : jump to the scheduler
79  * ABTI_xstream_schedule_thread : resume the scheduler
80  *
81  * ... After a while, a scheduler resumes this ULT ...
82  *
83  * jump_fcontext : save the scheduler's context
84  * : restore the ULT's context
85  * : jump to the ULT
86  * thread_f : [ULT body (remaining)]
87  * : end thread_f
88  * init_and_call_fcontext : calculate the return address, which is
89  * [2] => ABTD_thread_terminate_thread_no_arg
90  * : return
91  * ABTD_thread_terminate_thread_no_arg
92  * : call take_fcontext
93  * take_fcontext : restore the scheduler's context
94  * : jump to the scheduler
95  * ABTI_xstream_schedule_thread : resume the scheduler
96  *
97  * When a ULT yields, ABTD_thread_terminate_thread_no_arg is set to
98  * [ptr - 0x08] so that it can "jump" to the normal termination
99  * function by "return" in init_and_call_fcontext. This termination
100  * function calls take_fcontext, so the scheduler is resumed by user-level
101  * context switch.
102  *
103  * For example, the stack will be as follows at [1] and [2] in the x86-64
104  * case. Note that ptr points to the stack top (= p_stack + stacksize).
105  *
106  * In the case of [1] (no suspension):
107  * [0x12345600] : (the original instruction pointer)
108  * ...
109  * [ptr - 0x08] : the original stack pointer (i.e., 0x12345600)
110  * [ptr - 0x10] : unused (for 16-byte alignment)
111  * [ptr - xxxx] : used by thread_f
112  *
113  * In the case of [2] (after suspension):
114  * [ptr - 0x08] : pointing to (p_stack - 0x10)
115  * [ptr - 0x10] : the address of ABTD_thread_terminate_thread_no_arg
116  * [ptr - xxxx] : used by thread_f
117  *
118  * This technique was introduced as a "return-on-completion" thread in the
119  * following paper:
120  * Lessons Learned from Analyzing Dynamic Promotion for User-Level
121  * Threading, S. Iwasaki, A. Amer, K. Taura, and P. Balaji (SC '18)
122  */
123  return ABTD_thread_context_is_dynamic_promoted(&p_thread->ctx);
124 }
125 
126 static inline void ABTI_thread_dynamic_promote_thread(ABTI_thread *p_thread)
127 {
128  LOG_EVENT("[U%" PRIu64 "] dynamic-promote ULT\n",
129  ABTI_thread_get_id(p_thread));
130  void *p_stack = p_thread->attr.p_stack;
131  size_t stacksize = p_thread->attr.stacksize;
132  void *p_stacktop = (void *)(((char *)p_stack) + stacksize);
133  ABTD_thread_context_dynamic_promote_thread(p_stacktop);
134 }
135 #endif
136 
137 static inline void ABTI_thread_context_switch_thread_to_thread_internal(
138  ABTI_local *p_local, ABTI_thread *p_old, ABTI_thread *p_new,
139  ABT_bool is_finish)
140 {
141 #ifndef ABT_CONFIG_DISABLE_STACKABLE_SCHED
142  ABTI_ASSERT(!p_old->is_sched && !p_new->is_sched);
143 #endif
144  p_local->p_thread = p_new;
145 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
146  /* Dynamic promotion is unnecessary if p_old is discarded. */
147  if (!is_finish && !ABTI_thread_is_dynamic_promoted(p_old)) {
148  ABTI_thread_dynamic_promote_thread(p_old);
149  }
150  if (!ABTI_thread_is_dynamic_promoted(p_new)) {
151  /* p_new does not have a context, so we first need to make it. */
152  ABTD_thread_context_arm_thread(p_new->attr.stacksize,
153  p_new->attr.p_stack, &p_new->ctx);
154  }
155 #endif
156  if (is_finish) {
157  ABTD_thread_finish_context(&p_old->ctx, &p_new->ctx);
158  } else {
159  ABTD_thread_context_switch(&p_old->ctx, &p_new->ctx);
160  }
161 }
162 
163 static inline void ABTI_thread_context_switch_thread_to_sched_internal(
164  ABTI_thread *p_old, ABTI_sched *p_new, ABT_bool is_finish)
165 {
166 #ifndef ABT_CONFIG_DISABLE_STACKABLE_SCHED
167  ABTI_ASSERT(!p_old->is_sched);
168 #endif
169  ABTI_LOG_SET_SCHED(p_new);
170 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
171  /* Dynamic promotion is unnecessary if p_old is discarded. */
172  if (!is_finish && !ABTI_thread_is_dynamic_promoted(p_old))
173  ABTI_thread_dynamic_promote_thread(p_old);
174  /* Schedulers' contexts must be eagerly initialized. */
175  ABTI_ASSERT(!p_new->p_thread ||
176  ABTI_thread_is_dynamic_promoted(p_new->p_thread));
177 #endif
178  if (is_finish) {
179  ABTD_thread_finish_context(&p_old->ctx, p_new->p_ctx);
180  } else {
181  ABTD_thread_context_switch(&p_old->ctx, p_new->p_ctx);
182  }
183 }
184 
185 static inline void ABTI_thread_context_switch_sched_to_thread_internal(
186  ABTI_local *p_local, ABTI_sched *p_old, ABTI_thread *p_new,
187  ABT_bool is_finish)
188 {
189 #ifndef ABT_CONFIG_DISABLE_STACKABLE_SCHED
190  ABTI_ASSERT(!p_new->is_sched);
191 #endif
192  ABTI_LOG_SET_SCHED(NULL);
193  p_local->p_thread = p_new;
194  p_local->p_task = NULL; /* A tasklet scheduler can invoke ULT. */
195 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
196  /* Schedulers' contexts must be eagerly initialized. */
197  ABTI_ASSERT(!p_old->p_thread ||
198  ABTI_thread_is_dynamic_promoted(p_old->p_thread));
199  if (!ABTI_thread_is_dynamic_promoted(p_new)) {
200  void *p_stacktop =
201  ((char *)p_new->attr.p_stack) + p_new->attr.stacksize;
202  LOG_EVENT("[U%" PRIu64 "] run ULT (dynamic promotion)\n",
203  ABTI_thread_get_id(p_new));
204  ABTD_thread_context_make_and_call(p_old->p_ctx, p_new->ctx.f_thread,
205  p_new->ctx.p_arg, p_stacktop);
206  /* The scheduler continues from here. If the previous thread has not
207  * run dynamic promotion, ABTI_thread_context_make_and_call took the
208  * fast path. In this case, the request handling has not been done,
209  * so it must be done here. */
210  ABTI_thread *p_prev = p_local->p_thread;
211  if (!ABTI_thread_is_dynamic_promoted(p_prev)) {
212  ABTI_ASSERT(p_prev == p_new);
213  /* See ABTDI_thread_terminate for details.
214  * TODO: avoid making a copy of the code. */
215  ABTD_thread_context *p_ctx = &p_prev->ctx;
216  ABTD_thread_context *p_link =
217  ABTD_atomic_acquire_load_thread_context_ptr(&p_ctx->p_link);
218  if (p_link) {
219  /* If p_link is set, it means that other ULT has called the
220  * join. */
221  ABTI_thread *p_joiner = (ABTI_thread *)p_link;
222  /* The scheduler may not use a bypass mechanism, so just makes
223  * p_joiner ready. */
224  ABTI_thread_set_ready(p_local, p_joiner);
225 
226  /* We don't need to use the atomic OR operation here because
227  * the ULT will be terminated regardless of other requests. */
228  ABTD_atomic_release_store_uint32(&p_prev->request,
229  ABTI_THREAD_REQ_TERMINATE);
230  } else {
231  uint32_t req =
232  ABTD_atomic_fetch_or_uint32(&p_prev->request,
233  ABTI_THREAD_REQ_JOIN |
234  ABTI_THREAD_REQ_TERMINATE);
235  if (req & ABTI_THREAD_REQ_JOIN) {
236  /* This case means there has been a join request and the
237  * joiner has blocked. We have to wake up the joiner ULT.
238  */
239  do {
240  p_link = ABTD_atomic_acquire_load_thread_context_ptr(
241  &p_ctx->p_link);
242  } while (!p_link);
243  ABTI_thread_set_ready(p_local, (ABTI_thread *)p_link);
244  }
245  }
246  }
247  ABTI_LOG_SET_SCHED(p_old);
248  return;
249  }
250 #endif
251  if (is_finish) {
252  ABTD_thread_finish_context(p_old->p_ctx, &p_new->ctx);
253  } else {
254  ABTD_thread_context_switch(p_old->p_ctx, &p_new->ctx);
255  }
256 }
257 
258 static inline void ABTI_thread_context_switch_sched_to_sched_internal(
259  ABTI_sched *p_old, ABTI_sched *p_new, ABT_bool is_finish)
260 {
261  ABTI_LOG_SET_SCHED(p_new);
262 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
263  /* Schedulers' contexts must be initialized eagerly. */
264  ABTI_ASSERT(!p_old->p_thread ||
265  ABTI_thread_is_dynamic_promoted(p_old->p_thread));
266  ABTI_ASSERT(!p_new->p_thread ||
267  ABTI_thread_is_dynamic_promoted(p_new->p_thread));
268 #endif
269  if (is_finish) {
270  ABTD_thread_finish_context(p_old->p_ctx, p_new->p_ctx);
271  } else {
272  ABTD_thread_context_switch(p_old->p_ctx, p_new->p_ctx);
273  }
274 }
275 
276 static inline void ABTI_thread_context_switch_thread_to_thread(
277  ABTI_local **pp_local, ABTI_thread *p_old, ABTI_thread *p_new)
278 {
279  ABTI_thread_context_switch_thread_to_thread_internal(*pp_local, p_old,
280  p_new, ABT_FALSE);
281  *pp_local = ABTI_local_get_local_uninlined();
282 }
283 
284 static inline void ABTI_thread_context_switch_thread_to_sched(
285  ABTI_local **pp_local, ABTI_thread *p_old, ABTI_sched *p_new)
286 {
287  ABTI_thread_context_switch_thread_to_sched_internal(p_old, p_new,
288  ABT_FALSE);
289  *pp_local = ABTI_local_get_local_uninlined();
290 }
291 
292 static inline void ABTI_thread_context_switch_sched_to_thread(
293  ABTI_local **pp_local, ABTI_sched *p_old, ABTI_thread *p_new)
294 {
295  ABTI_thread_context_switch_sched_to_thread_internal(*pp_local, p_old, p_new,
296  ABT_FALSE);
297  *pp_local = ABTI_local_get_local_uninlined();
298 }
299 
300 static inline void
301 ABTI_thread_context_switch_sched_to_sched(ABTI_local **pp_local,
302  ABTI_sched *p_old, ABTI_sched *p_new)
303 {
304  ABTI_thread_context_switch_sched_to_sched_internal(p_old, p_new, ABT_FALSE);
305  *pp_local = ABTI_local_get_local_uninlined();
306 }
307 
308 static inline void ABTI_thread_finish_context_thread_to_thread(
309  ABTI_local *p_local, ABTI_thread *p_old, ABTI_thread *p_new)
310 {
311  ABTI_thread_context_switch_thread_to_thread_internal(p_local, p_old, p_new,
312  ABT_TRUE);
313 }
314 
315 static inline void
316 ABTI_thread_finish_context_thread_to_sched(ABTI_thread *p_old,
317  ABTI_sched *p_new)
318 {
319  ABTI_thread_context_switch_thread_to_sched_internal(p_old, p_new, ABT_TRUE);
320 }
321 
322 static inline void ABTI_thread_finish_context_sched_to_thread(
323  ABTI_local *p_local, ABTI_sched *p_old, ABTI_thread *p_new)
324 {
325  ABTI_thread_context_switch_sched_to_thread_internal(p_local, p_old, p_new,
326  ABT_TRUE);
327 }
328 
329 static inline void ABTI_thread_finish_context_sched_to_sched(ABTI_sched *p_old,
330  ABTI_sched *p_new)
331 {
332  ABTI_thread_context_switch_sched_to_sched_internal(p_old, p_new, ABT_TRUE);
333 }
334 
335 static inline void ABTI_thread_set_request(ABTI_thread *p_thread, uint32_t req)
336 {
337  ABTD_atomic_fetch_or_uint32(&p_thread->request, req);
338 }
339 
340 static inline void ABTI_thread_unset_request(ABTI_thread *p_thread,
341  uint32_t req)
342 {
343  ABTD_atomic_fetch_and_uint32(&p_thread->request, ~req);
344 }
345 
346 static inline void ABTI_thread_yield(ABTI_local **pp_local,
347  ABTI_thread *p_thread)
348 {
349  ABTI_sched *p_sched;
350 
351  LOG_EVENT("[U%" PRIu64 ":E%d] yield\n", ABTI_thread_get_id(p_thread),
352  p_thread->p_last_xstream->rank);
353 
354  /* Change the state of current running thread */
355  ABTD_atomic_release_store_int(&p_thread->state, ABT_THREAD_STATE_READY);
356 
357  /* Switch to the top scheduler */
358  p_sched = ABTI_xstream_get_top_sched(p_thread->p_last_xstream);
359  ABTI_thread_context_switch_thread_to_sched(pp_local, p_thread, p_sched);
360 
361  /* Back to the original thread */
362  LOG_EVENT("[U%" PRIu64 ":E%d] resume after yield\n",
363  ABTI_thread_get_id(p_thread), p_thread->p_last_xstream->rank);
364 }
365 
366 #endif /* ABTI_THREAD_H_INCLUDED */
int ABT_bool
Definition: abt.h:309
#define ABT_FALSE
Definition: abt.h:224
struct ABT_thread_opaque * ABT_thread
Definition: abt.h:279
#define LOG_EVENT(fmt,...)
Definition: abti_log.h:60
#define ABT_TRUE
Definition: abt.h:223
#define ABT_THREAD_NULL
Definition: abt.h:344