ARGOBOTS  c6511494322293e01714f56f341b8d2b22c1e3c1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
abti_ythread.h
Go to the documentation of this file.
1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3  * See COPYRIGHT in top-level directory.
4  */
5 
6 #ifndef ABTI_YTHREAD_H_INCLUDED
7 #define ABTI_YTHREAD_H_INCLUDED
8 
9 /* Inlined functions for yieldable threads */
10 
11 static inline ABTI_ythread *ABTI_ythread_get_ptr(ABT_thread thread)
12 {
13 #ifndef ABT_CONFIG_DISABLE_ERROR_CHECK
14  ABTI_ythread *p_ythread;
15  if (thread == ABT_THREAD_NULL) {
16  p_ythread = NULL;
17  } else {
18  p_ythread = (ABTI_ythread *)thread;
19  }
20  return p_ythread;
21 #else
22  return (ABTI_ythread *)thread;
23 #endif
24 }
25 
26 static inline ABT_thread ABTI_ythread_get_handle(ABTI_ythread *p_ythread)
27 {
28 #ifndef ABT_CONFIG_DISABLE_ERROR_CHECK
29  ABT_thread h_thread;
30  if (p_ythread == NULL) {
31  h_thread = ABT_THREAD_NULL;
32  } else {
33  h_thread = (ABT_thread)p_ythread;
34  }
35  return h_thread;
36 #else
37  return (ABT_thread)p_ythread;
38 #endif
39 }
40 
41 static inline ABTI_ythread *
42 ABTI_ythread_context_get_ythread(ABTD_ythread_context *p_ctx)
43 {
44  return (ABTI_ythread *)(((char *)p_ctx) - offsetof(ABTI_ythread, ctx));
45 }
46 
47 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
48 static inline ABT_bool ABTI_ythread_is_dynamic_promoted(ABTI_ythread *p_ythread)
49 {
50  /*
51  * Create a context and switch to it. The flow of the dynamic promotion
52  * thread is as follows:
53  *
54  * - When a ULT does not yield:
55  * ABTI_xstream_schedule_ythread : call init_and_call_fcontext
56  * init_and_call_fcontext : jump to the stack top
57  * : save the scheduler's context
58  * : call the thread function
59  * thread_f : start thread_f
60  * : [ULT body]
61  * : end thread_f
62  * init_and_call_fcontext : calculate the return address, which is
63  * [1] => the original return address.
64  * : `return`
65  * ABTI_xstream_schedule_ythread : resume the scheduler
66  *
67  * The ULT can return to the original scheduler by `return` if the scheduler
68  * has never been resumed during the execution of the ULT, because the
69  * context of the parent scheduler can be restored in a normal return
70  * procedure. In this case, the context is saved only once.
71  *
72  * - When a ULT yields:
73  * ABTI_xstream_schedule_ythread : call init_and_call_fcontext
74  * init_and_call_fcontext : jump to the stack top
75  * : save the scheduler's context
76  * : call the thread function
77  * thread_f : start thread_f
78  * : [yield in ULT body]
79  * ABTD_ythread_context_dynamic_promote
80  * : rewrite the return address to
81  * ABTD_ythread_terminate_ythread_no_arg
82  * jump_fcontext : save the ULT's context
83  * : restore the scheduler's context
84  * : jump to the scheduler
85  * ABTI_xstream_schedule_ythread : resume the scheduler
86  *
87  * ... After a while, a scheduler resumes this ULT ...
88  *
89  * jump_fcontext : save the scheduler's context
90  * : restore the ULT's context
91  * : jump to the ULT
92  * thread_f : [ULT body (remaining)]
93  * : end thread_f
94  * init_and_call_fcontext : calculate the return address, which is
95  * [2] => ABTD_ythread_terminate_ythread_no_arg
96  * : return
97  * ABTD_ythread_terminate_ythread_no_arg
98  * : call take_fcontext
99  * take_fcontext : restore the scheduler's context
100  * : jump to the scheduler
101  * ABTI_xstream_schedule_ythread : resume the scheduler
102  *
103  * When a ULT yields, ABTD_ythread_terminate_ythread_no_arg is set to
104  * [ptr - 0x08] so that it can "jump" to the normal termination
105  * function by "return" in init_and_call_fcontext. This termination
106  * function calls take_fcontext, so the scheduler is resumed by user-level
107  * context switch.
108  *
109  * For example, the stack will be as follows at [1] and [2] in the x86-64
110  * case. Note that ptr points to the stack top (= p_stack + stacksize).
111  *
112  * In the case of [1] (no suspension):
113  * [0x12345600] : (the original instruction pointer)
114  * ...
115  * [ptr - 0x08] : the original stack pointer (i.e., 0x12345600)
116  * [ptr - 0x10] : unused (for 16-byte alignment)
117  * [ptr - xxxx] : used by thread_f
118  *
119  * In the case of [2] (after suspension):
120  * [ptr - 0x08] : pointing to (p_stack - 0x10)
121  * [ptr - 0x10] : the address of ABTD_ythread_terminate_ythread_no_arg
122  * [ptr - xxxx] : used by thread_f
123  *
124  * This technique was introduced as a "return-on-completion" thread in the
125  * following paper:
126  * Lessons Learned from Analyzing Dynamic Promotion for User-Level
127  * Threading, S. Iwasaki, A. Amer, K. Taura, and P. Balaji (SC '18)
128  */
129  return ABTD_ythread_context_is_dynamic_promoted(&p_ythread->ctx);
130 }
131 
132 static inline void ABTI_ythread_dynamic_promote_ythread(ABTI_ythread *p_ythread)
133 {
134  LOG_DEBUG("[U%" PRIu64 "] dynamic-promote ULT\n",
135  ABTI_thread_get_id(&p_ythread->thread));
136  void *p_stack = p_ythread->p_stack;
137  size_t stacksize = p_ythread->stacksize;
138  void *p_stacktop = (void *)(((char *)p_stack) + stacksize);
139  ABTD_ythread_context_dynamic_promote_ythread(p_stacktop);
140 }
141 #endif
142 
143 static inline ABTI_ythread *ABTI_ythread_context_switch_to_sibling_internal(
144  ABTI_xstream **pp_local_xstream, ABTI_ythread *p_old, ABTI_ythread *p_new,
145  ABT_bool is_finish)
146 {
147 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
148  /* Dynamic promotion is unnecessary if p_old will be discarded. */
149  if (!ABTI_ythread_is_dynamic_promoted(p_old)) {
150  ABTI_ythread_dynamic_promote_ythread(p_old);
151  }
152  if (!ABTI_ythread_is_dynamic_promoted(p_new)) {
153  /* p_new does not have a context, so we first need to make it. */
154  ABTD_ythread_context_arm_ythread(p_new->stacksize, p_new->p_stack,
155  &p_new->ctx);
156  }
157 #endif
158  p_new->thread.p_parent = p_old->thread.p_parent;
159  if (is_finish) {
160  ABTI_tool_event_thread_finish(*pp_local_xstream, &p_old->thread,
161  p_old->thread.p_parent);
162  ABTD_ythread_finish_context(&p_old->ctx, &p_new->ctx);
164  } else {
165  ABTD_ythread_context_switch(&p_old->ctx, &p_new->ctx);
166  ABTI_local *p_local = ABTI_local_get_local_uninlined();
167  ABTI_xstream *p_local_xstream = ABTI_local_get_xstream(p_local);
168  *pp_local_xstream = p_local_xstream;
169  ABTI_thread *p_prev = p_local_xstream->p_thread;
170  p_local_xstream->p_thread = &p_old->thread;
171  ABTI_ASSERT(p_prev->type & ABTI_THREAD_TYPE_YIELDABLE);
172  return ABTI_thread_get_ythread(p_prev);
173  }
174 }
175 
176 static inline ABTI_ythread *ABTI_ythread_context_switch_to_parent_internal(
177  ABTI_xstream **pp_local_xstream, ABTI_ythread *p_old, ABT_bool is_finish,
178  ABT_sync_event_type sync_event_type, void *p_sync)
179 {
180  ABTI_ythread *p_new = ABTI_thread_get_ythread(p_old->thread.p_parent);
181 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
182  /* Dynamic promotion is unnecessary if p_old will be discarded. */
183  if (!is_finish && !ABTI_ythread_is_dynamic_promoted(p_old))
184  ABTI_ythread_dynamic_promote_ythread(p_old);
185  /* The parent's context must have been eagerly initialized. */
186  ABTI_ASSERT(ABTI_ythread_is_dynamic_promoted(p_new));
187 #endif
188  if (is_finish) {
189  ABTI_tool_event_thread_finish(*pp_local_xstream, &p_old->thread,
190  p_old->thread.p_parent);
191  ABTD_ythread_finish_context(&p_old->ctx, &p_new->ctx);
193  } else {
194  ABTI_tool_event_ythread_yield(*pp_local_xstream, p_old,
195  p_old->thread.p_parent, sync_event_type,
196  p_sync);
197  ABTD_ythread_context_switch(&p_old->ctx, &p_new->ctx);
198  ABTI_local *p_local = ABTI_local_get_local_uninlined();
199  ABTI_xstream *p_local_xstream = ABTI_local_get_xstream(p_local);
200  *pp_local_xstream = p_local_xstream;
201  ABTI_thread *p_prev = p_local_xstream->p_thread;
202  p_local_xstream->p_thread = &p_old->thread;
203  ABTI_ASSERT(p_prev->type & ABTI_THREAD_TYPE_YIELDABLE);
204  /* Invoke an event of thread run. */
205  ABTI_tool_event_thread_run(p_local_xstream, &p_old->thread, p_prev,
206  p_old->thread.p_parent);
207  return ABTI_thread_get_ythread(p_prev);
208  }
209 }
210 
211 static inline ABTI_ythread *ABTI_ythread_context_switch_to_child_internal(
212  ABTI_xstream **pp_local_xstream, ABTI_ythread *p_old, ABTI_ythread *p_new)
213 {
214  ABTI_xstream *p_local_xstream;
215  p_new->thread.p_parent = &p_old->thread;
216 #if ABT_CONFIG_THREAD_TYPE == ABT_THREAD_TYPE_DYNAMIC_PROMOTION
217  if (!ABTI_ythread_is_dynamic_promoted(p_old)) {
218  ABTI_ythread_dynamic_promote_ythread(p_old);
219  }
220  if (!ABTI_ythread_is_dynamic_promoted(p_new)) {
221  void *p_stacktop = ((char *)p_new->p_stack) + p_new->stacksize;
222  LOG_DEBUG("[U%" PRIu64 "] run ULT (dynamic promotion)\n",
223  ABTI_thread_get_id(&p_new->thread));
224  p_local_xstream = *pp_local_xstream;
225  p_local_xstream->p_thread = &p_new->thread;
226  /* Invoke an event of thread run. */
227  ABTI_tool_event_thread_run(p_local_xstream, &p_new->thread,
228  &p_old->thread, &p_old->thread);
229  ABTD_ythread_context_make_and_call(&p_old->ctx, p_new->thread.f_thread,
230  p_new->thread.p_arg, p_stacktop);
231  /* The scheduler continues from here. If the previous thread has not
232  * run dynamic promotion, ABTI_ythread_context_make_and_call took the
233  * fast path. In this case, the request handling has not been done,
234  * so it must be done here. */
235  ABTI_local *p_local = ABTI_local_get_local_uninlined();
236  p_local_xstream = ABTI_local_get_xstream(p_local);
237  *pp_local_xstream = p_local_xstream;
238  ABTI_thread *p_prev_thread = p_local_xstream->p_thread;
239  ABTI_ASSERT(p_prev_thread->type & ABTI_THREAD_TYPE_YIELDABLE);
240  ABTI_ythread *p_prev = ABTI_thread_get_ythread(p_prev_thread);
241  p_local_xstream->p_thread = &p_old->thread;
242  if (!ABTI_ythread_is_dynamic_promoted(p_prev)) {
243  ABTI_ASSERT(p_prev == p_new);
244  /* Invoke a thread-finish event of the previous thread. */
245  ABTI_tool_event_thread_finish(p_local_xstream, &p_prev->thread,
246  &p_old->thread);
247  /* See ABTDI_thread_terminate for details.
248  * TODO: avoid making a copy of the code. */
249  ABTD_ythread_context *p_ctx = &p_prev->ctx;
250  ABTD_ythread_context *p_link =
251  ABTD_atomic_acquire_load_ythread_context_ptr(&p_ctx->p_link);
252  if (p_link) {
253  /* If p_link is set, it means that other ULT has called the
254  * join. */
255  ABTI_ythread *p_joiner =
256  ABTI_ythread_context_get_ythread(p_link);
257  /* The scheduler may not use a bypass mechanism, so just makes
258  * p_joiner ready. */
259  ABTI_ythread_set_ready(ABTI_xstream_get_local(p_local_xstream),
260  p_joiner);
261 
262  /* We don't need to use the atomic OR operation here because
263  * the ULT will be terminated regardless of other requests. */
264  ABTD_atomic_release_store_uint32(&p_prev->thread.request,
265  ABTI_THREAD_REQ_TERMINATE);
266  } else {
267  uint32_t req =
268  ABTD_atomic_fetch_or_uint32(&p_prev->thread.request,
269  ABTI_THREAD_REQ_JOIN |
270  ABTI_THREAD_REQ_TERMINATE);
271  if (req & ABTI_THREAD_REQ_JOIN) {
272  /* This case means there has been a join request and the
273  * joiner has blocked. We have to wake up the joiner ULT.
274  */
275  do {
276  p_link = ABTD_atomic_acquire_load_ythread_context_ptr(
277  &p_ctx->p_link);
278  } while (!p_link);
279  ABTI_ythread_set_ready(ABTI_xstream_get_local(
280  p_local_xstream),
281  ABTI_ythread_context_get_ythread(
282  p_link));
283  }
284  }
285  }
286  return p_prev;
287  }
288 #endif
289  {
290  ABTD_ythread_context_switch(&p_old->ctx, &p_new->ctx);
291  ABTI_local *p_local = ABTI_local_get_local_uninlined();
292  p_local_xstream = ABTI_local_get_xstream(p_local);
293  *pp_local_xstream = p_local_xstream;
294  ABTI_thread *p_prev = p_local_xstream->p_thread;
295  p_local_xstream->p_thread = &p_old->thread;
296  ABTI_ASSERT(p_prev->type & ABTI_THREAD_TYPE_YIELDABLE);
297  /* p_old keeps running as a parent, so no thread-run event incurs. */
298  return ABTI_thread_get_ythread(p_prev);
299  }
300 }
301 
302 /* Return the previous thread. */
303 static inline ABTI_ythread *
304 ABTI_ythread_context_switch_to_sibling(ABTI_xstream **pp_local_xstream,
305  ABTI_ythread *p_old, ABTI_ythread *p_new)
306 {
307  return ABTI_ythread_context_switch_to_sibling_internal(pp_local_xstream,
308  p_old, p_new,
309  ABT_FALSE);
310 }
311 
312 static inline ABTI_ythread *ABTI_ythread_context_switch_to_parent(
313  ABTI_xstream **pp_local_xstream, ABTI_ythread *p_old,
314  ABT_sync_event_type sync_event_type, void *p_sync)
315 {
316  return ABTI_ythread_context_switch_to_parent_internal(pp_local_xstream,
317  p_old, ABT_FALSE,
318  sync_event_type,
319  p_sync);
320 }
321 
322 static inline ABTI_ythread *
323 ABTI_ythread_context_switch_to_child(ABTI_xstream **pp_local_xstream,
324  ABTI_ythread *p_old, ABTI_ythread *p_new)
325 {
326  return ABTI_ythread_context_switch_to_child_internal(pp_local_xstream,
327  p_old, p_new);
328 }
329 
330 ABTU_noreturn static inline void
331 ABTI_ythread_finish_context_to_sibling(ABTI_xstream *p_local_xstream,
332  ABTI_ythread *p_old, ABTI_ythread *p_new)
333 {
334  ABTI_ythread_context_switch_to_sibling_internal(&p_local_xstream, p_old,
335  p_new, ABT_TRUE);
337 }
338 
339 ABTU_noreturn static inline void
340 ABTI_ythread_finish_context_to_parent(ABTI_xstream *p_local_xstream,
341  ABTI_ythread *p_old)
342 {
343  ABTI_ythread_context_switch_to_parent_internal(&p_local_xstream, p_old,
344  ABT_TRUE,
346  NULL);
348 }
349 
350 static inline void ABTI_ythread_yield(ABTI_xstream **pp_local_xstream,
351  ABTI_ythread *p_ythread,
352  ABT_sync_event_type sync_event_type,
353  void *p_sync)
354 {
355  LOG_DEBUG("[U%" PRIu64 ":E%d] yield\n",
356  ABTI_thread_get_id(&p_ythread->thread),
357  p_ythread->thread.p_last_xstream->rank);
358 
359  /* Change the state of current running thread */
360  ABTD_atomic_release_store_int(&p_ythread->thread.state,
362 
363  /* Switch to the top scheduler */
364  ABTI_ythread_context_switch_to_parent(pp_local_xstream, p_ythread,
365  sync_event_type, p_sync);
366 
367  /* Back to the original thread */
368  LOG_DEBUG("[U%" PRIu64 ":E%d] resume after yield\n",
369  ABTI_thread_get_id(&p_ythread->thread),
370  p_ythread->thread.p_last_xstream->rank);
371 }
372 
373 #endif /* ABTI_YTHREAD_H_INCLUDED */
#define ABTU_unreachable()
Definition: abtu.h:25
int ABT_bool
Definition: abt.h:373
#define ABT_FALSE
Definition: abt.h:285
struct ABT_thread_opaque * ABT_thread
Definition: abt.h:343
#define ABT_TRUE
Definition: abt.h:284
#define ABT_THREAD_NULL
Definition: abt.h:416
#define ABTU_noreturn
Definition: abtu.h:31
#define LOG_DEBUG(fmt,...)
Definition: abti_log.h:26
ABT_sync_event_type
Definition: abt.h:244