Skip to content

Commit

Permalink
rt: batch pop from injection queue when idle (#5705)
Browse files Browse the repository at this point in the history
In the multi-threaded scheduler, when there are no tasks on the local
queue, a worker will attempt to pull tasks from the injection queue.
Previously, the worker would only attempt to poll one task from the
injection queue then continue trying to find work from other sources.
This can result in the injection queue backing up when there are many
tasks being scheduled from outside of the runtime.

This patch updates the worker to try to poll more than one task from the
injection queue when it has no more local work. Note that we also don't
want a single worker to poll **all** tasks on the injection queue as
that would result in work becoming unbalanced.
  • Loading branch information
carllerche committed May 23, 2023
1 parent 93bde08 commit 3a94eb0
Show file tree
Hide file tree
Showing 10 changed files with 381 additions and 53 deletions.
22 changes: 14 additions & 8 deletions .github/workflows/loom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,19 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
scope:
- --skip loom_pool
- loom_pool::group_a
- loom_pool::group_b
- loom_pool::group_c
- loom_pool::group_d
- time::driver
include:
- scope: --skip loom_pool
max_preemptions: 2
- scope: loom_pool::group_a
max_preemptions: 1
- scope: loom_pool::group_b
max_preemptions: 2
- scope: loom_pool::group_c
max_preemptions: 1
- scope: loom_pool::group_d
max_preemptions: 1
- scope: time::driver
max_preemptions: 2
steps:
- uses: actions/checkout@v3
- name: Install Rust ${{ env.rust_stable }}
Expand All @@ -43,6 +49,6 @@ jobs:
working-directory: tokio
env:
RUSTFLAGS: --cfg loom --cfg tokio_unstable -Dwarnings
LOOM_MAX_PREEMPTIONS: 2
LOOM_MAX_PREEMPTIONS: ${{ matrix.max_preemptions }}
LOOM_MAX_BRANCHES: 10000
SCOPE: ${{ matrix.scope }}
63 changes: 59 additions & 4 deletions benches/rt_multi_threaded.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering::Relaxed;
use std::sync::{mpsc, Arc};

fn spawn_many(b: &mut Bencher) {
const NUM_SPAWN: usize = 10_000;
const NUM_WORKERS: usize = 4;
const NUM_SPAWN: usize = 10_000;

fn spawn_many_local(b: &mut Bencher) {
let rt = rt();

let (tx, rx) = mpsc::sync_channel(1000);
Expand All @@ -38,6 +39,52 @@ fn spawn_many(b: &mut Bencher) {
});
}

fn spawn_many_remote_idle(b: &mut Bencher) {
let rt = rt();

let mut handles = Vec::with_capacity(NUM_SPAWN);

b.iter(|| {
for _ in 0..NUM_SPAWN {
handles.push(rt.spawn(async {}));
}

rt.block_on(async {
for handle in handles.drain(..) {
handle.await.unwrap();
}
});
});
}

fn spawn_many_remote_busy(b: &mut Bencher) {
let rt = rt();
let rt_handle = rt.handle();
let mut handles = Vec::with_capacity(NUM_SPAWN);

// Spawn some tasks to keep the runtimes busy
for _ in 0..(2 * NUM_WORKERS) {
rt.spawn(async {
loop {
tokio::task::yield_now().await;
std::thread::sleep(std::time::Duration::from_micros(10));
}
});
}

b.iter(|| {
for _ in 0..NUM_SPAWN {
handles.push(rt_handle.spawn(async {}));
}

rt.block_on(async {
for handle in handles.drain(..) {
handle.await.unwrap();
}
});
});
}

fn yield_many(b: &mut Bencher) {
const NUM_YIELD: usize = 1_000;
const TASKS: usize = 200;
Expand Down Expand Up @@ -140,12 +187,20 @@ fn chained_spawn(b: &mut Bencher) {

fn rt() -> Runtime {
runtime::Builder::new_multi_thread()
.worker_threads(4)
.worker_threads(NUM_WORKERS)
.enable_all()
.build()
.unwrap()
}

benchmark_group!(scheduler, spawn_many, ping_pong, yield_many, chained_spawn,);
benchmark_group!(
scheduler,
spawn_many_local,
spawn_many_remote_idle,
spawn_many_remote_busy,
ping_pong,
yield_many,
chained_spawn,
);

benchmark_main!(scheduler);
79 changes: 77 additions & 2 deletions tokio/src/runtime/scheduler/multi_thread/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ impl<T> Local<T> {
!self.inner.is_empty()
}

/// How many tasks can be pushed into the queue
pub(crate) fn remaining_slots(&self) -> usize {
self.inner.remaining_slots()
}

pub(crate) fn max_capacity(&self) -> usize {
LOCAL_QUEUE_CAPACITY
}

/// Returns false if there are any entries in the queue
///
/// Separate to is_stealable so that refactors of is_stealable to "protect"
Expand All @@ -118,8 +127,62 @@ impl<T> Local<T> {
!self.inner.is_empty()
}

/// Pushes a task to the back of the local queue, skipping the LIFO slot.
pub(crate) fn push_back(
/// Pushes a batch of tasks to the back of the queue. All tasks must fit in
/// the local queue.
///
/// # Panics
///
/// The method panics if there is not enough capacity to fit in the queue.
pub(crate) fn push_back(&mut self, tasks: impl ExactSizeIterator<Item = task::Notified<T>>) {
let len = tasks.len();
assert!(len <= LOCAL_QUEUE_CAPACITY);

if len == 0 {
// Nothing to do
return;
}

let head = self.inner.head.load(Acquire);
let (steal, _) = unpack(head);

// safety: this is the **only** thread that updates this cell.
let mut tail = unsafe { self.inner.tail.unsync_load() };

if tail.wrapping_sub(steal) <= (LOCAL_QUEUE_CAPACITY - len) as UnsignedShort {
// Yes, this if condition is structured a bit weird (first block
// does nothing, second returns an error). It is this way to match
// `push_back_or_overflow`.
} else {
panic!()
}

for task in tasks {
let idx = tail as usize & MASK;

self.inner.buffer[idx].with_mut(|ptr| {
// Write the task to the slot
//
// Safety: There is only one producer and the above `if`
// condition ensures we don't touch a cell if there is a
// value, thus no consumer.
unsafe {
ptr::write((*ptr).as_mut_ptr(), task);
}
});

tail = tail.wrapping_add(1);
}

self.inner.tail.store(tail, Release);
}

/// Pushes a task to the back of the local queue, if there is not enough
/// capacity in the queue, this triggers the overflow operation.
///
/// When the queue overflows, half of the curent contents of the queue is
/// moved to the given Injection queue. This frees up capacity for more
/// tasks to be pushed into the local queue.
pub(crate) fn push_back_or_overflow(
&mut self,
mut task: task::Notified<T>,
inject: &Inject<T>,
Expand Down Expand Up @@ -153,6 +216,11 @@ impl<T> Local<T> {
}
};

self.push_back_finish(task, tail);
}

// Second half of `push_back`
fn push_back_finish(&self, task: task::Notified<T>, tail: UnsignedShort) {
// Map the position to a slot index.
let idx = tail as usize & MASK;

Expand Down Expand Up @@ -501,6 +569,13 @@ impl<T> Drop for Local<T> {
}

impl<T> Inner<T> {
fn remaining_slots(&self) -> usize {
let (steal, _) = unpack(self.head.load(Acquire));
let tail = self.tail.load(Acquire);

LOCAL_QUEUE_CAPACITY - (tail.wrapping_sub(steal) as usize)
}

fn len(&self) -> UnsignedShort {
let (_, head) = unpack(self.head.load(Acquire));
let tail = self.tail.load(Acquire);
Expand Down
44 changes: 39 additions & 5 deletions tokio/src/runtime/scheduler/multi_thread/worker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,8 +509,11 @@ impl Context {
} else {
// Not enough budget left to run the LIFO task, push it to
// the back of the queue and return.
core.run_queue
.push_back(task, self.worker.inject(), &mut core.metrics);
core.run_queue.push_back_or_overflow(
task,
self.worker.inject(),
&mut core.metrics,
);
return Ok(core);
}
}
Expand Down Expand Up @@ -612,7 +615,38 @@ impl Core {
if self.tick % worker.handle.shared.config.global_queue_interval == 0 {
worker.inject().pop().or_else(|| self.next_local_task())
} else {
self.next_local_task().or_else(|| worker.inject().pop())
let maybe_task = self.next_local_task();

if maybe_task.is_some() {
return maybe_task;
}

// Other threads can only **remove** tasks from the current worker's
// `run_queue`. So, we can be confident that by the time we call
// `run_queue.push_back` below, there will be *at least* `cap`
// available slots in the queue.
let cap = usize::min(
self.run_queue.remaining_slots(),
self.run_queue.max_capacity() / 2,
);

// The worker is currently idle, pull a batch of work from the
// injection queue. We don't want to pull *all* the work so other
// workers can also get some.
let n = usize::min(
worker.inject().len() / worker.handle.shared.remotes.len() + 1,
cap,
);

let mut tasks = worker.inject().pop_n(n);

// Pop the first task to return immedietly
let ret = tasks.next();

// Push the rest of the on the run queue
self.run_queue.push_back(tasks);

ret
}
}

Expand Down Expand Up @@ -808,7 +842,7 @@ impl Handle {
// flexibility and the task may go to the front of the queue.
let should_notify = if is_yield || self.shared.config.disable_lifo_slot {
core.run_queue
.push_back(task, &self.shared.inject, &mut core.metrics);
.push_back_or_overflow(task, &self.shared.inject, &mut core.metrics);
true
} else {
// Push to the LIFO slot
Expand All @@ -817,7 +851,7 @@ impl Handle {

if let Some(prev) = prev {
core.run_queue
.push_back(prev, &self.shared.inject, &mut core.metrics);
.push_back_or_overflow(prev, &self.shared.inject, &mut core.metrics);
}

core.lifo_slot = Some(task);
Expand Down

0 comments on commit 3a94eb0

Please sign in to comment.