tokio-rs · Darksonn · Dec 7, 2023 · Sep 11, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/benches/Cargo.toml b/benches/Cargo.toml
@@ -12,6 +12,7 @@ tokio = { version = "1.5.0", path = "../tokio", features = ["full"] }
 criterion = "0.5.1"
 rand = "0.8"
 rand_chacha = "0.3"
+num_cpus = "1.16.0"
 
 [dev-dependencies]
 tokio-util = { version = "0.7.0", path = "../tokio-util", features = ["full"] }

diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs
@@ -57,6 +57,11 @@ pub struct Builder {
     /// Only used when not using the current-thread executor.
     worker_threads: Option<usize>,
 
+    /// Configures the global OwnedTasks's concurrency level
+    ///
+    /// Only used when not using the current-thread executor.
+    pub(super) spawn_concurrency_level: Option<usize>,
+
     /// Cap on thread usage.
     max_blocking_threads: usize,
 
@@ -278,6 +283,9 @@ impl Builder {
             // Default to lazy auto-detection (one thread per CPU core)
             worker_threads: None,
 
+            // Default to lazy auto-detection (4 times the number of worker threads)
+            spawn_concurrency_level: None,
+
             max_blocking_threads: 512,
 
             // Default thread name
@@ -401,6 +409,53 @@ impl Builder {
         self
     }
 
+    /// Sets the spawn concurrency level the `Runtime` will use.
+    ///
+    /// This can be any number greater than 0 and less than or equal to 65536,
+    /// if the parameter is larger than this value, concurrency level will actually select 65536 internally.
+    ///
+    /// When the value of this is small compared to the number of concurrent threads, increasing it
+    /// will help improve the performanc of concurrently spawn tasks. However, when the value is
+    /// already large enough, further increasing it will not continue to improve performance.
+    /// Instead, it may result in longer time of the Runtime creation.
+    ///
+    /// # Default
+    ///
+    /// The default value for this is 4 times the number of worker threads.
+    ///
+    /// When using the `current_thread` runtime this method has no effect.
+    ///
+    /// # Examples
+    ///
+    /// ## Multi threaded runtime with spawn_concurrency_level 8
+    ///
+    /// ```
+    /// use tokio::runtime;
+    ///
+    /// // This will spawn a work-stealing runtime with 4 worker threads.
+    /// let rt = runtime::Builder::new_multi_thread()
+    ///     .spawn_concurrency_level(8)
+    ///     .build()
+    ///     .unwrap();
+    ///
+    /// rt.spawn(async move {});
+    /// ```
+    ///
+    /// # Panics
+    ///
+    /// This will panic if `val` is not larger than `0`.
+    #[track_caller]
+    #[cfg(tokio_unstable)]
+    #[cfg_attr(docsrs, doc(cfg(tokio_unstable)))]
+    pub fn spawn_concurrency_level(&mut self, mut val: usize) -> &mut Self {
+        assert!(val > 0, "spawn concurrency level cannot be set to 0");
+        if val > 1 << 16 {
+            val = 1 << 16;
+        }
+        self.spawn_concurrency_level = Some(val);
+        self
+    }
+
     /// Specifies the limit for additional threads spawned by the Runtime.
     ///
     /// These threads are used for blocking operations like tasks spawned
@@ -1231,6 +1286,12 @@ cfg_rt_multi_thread! {
             use crate::runtime::scheduler::{self, MultiThread};
 
             let core_threads = self.worker_threads.unwrap_or_else(num_cpus);
+            // Shrink the size of spawn_concurrency_level when using loom. This shouldn't impact
+            // logic, but allows loom to test more edge cases in a reasoable a mount of time
+            #[cfg(loom)]
+            let spawn_concurrency_level = 4;
+            #[cfg(not(loom))]
+            let spawn_concurrency_level = self.spawn_concurrency_level.unwrap_or(core_threads * 4);
 
             let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?;
 
@@ -1249,6 +1310,7 @@ cfg_rt_multi_thread! {
                 driver_handle,
                 blocking_spawner,
                 seed_generator_2,
+                spawn_concurrency_level,
                 Config {
                     before_park: self.before_park.clone(),
                     after_unpark: self.after_unpark.clone(),
@@ -1280,6 +1342,13 @@ cfg_rt_multi_thread! {
 
                 let core_threads = self.worker_threads.unwrap_or_else(num_cpus);
 
+                // Shrink the size of spawn_concurrency_level when using loom. This shouldn't impact
+                // logic, but allows loom to test more edge cases in a reasoable a mount of time
+                #[cfg(loom)]
+                let spawn_concurrency_level = 4;
+                #[cfg(not(loom))]
+                let spawn_concurrency_level = self.spawn_concurrency_level.unwrap_or(core_threads * 4);
+
                 let (driver, driver_handle) = driver::Driver::new(self.get_cfg())?;
 
                 // Create the blocking pool
@@ -1297,6 +1366,7 @@ cfg_rt_multi_thread! {
                     driver_handle,
                     blocking_spawner,
                     seed_generator_2,
+                    spawn_concurrency_level,
                     Config {
                         before_park: self.before_park.clone(),
                         after_unpark: self.after_unpark.clone(),
@@ -1321,6 +1391,7 @@ impl fmt::Debug for Builder {
     fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
         fmt.debug_struct("Builder")
             .field("worker_threads", &self.worker_threads)
+            .field("spawn_concurrency_level", &self.spawn_concurrency_level)
             .field("max_blocking_threads", &self.max_blocking_threads)
             .field(
                 "thread_name",

diff --git a/tokio/src/runtime/id.rs b/tokio/src/runtime/id.rs
@@ -1,5 +1,5 @@
 use std::fmt;
-use std::num::NonZeroU64;
+use std::num::{NonZeroU32, NonZeroU64};
 
 /// An opaque ID that uniquely identifies a runtime relative to all other currently
 /// running runtimes.
@@ -39,6 +39,12 @@ impl From<NonZeroU64> for Id {
     }
 }
 
+impl From<NonZeroU32> for Id {
+    fn from(value: NonZeroU32) -> Self {
+        Id(value.into())
+    }
+}
+
 impl fmt::Display for Id {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.0.fmt(f)

diff --git a/tokio/src/runtime/scheduler/current_thread/mod.rs b/tokio/src/runtime/scheduler/current_thread/mod.rs
@@ -132,7 +132,7 @@ impl CurrentThread {
         let handle = Arc::new(Handle {
             shared: Shared {
                 inject: Inject::new(),
-                owned: OwnedTasks::new(),
+                owned: OwnedTasks::new(1),
                 woken: AtomicBool::new(false),
                 config,
                 scheduler_metrics: SchedulerMetrics::new(),
@@ -248,7 +248,7 @@ fn shutdown2(mut core: Box<Core>, handle: &Handle) -> Box<Core> {
     // Drain the OwnedTasks collection. This call also closes the
     // collection, ensuring that no tasks are ever pushed after this
     // call returns.
-    handle.shared.owned.close_and_shutdown_all();
+    handle.shared.owned.close_and_shutdown_all(0);
 
     // Drain local queue
     // We already shut down every task, so we just need to drop the task.
@@ -614,7 +614,7 @@ impl Schedule for Arc<Handle> {
                             // If `None`, the runtime is shutting down, so there is no need to signal shutdown
                             if let Some(core) = core.as_mut() {
                                 core.unhandled_panic = true;
-                                self.shared.owned.close_and_shutdown_all();
+                                self.shared.owned.close_and_shutdown_all(0);
                             }
                         }
                         _ => unreachable!("runtime core not set in CURRENT thread-local"),

diff --git a/tokio/src/runtime/scheduler/multi_thread/mod.rs b/tokio/src/runtime/scheduler/multi_thread/mod.rs
@@ -60,6 +60,7 @@ impl MultiThread {
         driver_handle: driver::Handle,
         blocking_spawner: blocking::Spawner,
         seed_generator: RngSeedGenerator,
+        spawn_concurrency_level: usize,
         config: Config,
     ) -> (MultiThread, Arc<Handle>, Launch) {
         let parker = Parker::new(driver);
@@ -69,6 +70,7 @@ impl MultiThread {
             driver_handle,
             blocking_spawner,
             seed_generator,
+            spawn_concurrency_level,
             config,
         );
 

diff --git a/tokio/src/runtime/scheduler/multi_thread/worker.rs b/tokio/src/runtime/scheduler/multi_thread/worker.rs
@@ -245,6 +245,7 @@ pub(super) fn create(
     driver_handle: driver::Handle,
     blocking_spawner: blocking::Spawner,
     seed_generator: RngSeedGenerator,
+    spawn_concurrency_level: usize,
     config: Config,
 ) -> (Arc<Handle>, Launch) {
     let mut cores = Vec::with_capacity(size);
@@ -287,7 +288,7 @@ pub(super) fn create(
             remotes: remotes.into_boxed_slice(),
             inject,
             idle,
-            owned: OwnedTasks::new(),
+            owned: OwnedTasks::new(spawn_concurrency_level as u32),
             synced: Mutex::new(Synced {
                 idle: idle_synced,
                 inject: inject_synced,
@@ -547,7 +548,6 @@ impl Context {
         }
 
         core.pre_shutdown(&self.worker);
-
         // Signal shutdown
         self.worker.handle.shutdown_core(core);
         Err(())
@@ -954,8 +954,16 @@ impl Core {
     /// Signals all tasks to shut down, and waits for them to complete. Must run
     /// before we enter the single-threaded phase of shutdown processing.
     fn pre_shutdown(&mut self, worker: &Worker) {
+        // Start from a random inner list
+        let start = self
+            .rand
+            .fastrand_n(worker.handle.shared.owned.get_shard_size() as u32);
         // Signal to all tasks to shut down.
-        worker.handle.shared.owned.close_and_shutdown_all();
+        worker
+            .handle
+            .shared
+            .owned
+            .close_and_shutdown_all(start as usize);
 
         self.stats
             .submit(&worker.handle.shared.worker_metrics[worker.index]);

diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs b/tokio/src/runtime/scheduler/multi_thread_alt/mod.rs
@@ -49,6 +49,7 @@ impl MultiThread {
         driver_handle: driver::Handle,
         blocking_spawner: blocking::Spawner,
         seed_generator: RngSeedGenerator,
+        spawn_concurrency_level: usize,
         config: Config,
     ) -> (MultiThread, runtime::Handle) {
         let handle = worker::create(
@@ -57,6 +58,7 @@ impl MultiThread {
             driver_handle,
             blocking_spawner,
             seed_generator,
+            spawn_concurrency_level,
             config,
         );
 

diff --git a/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs b/tokio/src/runtime/scheduler/multi_thread_alt/worker.rs
@@ -259,6 +259,7 @@ pub(super) fn create(
     driver_handle: driver::Handle,
     blocking_spawner: blocking::Spawner,
     seed_generator: RngSeedGenerator,
+    spawn_concurrency_level: usize,
     config: Config,
 ) -> runtime::Handle {
     let mut num_workers = num_cores;
@@ -307,7 +308,7 @@ pub(super) fn create(
             remotes: remotes.into_boxed_slice(),
             inject,
             idle,
-            owned: OwnedTasks::new(),
+            owned: OwnedTasks::new(spawn_concurrency_level as u32),
             synced: Mutex::new(Synced {
                 assigned_cores: (0..num_workers).map(|_| None).collect(),
                 shutdown_cores: Vec::with_capacity(num_cores),
@@ -1460,7 +1461,9 @@ impl Shared {
     }
 
     pub(super) fn shutdown_core(&self, handle: &Handle, mut core: Box<Core>) {
-        self.owned.close_and_shutdown_all();
+        // Start from a random inner list
+        let start = core.rand.fastrand_n(self.owned.get_shard_size() as u32);
+        self.owned.close_and_shutdown_all(start as usize);
 
         core.stats.submit(&self.worker_metrics[core.index]);
 

diff --git a/tokio/src/runtime/task/id.rs b/tokio/src/runtime/task/id.rs
@@ -24,7 +24,7 @@ use std::fmt;
 #[cfg_attr(docsrs, doc(cfg(all(feature = "rt", tokio_unstable))))]
 #[cfg_attr(not(tokio_unstable), allow(unreachable_pub))]
 #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
-pub struct Id(u64);
+pub struct Id(pub(crate) u64);
 
 /// Returns the [`Id`] of the currently running task.
 ///
@@ -74,11 +74,22 @@ impl fmt::Display for Id {
 
 impl Id {
     pub(crate) fn next() -> Self {
-        use crate::loom::sync::atomic::{Ordering::Relaxed, StaticAtomicU64};
+        use crate::loom::sync::atomic::Ordering::Relaxed;
+        use crate::loom::sync::atomic::StaticAtomicU64;
 
-        static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(1);
+        #[cfg(all(test, loom))]
+        {
+            crate::loom::lazy_static! {
+                static ref NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(1);
+            }
+            Self(NEXT_ID.fetch_add(1, Relaxed))
+        }
 
-        Self(NEXT_ID.fetch_add(1, Relaxed))
+        #[cfg(not(all(test, loom)))]
+        {
+            static NEXT_ID: StaticAtomicU64 = StaticAtomicU64::new(1);
+            Self(NEXT_ID.fetch_add(1, Relaxed))
+        }
     }
 
     pub(crate) fn as_u64(&self) -> u64 {