|
| 1 | +//! Prefork worker pool — dispatches jobs to child Python processes via IPC. |
| 2 | +//! |
| 3 | +//! Each child is an independent Python interpreter with its own GIL, |
| 4 | +//! enabling true parallelism for CPU-bound tasks. The parent process |
| 5 | +//! runs the Rust scheduler and dispatches serialized jobs over stdin |
| 6 | +//! pipes; children send results back over stdout pipes. |
| 7 | +//! |
| 8 | +//! Architecture: |
| 9 | +//! - One dispatch thread: receives `Job` from scheduler, sends to children via stdin |
| 10 | +//! - N reader threads: one per child, reads results from stdout, sends to `result_tx` |
| 11 | +//! - Child processes: run `python -m taskito.prefork <app_path>` |
| 12 | +
|
| 13 | +mod child; |
| 14 | +mod dispatch; |
| 15 | +pub mod protocol; |
| 16 | + |
| 17 | +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; |
| 18 | +use std::sync::Arc; |
| 19 | +use std::thread; |
| 20 | + |
| 21 | +use async_trait::async_trait; |
| 22 | +use crossbeam_channel::Sender; |
| 23 | + |
| 24 | +use taskito_core::job::Job; |
| 25 | +use taskito_core::scheduler::JobResult; |
| 26 | +use taskito_core::worker::WorkerDispatcher; |
| 27 | + |
| 28 | +use child::{spawn_child, ChildWriter}; |
| 29 | +use protocol::ParentMessage; |
| 30 | + |
| 31 | +/// Multi-process worker pool that dispatches jobs to child Python processes. |
| 32 | +pub struct PreforkPool { |
| 33 | + num_workers: usize, |
| 34 | + app_path: String, |
| 35 | + python: String, |
| 36 | + shutdown: AtomicBool, |
| 37 | +} |
| 38 | + |
| 39 | +impl PreforkPool { |
| 40 | + pub fn new(num_workers: usize, app_path: String) -> Self { |
| 41 | + let python = std::env::var("TASKITO_PYTHON").unwrap_or_else(|_| "python".to_string()); |
| 42 | + |
| 43 | + Self { |
| 44 | + num_workers, |
| 45 | + app_path, |
| 46 | + python, |
| 47 | + shutdown: AtomicBool::new(false), |
| 48 | + } |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +#[async_trait] |
| 53 | +impl WorkerDispatcher for PreforkPool { |
| 54 | + async fn run( |
| 55 | + &self, |
| 56 | + mut job_rx: tokio::sync::mpsc::Receiver<Job>, |
| 57 | + result_tx: Sender<JobResult>, |
| 58 | + ) { |
| 59 | + let num_workers = self.num_workers; |
| 60 | + let app_path = self.app_path.clone(); |
| 61 | + let python = self.python.clone(); |
| 62 | + let shutdown = &self.shutdown; |
| 63 | + |
| 64 | + // Spawn all children and split into writers + readers |
| 65 | + let mut writers: Vec<ChildWriter> = Vec::with_capacity(num_workers); |
| 66 | + let in_flight: Arc<Vec<AtomicU32>> = |
| 67 | + Arc::new((0..num_workers).map(|_| AtomicU32::new(0)).collect()); |
| 68 | + let mut reader_handles: Vec<thread::JoinHandle<()>> = Vec::new(); |
| 69 | + let mut process_handles: Vec<child::ChildProcess> = Vec::new(); |
| 70 | + |
| 71 | + for i in 0..num_workers { |
| 72 | + match spawn_child(&python, &app_path) { |
| 73 | + Ok((writer, mut reader, process)) => { |
| 74 | + log::info!("[taskito] prefork child {i} ready"); |
| 75 | + writers.push(writer); |
| 76 | + process_handles.push(process); |
| 77 | + |
| 78 | + // Spawn a reader thread for this child |
| 79 | + let tx = result_tx.clone(); |
| 80 | + let in_flight_counter = in_flight.clone(); |
| 81 | + let child_idx = i; |
| 82 | + reader_handles.push(thread::spawn(move || { |
| 83 | + loop { |
| 84 | + match reader.read() { |
| 85 | + Ok(msg) => { |
| 86 | + if let Some(job_result) = msg.into_job_result() { |
| 87 | + in_flight_counter[child_idx] |
| 88 | + .fetch_sub(1, Ordering::Relaxed); |
| 89 | + if tx.send(job_result).is_err() { |
| 90 | + break; // result channel closed |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | + Err(e) => { |
| 95 | + log::warn!( |
| 96 | + "[taskito] prefork child {child_idx} reader error: {e}" |
| 97 | + ); |
| 98 | + break; |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | + })); |
| 103 | + } |
| 104 | + Err(e) => { |
| 105 | + log::error!("[taskito] failed to spawn prefork child {i}: {e}"); |
| 106 | + } |
| 107 | + } |
| 108 | + } |
| 109 | + |
| 110 | + if writers.is_empty() { |
| 111 | + log::error!("[taskito] no prefork children started, aborting"); |
| 112 | + return; |
| 113 | + } |
| 114 | + |
| 115 | + log::info!( |
| 116 | + "[taskito] prefork pool running with {} children", |
| 117 | + writers.len() |
| 118 | + ); |
| 119 | + |
| 120 | + // Dispatch loop: receive jobs from scheduler, send to least-loaded child |
| 121 | + while let Some(job) = job_rx.recv().await { |
| 122 | + if shutdown.load(Ordering::Relaxed) { |
| 123 | + break; |
| 124 | + } |
| 125 | + |
| 126 | + let counts: Vec<u32> = in_flight |
| 127 | + .iter() |
| 128 | + .map(|c| c.load(Ordering::Relaxed)) |
| 129 | + .collect(); |
| 130 | + let idx = dispatch::least_loaded(&counts); |
| 131 | + |
| 132 | + let msg = ParentMessage::from(&job); |
| 133 | + if let Err(e) = writers[idx].send(&msg) { |
| 134 | + log::error!( |
| 135 | + "[taskito] failed to send job {} to child {idx}: {e}", |
| 136 | + job.id |
| 137 | + ); |
| 138 | + // Job will be reaped by the scheduler's stale job reaper |
| 139 | + continue; |
| 140 | + } |
| 141 | + in_flight[idx].fetch_add(1, Ordering::Relaxed); |
| 142 | + } |
| 143 | + |
| 144 | + // Graceful shutdown: tell all children to stop |
| 145 | + for (i, writer) in writers.iter_mut().enumerate() { |
| 146 | + writer.send_shutdown(); |
| 147 | + log::info!("[taskito] sent shutdown to prefork child {i}"); |
| 148 | + } |
| 149 | + |
| 150 | + // Wait for children to exit |
| 151 | + let drain_timeout = std::time::Duration::from_secs(30); |
| 152 | + for (i, process) in process_handles.iter_mut().enumerate() { |
| 153 | + process.wait_or_kill(drain_timeout); |
| 154 | + log::info!("[taskito] prefork child {i} exited"); |
| 155 | + } |
| 156 | + |
| 157 | + // Wait for reader threads |
| 158 | + for handle in reader_handles { |
| 159 | + let _ = handle.join(); |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + fn shutdown(&self) { |
| 164 | + self.shutdown.store(true, Ordering::SeqCst); |
| 165 | + } |
| 166 | +} |
0 commit comments