vlib: add an arrays.parallel module, containing parallel.run/3 and parallel.amap/3 implementations (#22090)

2025-09-13 22:42:26 +03:00 · 2024-08-24 09:57:06 -05:00 · 2024-08-24 09:57:06 -05:00 · a31cd37b66
commit a31cd37b66
parent 481b842fb6
3 changed files with 179 additions and 0 deletions
--- a/vlib/arrays/parallel/parallel.v
+++ b/vlib/arrays/parallel/parallel.v
@ -0,0 +1,111 @@
+module parallel
+
+import sync
+import runtime
+
+// Params contains the optional parameters that can be passed to `run` and `amap`.
+@[params]
+pub struct Params {
+pub mut:
+	workers int // 0 by default, so that VJOBS will be used, through runtime.nr_jobs()
+}
+
+fn limited_workers(max_workers int, ilen int) int {
+	// create a limited amount of workers to handle the load
+	workers := if max_workers != 0 { max_workers } else { runtime.nr_jobs() }
+	if ilen < workers {
+		return ilen
+	}
+	return workers
+}
+
+// run lets the user run an array of input with a
+// user provided function in parallel. It limits the number of
+// worker threads to min(num_workers, num_cpu)
+// The function aborts if an error is encountered.
+// Example: parallel.run([1, 2, 3, 4, 5], 2, fn (i) { println(i) })
+pub fn run[T](input []T, worker fn (T), opt Params) {
+	if input.len == 0 {
+		return
+	}
+	workers := limited_workers(opt.workers, input.len)
+	ch := chan T{cap: workers * 2}
+	mut wg := sync.new_waitgroup()
+	wg.add(input.len)
+	for _ in 0 .. workers {
+		spawn fn [ch, worker, mut wg] [T]() {
+			for {
+				task := <-ch or { break }
+				worker(task)
+				wg.done()
+			}
+		}()
+	}
+
+	// put the input into the channel
+	for i in input {
+		ch <- i
+	}
+
+	// wait for all tasks to complete
+	wg.wait()
+	ch.close() // this will signal all the workers to exit, and we can return, without having to wait for them to finish
+}
+
+struct Task[T, R] {
+	idx    int
+	input  T
+	result R
+}
+
+// amap lets the user run an array of input with a
+// user provided function in parallel. It limits the number of
+// worker threads to max number of cpus.
+// The worker function can return a value. The returning array maintains the input order.
+// Any error handling should have happened within the worker function.
+// Example: squares := parallel.amap([1, 2, 3, 4, 5], 2, fn (i) { return i * i })
+pub fn amap[T, R](input []T, worker fn (T) R, opt Params) []R {
+	if input.len == 0 {
+		return []
+	}
+	mut tasks := []Task[T, R]{len: input.len}
+	// the tasks array will be passed to the closure of each worker by reference, so that it could
+	// then modify the same tasks:
+	mut tasks_ref := &tasks
+
+	workers := limited_workers(opt.workers, input.len)
+	// use a buffered channel for transfering the tasks, that has enough space to keep all the workers busy,
+	// without blocking the main thread needlessly
+	ch := chan Task[T, R]{cap: workers * 2}
+	mut wg := sync.new_waitgroup()
+	wg.add(input.len)
+	for _ in 0 .. workers {
+		spawn fn [ch, worker, mut wg, mut tasks_ref] [T, R]() {
+			for {
+				mut task := <-ch or { break }
+				unsafe {
+					tasks_ref[task.idx] = Task[T, R]{
+						idx:    task.idx
+						input:  task.input
+						result: worker(task.input)
+					}
+				}
+				wg.done()
+			}
+		}()
+	}
+
+	// put the input into the channel
+	for idx, inp in input {
+		ch <- Task[T, R]{
+			idx:   idx
+			input: inp
+		}
+	}
+
+	// wait for all tasks to complete
+	wg.wait()
+	ch.close()
+	tasks.sort(a.idx < b.idx)
+	return tasks.map(it.result)
+}