#![deny(clippy::pedantic)]
#![no_std]
#![feature(type_alias_impl_trait)]
#![feature(decl_macro)]
#![cfg_attr(target_os = "cuda", feature(abi_ptx))]
#![cfg_attr(target_os = "cuda", feature(asm_experimental_arch))]
#![cfg_attr(target_os = "cuda", feature(alloc_error_handler))]
#![allow(long_running_const_eval)]
#![recursion_limit = "1024"]
extern crate alloc;
#[cfg(target_os = "cuda")]
use core::ops::ControlFlow;
use core::sync::atomic::AtomicU64;
use necsim_core::{
cogs::{
CoalescenceSampler, DispersalSampler, EmigrationExit, Habitat, ImmigrationEntry,
LineageStore, MathsCore, PrimeableRng, SpeciationProbability, TurnoverRate,
},
lineage::Lineage,
reporter::boolean::Boolean,
simulation::Simulation,
};
use necsim_core_bond::{NonNegativeF64, PositiveF64};
use necsim_impls_cuda::{event_buffer::EventBuffer, value_buffer::ValueBuffer};
use necsim_impls_no_std::cogs::{
active_lineage_sampler::singular::SingularActiveLineageSampler,
event_sampler::tracking::{MinSpeciationTrackingEventSampler, SpeciationSample},
};
use rust_cuda::{
kernel::param::{DeepPerThreadBorrow, PerThreadShallowCopy, PtxJit, ShallowInteriorMutable},
lend::RustToCuda,
};
#[rust_cuda::kernel::kernel(pub use link! for impl)]
#[kernel(
allow(ptx::double_precision_use),
allow(ptx::local_memory_use), forbid(ptx::register_spills),
)]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
pub fn simulate<
M: MathsCore + Sync,
H: Habitat<M> + RustToCuda + Sync,
G: PrimeableRng<M> + RustToCuda + Sync,
S: LineageStore<M, H> + RustToCuda + Sync,
X: EmigrationExit<M, H, G, S> + RustToCuda + Sync,
D: DispersalSampler<M, H, G> + RustToCuda + Sync,
C: CoalescenceSampler<M, H, S> + RustToCuda + Sync,
T: TurnoverRate<M, H> + RustToCuda + Sync,
N: SpeciationProbability<M, H> + RustToCuda + Sync,
E: MinSpeciationTrackingEventSampler<M, H, G, S, X, D, C, T, N> + RustToCuda + Sync,
I: ImmigrationEntry<M> + RustToCuda + Sync,
A: SingularActiveLineageSampler<M, H, G, S, X, D, C, T, N, E, I> + RustToCuda + Sync,
ReportSpeciation: Boolean,
ReportDispersal: Boolean,
>(
simulation: &PtxJit<DeepPerThreadBorrow<Simulation<M, H, G, S, X, D, C, T, N, E, I, A>>>,
task_list: &mut PtxJit<DeepPerThreadBorrow<ValueBuffer<Lineage, true, true>>>,
event_buffer_reporter: &mut PtxJit<
DeepPerThreadBorrow<EventBuffer<ReportSpeciation, ReportDispersal>>,
>,
min_spec_sample_buffer: &mut PtxJit<
DeepPerThreadBorrow<ValueBuffer<SpeciationSample, false, true>>,
>,
next_event_time_buffer: &mut PtxJit<DeepPerThreadBorrow<ValueBuffer<PositiveF64, false, true>>>,
total_time_max: &ShallowInteriorMutable<AtomicU64>,
total_steps_sum: &ShallowInteriorMutable<AtomicU64>,
max_steps: PerThreadShallowCopy<u64>,
max_next_event_time: PerThreadShallowCopy<NonNegativeF64>,
) {
let mut simulation = unsafe { core::mem::ManuallyDrop::new(core::ptr::read(simulation)) };
task_list.with_value_for_core(|task| {
core::mem::drop(
simulation
.active_lineage_sampler_mut()
.replace_active_lineage(task),
);
simulation.event_sampler_mut().replace_min_speciation(None);
let mut final_next_event_time = None;
let (time, steps) = simulation.simulate_incremental_early_stop(
|_, steps, next_event_time, reporter| {
final_next_event_time = Some(next_event_time);
if steps < max_steps
&& next_event_time < max_next_event_time
&& reporter.can_buffer_next_event()
{
ControlFlow::Continue(())
} else {
ControlFlow::Break(())
}
},
event_buffer_reporter,
);
next_event_time_buffer.put_value_for_core(final_next_event_time);
if steps > 0 {
total_time_max.fetch_max(time.get().to_bits(), core::sync::atomic::Ordering::Relaxed);
total_steps_sum.fetch_add(steps, core::sync::atomic::Ordering::Relaxed);
}
min_spec_sample_buffer
.put_value_for_core(simulation.event_sampler_mut().replace_min_speciation(None));
simulation
.active_lineage_sampler_mut()
.replace_active_lineage(None)
});
}
#[cfg(target_os = "cuda")]
mod cuda_prelude {
use rust_cuda::device::alloc::PTXAllocator;
#[global_allocator]
static _GLOBAL_ALLOCATOR: PTXAllocator = PTXAllocator;
#[cfg(not(debug_assertions))]
#[panic_handler]
fn panic(_panic_info: &::core::panic::PanicInfo) -> ! {
rust_cuda::device::utils::abort()
}
#[cfg(debug_assertions)]
#[panic_handler]
fn panic(info: &::core::panic::PanicInfo) -> ! {
rust_cuda::device::utils::pretty_print_panic_info(info, true, true);
rust_cuda::device::utils::abort()
}
#[cfg(not(debug_assertions))]
#[alloc_error_handler]
fn alloc_error_handler(_: core::alloc::Layout) -> ! {
rust_cuda::device::utils::abort()
}
#[cfg(debug_assertions)]
#[alloc_error_handler]
fn alloc_error_handler(layout: core::alloc::Layout) -> ! {
rust_cuda::device::utils::pretty_print_alloc_error(layout);
rust_cuda::device::utils::abort()
}
}