numcodecs_wasm_host_reproducible/transform/
mod.rs

1use std::sync::OnceLock;
2
3use anyhow::{anyhow, Context, Error};
4use instcnt::PerfWitInterfaces;
5use numcodecs_wasm_host::NumcodecsWitInterfaces;
6
7use crate::{logging::WasiLoggingInterface, stdio::WasiSandboxedStdioInterface};
8
9pub mod instcnt;
10pub mod nan;
11
12#[expect(clippy::too_many_lines)] // FIXME
13pub fn transform_wasm_component(wasm_component: impl Into<Vec<u8>>) -> Result<Vec<u8>, Error> {
14    let NumcodecsWitInterfaces {
15        codec: codec_interface,
16        ..
17    } = NumcodecsWitInterfaces::get();
18
19    // create a new WAC composition graph with the WASI component packages
20    //  pre-registered and the numcodecs:wasm/perf interface pre-exported
21    let PreparedCompositionGraph {
22        graph: wac,
23        wasi: wasi_component_packages,
24    } = get_prepared_composition_graph()?;
25    let mut wac = wac.clone();
26
27    // parse and instantiate the root package, which exports numcodecs:abc/codec
28    let numcodecs_codec_package = wac_graph::types::Package::from_bytes(
29        &format!("{}", codec_interface.package().name()),
30        codec_interface.package().version(),
31        wasm_component,
32        wac.types_mut(),
33    )?;
34
35    let numcodecs_codec_world = &wac.types()[numcodecs_codec_package.ty()];
36    let numcodecs_codec_imports = extract_component_ports(&numcodecs_codec_world.imports)?;
37
38    let numcodecs_codec_package = wac.register_package(numcodecs_codec_package)?;
39    let numcodecs_codec_instance = wac.instantiate(numcodecs_codec_package);
40
41    // list the imports that the linker will provide
42    let linker_provided_imports = [
43        &WasiSandboxedStdioInterface::get().stdio,
44        &WasiLoggingInterface::get().logging,
45    ];
46
47    // initialise the unresolved imports to the imports of the root package
48    let mut unresolved_imports = vecmap::VecMap::new();
49    for import in &numcodecs_codec_imports {
50        unresolved_imports
51            .entry(import.clone())
52            .or_insert_with(Vec::new)
53            .push(numcodecs_codec_instance);
54    }
55
56    // track all non-root instances, which may fulfil imports
57    let mut package_instances = vecmap::VecMap::new();
58
59    // initialise the queue of required, still to instantiate packages
60    //  to the imports of the root package
61    let mut required_packages_queue = numcodecs_codec_imports
62        .iter()
63        .map(|import| import.package().clone())
64        .collect::<std::collections::VecDeque<_>>();
65
66    // iterate while not all required packages have been instantiated
67    while let Some(required_package) = required_packages_queue.pop_front() {
68        if package_instances.contains_key(&required_package) {
69            continue;
70        }
71
72        // some packages do not need to be instantiated since they will be
73        //  provided by the linker
74        if linker_provided_imports
75            .iter()
76            .any(|interface| interface.package() == &required_package)
77        {
78            continue;
79        }
80
81        // find the WASI component package that can fulfil the required package
82        let Some(component_package) = wasi_component_packages.iter().find(|component_package| {
83            component_package
84                .exports
85                .iter()
86                .any(|export| export.package() == &required_package)
87        }) else {
88            return Err(anyhow!(
89                "WASM component requires unresolved import(s) from package {required_package}"
90            ));
91        };
92
93        let PackageWithPorts {
94            package: component_package,
95            imports: component_imports,
96            exports: component_exports,
97        } = component_package;
98
99        // instantiate the component package
100        let component_instance = wac.instantiate(*component_package);
101
102        // try to resolve all imports of the component package ...
103        for import in component_imports {
104            if let Some(dependency_instance) = package_instances.get(import.package()).copied() {
105                // ... if the dependency has already been instantiated,
106                //     import its export directly
107                let import_str = &format!("{import}");
108                let dependency_export =
109                    wac.alias_instance_export(dependency_instance, import_str)?;
110                wac.set_instantiation_argument(component_instance, import_str, dependency_export)?;
111            } else {
112                // ... otherwise require the dependency package and store the
113                //     import so that it can be resolved later
114                required_packages_queue.push_back(import.package().clone());
115                unresolved_imports
116                    .entry(import.clone())
117                    .or_insert_with(Vec::new)
118                    .push(component_instance);
119            }
120        }
121
122        for export in component_exports {
123            // register this instance's package so that its exports can later
124            //  fulfil more imports
125            package_instances.insert(export.package().clone(), component_instance);
126
127            // try to resolve unresolved imports using the export of this package
128            if let Some(unresolved_imports) = unresolved_imports.remove(export) {
129                let export_str = &format!("{export}");
130                let component_export = wac.alias_instance_export(component_instance, export_str)?;
131                for import in unresolved_imports {
132                    wac.set_instantiation_argument(import, export_str, component_export)?;
133                }
134            }
135        }
136    }
137
138    // linker-provided imports will be resolved later
139    for provided in linker_provided_imports {
140        unresolved_imports.remove(provided);
141    }
142
143    if !unresolved_imports.is_empty() {
144        return Err(anyhow!(
145            "WASM component requires unresolved import(s): {:?}",
146            unresolved_imports.into_keys().collect::<Vec<_>>(),
147        ));
148    }
149
150    // export the numcodecs:abc/codec interface
151    let numcodecs_codecs_str = &format!("{codec_interface}");
152    let numcodecs_codecs_export =
153        wac.alias_instance_export(numcodecs_codec_instance, numcodecs_codecs_str)?;
154    wac.export(numcodecs_codecs_export, numcodecs_codecs_str)?;
155
156    // encode the WAC composition graph into a WASM component and validate it
157    let wasm = wac.encode(wac_graph::EncodeOptions {
158        define_components: true,
159        // we do our own validation right below
160        validate: false,
161        processor: None,
162    })?;
163
164    wasmparser::Validator::new_with_features(
165        wasmparser::WasmFeaturesInflated {
166            // MUST: float operations are required
167            //       (and our engine's transformations makes them deterministic)
168            floats: true,
169            // MUST: codecs and reproducible WASI are implemented as components
170            component_model: true,
171            // OK: using linear values in component init is deterministic, as
172            //     long as the values provided are deterministic
173            component_model_values: true,
174            // OK: nested component names do not interact with determinism
175            component_model_nested_names: true,
176            ..crate::engine::DETERMINISTIC_WASM_MODULE_FEATURES
177        }
178        .into(),
179    )
180    .validate_all(&wasm)?;
181
182    Ok(wasm)
183}
184
185struct PreparedCompositionGraph {
186    graph: wac_graph::CompositionGraph,
187    wasi: Box<[PackageWithPorts]>,
188}
189
190fn get_prepared_composition_graph() -> Result<&'static PreparedCompositionGraph, Error> {
191    static PREPARED_COMPOSITION_GRAPH: OnceLock<Result<PreparedCompositionGraph, Error>> =
192        OnceLock::new();
193
194    let prepared_composition_graph = PREPARED_COMPOSITION_GRAPH.get_or_init(|| {
195        let PerfWitInterfaces {
196            perf: perf_interface,
197            ..
198        } = PerfWitInterfaces::get();
199
200        // create a new WAC composition graph
201        let mut wac = wac_graph::CompositionGraph::new();
202
203        // parse and register the WASI component packages
204        let wasi_component_packages =
205            register_wasi_component_packages(&mut wac)?.into_boxed_slice();
206
207        // create, register, and instantiate the numcodecs:wasm package
208        let numcodecs_wasm_perf_instance = instantiate_numcodecs_wasm_perf_package(&mut wac)?;
209
210        // export the numcodecs:wasm/perf interface
211        let numcodecs_wasm_perf_str = &format!("{perf_interface}");
212        let numcodecs_wasm_perf_export =
213            wac.alias_instance_export(numcodecs_wasm_perf_instance, numcodecs_wasm_perf_str)?;
214        wac.export(numcodecs_wasm_perf_export, numcodecs_wasm_perf_str)?;
215
216        Ok(PreparedCompositionGraph {
217            graph: wac,
218            wasi: wasi_component_packages,
219        })
220    });
221
222    match prepared_composition_graph {
223        Ok(prepared_composition_graph) => Ok(prepared_composition_graph),
224        Err(err) => Err(anyhow!(err)),
225    }
226}
227
228struct PackageWithPorts {
229    package: wac_graph::PackageId,
230    imports: Box<[wasm_component_layer::InterfaceIdentifier]>,
231    exports: Box<[wasm_component_layer::InterfaceIdentifier]>,
232}
233
234fn register_wasi_component_packages(
235    wac: &mut wac_graph::CompositionGraph,
236) -> Result<Vec<PackageWithPorts>, Error> {
237    let wasi_component_packages = wasi_sandboxed_component_provider::ALL_COMPONENTS
238        .iter()
239        .map(|(component_name, component_bytes)| -> Result<_, Error> {
240            let component_package = wac_graph::types::Package::from_bytes(
241                component_name,
242                None,
243                Vec::from(*component_bytes),
244                wac.types_mut(),
245            )?;
246
247            let component_world = &wac.types()[component_package.ty()];
248
249            let component_imports = extract_component_ports(&component_world.imports)?;
250            let component_exports = extract_component_ports(&component_world.exports)?;
251
252            let component_package = wac.register_package(component_package)?;
253
254            Ok(PackageWithPorts {
255                package: component_package,
256                imports: component_imports.into_boxed_slice(),
257                exports: component_exports.into_boxed_slice(),
258            })
259        })
260        .collect::<Result<Vec<_>, _>>()?;
261
262    Ok(wasi_component_packages)
263}
264
265fn extract_component_ports(
266    ports: &indexmap::IndexMap<String, wac_graph::types::ItemKind>,
267) -> Result<Vec<wasm_component_layer::InterfaceIdentifier>, anyhow::Error> {
268    ports
269        .iter()
270        .filter_map(|(import, kind)| match kind {
271            wac_graph::types::ItemKind::Instance(_) => Some(
272                wasm_component_layer::InterfaceIdentifier::try_from(import.as_str()),
273            ),
274            _ => None,
275        })
276        .collect::<Result<Vec<_>, _>>()
277}
278
279fn instantiate_numcodecs_wasm_perf_package(
280    wac: &mut wac_graph::CompositionGraph,
281) -> Result<wac_graph::NodeId, Error> {
282    let PerfWitInterfaces {
283        perf: perf_interface,
284        ..
285    } = PerfWitInterfaces::get();
286
287    // create, register, and instantiate the numcodecs:wasm/perf package
288    let numcodecs_wasm_perf_package = wac_graph::types::Package::from_bytes(
289        &format!("{}", perf_interface.package().name()),
290        perf_interface.package().version(),
291        create_numcodecs_wasm_perf_component()?,
292        wac.types_mut(),
293    )?;
294
295    let numcodecs_wasm_perf_package = wac.register_package(numcodecs_wasm_perf_package)?;
296    let numcodecs_wasm_perf_instance = wac.instantiate(numcodecs_wasm_perf_package);
297
298    Ok(numcodecs_wasm_perf_instance)
299}
300
301fn create_numcodecs_wasm_perf_component() -> Result<Vec<u8>, Error> {
302    const ROOT: &str = "root";
303
304    let PerfWitInterfaces {
305        perf: perf_interface,
306        instruction_counter,
307    } = PerfWitInterfaces::get();
308
309    let mut module = create_numcodecs_wasm_perf_module();
310
311    let mut resolve = wit_parser::Resolve::new();
312
313    let interface = resolve.interfaces.alloc(wit_parser::Interface {
314        name: Some(String::from(perf_interface.name())),
315        types: indexmap::IndexMap::new(),
316        #[expect(clippy::iter_on_single_items)]
317        functions: [(
318            String::from(instruction_counter),
319            wit_parser::Function {
320                name: String::from(instruction_counter),
321                kind: wit_parser::FunctionKind::Freestanding,
322                params: Vec::new(),
323                result: Some(wit_parser::Type::U64),
324                docs: wit_parser::Docs { contents: None },
325                stability: wit_parser::Stability::Unknown,
326            },
327        )]
328        .into_iter()
329        .collect(),
330        docs: wit_parser::Docs { contents: None },
331        package: None, // The package is linked up below
332        stability: wit_parser::Stability::Unknown,
333    });
334
335    let package_name = wit_parser::PackageName {
336        namespace: String::from(perf_interface.package().name().namespace()),
337        name: String::from(perf_interface.package().name().name()),
338        version: perf_interface.package().version().cloned(),
339    };
340    let package = resolve.packages.alloc(wit_parser::Package {
341        name: package_name.clone(),
342        docs: wit_parser::Docs { contents: None },
343        #[expect(clippy::iter_on_single_items)]
344        interfaces: [(String::from(perf_interface.name()), interface)]
345            .into_iter()
346            .collect(),
347        worlds: indexmap::IndexMap::new(),
348    });
349    resolve.package_names.insert(package_name, package);
350
351    if let Some(interface) = resolve.interfaces.get_mut(interface) {
352        interface.package = Some(package);
353    }
354
355    let world = resolve.worlds.alloc(wit_parser::World {
356        name: String::from(ROOT),
357        imports: indexmap::IndexMap::new(),
358        #[expect(clippy::iter_on_single_items)]
359        exports: [(
360            wit_parser::WorldKey::Interface(interface),
361            wit_parser::WorldItem::Interface {
362                id: interface,
363                stability: wit_parser::Stability::Unknown,
364            },
365        )]
366        .into_iter()
367        .collect(),
368        package: None, // The package is linked up below
369        docs: wit_parser::Docs { contents: None },
370        includes: Vec::new(),
371        include_names: Vec::new(),
372        stability: wit_parser::Stability::Unknown,
373    });
374
375    let root_name = wit_parser::PackageName {
376        namespace: String::from(ROOT),
377        name: String::from("component"),
378        version: perf_interface.package().version().cloned(),
379    };
380    let root = resolve.packages.alloc(wit_parser::Package {
381        name: root_name.clone(),
382        docs: wit_parser::Docs { contents: None },
383        interfaces: indexmap::IndexMap::new(),
384        #[expect(clippy::iter_on_single_items)]
385        worlds: [(String::from(ROOT), world)].into_iter().collect(),
386    });
387    resolve.package_names.insert(root_name, root);
388
389    if let Some(world) = resolve.worlds.get_mut(world) {
390        world.package = Some(root);
391    }
392
393    wit_component::embed_component_metadata(
394        &mut module,
395        &resolve,
396        world,
397        wit_component::StringEncoding::UTF8,
398    )?;
399
400    let mut encoder = wit_component::ComponentEncoder::default()
401        .module(&module)
402        .context("wit_component::ComponentEncoder::module failed")?;
403
404    let component = encoder
405        .encode()
406        .context("wit_component::ComponentEncoder::encode failed")?;
407
408    Ok(component)
409}
410
411fn create_numcodecs_wasm_perf_module() -> Vec<u8> {
412    let PerfWitInterfaces {
413        perf: perf_interface,
414        instruction_counter,
415    } = PerfWitInterfaces::get();
416
417    let mut module = wasm_encoder::Module::new();
418
419    // Encode the type section with
420    //  types[0] = () -> i64
421    let mut types = wasm_encoder::TypeSection::new();
422    let ty0 = types.len();
423    types.ty().function([], [wasm_encoder::ValType::I64]);
424    module.section(&types);
425
426    // Encode the function section with
427    //  functions[0] = fn() -> i64 [ types[0] ]
428    let mut functions = wasm_encoder::FunctionSection::new();
429    let fn0 = functions.len();
430    functions.function(ty0);
431    module.section(&functions);
432
433    // Encode the export section with
434    //  {perf_interface}#{instruction_counter} = functions[0]
435    let mut exports = wasm_encoder::ExportSection::new();
436    exports.export(
437        &format!("{perf_interface}#{instruction_counter}"),
438        wasm_encoder::ExportKind::Func,
439        fn0,
440    );
441    module.section(&exports);
442
443    // Encode the code section.
444    let mut codes = wasm_encoder::CodeSection::new();
445    let mut fn0 = wasm_encoder::Function::new([]);
446    fn0.instruction(&wasm_encoder::Instruction::Unreachable);
447    fn0.instruction(&wasm_encoder::Instruction::End);
448    codes.function(&fn0);
449    module.section(&codes);
450
451    // Extract the encoded WASM bytes for this module
452    module.finish()
453}