numcodecs_wasm_host_reproducible/transform/
mod.rs

1use std::sync::OnceLock;
2
3use anyhow::{Context, Error, anyhow};
4use instcnt::PerfWitInterfaces;
5use numcodecs_wasm_host::NumcodecsWitInterfaces;
6
7use crate::{logging::WasiLoggingInterface, stdio::WasiSandboxedStdioInterface};
8
9pub mod instcnt;
10pub mod nan;
11
12#[expect(clippy::too_many_lines)] // FIXME
13pub fn transform_wasm_component(wasm_component: impl Into<Vec<u8>>) -> Result<Vec<u8>, Error> {
14    let NumcodecsWitInterfaces {
15        codec: codec_interface,
16        ..
17    } = NumcodecsWitInterfaces::get();
18
19    // create a new WAC composition graph with the WASI component packages
20    //  pre-registered and the numcodecs:wasm/perf interface pre-exported
21    let PreparedCompositionGraph {
22        graph: wac,
23        wasi: wasi_component_packages,
24    } = get_prepared_composition_graph()?;
25    let mut wac = wac.clone();
26
27    // parse and instantiate the root package, which exports numcodecs:abc/codec
28    let numcodecs_codec_package = wac_graph::types::Package::from_bytes(
29        &format!("{}", codec_interface.package().name()),
30        codec_interface.package().version(),
31        wasm_component,
32        wac.types_mut(),
33    )?;
34
35    let numcodecs_codec_world = &wac.types()[numcodecs_codec_package.ty()];
36    let numcodecs_codec_imports = extract_component_ports(&numcodecs_codec_world.imports)?;
37
38    let numcodecs_codec_package = wac.register_package(numcodecs_codec_package)?;
39    let numcodecs_codec_instance = wac.instantiate(numcodecs_codec_package);
40
41    // list the imports that the linker will provide
42    let linker_provided_imports = [
43        &WasiSandboxedStdioInterface::get().stdio,
44        &WasiLoggingInterface::get().logging,
45    ];
46
47    // initialise the unresolved imports to the imports of the root package
48    let mut unresolved_imports = vecmap::VecMap::new();
49    for import in &numcodecs_codec_imports {
50        unresolved_imports
51            .entry(import.clone())
52            .or_insert_with(Vec::new)
53            .push(numcodecs_codec_instance);
54    }
55
56    // track all non-root instances, which may fulfil imports
57    let mut package_instances = vecmap::VecMap::new();
58
59    // initialise the queue of required, still to instantiate packages
60    //  to the imports of the root package
61    let mut required_packages_queue = numcodecs_codec_imports
62        .iter()
63        .map(|import| import.package().clone())
64        .collect::<std::collections::VecDeque<_>>();
65
66    // iterate while not all required packages have been instantiated
67    while let Some(required_package) = required_packages_queue.pop_front() {
68        if package_instances.contains_key(&required_package) {
69            continue;
70        }
71
72        // some packages do not need to be instantiated since they will be
73        //  provided by the linker
74        if linker_provided_imports
75            .iter()
76            .any(|interface| interface.package() == &required_package)
77        {
78            continue;
79        }
80
81        // find the WASI component package that can fulfil the required package
82        let Some(component_package) = wasi_component_packages.iter().find(|component_package| {
83            component_package
84                .exports
85                .iter()
86                .any(|export| export.package() == &required_package)
87        }) else {
88            return Err(anyhow!(
89                "WASM component requires unresolved import(s) from package {required_package}"
90            ));
91        };
92
93        let PackageWithPorts {
94            package: component_package,
95            imports: component_imports,
96            exports: component_exports,
97        } = component_package;
98
99        // instantiate the component package
100        let component_instance = wac.instantiate(*component_package);
101
102        // try to resolve all imports of the component package ...
103        for import in component_imports {
104            if let Some(dependency_instance) = package_instances.get(import.package()).copied() {
105                // ... if the dependency has already been instantiated,
106                //     import its export directly
107                let import_str = &format!("{import}");
108                let dependency_export =
109                    wac.alias_instance_export(dependency_instance, import_str)?;
110                wac.set_instantiation_argument(component_instance, import_str, dependency_export)?;
111            } else {
112                // ... otherwise require the dependency package and store the
113                //     import so that it can be resolved later
114                required_packages_queue.push_back(import.package().clone());
115                unresolved_imports
116                    .entry(import.clone())
117                    .or_insert_with(Vec::new)
118                    .push(component_instance);
119            }
120        }
121
122        for export in component_exports {
123            // register this instance's package so that its exports can later
124            //  fulfil more imports
125            package_instances.insert(export.package().clone(), component_instance);
126
127            // try to resolve unresolved imports using the export of this package
128            if let Some(unresolved_imports) = unresolved_imports.remove(export) {
129                let export_str = &format!("{export}");
130                let component_export = wac.alias_instance_export(component_instance, export_str)?;
131                for import in unresolved_imports {
132                    wac.set_instantiation_argument(import, export_str, component_export)?;
133                }
134            }
135        }
136    }
137
138    // linker-provided imports will be resolved later
139    for provided in linker_provided_imports {
140        unresolved_imports.remove(provided);
141    }
142
143    if !unresolved_imports.is_empty() {
144        return Err(anyhow!(
145            "WASM component requires unresolved import(s): {:?}",
146            unresolved_imports.into_keys().collect::<Vec<_>>(),
147        ));
148    }
149
150    // export the numcodecs:abc/codec interface
151    let numcodecs_codecs_str = &format!("{codec_interface}");
152    let numcodecs_codecs_export =
153        wac.alias_instance_export(numcodecs_codec_instance, numcodecs_codecs_str)?;
154    wac.export(numcodecs_codecs_export, numcodecs_codecs_str)?;
155
156    // encode the WAC composition graph into a WASM component and validate it
157    let wasm = wac.encode(wac_graph::EncodeOptions {
158        define_components: true,
159        // we do our own validation right below
160        validate: false,
161        processor: None,
162    })?;
163
164    wasmparser::Validator::new_with_features(
165        wasmparser::WasmFeaturesInflated {
166            // MUST: float operations are required
167            //       (and our engine's transformations makes them deterministic)
168            floats: true,
169            // MUST: codecs and reproducible WASI are implemented as components
170            component_model: true,
171            ..crate::engine::DETERMINISTIC_WASM_MODULE_FEATURES
172        }
173        .into(),
174    )
175    .validate_all(&wasm)?;
176
177    Ok(wasm)
178}
179
180struct PreparedCompositionGraph {
181    graph: wac_graph::CompositionGraph,
182    wasi: Box<[PackageWithPorts]>,
183}
184
185fn get_prepared_composition_graph() -> Result<&'static PreparedCompositionGraph, Error> {
186    static PREPARED_COMPOSITION_GRAPH: OnceLock<Result<PreparedCompositionGraph, Error>> =
187        OnceLock::new();
188
189    let prepared_composition_graph = PREPARED_COMPOSITION_GRAPH.get_or_init(|| {
190        let PerfWitInterfaces {
191            perf: perf_interface,
192            ..
193        } = PerfWitInterfaces::get();
194
195        // create a new WAC composition graph
196        let mut wac = wac_graph::CompositionGraph::new();
197
198        // parse and register the WASI component packages
199        let wasi_component_packages =
200            register_wasi_component_packages(&mut wac)?.into_boxed_slice();
201
202        // create, register, and instantiate the numcodecs:wasm package
203        let numcodecs_wasm_perf_instance = instantiate_numcodecs_wasm_perf_package(&mut wac)?;
204
205        // export the numcodecs:wasm/perf interface
206        let numcodecs_wasm_perf_str = &format!("{perf_interface}");
207        let numcodecs_wasm_perf_export =
208            wac.alias_instance_export(numcodecs_wasm_perf_instance, numcodecs_wasm_perf_str)?;
209        wac.export(numcodecs_wasm_perf_export, numcodecs_wasm_perf_str)?;
210
211        Ok(PreparedCompositionGraph {
212            graph: wac,
213            wasi: wasi_component_packages,
214        })
215    });
216
217    match prepared_composition_graph {
218        Ok(prepared_composition_graph) => Ok(prepared_composition_graph),
219        Err(err) => Err(anyhow!(err)),
220    }
221}
222
223struct PackageWithPorts {
224    package: wac_graph::PackageId,
225    imports: Box<[wasm_component_layer::InterfaceIdentifier]>,
226    exports: Box<[wasm_component_layer::InterfaceIdentifier]>,
227}
228
229fn register_wasi_component_packages(
230    wac: &mut wac_graph::CompositionGraph,
231) -> Result<Vec<PackageWithPorts>, Error> {
232    let wasi_component_packages = wasi_sandboxed_component_provider::ALL_COMPONENTS
233        .iter()
234        .map(|(component_name, component_bytes)| -> Result<_, Error> {
235            let component_package = wac_graph::types::Package::from_bytes(
236                component_name,
237                None,
238                Vec::from(*component_bytes),
239                wac.types_mut(),
240            )?;
241
242            let component_world = &wac.types()[component_package.ty()];
243
244            let component_imports = extract_component_ports(&component_world.imports)?;
245            let component_exports = extract_component_ports(&component_world.exports)?;
246
247            let component_package = wac.register_package(component_package)?;
248
249            Ok(PackageWithPorts {
250                package: component_package,
251                imports: component_imports.into_boxed_slice(),
252                exports: component_exports.into_boxed_slice(),
253            })
254        })
255        .collect::<Result<Vec<_>, _>>()?;
256
257    Ok(wasi_component_packages)
258}
259
260fn extract_component_ports(
261    ports: &indexmap::IndexMap<String, wac_graph::types::ItemKind>,
262) -> Result<Vec<wasm_component_layer::InterfaceIdentifier>, anyhow::Error> {
263    ports
264        .iter()
265        .filter_map(|(import, kind)| match kind {
266            wac_graph::types::ItemKind::Instance(_) => Some(
267                wasm_component_layer::InterfaceIdentifier::try_from(import.as_str()),
268            ),
269            _ => None,
270        })
271        .collect::<Result<Vec<_>, _>>()
272}
273
274fn instantiate_numcodecs_wasm_perf_package(
275    wac: &mut wac_graph::CompositionGraph,
276) -> Result<wac_graph::NodeId, Error> {
277    let PerfWitInterfaces {
278        perf: perf_interface,
279        ..
280    } = PerfWitInterfaces::get();
281
282    // create, register, and instantiate the numcodecs:wasm/perf package
283    let numcodecs_wasm_perf_package = wac_graph::types::Package::from_bytes(
284        &format!("{}", perf_interface.package().name()),
285        perf_interface.package().version(),
286        create_numcodecs_wasm_perf_component()?,
287        wac.types_mut(),
288    )?;
289
290    let numcodecs_wasm_perf_package = wac.register_package(numcodecs_wasm_perf_package)?;
291    let numcodecs_wasm_perf_instance = wac.instantiate(numcodecs_wasm_perf_package);
292
293    Ok(numcodecs_wasm_perf_instance)
294}
295
296fn create_numcodecs_wasm_perf_component() -> Result<Vec<u8>, Error> {
297    const ROOT: &str = "root";
298
299    let PerfWitInterfaces {
300        perf: perf_interface,
301        instruction_counter,
302    } = PerfWitInterfaces::get();
303
304    let mut module = create_numcodecs_wasm_perf_module();
305
306    let mut resolve = wit_parser::Resolve::new();
307
308    let interface = resolve.interfaces.alloc(wit_parser::Interface {
309        name: Some(String::from(perf_interface.name())),
310        types: indexmap::IndexMap::new(),
311        #[expect(clippy::iter_on_single_items)]
312        functions: [(
313            String::from(instruction_counter),
314            wit_parser::Function {
315                name: String::from(instruction_counter),
316                kind: wit_parser::FunctionKind::Freestanding,
317                params: Vec::new(),
318                result: Some(wit_parser::Type::U64),
319                docs: wit_parser::Docs { contents: None },
320                stability: wit_parser::Stability::Unknown,
321            },
322        )]
323        .into_iter()
324        .collect(),
325        docs: wit_parser::Docs { contents: None },
326        package: None, // The package is linked up below
327        stability: wit_parser::Stability::Unknown,
328    });
329
330    let package_name = wit_parser::PackageName {
331        namespace: String::from(perf_interface.package().name().namespace()),
332        name: String::from(perf_interface.package().name().name()),
333        version: perf_interface.package().version().cloned(),
334    };
335    let package = resolve.packages.alloc(wit_parser::Package {
336        name: package_name.clone(),
337        docs: wit_parser::Docs { contents: None },
338        #[expect(clippy::iter_on_single_items)]
339        interfaces: [(String::from(perf_interface.name()), interface)]
340            .into_iter()
341            .collect(),
342        worlds: indexmap::IndexMap::new(),
343    });
344    resolve.package_names.insert(package_name, package);
345
346    if let Some(interface) = resolve.interfaces.get_mut(interface) {
347        interface.package = Some(package);
348    }
349
350    let world = resolve.worlds.alloc(wit_parser::World {
351        name: String::from(ROOT),
352        imports: indexmap::IndexMap::new(),
353        #[expect(clippy::iter_on_single_items)]
354        exports: [(
355            wit_parser::WorldKey::Interface(interface),
356            wit_parser::WorldItem::Interface {
357                id: interface,
358                stability: wit_parser::Stability::Unknown,
359            },
360        )]
361        .into_iter()
362        .collect(),
363        package: None, // The package is linked up below
364        docs: wit_parser::Docs { contents: None },
365        includes: Vec::new(),
366        include_names: Vec::new(),
367        stability: wit_parser::Stability::Unknown,
368    });
369
370    let root_name = wit_parser::PackageName {
371        namespace: String::from(ROOT),
372        name: String::from("component"),
373        version: perf_interface.package().version().cloned(),
374    };
375    let root = resolve.packages.alloc(wit_parser::Package {
376        name: root_name.clone(),
377        docs: wit_parser::Docs { contents: None },
378        interfaces: indexmap::IndexMap::new(),
379        #[expect(clippy::iter_on_single_items)]
380        worlds: [(String::from(ROOT), world)].into_iter().collect(),
381    });
382    resolve.package_names.insert(root_name, root);
383
384    if let Some(world) = resolve.worlds.get_mut(world) {
385        world.package = Some(root);
386    }
387
388    wit_component::embed_component_metadata(
389        &mut module,
390        &resolve,
391        world,
392        wit_component::StringEncoding::UTF8,
393    )?;
394
395    let mut encoder = wit_component::ComponentEncoder::default()
396        .module(&module)
397        .context("wit_component::ComponentEncoder::module failed")?;
398
399    let component = encoder
400        .encode()
401        .context("wit_component::ComponentEncoder::encode failed")?;
402
403    Ok(component)
404}
405
406fn create_numcodecs_wasm_perf_module() -> Vec<u8> {
407    let PerfWitInterfaces {
408        perf: perf_interface,
409        instruction_counter,
410    } = PerfWitInterfaces::get();
411
412    let mut module = wasm_encoder::Module::new();
413
414    // Encode the type section with
415    //  types[0] = () -> i64
416    let mut types = wasm_encoder::TypeSection::new();
417    let ty0 = types.len();
418    types.ty().function([], [wasm_encoder::ValType::I64]);
419    module.section(&types);
420
421    // Encode the function section with
422    //  functions[0] = fn() -> i64 [ types[0] ]
423    let mut functions = wasm_encoder::FunctionSection::new();
424    let fn0 = functions.len();
425    functions.function(ty0);
426    module.section(&functions);
427
428    // Encode the export section with
429    //  {perf_interface}#{instruction_counter} = functions[0]
430    let mut exports = wasm_encoder::ExportSection::new();
431    exports.export(
432        &format!("{perf_interface}#{instruction_counter}"),
433        wasm_encoder::ExportKind::Func,
434        fn0,
435    );
436    module.section(&exports);
437
438    // Encode the code section.
439    let mut codes = wasm_encoder::CodeSection::new();
440    let mut fn0 = wasm_encoder::Function::new([]);
441    fn0.instruction(&wasm_encoder::Instruction::Unreachable);
442    fn0.instruction(&wasm_encoder::Instruction::End);
443    codes.function(&fn0);
444    module.section(&codes);
445
446    // Extract the encoded WASM bytes for this module
447    module.finish()
448}