numcodecs_wasm_host_reproducible/transform/
mod.rs

1use std::sync::OnceLock;
2
3use anyhow::{Context, Error, anyhow};
4use instcnt::PerfWitInterfaces;
5use numcodecs_wasm_host::NumcodecsWitInterfaces;
6
7use crate::{logging::WasiLoggingInterface, stdio::WasiSandboxedStdioInterface};
8
9pub mod instcnt;
10pub mod nan;
11
12#[expect(clippy::too_many_lines)] // FIXME
13pub fn transform_wasm_component(wasm_component: impl Into<Vec<u8>>) -> Result<Vec<u8>, Error> {
14    let NumcodecsWitInterfaces {
15        codec: codec_interface,
16        ..
17    } = NumcodecsWitInterfaces::get();
18
19    // create a new WAC composition graph with the WASI component packages
20    //  pre-registered and the numcodecs:wasm/perf interface pre-exported
21    let PreparedCompositionGraph {
22        graph: wac,
23        wasi: wasi_component_packages,
24    } = get_prepared_composition_graph()?;
25    let mut wac = wac.clone();
26
27    // parse and instantiate the root package, which exports numcodecs:abc/codec
28    let numcodecs_codec_package = wac_graph::types::Package::from_bytes(
29        &format!("{}", codec_interface.package().name()),
30        codec_interface.package().version(),
31        wasm_component,
32        wac.types_mut(),
33    )?;
34
35    let numcodecs_codec_world = &wac.types()[numcodecs_codec_package.ty()];
36    let numcodecs_codec_imports = extract_component_ports(&numcodecs_codec_world.imports)?;
37
38    let numcodecs_codec_package = wac.register_package(numcodecs_codec_package)?;
39    let numcodecs_codec_instance = wac.instantiate(numcodecs_codec_package);
40
41    // list the imports that the linker will provide
42    let linker_provided_imports = [
43        &WasiSandboxedStdioInterface::get().stdio,
44        &WasiLoggingInterface::get().logging,
45    ];
46
47    // initialise the unresolved imports to the imports of the root package
48    let mut unresolved_imports = vecmap::VecMap::new();
49    for import in &numcodecs_codec_imports {
50        unresolved_imports
51            .entry(import.clone())
52            .or_insert_with(Vec::new)
53            .push(numcodecs_codec_instance);
54    }
55
56    // track all non-root instances, which may fulfil imports
57    let mut package_instances = vecmap::VecMap::new();
58
59    // initialise the queue of required, still to instantiate packages
60    //  to the imports of the root package
61    let mut required_packages_queue = numcodecs_codec_imports
62        .iter()
63        .map(|import| import.package().clone())
64        .collect::<std::collections::VecDeque<_>>();
65
66    // iterate while not all required packages have been instantiated
67    while let Some(required_package) = required_packages_queue.pop_front() {
68        if package_instances.contains_key(&required_package) {
69            continue;
70        }
71
72        // some packages do not need to be instantiated since they will be
73        //  provided by the linker
74        if linker_provided_imports
75            .iter()
76            .any(|interface| interface.package() == &required_package)
77        {
78            continue;
79        }
80
81        // find the WASI component package that can fulfil the required package
82        let Some(component_package) = wasi_component_packages.iter().find(|component_package| {
83            component_package
84                .exports
85                .iter()
86                .any(|export| export.package() == &required_package)
87        }) else {
88            return Err(anyhow!(
89                "WASM component requires unresolved import(s) from package {required_package}"
90            ));
91        };
92
93        let PackageWithPorts {
94            package: component_package,
95            imports: component_imports,
96            exports: component_exports,
97        } = component_package;
98
99        // instantiate the component package
100        let component_instance = wac.instantiate(*component_package);
101
102        // try to resolve all imports of the component package ...
103        for import in component_imports {
104            if let Some(dependency_instance) = package_instances.get(import.package()).copied() {
105                // ... if the dependency has already been instantiated,
106                //     import its export directly
107                let import_str = &format!("{import}");
108                let dependency_export =
109                    wac.alias_instance_export(dependency_instance, import_str)?;
110                wac.set_instantiation_argument(component_instance, import_str, dependency_export)?;
111            } else {
112                // ... otherwise require the dependency package and store the
113                //     import so that it can be resolved later
114                required_packages_queue.push_back(import.package().clone());
115                unresolved_imports
116                    .entry(import.clone())
117                    .or_insert_with(Vec::new)
118                    .push(component_instance);
119            }
120        }
121
122        for export in component_exports {
123            // register this instance's package so that its exports can later
124            //  fulfil more imports
125            package_instances.insert(export.package().clone(), component_instance);
126
127            // try to resolve unresolved imports using the export of this package
128            if let Some(unresolved_imports) = unresolved_imports.remove(export) {
129                let export_str = &format!("{export}");
130                let component_export = wac.alias_instance_export(component_instance, export_str)?;
131                for import in unresolved_imports {
132                    wac.set_instantiation_argument(import, export_str, component_export)?;
133                }
134            }
135        }
136    }
137
138    // linker-provided imports will be resolved later
139    for provided in linker_provided_imports {
140        unresolved_imports.remove(provided);
141    }
142
143    if !unresolved_imports.is_empty() {
144        return Err(anyhow!(
145            "WASM component requires unresolved import(s): {:?}",
146            unresolved_imports.into_keys().collect::<Vec<_>>(),
147        ));
148    }
149
150    // export the numcodecs:abc/codec interface
151    let numcodecs_codecs_str = &format!("{codec_interface}");
152    let numcodecs_codecs_export =
153        wac.alias_instance_export(numcodecs_codec_instance, numcodecs_codecs_str)?;
154    wac.export(numcodecs_codecs_export, numcodecs_codecs_str)?;
155
156    // encode the WAC composition graph into a WASM component and validate it
157    let wasm = wac.encode(wac_graph::EncodeOptions {
158        define_components: true,
159        // we do our own validation right below
160        validate: false,
161        processor: None,
162    })?;
163
164    wasmparser::Validator::new_with_features(
165        wasmparser::WasmFeaturesInflated {
166            // MUST: float operations are required
167            //       (and our engine's transformations makes them deterministic)
168            floats: true,
169            // MUST: codecs and reproducible WASI are implemented as components
170            component_model: true,
171            ..crate::engine::DETERMINISTIC_WASM_MODULE_FEATURES
172        }
173        .into(),
174    )
175    .validate_all(&wasm)?;
176
177    Ok(wasm)
178}
179
180struct PreparedCompositionGraph {
181    graph: wac_graph::CompositionGraph,
182    wasi: Box<[PackageWithPorts]>,
183}
184
185fn get_prepared_composition_graph() -> Result<&'static PreparedCompositionGraph, Error> {
186    static PREPARED_COMPOSITION_GRAPH: OnceLock<Result<PreparedCompositionGraph, Error>> =
187        OnceLock::new();
188
189    let prepared_composition_graph = PREPARED_COMPOSITION_GRAPH.get_or_init(|| {
190        let PerfWitInterfaces {
191            perf: perf_interface,
192            ..
193        } = PerfWitInterfaces::get();
194
195        // create a new WAC composition graph
196        let mut wac = wac_graph::CompositionGraph::new();
197
198        // parse and register the WASI component packages
199        let wasi_component_packages =
200            register_wasi_component_packages(&mut wac)?.into_boxed_slice();
201
202        // create, register, and instantiate the numcodecs:wasm package
203        let numcodecs_wasm_perf_instance = instantiate_numcodecs_wasm_perf_package(&mut wac)?;
204
205        // export the numcodecs:wasm/perf interface
206        let numcodecs_wasm_perf_str = &format!("{perf_interface}");
207        let numcodecs_wasm_perf_export =
208            wac.alias_instance_export(numcodecs_wasm_perf_instance, numcodecs_wasm_perf_str)?;
209        wac.export(numcodecs_wasm_perf_export, numcodecs_wasm_perf_str)?;
210
211        Ok(PreparedCompositionGraph {
212            graph: wac,
213            wasi: wasi_component_packages,
214        })
215    });
216
217    match prepared_composition_graph {
218        Ok(prepared_composition_graph) => Ok(prepared_composition_graph),
219        Err(err) => Err(anyhow!(err)),
220    }
221}
222
223struct PackageWithPorts {
224    package: wac_graph::PackageId,
225    imports: Box<[wasm_component_layer::InterfaceIdentifier]>,
226    exports: Box<[wasm_component_layer::InterfaceIdentifier]>,
227}
228
229fn register_wasi_component_packages(
230    wac: &mut wac_graph::CompositionGraph,
231) -> Result<Vec<PackageWithPorts>, Error> {
232    // TODO: switch to merged component with wasi_sandboxed_component_provider >= v0.2.4
233    // const WASI_COMPONENTS: &[(&str, &[u8])] = &[(
234    //     "wasi-sandboxed:merged",
235    //     wasi_sandboxed_component_provider::MERGED_COMPONENT,
236    // )];
237    const WASI_COMPONENTS: &[(&str, &[u8])] = wasi_sandboxed_component_provider::ALL_COMPONENTS;
238
239    let wasi_component_packages = WASI_COMPONENTS
240        .iter()
241        .map(|(component_name, component_bytes)| -> Result<_, Error> {
242            let component_package = wac_graph::types::Package::from_bytes(
243                component_name,
244                None,
245                Vec::from(*component_bytes),
246                wac.types_mut(),
247            )?;
248
249            let component_world = &wac.types()[component_package.ty()];
250
251            let component_imports = extract_component_ports(&component_world.imports)?;
252            let component_exports = extract_component_ports(&component_world.exports)?;
253
254            let component_package = wac.register_package(component_package)?;
255
256            Ok(PackageWithPorts {
257                package: component_package,
258                imports: component_imports.into_boxed_slice(),
259                exports: component_exports.into_boxed_slice(),
260            })
261        })
262        .collect::<Result<Vec<_>, _>>()?;
263
264    Ok(wasi_component_packages)
265}
266
267fn extract_component_ports(
268    ports: &indexmap::IndexMap<String, wac_graph::types::ItemKind>,
269) -> Result<Vec<wasm_component_layer::InterfaceIdentifier>, anyhow::Error> {
270    ports
271        .iter()
272        .filter_map(|(import, kind)| match kind {
273            wac_graph::types::ItemKind::Instance(_) => Some(
274                wasm_component_layer::InterfaceIdentifier::try_from(import.as_str()),
275            ),
276            _ => None,
277        })
278        .collect::<Result<Vec<_>, _>>()
279}
280
281fn instantiate_numcodecs_wasm_perf_package(
282    wac: &mut wac_graph::CompositionGraph,
283) -> Result<wac_graph::NodeId, Error> {
284    let PerfWitInterfaces {
285        perf: perf_interface,
286        ..
287    } = PerfWitInterfaces::get();
288
289    // create, register, and instantiate the numcodecs:wasm/perf package
290    let numcodecs_wasm_perf_package = wac_graph::types::Package::from_bytes(
291        &format!("{}", perf_interface.package().name()),
292        perf_interface.package().version(),
293        create_numcodecs_wasm_perf_component()?,
294        wac.types_mut(),
295    )?;
296
297    let numcodecs_wasm_perf_package = wac.register_package(numcodecs_wasm_perf_package)?;
298    let numcodecs_wasm_perf_instance = wac.instantiate(numcodecs_wasm_perf_package);
299
300    Ok(numcodecs_wasm_perf_instance)
301}
302
303fn create_numcodecs_wasm_perf_component() -> Result<Vec<u8>, Error> {
304    const ROOT: &str = "root";
305
306    let PerfWitInterfaces {
307        perf: perf_interface,
308        instruction_counter,
309    } = PerfWitInterfaces::get();
310
311    let mut module = create_numcodecs_wasm_perf_module();
312
313    let mut resolve = wit_parser::Resolve::new();
314
315    let interface = resolve.interfaces.alloc(wit_parser::Interface {
316        name: Some(String::from(perf_interface.name())),
317        types: indexmap::IndexMap::new(),
318        #[expect(clippy::iter_on_single_items)]
319        functions: [(
320            String::from(instruction_counter),
321            wit_parser::Function {
322                name: String::from(instruction_counter),
323                kind: wit_parser::FunctionKind::Freestanding,
324                params: Vec::new(),
325                result: Some(wit_parser::Type::U64),
326                docs: wit_parser::Docs { contents: None },
327                stability: wit_parser::Stability::Unknown,
328            },
329        )]
330        .into_iter()
331        .collect(),
332        docs: wit_parser::Docs { contents: None },
333        package: None, // The package is linked up below
334        stability: wit_parser::Stability::Unknown,
335    });
336
337    let package_name = wit_parser::PackageName {
338        namespace: String::from(perf_interface.package().name().namespace()),
339        name: String::from(perf_interface.package().name().name()),
340        version: perf_interface.package().version().cloned(),
341    };
342    let package = resolve.packages.alloc(wit_parser::Package {
343        name: package_name.clone(),
344        docs: wit_parser::Docs { contents: None },
345        #[expect(clippy::iter_on_single_items)]
346        interfaces: [(String::from(perf_interface.name()), interface)]
347            .into_iter()
348            .collect(),
349        worlds: indexmap::IndexMap::new(),
350    });
351    resolve.package_names.insert(package_name, package);
352
353    if let Some(interface) = resolve.interfaces.get_mut(interface) {
354        interface.package = Some(package);
355    }
356
357    let world = resolve.worlds.alloc(wit_parser::World {
358        name: String::from(ROOT),
359        imports: indexmap::IndexMap::new(),
360        #[expect(clippy::iter_on_single_items)]
361        exports: [(
362            wit_parser::WorldKey::Interface(interface),
363            wit_parser::WorldItem::Interface {
364                id: interface,
365                stability: wit_parser::Stability::Unknown,
366            },
367        )]
368        .into_iter()
369        .collect(),
370        package: None, // The package is linked up below
371        docs: wit_parser::Docs { contents: None },
372        includes: Vec::new(),
373        include_names: Vec::new(),
374        stability: wit_parser::Stability::Unknown,
375    });
376
377    let root_name = wit_parser::PackageName {
378        namespace: String::from(ROOT),
379        name: String::from("component"),
380        version: perf_interface.package().version().cloned(),
381    };
382    let root = resolve.packages.alloc(wit_parser::Package {
383        name: root_name.clone(),
384        docs: wit_parser::Docs { contents: None },
385        interfaces: indexmap::IndexMap::new(),
386        #[expect(clippy::iter_on_single_items)]
387        worlds: [(String::from(ROOT), world)].into_iter().collect(),
388    });
389    resolve.package_names.insert(root_name, root);
390
391    if let Some(world) = resolve.worlds.get_mut(world) {
392        world.package = Some(root);
393    }
394
395    wit_component::embed_component_metadata(
396        &mut module,
397        &resolve,
398        world,
399        wit_component::StringEncoding::UTF8,
400    )?;
401
402    let mut encoder = wit_component::ComponentEncoder::default()
403        .module(&module)
404        .context("wit_component::ComponentEncoder::module failed")?;
405
406    let component = encoder
407        .encode()
408        .context("wit_component::ComponentEncoder::encode failed")?;
409
410    Ok(component)
411}
412
413fn create_numcodecs_wasm_perf_module() -> Vec<u8> {
414    let PerfWitInterfaces {
415        perf: perf_interface,
416        instruction_counter,
417    } = PerfWitInterfaces::get();
418
419    let mut module = wasm_encoder::Module::new();
420
421    // Encode the type section with
422    //  types[0] = () -> i64
423    let mut types = wasm_encoder::TypeSection::new();
424    let ty0 = types.len();
425    types.ty().function([], [wasm_encoder::ValType::I64]);
426    module.section(&types);
427
428    // Encode the function section with
429    //  functions[0] = fn() -> i64 [ types[0] ]
430    let mut functions = wasm_encoder::FunctionSection::new();
431    let fn0 = functions.len();
432    functions.function(ty0);
433    module.section(&functions);
434
435    // Encode the export section with
436    //  {perf_interface}#{instruction_counter} = functions[0]
437    let mut exports = wasm_encoder::ExportSection::new();
438    exports.export(
439        &format!("{perf_interface}#{instruction_counter}"),
440        wasm_encoder::ExportKind::Func,
441        fn0,
442    );
443    module.section(&exports);
444
445    // Encode the code section.
446    let mut codes = wasm_encoder::CodeSection::new();
447    let mut fn0 = wasm_encoder::Function::new([]);
448    fn0.instruction(&wasm_encoder::Instruction::Unreachable);
449    fn0.instruction(&wasm_encoder::Instruction::End);
450    codes.function(&fn0);
451    module.section(&codes);
452
453    // Extract the encoded WASM bytes for this module
454    module.finish()
455}