diff --git a/Cargo.lock b/Cargo.lock index 5680d4072..8249da4c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -831,11 +831,12 @@ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "embed-resource" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f54cc3e827ee1c3812239a9a41dede7b4d7d5d5464faa32d71bd7cba28ce2cb2" +checksum = "f4e24052d7be71f0efb50c201557f6fe7d237cfd5a64fd5bcd7fd8fe32dbbffa" dependencies = [ "cc", + "memchr", "rustc_version", "toml 0.8.8", "vswhom", @@ -1109,6 +1110,7 @@ dependencies = [ "anyhow", "cfg-if", "ctor", + "embed-resource", "harp-macros", "itertools", "libc", @@ -1640,9 +1642,9 @@ checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5" [[package]] name = "memchr" -version = "2.6.4" +version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "memoffset" @@ -3731,9 +3733,9 @@ dependencies = [ [[package]] name = "winreg" -version = "0.51.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "937f3df7948156640f46aacef17a70db0de5917bda9c92b0f751f3a955b588fc" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" dependencies = [ "cfg-if", "windows-sys 0.48.0", diff --git a/crates/ark/Cargo.toml b/crates/ark/Cargo.toml index 86764a53c..cc5be8a69 100644 --- a/crates/ark/Cargo.toml +++ b/crates/ark/Cargo.toml @@ -68,7 +68,7 @@ insta = { version = "1.39.0" } [build-dependencies] chrono = "0.4.23" -embed-resource = "2.4.0" +embed-resource = "2.5.0" [package.metadata.generate-rpm] assets = [{ source = "target/release/ark", dest = "/usr/bin/ark", mode = "755" }] diff --git a/crates/ark/build.rs b/crates/ark/build.rs index cf0a1a9f3..b6a138da0 100644 --- a/crates/ark/build.rs +++ b/crates/ark/build.rs @@ -37,8 +37,23 @@ fn main() { // Embed an Application Manifest file on Windows. // Documented to do nothing on non-Windows. + // We also do this for harp to support its unit tests. + // + // We can't just use `compile()`, as that uses `cargo:rustc-link-arg-bins`, + // which targets the main `ark.exe` (good) but not the test binaries (bad). + // We need the application manifest to get embedded into the ark/harp test + // binaries too, so that the instance of R started by our tests also has + // UTF-8 support. + // + // We can't use `compile_for_tests()` because `cargo:rustc-link-arg-tests` + // only targets integration tests right now, not unit tests. + // https://github.com/rust-lang/cargo/issues/10937 + // + // Instead we use `compile_for_everything()` which uses the kitchen sink + // instruction of `cargo:rustc-link-arg`, and that seems to work. + // https://github.com/nabijaczleweli/rust-embed-resource/issues/69 let resource = Path::new("resources") .join("manifest") .join("ark-manifest.rc"); - embed_resource::compile(resource, embed_resource::NONE); + embed_resource::compile_for_everything(resource, embed_resource::NONE); } diff --git a/crates/ark/resources/manifest/ark-manifest.rc b/crates/ark/resources/manifest/ark-manifest.rc index 571caf3f7..26e00deb8 100644 --- a/crates/ark/resources/manifest/ark-manifest.rc +++ b/crates/ark/resources/manifest/ark-manifest.rc @@ -1,3 +1,5 @@ +// Autogenerated, do not modify! + // This is a C file that embed-resource compiles for us. // It helps embed our Windows Application Manifest file. // https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests @@ -7,5 +9,5 @@ // https://github.com/nabijaczleweli/rust-embed-resource/issues/11#issuecomment-779295722 #include -// The `1` is a `resource_id` that specifies ark as an executable file +// The `1` is a `resource_id` that specifies us as an executable file 1 RT_MANIFEST "ark.exe.manifest" diff --git a/crates/ark/src/data_explorer/format.rs b/crates/ark/src/data_explorer/format.rs index bf9054e1d..af8067471 100644 --- a/crates/ark/src/data_explorer/format.rs +++ b/crates/ark/src/data_explorer/format.rs @@ -439,8 +439,6 @@ impl Into for FormattedValue { #[cfg(test)] mod tests { - use harp::utils::r_envir_set; - use super::*; use crate::test::r_test; @@ -752,40 +750,18 @@ mod tests { ColumnValue::FormattedValue("aa".to_string()), ]); - // options.max_value_length = 1000; - // let text = RObject::from(r#"x <- c("ボルテックス")"#); - // unsafe { r_envir_set("text", text.sexp, R_GlobalEnv) }; - // let data = harp::parse_eval_global(r#"Encoding(text)"#).unwrap(); - // let data = String::try_from(data).unwrap(); - // let _ = harp::parse_eval_global(r#"rm(text)"#).unwrap(); - // assert_eq!(data, "UTF-8".to_string()); - - // let data = harp::parse_eval_global("Sys.getlocale()").unwrap(); - // let data = String::try_from(data).unwrap(); - // assert_eq!(data, "wrong".to_string()); - - // let data = - // harp::parse_eval_global("paste0(capture.output(sessionInfo()), collapse = ' ')") - // .unwrap(); - // let data = String::try_from(data).unwrap(); - // assert_eq!(data, "wrong".to_string()); - - // let _ = harp::parse_eval_global(r#"x <- c("ボルテックス")"#).unwrap(); - // let data = harp::parse_eval_global(r#"Encoding(x)"#).unwrap(); - // let data = String::try_from(data).unwrap(); - // let _ = harp::parse_eval_global(r#"rm(x)"#).unwrap(); - // assert_eq!(data, "UTF-8".to_string()); - // let formatted = format_column(data.sexp, &options); - // assert_eq!(formatted, vec![ColumnValue::FormattedValue( - // "ボルテックス".to_string() - // ),]); - - // options.max_value_length = 4; - // let data = harp::parse_eval_global(r#"c("नमस्ते")"#).unwrap(); - // let formatted = format_column(data.sexp, &options); - // assert_eq!(formatted, vec![ColumnValue::FormattedValue( - // "नमस्".to_string() - // ),]); + let data = harp::parse_eval_global(r#"c("ボルテックス")"#).unwrap(); + let formatted = format_column(data.sexp, &options); + assert_eq!(formatted, vec![ColumnValue::FormattedValue( + "ボルテ".to_string() + ),]); + + options.max_value_length = 4; + let data = harp::parse_eval_global(r#"c("नमस्ते")"#).unwrap(); + let formatted = format_column(data.sexp, &options); + assert_eq!(formatted, vec![ColumnValue::FormattedValue( + "नमस्".to_string() + ),]); }) } } diff --git a/crates/ark/src/sys/windows.rs b/crates/ark/src/sys/windows.rs index 6651ef627..19571fe85 100644 --- a/crates/ark/src/sys/windows.rs +++ b/crates/ark/src/sys/windows.rs @@ -8,6 +8,7 @@ pub mod console; pub mod control; pub mod interface; +mod locale; pub mod path; pub mod signals; mod strings; diff --git a/crates/ark/src/sys/windows/locale.rs b/crates/ark/src/sys/windows/locale.rs new file mode 100644 index 000000000..df0ecf72d --- /dev/null +++ b/crates/ark/src/sys/windows/locale.rs @@ -0,0 +1,33 @@ +/* + * locale.rs + * + * Copyright (C) 2024 Posit Software, PBC. All rights reserved. + * + */ + +#[cfg(test)] +mod tests { + use crate::test::r_test; + + #[test] + fn test_locale() { + // These tests assert that we've embedded our Application Manifest file correctly in `build.rs` + r_test(|| { + let latin1 = harp::parse_eval_base("l10n_info()$`Latin-1`").unwrap(); + let latin1 = bool::try_from(latin1).unwrap(); + assert!(!latin1); + + let utf8 = harp::parse_eval_base("l10n_info()$`UTF-8`").unwrap(); + let utf8 = bool::try_from(utf8).unwrap(); + assert!(utf8); + + let codepage = harp::parse_eval_base("l10n_info()$codepage").unwrap(); + let codepage = i32::try_from(codepage).unwrap(); + assert_eq!(codepage, 65001); + + let system_codepage = harp::parse_eval_base("l10n_info()$system.codepage").unwrap(); + let system_codepage = i32::try_from(system_codepage).unwrap(); + assert_eq!(system_codepage, 65001); + }) + } +} diff --git a/crates/ark/tests/data_explorer.rs b/crates/ark/tests/data_explorer.rs index 3a984d4e0..c5285aefa 100644 --- a/crates/ark/tests/data_explorer.rs +++ b/crates/ark/tests/data_explorer.rs @@ -1808,11 +1808,6 @@ fn test_histogram() { #[test] fn test_frequency_table() { r_test(|| { - let data = harp::parse_eval_global("paste0(capture.output(sessionInfo()), collapse = ' ')") - .unwrap(); - let data = String::try_from(data).unwrap(); - assert_eq!(data, "wrong".to_string()); - let socket = open_data_explorer_from_expression("data.frame(x = rep(letters[1:10], 10:1))", None) .unwrap(); diff --git a/crates/harp/Cargo.toml b/crates/harp/Cargo.toml index 56b7680db..0df09ee39 100644 --- a/crates/harp/Cargo.toml +++ b/crates/harp/Cargo.toml @@ -28,3 +28,6 @@ serde = { version = "1.0.183", features = ["derive"] } serde_json = { version = "1.0.94", features = ["preserve_order"]} rust-embed = "8.2.0" tracing-error = "0.2.0" + +[build-dependencies] +embed-resource = "2.5.0" diff --git a/crates/harp/build.rs b/crates/harp/build.rs new file mode 100644 index 000000000..b2b3f3d15 --- /dev/null +++ b/crates/harp/build.rs @@ -0,0 +1,22 @@ +// +// build.rs +// +// Copyright (C) 2024 Posit Software, PBC. All rights reserved. +// +// + +use std::path::Path; +extern crate embed_resource; + +fn main() { + // Embed an Application Manifest file on Windows. + // Documented to do nothing on non-Windows. + // We also do this for ark. + // We don't generate a main `harp.exe` binary, but `cargo test` does generate a `harp-*.exe` + // binary for unit testing, and those unit tests also start R and test UTF-8 related capabilities! + // So we need that test executable to include a manifest file too. + let resource = Path::new("resources") + .join("manifest") + .join("harp-manifest.rc"); + embed_resource::compile_for_everything(resource, embed_resource::NONE); +} diff --git a/crates/harp/resources/manifest/harp-manifest.rc b/crates/harp/resources/manifest/harp-manifest.rc new file mode 100644 index 000000000..dc8649797 --- /dev/null +++ b/crates/harp/resources/manifest/harp-manifest.rc @@ -0,0 +1,13 @@ +// Autogenerated, do not modify! + +// This is a C file that embed-resource compiles for us. +// It helps embed our Windows Application Manifest file. +// https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests +// https://learn.microsoft.com/en-us/cpp/build/reference/manifest-create-side-by-side-assembly-manifest?view=msvc-170 + +// This defines `RT_MANIFEST` to `24`. embed-resource should know how to ensure the header is available. +// https://github.com/nabijaczleweli/rust-embed-resource/issues/11#issuecomment-779295722 +#include + +// The `1` is a `resource_id` that specifies us as an executable file +1 RT_MANIFEST "harp.exe.manifest" diff --git a/crates/harp/resources/manifest/harp.exe.manifest b/crates/harp/resources/manifest/harp.exe.manifest new file mode 100644 index 000000000..e3bab77fd --- /dev/null +++ b/crates/harp/resources/manifest/harp.exe.manifest @@ -0,0 +1,40 @@ + + + + + + harp + + + + + UTF-8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crates/harp/src/sys/windows.rs b/crates/harp/src/sys/windows.rs index 95932ce46..dff45856b 100644 --- a/crates/harp/src/sys/windows.rs +++ b/crates/harp/src/sys/windows.rs @@ -7,4 +7,5 @@ pub mod library; pub mod line_ending; +mod locale; pub mod polled_events; diff --git a/crates/harp/src/sys/windows/locale.rs b/crates/harp/src/sys/windows/locale.rs new file mode 100644 index 000000000..df0ecf72d --- /dev/null +++ b/crates/harp/src/sys/windows/locale.rs @@ -0,0 +1,33 @@ +/* + * locale.rs + * + * Copyright (C) 2024 Posit Software, PBC. All rights reserved. + * + */ + +#[cfg(test)] +mod tests { + use crate::test::r_test; + + #[test] + fn test_locale() { + // These tests assert that we've embedded our Application Manifest file correctly in `build.rs` + r_test(|| { + let latin1 = harp::parse_eval_base("l10n_info()$`Latin-1`").unwrap(); + let latin1 = bool::try_from(latin1).unwrap(); + assert!(!latin1); + + let utf8 = harp::parse_eval_base("l10n_info()$`UTF-8`").unwrap(); + let utf8 = bool::try_from(utf8).unwrap(); + assert!(utf8); + + let codepage = harp::parse_eval_base("l10n_info()$codepage").unwrap(); + let codepage = i32::try_from(codepage).unwrap(); + assert_eq!(codepage, 65001); + + let system_codepage = harp::parse_eval_base("l10n_info()$system.codepage").unwrap(); + let system_codepage = i32::try_from(system_codepage).unwrap(); + assert_eq!(system_codepage, 65001); + }) + } +}