Skip to content

Commit

Permalink
Merge pull request #2013 from jqnatividad/2012-apply-base62-encodedecode
Browse files Browse the repository at this point in the history
`apply`: add base62 encode/decode operations
  • Loading branch information
jqnatividad committed Jul 29, 2024
2 parents 72f48fe + 790cf45 commit c8ad3d9
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 13 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ ahash = "0.8"
anyhow = { version = "1.0", optional = true }
arboard = "3.4.0"
atoi_simd = "0.16"
base62 = { version = "2.0", optional = true }
bincode = "1.3"
byteorder = "1.5"
bytes = "1"
Expand Down Expand Up @@ -279,6 +280,7 @@ distrib_features = [
]
all_features = ["distrib_features", "self_update"]
apply = [
"base62",
"censor",
"cpc",
"data-encoding",
Expand Down
34 changes: 25 additions & 9 deletions src/cmd/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ number of transformed columns with the --rename option is the same. e.g.:
$ qsv apply operations trim,upper col1,col2,col3 -r newcol1,newcol2,newcol3 file.csv
It has 36 supported operations:
It has 38 supported operations:
* len: Return string length
* lower: Transform to lowercase
Expand All @@ -44,6 +44,8 @@ It has 36 supported operations:
* strip_prefix: Removes specified prefix in --comparand
* strip_suffix: Remove specified suffix in --comparand
* escape - escape (Rust escape_default)
* encode62: base62 encode
* decode62: base62 decode
* encode64: base64 encode
* decode64: base64 decode
* replace: Replace all matches of a pattern (using --comparand)
Expand Down Expand Up @@ -128,10 +130,9 @@ Convert the USD_Price to PHP_Price using the currency symbol "PHP" with a conver
$ qsv apply operations numtocurrency USD_Price -C PHP -R 60 -c PHP_Price file.csv
Base64 encode the plaintext_col column and save the encoded value into new column named encoded_col
and then decode it.
Base64 encode the text_col column & save the encoded value into new column named encoded & decode it.
$ qsv apply operations encode plaintext_col -c encoded_col file.csv | qsv apply operations decode encode_col
$ qsv apply operations encode64 text_col -c encoded file.csv | qsv apply operations decode64 encoded
Compute the Normalized Damerau-Levenshtein similarity of the neighborhood column to the string 'Roxbury'
and save it to a new column named dln_roxbury_score.
Expand Down Expand Up @@ -299,6 +300,7 @@ Common options:

use std::{str::FromStr, sync::OnceLock};

use base62;
use censor::{Censor, Sex, Zealous};
use cpc::{eval, units::Unit};
use data_encoding::BASE64;
Expand Down Expand Up @@ -344,8 +346,10 @@ enum Operations {
Censor_Count,
Copy,
Currencytonum,
Decode,
Encode,
Decode62,
Decode64,
Encode62,
Encode64,
Escape,
Eudex,
Gender_Guess,
Expand Down Expand Up @@ -998,16 +1002,28 @@ fn apply_operations(
Operations::Mrtrim => {
*cell = String::from(cell.trim_end_matches(comparand));
},
Operations::Encode => {
Operations::Encode64 => {
*cell = BASE64.encode(cell.as_bytes());
},
Operations::Decode => {
Operations::Decode64 => {
let mut output = vec![0; BASE64.decode_len(cell.len()).unwrap_or_default()];
*cell = match BASE64.decode_mut(cell.as_bytes(), &mut output) {
Ok(len) => simdutf8::basic::from_utf8(&output[0..len])
.unwrap_or_default()
.to_owned(),
Err(e) => format!("decoding error: {e:?}"),
Err(e) => format!("decoding64 error: {e:?}"),
};
},
Operations::Encode62 => {
*cell = match cell.parse::<u128>() {
Ok(num) => base62::encode(num),
Err(e) => format!("encode62 error: {e:?}"),
};
},
Operations::Decode62 => {
*cell = match base62::decode(cell.as_str()) {
Ok(decoded) => decoded.to_string(),
Err(e) => format!("decode62 error: {e:?}"),
};
},
Operations::Gender_Guess => {
Expand Down
85 changes: 81 additions & 4 deletions tests/test_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,84 @@ fn apply_ops_upper_index_params() {
}

#[test]
fn apply_ops_encode() {
fn apply_ops_encode62() {
let wrk = Workdir::new("apply");
wrk.create(
"data.csv",
vec![
svec!["hash", "encoded_hash"],
svec!["3824660653605227303", "4YWz9bdwJXT"],
svec!["1851770582521928574", "2Cn7zXlsIM2"],
svec!["7916590694040213670", "9Qo4IeFuLyQ"],
svec!["10903434754618017012", "CzRqVIpxoUG"],
svec!["7671262618974725285", "98gSmJMD1XB"],
],
);
let mut cmd = wrk.command("apply");
cmd.arg("operations")
.arg("decode62")
.arg("2")
.arg("--new-column")
.arg("decoded_hash")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["hash", "encoded_hash", "decoded_hash"],
svec!["3824660653605227303", "4YWz9bdwJXT", "3824660653605227303"],
svec!["1851770582521928574", "2Cn7zXlsIM2", "1851770582521928574"],
svec!["7916590694040213670", "9Qo4IeFuLyQ", "7916590694040213670"],
svec![
"10903434754618017012",
"CzRqVIpxoUG",
"10903434754618017012"
],
svec!["7671262618974725285", "98gSmJMD1XB", "7671262618974725285"],
];
assert_eq!(got, expected);
}

#[test]
fn apply_ops_decode62() {
let wrk = Workdir::new("apply");
wrk.create(
"data.csv",
vec![
svec!["hash"],
svec!["3824660653605227303"],
svec!["1851770582521928574"],
svec!["7916590694040213670"],
svec!["10903434754618017012"],
svec!["7671262618974725285"],
svec!["this should cause an error"],
],
);
let mut cmd = wrk.command("apply");
cmd.arg("operations")
.arg("encode62")
.arg("1")
.arg("--new-column")
.arg("encoded_hash")
.arg("data.csv");

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);
let expected = vec![
svec!["hash", "encoded_hash"],
svec!["3824660653605227303", "4YWz9bdwJXT"],
svec!["1851770582521928574", "2Cn7zXlsIM2"],
svec!["7916590694040213670", "9Qo4IeFuLyQ"],
svec!["10903434754618017012", "CzRqVIpxoUG"],
svec!["7671262618974725285", "98gSmJMD1XB"],
svec![
"this should cause an error",
"encode62 error: ParseIntError { kind: InvalidDigit }"
],
];
assert_eq!(got, expected);
}

#[test]
fn apply_ops_encode64() {
let wrk = Workdir::new("apply");
wrk.create(
"data.csv",
Expand All @@ -252,7 +329,7 @@ fn apply_ops_encode() {
);
let mut cmd = wrk.command("apply");
cmd.arg("operations")
.arg("encode")
.arg("encode64")
.arg("surname")
.arg("--new-column")
.arg("encoded_surname")
Expand All @@ -277,7 +354,7 @@ fn apply_ops_encode() {
}

#[test]
fn apply_ops_decode() {
fn apply_ops_decode64() {
let wrk = Workdir::new("apply");
wrk.create(
"data.csv",
Expand All @@ -297,7 +374,7 @@ fn apply_ops_decode() {
);
let mut cmd = wrk.command("apply");
cmd.arg("operations")
.arg("decode")
.arg("decode64")
.arg("encoded_surname")
.arg("--new-column")
.arg("decoded_surname")
Expand Down

0 comments on commit c8ad3d9

Please sign in to comment.