From 5848e7e7600e271987c9e9e35a4fc1545deb1d87 Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Mon, 9 Sep 2024 08:12:45 -0700 Subject: [PATCH 1/4] coerce Arrow BigInt to Number --- src/options.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/options.js b/src/options.js index ac9caca472..e69b7bf4f9 100644 --- a/src/options.js +++ b/src/options.js @@ -69,6 +69,8 @@ function maybeTypedArrowify(vector, type) { ? vector : (type === undefined || type === Array) && isArrowDateType(vector.type) ? coerceDates(vector.toArray()) + : (type === undefined || type === Array) && isArrowBigIntType(vector.type) + ? (type ?? Float64Array).from(vector.toArray(), Number) : maybeTypedArrayify(vector.toArray(), type); } @@ -625,6 +627,7 @@ function isArrowVector(value) { // Apache Arrow now represents dates as numbers. We currently only support // implicit coercion to JavaScript Date objects when the numbers represent // milliseconds since Unix epoch. +// https://github.com/apache/arrow/blob/cd50c324882ab1419d1728e9adad20d47b185508/js/src/enum.ts#L52-L72 function isArrowDateType(type) { return ( type && @@ -633,3 +636,12 @@ function isArrowDateType(type) { type.unit === 1 // millisecond ); } + +// https://github.com/apache/arrow/blob/cd50c324882ab1419d1728e9adad20d47b185508/js/src/enum.ts#L52-L72 +function isArrowBigIntType(type) { + return ( + type && + type.typeId === 2 && // int + type.bitWidth >= 64 + ); +} From 7aa6b4d6a014f89c8562560862efd626fbaf106c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 9 Sep 2024 19:35:21 +0200 Subject: [PATCH 2/4] test BigInt coercion --- test/data/height_frequency.arrow | Bin 0 -> 1946 bytes test/output/bigintNormalize.svg | 58 +++++++++++++++++++++++++++++++ test/plots/bigint.ts | 17 +++++++++ 3 files changed, 75 insertions(+) create mode 100644 test/data/height_frequency.arrow create mode 100644 test/output/bigintNormalize.svg diff --git a/test/data/height_frequency.arrow b/test/data/height_frequency.arrow new file mode 100644 index 0000000000000000000000000000000000000000..82b5d9f96fb9e6770efe0db47ed2666e732c2c05 GIT binary patch literal 1946 zcmd6oJxml)5XWcXI8H+J3IvP^LZSvsKPr>UJp2gSXtFsPdZ?H3)I|lk6N_@UGO$DEA4g1t4oI7RI5j&rb?^35 zxi3hj0(JWw;zmv`Md}kjFHwC;5o?G#HP^7ToHVt(w3(^9cV-@$r*kx>D+%lG^FB$Y ze(6(!UZ0gZb2KP@!(A94WP{S9G|D(BO`lf|-J;>SZK1u}mkKWy)V#lbdt8UM=hdK_%U5F^HMCG;KYg{ve%MxxZM`v{@&jWo`-Cw+_YaDBMY3%z$GrJXE#~T|4vhJj zPgut~>sV)<-@I0<^M+6?t=F;cI@aA_k5g)3{SEfGOAT-|z|jCl6C6!&G{MmXM-v=P za3tVJz>$C>0Y?Ik1RM!CQgEc;NWqbUBLznajutpt;AnxP1&$UtTHwgQk%1!vM+S}z z92q#sS)IvQUC7ycBWLv?XZ0s%bs}f)k(||wob3y8RyT6CKgiksh@90ga<)$*XZs{_ zw%;OW`y_IaZ4m#B>+Qe%5D(gkdBOMRhs5x?$QNlAR8JJOBPMhZIto1rJqo=5Jp;WA zeFD7--Gx4ezJu;SKS5t$p_}mg;lG9cMEoKE`2)_&cEAU@I=b>}Z@ILOtABWz8hWC9Q{s{C0bQn4S4d?A4 z_@RD5yAcolq2BxVN!TC22zYy{=ewrTGrCv)7kPiO7wv49llq**!+%F}e3hp5(O;?U Q-2cD-W8Y=HWB#A~2W)SH4gdfE literal 0 HcmV?d00001 diff --git a/test/output/bigintNormalize.svg b/test/output/bigintNormalize.svg new file mode 100644 index 0000000000..93053de65e --- /dev/null +++ b/test/output/bigintNormalize.svg @@ -0,0 +1,58 @@ + + + + + 1.3 + 1.4 + 1.5 + 1.6 + 1.7 + 1.8 + 1.9 + 2.0 + 2.1 + 2.2 + + + ↑ height + + + + + 0 + + + frequency (%) + + + + + \ No newline at end of file diff --git a/test/plots/bigint.ts b/test/plots/bigint.ts index 0d526f75ca..7ab876dac8 100644 --- a/test/plots/bigint.ts +++ b/test/plots/bigint.ts @@ -1,5 +1,6 @@ import * as Plot from "@observablehq/plot"; import * as d3 from "d3"; +import * as Arrow from "apache-arrow"; const integers = d3.range(40).map((int) => ({ big1: BigInt(int), @@ -25,3 +26,19 @@ export async function bigintOrdinal() { export async function bigintStack() { return Plot.barY(integers, {x: (d, i) => i % 5, y: "big1"}).plot(); } + +export async function bigintNormalize() { + const table = await Arrow.tableFromIPC(fetch("data/height_frequency.arrow")); + return Plot.plot({ + height: 500, + x: {percent: true, grid: true}, + y: {domain: [1.3, 2.21]}, + marks: [ + Plot.ruleX([0]), + Plot.ruleY( + table, + Plot.normalizeX("sum", {strokeWidth: 2, y: "height", x: "frequency", tip: {format: {y: ".2f"}}}) + ) + ] + }); +} From db209495e87739afd5bc6079b65248cbc3e2ce08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 9 Sep 2024 19:38:19 +0200 Subject: [PATCH 3/4] Delete test/output/bigintNormalize.svg this test output is broken --- test/output/bigintNormalize.svg | 58 --------------------------------- 1 file changed, 58 deletions(-) delete mode 100644 test/output/bigintNormalize.svg diff --git a/test/output/bigintNormalize.svg b/test/output/bigintNormalize.svg deleted file mode 100644 index 93053de65e..0000000000 --- a/test/output/bigintNormalize.svg +++ /dev/null @@ -1,58 +0,0 @@ - - - - - 1.3 - 1.4 - 1.5 - 1.6 - 1.7 - 1.8 - 1.9 - 2.0 - 2.1 - 2.2 - - - ↑ height - - - - - 0 - - - frequency (%) - - - - - \ No newline at end of file From 882ed4afd5bb4fec5036aa54e259d67359ff5944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Rivi=C3=A8re?= Date: Mon, 9 Sep 2024 21:40:41 +0200 Subject: [PATCH 4/4] generate arrow structure on the fly rather than using a file --- test/data/height_frequency.arrow | Bin 1946 -> 0 bytes test/output/bigintNormalize.svg | 173 +++++++++++++++++++++++++++++++ test/plots/bigint.ts | 6 +- 3 files changed, 177 insertions(+), 2 deletions(-) delete mode 100644 test/data/height_frequency.arrow create mode 100644 test/output/bigintNormalize.svg diff --git a/test/data/height_frequency.arrow b/test/data/height_frequency.arrow deleted file mode 100644 index 82b5d9f96fb9e6770efe0db47ed2666e732c2c05..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1946 zcmd6oJxml)5XWcXI8H+J3IvP^LZSvsKPr>UJp2gSXtFsPdZ?H3)I|lk6N_@UGO$DEA4g1t4oI7RI5j&rb?^35 zxi3hj0(JWw;zmv`Md}kjFHwC;5o?G#HP^7ToHVt(w3(^9cV-@$r*kx>D+%lG^FB$Y ze(6(!UZ0gZb2KP@!(A94WP{S9G|D(BO`lf|-J;>SZK1u}mkKWy)V#lbdt8UM=hdK_%U5F^HMCG;KYg{ve%MxxZM`v{@&jWo`-Cw+_YaDBMY3%z$GrJXE#~T|4vhJj zPgut~>sV)<-@I0<^M+6?t=F;cI@aA_k5g)3{SEfGOAT-|z|jCl6C6!&G{MmXM-v=P za3tVJz>$C>0Y?Ik1RM!CQgEc;NWqbUBLznajutpt;AnxP1&$UtTHwgQk%1!vM+S}z z92q#sS)IvQUC7ycBWLv?XZ0s%bs}f)k(||wob3y8RyT6CKgiksh@90ga<)$*XZs{_ zw%;OW`y_IaZ4m#B>+Qe%5D(gkdBOMRhs5x?$QNlAR8JJOBPMhZIto1rJqo=5Jp;WA zeFD7--Gx4ezJu;SKS5t$p_}mg;lG9cMEoKE`2)_&cEAU@I=b>}Z@ILOtABWz8hWC9Q{s{C0bQn4S4d?A4 z_@RD5yAcolq2BxVN!TC22zYy{=ewrTGrCv)7kPiO7wv49llq**!+%F}e3hp5(O;?U Q-2cD-W8Y=HWB#A~2W)SH4gdfE diff --git a/test/output/bigintNormalize.svg b/test/output/bigintNormalize.svg new file mode 100644 index 0000000000..2d7b5e15b5 --- /dev/null +++ b/test/output/bigintNormalize.svg @@ -0,0 +1,173 @@ + + + + + 1.3 + 1.4 + 1.5 + 1.6 + 1.7 + 1.8 + 1.9 + 2.0 + 2.1 + 2.2 + + + ↑ height + + + + + 0.0 + 0.5 + 1.0 + 1.5 + 2.0 + 2.5 + 3.0 + 3.5 + 4.0 + 4.5 + 5.0 + + + frequency (%) → + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/plots/bigint.ts b/test/plots/bigint.ts index 7ab876dac8..b50d02ed2f 100644 --- a/test/plots/bigint.ts +++ b/test/plots/bigint.ts @@ -28,11 +28,13 @@ export async function bigintStack() { } export async function bigintNormalize() { - const table = await Arrow.tableFromIPC(fetch("data/height_frequency.arrow")); + const heights = await d3.csv("data/athletes.csv").then((data) => data.map((d) => d.height)); + const table = Arrow.tableFromJSON( + d3.groups(heights, (d) => (d ? +d : NaN)).map(([height, {length}]) => ({height, frequency: BigInt(length)})) + ); return Plot.plot({ height: 500, x: {percent: true, grid: true}, - y: {domain: [1.3, 2.21]}, marks: [ Plot.ruleX([0]), Plot.ruleY(