From 2d8d9277a7b8dfef4f56f82bf6e93b19f83da310 Mon Sep 17 00:00:00 2001 From: Jeffrey Heer Date: Sun, 9 May 2021 23:15:03 +0200 Subject: [PATCH] Add column value extraction methods. (#163) * feat: Add table values() method. * docs: Fix anchor link for table values section. * feat: Add table array() method. --- docs/api/table.md | 81 ++++++++++++++++++++---------- src/table/column-table.js | 6 +-- src/table/table.js | 29 ++++++++++- test/table/column-table-test.js | 87 ++++++++++++++++++++++----------- 4 files changed, 144 insertions(+), 59 deletions(-) diff --git a/docs/api/table.md b/docs/api/table.md index 0279d124..67ae107f 100644 --- a/docs/api/table.md +++ b/docs/api/table.md @@ -14,7 +14,8 @@ title: Table \| Arquero API Reference * [column](#column), [columnAt](#columnAt), [columnArray](#columnArray) * [columnIndex](#columnIndex), [columnName](#columnName), [columnNames](#columnNames) * [assign](#assign) -* [Table Values](#values) +* [Table Values](#table-values) + * [array](#array), [values](#values) * [data](#data), [get](#get), [getter](#getter) * [indices](#indices), [partitions](#partitions), [scan](#scan) * [Table Output](#output) @@ -217,30 +218,14 @@ dt.columnAt(1).get(1) // 5 ```
# -table.columnArray(name[, arrayConstructor]) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/column-table.js) +table.columnArray(name[, constructor]) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/table.js) -Get an array of values contained in the column with the given *name*. Unlike direct access through the table [column](#column) method, the array returned by this method respects any table filter or orderby criteria. By default, a standard [Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array) is returned; use the *arrayConstructor* argument to specify a [typed array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray). +_This method is a deprecated alias for the table [array()](#array) method. Please use [array()](#array) instead._ -* *name*: The column name. -* *arrayConstructor*: An optional array constructor (default [`Array`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Array)) to use to instantiate the output array. Note that errors or truncated values may occur when assigning to a typed array with an incompatible type. - -*Examples* - -```js -aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) - .columnArray('b'); // [ 4, 5, 6 ] -``` +Get an array of values contained in the column with the given *name*. Unlike direct access through the table [column](#column) method, the array returned by this method respects any table filter or orderby criteria. By default, a standard [Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array) is returned; use the *constructor* argument to specify a [typed array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray). -```js -aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) - .filter(d => d.a > 1) - .columnArray('b'); // [ 5, 6 ] -``` - -```js -aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) - .columnArray('b', Int32Array); // Int32Array.of(4, 5, 6) -``` +* *name*: The column name. +* *constructor*: An optional array constructor (default [`Array`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Array)) to use to instantiate the output array. Note that errors or truncated values may occur when assigning to a typed array with an incompatible type.
# table.columnIndex(name) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/table.js) @@ -304,7 +289,53 @@ t1.assign(t2); // { a: [1, 2], b: [7, 8], c: [5, 6] }
-## Table Values +## Table Values + +
# +table.array(name[, constructor]) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/column-table.js) + +Get an array of values contained in the column with the given *name*. Unlike direct access through the table [column](#column) method, the array returned by this method respects any table filter or orderby criteria. By default, a standard [Array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array) is returned; use the *constructor* argument to specify a [typed array](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/TypedArray). + +* *name*: The column name. +* *constructor*: An optional array constructor (default [`Array`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/Array)) to use to instantiate the output array. Note that errors or truncated values may occur when assigning to a typed array with an incompatible type. + +*Examples* + +```js +aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) + .array('b'); // [ 4, 5, 6 ] +``` + +```js +aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) + .filter(d => d.a > 1) + .array('b'); // [ 5, 6 ] +``` + +```js +aq.table({ a: [1, 2, 3], b: [4, 5, 6] }) + .array('b', Int32Array); // Int32Array.of(4, 5, 6) +``` + +
# +table.values(name) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/table.js) + +Returns an iterator over values in the column with the given *name*. The iterator returned by this method respects any table filter or orderby criteria. + +* *name*: The column name. + +*Examples* + +```js +for (const value of table.values('colA')) { + // do something with ordered values from column A +} +``` + +```js +// slightly less efficient version of table.columnArray('colA') +const colValues = Array.from(table.values('colA')); +```
# table.data() · [Source](https://github.com/uwdata/arquero/blob/master/src/table/table.js) @@ -339,6 +370,8 @@ dt.get('a', 2) // 1 Returns an accessor ("getter") function for a column. The returned function takes a row index as its single argument and returns the corresponding column value. Row indices are relative to any filtering and ordering criteria, not the internal data layout. +* *name*: The column name. + *Examples* ```js @@ -355,8 +388,6 @@ get(0) // 3 get(2) // 1 ``` -* *name*: The column name. -
# table.indices([order]) · [Source](https://github.com/uwdata/arquero/blob/master/src/table/table.js) diff --git a/src/table/column-table.js b/src/table/column-table.js index 873d27bb..311ae431 100644 --- a/src/table/column-table.js +++ b/src/table/column-table.js @@ -120,13 +120,13 @@ export default class ColumnTable extends Table { * Get an array of values contained in a column. The resulting array * respects any table filter or orderby criteria. * @param {string} name The column name. - * @param {ArrayConstructor|import('./table').TypedArrayConstructor} [arrayConstructor=Array] + * @param {ArrayConstructor|import('./table').TypedArrayConstructor} [constructor=Array] * The array constructor for instantiating the output array. * @return {import('./table').DataValue[]|import('./table).TypedArray} The array of column values. */ - columnArray(name, arrayConstructor = Array) { + array(name, constructor = Array) { const column = this.column(name); - const array = new arrayConstructor(this.numRows()); + const array = new constructor(this.numRows()); let idx = -1; this.scan(row => array[++idx] = column.get(row), true); return array; diff --git a/src/table/table.js b/src/table/table.js index 76449da4..33358fdd 100644 --- a/src/table/table.js +++ b/src/table/table.js @@ -184,18 +184,43 @@ export default class Table extends Transformable { return this._names.indexOf(name); } + /** + * Deprecated alias for the table array() method: use table.array() + * instead. Get an array of values contained in a column. The resulting + * array respects any table filter or orderby criteria. + * @param {string} name The column name. + * @param {ArrayConstructor|TypedArrayConstructor} [constructor=Array] + * The array constructor for instantiating the output array. + * @return {DataValue[]|TypedArray} The array of column values. + */ + columnArray(name, constructor) { + return this.array(name, constructor); + } + /** * Get an array of values contained in a column. The resulting array * respects any table filter or orderby criteria. * @param {string} name The column name. - * @param {ArrayConstructor|TypedArrayConstructor} [arrayConstructor=Array] + * @param {ArrayConstructor|TypedArrayConstructor} [constructor=Array] * The array constructor for instantiating the output array. * @return {DataValue[]|TypedArray} The array of column values. */ - columnArray(name, arrayConstructor) { // eslint-disable-line no-unused-vars + array(name, constructor) { // eslint-disable-line no-unused-vars error('Not implemented'); } + /** + * Returns an iterator over column values. + * @return {Iterator} An iterator over row objects. + */ + *values(name) { + const get = this.getter(name); + const n = this.numRows(); + for (let i = 0; i < n; ++i) { + yield get(i); + } + } + /** * Get the value for the given column and row. * @param {string} name The column name. diff --git a/test/table/column-table-test.js b/test/table/column-table-test.js index cd327230..633c5f63 100644 --- a/test/table/column-table-test.js +++ b/test/table/column-table-test.js @@ -119,6 +119,64 @@ tape('ColumnTable memoizes indices', t => { t.end(); }); +tape('ColumnTable supports column values output', t => { + const dt = new ColumnTable({ + u: ['a', 'a', 'a', 'b', 'b'], + v: [2, 1, 4, 5, 3] + }) + .filter(d => d.v > 1) + .orderby('v'); + + t.deepEqual( + Array.from(dt.values('u')), + ['a', 'b', 'a', 'b'], + 'column values, strings' + ); + + t.deepEqual( + Array.from(dt.values('v')), + [2, 3, 4, 5], + 'column values, numbers' + ); + + t.deepEqual( + Int32Array.from(dt.values('v')), + Int32Array.of(2, 3, 4, 5), + 'column values, typed array' + ); + + t.end(); +}); + +tape('ColumnTable supports column array output', t => { + const dt = new ColumnTable({ + u: ['a', 'a', 'a', 'b', 'b'], + v: [2, 1, 4, 5, 3] + }) + .filter(d => d.v > 1) + .orderby('v'); + + t.deepEqual( + dt.array('u'), + ['a', 'b', 'a', 'b'], + 'column array, strings' + ); + + t.deepEqual( + dt.array('v'), + [2, 3, 4, 5], + 'column array, numbers' + ); + + t.deepEqual( + dt.array('v', Int32Array), + Int32Array.of(2, 3, 4, 5), + 'column array, typed array' + ); + + t.end(); +}); + tape('ColumnTable supports object output', t => { const output = [ { u: 'a', v: 1 }, @@ -175,35 +233,6 @@ tape('ColumnTable supports object output', t => { t.end(); }); -tape('ColumnTable supports column array output', t => { - const dt = new ColumnTable({ - u: ['a', 'a', 'a', 'b', 'b'], - v: [2, 1, 4, 5, 3] - }) - .filter(d => d.v > 1) - .orderby('v'); - - t.deepEqual( - dt.columnArray('u'), - ['a', 'b', 'a', 'b'], - 'column array, strings' - ); - - t.deepEqual( - dt.columnArray('v'), - [2, 3, 4, 5], - 'column array, numbers' - ); - - t.deepEqual( - dt.columnArray('v', Int32Array), - Int32Array.of(2, 3, 4, 5), - 'column array, typed array' - ); - - t.end(); -}); - tape('ColumnTable supports grouped object output', t => { const dt = new ColumnTable({ u: ['a', 'a', 'a', 'b', 'b'],