Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore files in _scans.tsv that correspond to entries in .bidsignore (#1366) #1914

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
3 changes: 3 additions & 0 deletions bids-validator/tests/setupTests.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Mock sessionStorage
import getSessionStorage from '../utils/getSessionStorage'
global.sessionStorage = getSessionStorage()
30 changes: 30 additions & 0 deletions bids-validator/tests/tsv.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,36 @@ describe('TSV', function () {
)
})

it('should ignore files in scans.tsv that correspond to entries in .bidsignore', function () {
sessionStorage.setItem('bidsignoreContent', JSON.stringify('sodium/'))
const fileList = [niftiFile, eegFile, ieegFile]
const tsv =
'filename\tacq_time\n' +
'func/sub-08_ses-test_task-linebisection_run-01_bold.nii.gz\t2017-05-03T06:45:45\n' +
'eeg/sub-08_ses-test_task-linebisection_run-01_eeg.fif\t2017-05-03T06:45:45\n' +
'ieeg/sub-08_ses-test_task-linebisection_run-01_ieeg.edf\t2017-05-03T06:45:45\n' +
'sodium/sub-08_acq-23Na_echo-01.nii.gz\t2018-04-26T21:30:00'
validate.TSV.TSV(scansFile, tsv, fileList, function (issues) {
assert.deepEqual(issues, [])
})
sessionStorage.removeItem('bidsignoreContent')
})

it('should not allow missing files listed in scans.tsv and not accounted for by .bidsignore', function () {
sessionStorage.setItem('bidsignoreContent', JSON.stringify('sodium/'))
const fileList = [niftiFile, eegFile]
const tsv =
'filename\tacq_time\n' +
'func/sub-08_ses-test_task-linebisection_run-01_bold.nii.gz\t2017-05-03T06:45:45\n' +
'eeg/sub-08_ses-test_task-linebisection_run-01_eeg.fif\t2017-05-03T06:45:45\n' +
'ieeg/sub-08_ses-test_task-linebisection_run-01_ieeg.edf\t2017-05-03T06:45:45\n' +
'sodium/sub-08_acq-23Na_echo-01.nii.gz\t2018-04-26T21:30:00'
validate.TSV.TSV(scansFile, tsv, fileList, function (issues) {
assert(issues.length === 1 && issues[0].code === 129)
})
sessionStorage.removeItem('bidsignoreContent')
})

// channels checks -----------------------------------------------------------------

var channelsFileMEG = {
Expand Down
5 changes: 5 additions & 0 deletions bids-validator/utils/files/readDir.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import path from 'path'
import fs from 'fs'
import * as child_proccess from 'child_process'
import isNode from '../isNode'
import getSessionStorage from '../getSessionStorage'

const sessionStorage = isNode ? getSessionStorage() : window.sessionStorage

/**
* Read Directory
Expand Down Expand Up @@ -330,6 +333,8 @@ async function getBIDSIgnore(dir) {
if (bidsIgnoreFileObj) {
const content = await readFile(bidsIgnoreFileObj)
ig.add(content)
// Store the .bidsignore content in session storage
sessionStorage.setItem('bidsignoreContent', JSON.stringify(content))
Copy link
Collaborator

@effigies effigies Mar 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is causing an error.

Unhandled rejection (
  reason: ReferenceError: sessionStorage is not defined
    at getBIDSIgnore (/home/runner/work/bids-validator/bids-validator/bids-validator/bids-validator/utils/files/readDir.js:334:5)
    at Object.readDir (/home/runner/work/bids-validator/bids-validator/bids-validator/bids-validator/utils/files/readDir.js:23:14)
    at /home/runner/work/bids-validator/bids-validator/bids-validator/bids-validator/validators/bids/start.js:40:21
).

}
return ig
}
Expand Down
31 changes: 31 additions & 0 deletions bids-validator/utils/getSessionStorage.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// return sessionStorage based on environment
// uses mock object for use in tests and GitHub workflows
import isNode from './isNode'

function getSessionStorage() {
if ('sessionStorage' in global) {
// created in setupTests.js; enables data sharing using same object
return global.sessionStorage
} else if (!isNode) {
return window.sessionStorage

Check warning on line 10 in bids-validator/utils/getSessionStorage.js

View check run for this annotation

Codecov / codecov/patch

bids-validator/utils/getSessionStorage.js#L10

Added line #L10 was not covered by tests
} else {
const sessionStorage = {}

return {
getItem: (key) => sessionStorage[key],
setItem: (key, value) => {
sessionStorage[key] = value
},
removeItem: (key) => {
delete sessionStorage[key]
},
clear: () => {
Object.keys(sessionStorage).forEach((key) =>
sessionStorage.removeItem(key),

Check warning on line 24 in bids-validator/utils/getSessionStorage.js

View check run for this annotation

Codecov / codecov/patch

bids-validator/utils/getSessionStorage.js#L22-L24

Added lines #L22 - L24 were not covered by tests
)
},
}
}
}

export default getSessionStorage
13 changes: 13 additions & 0 deletions bids-validator/validators/tsv/tsv.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ import checkStatusCol from './checkStatusCol'
import checkTypecol from './checkTypeCol'
import parseTSV from './tsvParser'
import checkMotionComponent from './checkMotionComponent'
import getSessionStorage from '../../utils/getSessionStorage'
import ignore from 'ignore'
var path = require('path')

const sessionStorage = getSessionStorage()

/**
* Format TSV headers for evidence string
* @param {Array[string]} headers
Expand Down Expand Up @@ -612,13 +616,22 @@ const TSV = (file, contents, fileList, callback) => {
}),
)
} else {
// Retrieve the .bidsignore content (if any) from session storage
const content = sessionStorage.getItem('bidsignoreContent')
const ig = content ? ignore().add(JSON.parse(content)) : null

// check scans filenames match pathList
const filenameColumn = headers.indexOf('filename')
for (let l = 1; l < rows.length; l++) {
const row = rows[l]
const scanRelativePath = row[filenameColumn]
const scanFullPath = scanDirPath + '/' + scanRelativePath

// check if file should be ignored based on .bidsignore content
if (ig && ig.ignores(path.relative('/', scanRelativePath))) {
continue
}

// check if scan matches full dataset path list
if (!pathList.includes(scanFullPath)) {
Comment on lines +631 to 636
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I get what you're doing here. Thanks for this!

The issue here is that we want to error if a listed file doesn't exist, but we're currently erroring if the file doesn't exist or exists but is ignored. If you're going to follow the strategy you're using here, instead of storing the ignore patterns, you should be storing the ignored files. We can then say

 if (!(pathList.includes(scanFullPath) || ignoreList.includes(scanFullPath))) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the feedback @effigies

That should also work, just that we would need to store much more data in the session storage (storing ignore patterns vs complete list of ignored files across all subjects and sessions). I'm also curious why the current approach of storing patterns would not suffice; maybe I didn't follow your comment entirely.

As per #1366 , the intention was to allow .bidsignore files to be listed in _scans.tsv.
The plan then was to:

doing a match of scans.tsv files against ignore entries

The PR, in its current form, was intending to just add that one extra check, i.e. for each file listed in _scans.tsv, check if the file is to be ignored (based on .bidsignore), if yes then ignore that file entry with no error. This is being done irrespective of whether the file actually exists in the directory or not.

I added two tests for checking the following cases:

  • file is listed in _scans.tsv but isn't part of output of readDir() -> ERROR 129
  • file is listed in _scans.tsv; isn't part of output of readDir(), but is to be ignored -> no error

and both these tests are passing.

Note that there is a difference between the file actually existing (on disk) vs being present in the output of readDir() as the latter function already filters out the files to be ignored, before returning the list of required files (i.e. existing and non-ignored) to other steps in the workflow (e.g. TSV() in tsv.js)

issues.push(
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
},
"jest": {
"testEnvironment": "node",
"setupFilesAfterEnv": ["<rootDir>/bids-validator/tests/setupTests.js"],
"moduleNameMapper": {
"^uuid$": "uuid"
},
Expand Down
Loading