Skip to content

Commit

Permalink
test: include all dompurify test cases
Browse files Browse the repository at this point in the history
  • Loading branch information
JounQin committed Dec 13, 2023
1 parent 21ee166 commit 912b39a
Show file tree
Hide file tree
Showing 18 changed files with 1,534 additions and 41 deletions.
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ coverage
dist
lib
CHANGELOG.md
/DOMPurify
/auto-imports.d.ts
/pnpm-lock.yaml
!/.github
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ jobs:
steps:
- name: Checkout Repo
uses: actions/checkout@v4
with:
submodules: true

- name: Setup pnpm
uses: pnpm/action-setup@v2
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "DOMPurify"]
path = DOMPurify
url = https://github.com/cure53/DOMPurify.git
2 changes: 2 additions & 0 deletions .prettierignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@
coverage
dist
lib
/DOMPurify
/auto-imports.d.ts
/pnpm-lock.yaml
2 changes: 2 additions & 0 deletions .stylelintignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ coverage
dist
lib
LICENSE
/DOMPurify
*.json
*.log
*.mts
*.patch
*.snap
*.svg
Expand Down
1 change: 1 addition & 0 deletions DOMPurify
Submodule DOMPurify added at d1e4f2
11 changes: 11 additions & 0 deletions dompurify.fixtures.d.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
declare module 'DOMPurify/test/fixtures/expect.mjs' {
export interface Fixture {
title?: string
payload: string
expected: string[] | string
}

const fixtures: Fixture[]

export default fixtures
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
"@types/react-dom": "^18.2.17",
"@types/web": "^0.0.127",
"@vitest/coverage-istanbul": "^1.0.4",
"domiso": "link:",
"domiso": "link:.",
"github-markdown-css": "^5.5.0",
"jsdom": "^23.0.1",
"react": "^18.2.0",
Expand Down
15 changes: 14 additions & 1 deletion pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

127 changes: 105 additions & 22 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,39 @@
let domParser: DOMParser | undefined

const isDocument = (el: Document | Element): el is Document =>
el.nodeType === el.DOCUMENT_NODE
export type DocumentOrFragment = Document | DocumentFragment

const isDocumentOrFragment = (
el: DocumentOrFragment | Element,
): el is DocumentOrFragment =>
el.nodeType === Node.DOCUMENT_NODE ||
el.nodeType === Node.DOCUMENT_FRAGMENT_NODE

export function getTagName(el: DocumentOrFragment): undefined
export function getTagName(el: Element): string
export function getTagName(el: DocumentOrFragment | Element): string | undefined
export function getTagName(el: DocumentOrFragment | Element) {
return 'tagName' in el ? el.tagName.toLowerCase() : undefined
}

/**
* @see https://www.w3schools.com/tags/att_form.asp
*/
export const DISALLOWED_FORM_ATTR_TAG_NAMES =
'button,fieldset,input,label,meter,object,output,select,textarea'.split(',')

const sanitizeAttributes = (el: Element) => {
const tagName = getTagName(el)
const attrs = el.attributes
for (let i = 0, len = attrs.length; i < len; i++) {
const attr = attrs[i]
if (
/^on/i.test(attr.name) ||
/^(?:data|javascript|vbscript):/i.test(attr.value)
const { name, value } = attr
if (name === 'is') {
attr.value = ''
} else if (
name === 'autofocus' ||
(name === 'form' && DISALLOWED_FORM_ATTR_TAG_NAMES.includes(tagName)) ||
/^on/i.test(name) ||
/^(?:\w+script|data):/i.test(value.replaceAll(/\r?\n/g, ''))
) {
el.removeAttributeNode(attr)
// eslint-disable-next-line sonar/updated-loop-counter -- the attribute is removed, the index and length must be rechecked
Expand All @@ -20,28 +44,73 @@ const sanitizeAttributes = (el: Element) => {
return el
}

const sanitizeChildren = <T extends Document | Element>(el: T) => {
const sanitizeChildren = <T extends DocumentOrFragment | Element>(el: T) => {
for (let i = 0, len = el.children.length; i < len; i++) {
const sanitized = sanitizeNode(el.children[i])
if (sanitized == null) {
// eslint-disable-next-line sonar/updated-loop-counter -- the child is removed, the index and length must be rechecked
const item = el.children[i]
const sanitized = sanitizeNode(item, getTagName(el))
if (sanitized === item) {
continue
}
if (sanitized == null || typeof sanitized === 'string') {
item.replaceWith(...(sanitized == null ? [] : [sanitized]))
// eslint-disable-next-line sonar/updated-loop-counter -- the child is removed or replaced by text, the index and length must be rechecked
i--
len--
}
}
return el
}

/**
* @see https://developer.mozilla.org/en-US/docs/Web/MathML/Authoring#using_mathml
*/
export const MathML_TAG_NAMES = new Set(
'error,frac,i,multiscripts,n,o,over,padded,phantom,root,row,s,space,sqrt,style,sub,subsup,sup,table,td,text,tr,under,underover'
.split(',')
.map(it => `m${it}`),
)

function sanitizeNode(el: Document): Document
function sanitizeNode(el: Element): Element | null
function sanitizeNode(el: Document | Element) {
if (isDocument(el)) {
function sanitizeNode(el: DocumentFragment): DocumentFragment
function sanitizeNode(
el: Element,
parentTagName?: string,
): Element | string | null
function sanitizeNode(
el: DocumentOrFragment | Element,
parentTagName?: string,
) {
if (isDocumentOrFragment(el)) {
return sanitizeChildren(el)
}

if (['parsererror', 'script'].includes(el.tagName.toLowerCase())) {
el.remove()
return null
const tagName = getTagName(el)

if (
(parentTagName === 'math' && !MathML_TAG_NAMES.has(tagName)) ||
// unknown HTML element
el instanceof HTMLUnknownElement ||
// unknown SVG element
Object.getPrototypeOf(el) === SVGElement.prototype
) {
return el.textContent
}

switch (tagName) {
case 'iframe':
case 'link':
case 'meta':
case 'parsererror':
case 'script':
// eslint-disable-next-line no-fallthrough -- deprecated tags
case 'noembed':
case 'xmp': {
el.remove()
return
}
case 'template': {
sanitizeChildren((el as HTMLTemplateElement).content)
}
}

return sanitizeChildren(sanitizeAttributes(el))
Expand All @@ -50,23 +119,37 @@ function sanitizeNode(el: Document | Element) {
export const TEXT_HTML = 'text/html'
export const IMAGE_SVG_XML = 'image/svg+xml'

export interface SanitizeOptions {
type?: DOMParserSupportedType
fragment?: boolean
}

export const sanitize = (
domString: string,
type: DOMParserSupportedType = TEXT_HTML,
typeOrFragment?: DOMParserSupportedType | boolean,
) => {
const trimmed = domString.trim()

if (!trimmed) {
return domString
}

if (!domParser) {
domParser = new DOMParser()
}

const { type = TEXT_HTML, fragment }: SanitizeOptions =
typeOrFragment == null || typeof typeOrFragment === 'string'
? { type: typeOrFragment }
: { fragment: typeOrFragment }

const doc = sanitizeNode(domParser.parseFromString(domString, type))

return (
((type !== IMAGE_SVG_XML ||
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- `document.body` is unavailable in XML, see also https://github.com/microsoft/TypeScript/issues/29052#issuecomment-447998135
!doc.body) &&
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- https://github.com/microsoft/TypeScript/issues/50078
doc.documentElement?.outerHTML) ||
''
(fragment && type === TEXT_HTML
? doc.body.innerHTML
: // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- https://github.com/microsoft/TypeScript/issues/50078
doc.documentElement?.outerHTML) || ''
)
}

Expand Down
Loading

0 comments on commit 912b39a

Please sign in to comment.