Skip to content

Commit

Permalink
add script to report broken links
Browse files Browse the repository at this point in the history
  • Loading branch information
alexfauquette committed Oct 10, 2022
1 parent 597c092 commit f1137e7
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 1 deletion.
3 changes: 2 additions & 1 deletion docs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
"start": "next start",
"typescript": "tsc -p tsconfig.json && tsc -p scripts/tsconfig.json",
"typescript:transpile": "echo 'Use `yarn docs:typescript:formatted'` instead && exit 1",
"typescript:transpile:dev": "echo 'Use `yarn docs:typescript'` instead && exit 1"
"typescript:transpile:dev": "echo 'Use `yarn docs:typescript'` instead && exit 1",
"link-check": "node ./scripts/reportBrokenLinks.js"
},
"dependencies": {
"@babel/core": "^7.19.3",
Expand Down
204 changes: 204 additions & 0 deletions docs/scripts/reportBrokenLinks.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
const path = require('path');
const fse = require('fs-extra');
const { createRender } = require('@mui/markdown');
const { marked } = require('marked');

const UNSUPPORTED_PATHS = ['/api/', '/careers/', '/store/', '/x/'];

const docsSpaceRoot = path.join(__dirname, '../');

const buffer = [];

function write(text) {
buffer.push(text);
}

function save() {
const fileContents = [...buffer, ''].join('\n');
fse.writeFileSync(path.join(docsSpaceRoot, 'broken-links.txt'), fileContents);
}

// Use renderer to extract all links into a markdown document
const getPageLinks = (markdown) => {
const hrefs = [];

const renderer = new marked.Renderer();
renderer.link = (href) => {
if (href[0] === '/') {
hrefs.push(href);
}
};
marked(markdown, { renderer });
return hrefs;
};

// List all .js files in a folder
const getJsFilesInFolder = (folderPath) => {
const files = fse.readdirSync(folderPath, { withFileTypes: true });
return files.reduce((acc, file) => {
if (file.isDirectory()) {
const filesInFolder = getJsFilesInFolder(path.join(folderPath, file.name));
return [...acc, ...filesInFolder];
}
if (file.name.endsWith('.js') || file.name.endsWith('.tsx')) {
return [...acc, path.join(folderPath, file.name)];
}
return acc;
}, []);
};

// Returns url assuming it's "./docs/pages/x/..." becomes "mui.com/x/..."
const jsFilePathToUrl = (jsFilePath) => {
const folder = path.dirname(jsFilePath);
const file = path.basename(jsFilePath);

const root = folder.slice(jsFilePath.indexOf('/pages') + '/pages'.length);
const suffix = file.split('.').at(-1);
let page = `/${file.slice(0, file.length - 1 - suffix.length)}`;

if (page === '/index') {
page = '';
}

return `${root}${page}`;
};

function cleanLink(link) {
const startQueryIndex = link.indexOf('?');
const endQueryIndex = link.indexOf('#', startQueryIndex);

if (startQueryIndex === -1) {
return link;
}
if (endQueryIndex === -1) {
return link.slice(0, startQueryIndex);
}
return `${link.slice(0, startQueryIndex)}${link.slice(endQueryIndex)}`;
}

function getLinksAndAnchors(fileName) {
const toc = [];
const headingHashes = {};
const userLanguage = 'en';
const render = createRender({ headingHashes, toc, userLanguage });

const data = fse.readFileSync(fileName, { encoding: 'utf-8' });
render(data);

const links = getPageLinks(data).map(cleanLink);

return {
hashes: Object.keys(headingHashes),
links,
};
}

const getMdFilesImported = (jsPageFile) => {
// For each JS file extract the markdown rendered if it exists
const fileContent = fse.readFileSync(jsPageFile, 'utf8');
/**
* Content files can be represented by either:
* - 'docsx/data/advanced-components/overview.md?@mui/markdown'; (for mui-x)
* - 'docs/data/advanced-components/overview.md?@mui/markdown';
* - './index.md?@mui/markdown';
*/
const importPaths = fileContent.match(/'.*\?@mui\/markdown'/g);

if (importPaths === null) {
return [];
}
return importPaths.map((importPath) => {
let cleanImportPath = importPath.slice(1, importPath.length - "?@mui/markdown'".length);
if (cleanImportPath.startsWith('.')) {
cleanImportPath = path.join(path.dirname(jsPageFile), cleanImportPath);
} else if (cleanImportPath.startsWith('docs/')) {
cleanImportPath = path.join(
jsPageFile.slice(0, jsPageFile.indexOf('docs/')),
cleanImportPath,
);
} else if (cleanImportPath.startsWith('docsx/')) {
cleanImportPath = path.join(
jsPageFile.slice(0, jsPageFile.indexOf('docs/')),
cleanImportPath.replace('docsx', 'docs'),
);
} else {
console.error(`unable to deal with import path: ${cleanImportPath}`);
}

return cleanImportPath;
});
};

const parseDocFolder = (folderPath, availableLinks = {}, usedLinks = {}) => {
const jsPageFiles = getJsFilesInFolder(folderPath);

const mdFiles = jsPageFiles.flatMap((jsPageFile) => {
const pageUrl = jsFilePathToUrl(jsPageFile);
const importedMds = getMdFilesImported(jsPageFile);

return importedMds.map((fileName) => ({ fileName, url: pageUrl }));
});

// Mark all the existing page as available
jsPageFiles.forEach((jsFilePath) => {
const url = jsFilePathToUrl(jsFilePath);
availableLinks[url] = true;
});

// For each markdown file, extract links
mdFiles.forEach(({ fileName, url }) => {
const { hashes, links } = getLinksAndAnchors(fileName);

links
.map((link) => (link[link.length - 1] === '/' ? link.slice(0, link.length - 1) : link))
.forEach((link) => {
if (usedLinks[link] === undefined) {
usedLinks[link] = [fileName];
} else {
usedLinks[link].push(fileName);
}
});

hashes.forEach((hash) => {
availableLinks[`${url}/#${hash}`] = true;
});
});
};

// {[url with hash]: true}
const availableLinks = {};

// {[url with hash]: list of files using this link}
const usedLinks = {};

parseDocFolder(path.join(docsSpaceRoot, './pages/'), availableLinks, usedLinks);
// TODO: Allows to run on documents of /mui/material-ui from /mui/mui-x
// parseDocFolder(path.join(ROOT, process.env.MUI_X_PATH, "docs/pages/"), availableLinks, usedLinks);

function getPageUrlFromLink(link) {
const [rep] = link.split('/#');
return rep;
}

Object.keys(usedLinks)
.filter((link) => link.startsWith('/'))
.filter((link) => !availableLinks[link])
// unstyled sections are added by scripts (can not be found in markdown)
.filter((link) => !link.includes('#unstyled'))
.filter((link) => UNSUPPORTED_PATHS.every((unsupportedPath) => !link.includes(unsupportedPath)))
.sort()
.forEach((linkKey) => {
write(`not found: https://mui.com${linkKey}`);
write(`used in`);
usedLinks[linkKey].forEach((f) => write(`- ${path.relative(docsSpaceRoot, f)}`));
write('available anchors on the same page:');
write(
Object.keys(availableLinks)
.filter((link) => getPageUrlFromLink(link) === getPageUrlFromLink(linkKey))
.sort()
.map((link) => link.split('/').at(-1))
.join('\n'),
);
write('\n\n');
});
save();
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"docs:size-why": "cross-env DOCS_STATS_ENABLED=true yarn docs:build",
"docs:start": "yarn workspace docs start",
"docs:i18n": "cross-env BABEL_ENV=development babel-node --extensions \".tsx,.ts,.js\" ./docs/scripts/i18n.js",
"docs:link-check": "yarn workspace docs link-check",
"docs:typescript": "yarn docs:typescript:formatted --watch",
"docs:typescript:check": "yarn workspace docs typescript",
"docs:typescript:formatted": "cross-env BABEL_ENV=development babel-node --extensions \".tsx,.ts,.js\" ./docs/scripts/formattedTSDemos",
Expand Down

0 comments on commit f1137e7

Please sign in to comment.