diff --git a/package-lock.json b/package-lock.json index 8954872f..610fbeb3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50,6 +50,7 @@ "lunr-languages": "1.10.0", "marked": "9.0.2", "mathjs": "10.5.0", + "mathml-to-latex": "1.4.0", "minimist": "1.2.6", "mitt": "^3.0.0", "open": "^7.4.2", @@ -1752,6 +1753,14 @@ "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.2.45.tgz", "integrity": "sha512-Ewzq5Yhimg7pSztDV+RH1UDKBzmtqieXQlpTVm2AwraoRL/Rks96mvd8Vgi7Lj+h+TH8dv7mXD3FRZR3TUvbSg==" }, + "node_modules/@xmldom/xmldom": { + "version": "0.8.10", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz", + "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -4523,6 +4532,14 @@ "node": ">= 12" } }, + "node_modules/mathml-to-latex": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/mathml-to-latex/-/mathml-to-latex-1.4.0.tgz", + "integrity": "sha512-dRVr2hCh/dwM8Cn1ZlKtb1Rw48z4fsUuZIWoOdMZ3Tct0v+QMSgxrO2nV69UIgySF51VW8qPEskNzhLLBrl5QQ==", + "dependencies": { + "@xmldom/xmldom": "^0.8.10" + } + }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -7695,6 +7712,11 @@ "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.2.45.tgz", "integrity": "sha512-Ewzq5Yhimg7pSztDV+RH1UDKBzmtqieXQlpTVm2AwraoRL/Rks96mvd8Vgi7Lj+h+TH8dv7mXD3FRZR3TUvbSg==" }, + "@xmldom/xmldom": { + "version": "0.8.10", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz", + "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==" + }, "accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -9799,6 +9821,14 @@ "typed-function": "^2.1.0" } }, + "mathml-to-latex": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/mathml-to-latex/-/mathml-to-latex-1.4.0.tgz", + "integrity": "sha512-dRVr2hCh/dwM8Cn1ZlKtb1Rw48z4fsUuZIWoOdMZ3Tct0v+QMSgxrO2nV69UIgySF51VW8qPEskNzhLLBrl5QQ==", + "requires": { + "@xmldom/xmldom": "^0.8.10" + } + }, "media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", diff --git a/package.json b/package.json index 609a63c3..a6d9da2e 100644 --- a/package.json +++ b/package.json @@ -102,6 +102,7 @@ "lunr-languages": "1.10.0", "marked": "9.0.2", "mathjs": "10.5.0", + "mathml-to-latex": "1.4.0", "minimist": "1.2.6", "mitt": "^3.0.0", "open": "^7.4.2", diff --git a/src/containers/job/worker.ts b/src/containers/job/worker.ts index 13071079..a596acc7 100644 --- a/src/containers/job/worker.ts +++ b/src/containers/job/worker.ts @@ -1,5 +1,6 @@ import { parentPort } from 'worker_threads'; -import {executeOdtToMarkdown} from '../../odt/executeOdtToMarkdown'; + +import {executeOdtToMarkdown} from '../../odt/executeOdtToMarkdown.ts'; parentPort.on('message', async (msg) => { try { diff --git a/src/containers/transform/TaskLocalFileTransform.ts b/src/containers/transform/TaskLocalFileTransform.ts index 58634f1b..40f5258b 100644 --- a/src/containers/transform/TaskLocalFileTransform.ts +++ b/src/containers/transform/TaskLocalFileTransform.ts @@ -136,6 +136,8 @@ export class TaskLocalFileTransform extends QueueTask { const rewriteRules = this.userConfig.rewrite_rules || []; + const picturesDirAbsolute = destinationPath + '/' + this.realFileName.replace(/.md$/, '.assets/'); + if (SINGLE_THREADED_TRANSFORM) { const processor = new OdtProcessor(odtPath, true); await processor.load(); @@ -156,9 +158,9 @@ export class TaskLocalFileTransform extends QueueTask { const converter = new OdtToMarkdown(document, styles, fileNameMap); converter.setRewriteRules(rewriteRules); if (this.realFileName === '_index.md') { - converter.setPicturesDir('./' + this.realFileName.replace(/.md$/, '.assets/')); + converter.setPicturesDir('./' + this.realFileName.replace(/.md$/, '.assets/'), picturesDirAbsolute); } else { - converter.setPicturesDir('../' + this.realFileName.replace(/.md$/, '.assets/')); + converter.setPicturesDir('../' + this.realFileName.replace(/.md$/, '.assets/'), picturesDirAbsolute); } markdown = await converter.convert(); links = Array.from(converter.links); @@ -173,9 +175,10 @@ export class TaskLocalFileTransform extends QueueTask { errors: Array; } - const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', { + const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', { localFile, realFileName: this.realFileName, + picturesDirAbsolute, odtPath, destinationPath, rewriteRules, diff --git a/src/odt/MarkdownNodes.ts b/src/odt/MarkdownNodes.ts index 736195b5..fe8679b1 100644 --- a/src/odt/MarkdownNodes.ts +++ b/src/odt/MarkdownNodes.ts @@ -15,6 +15,7 @@ export type TAG = 'BODY' | 'HR/' | 'B' | 'I' | 'BI' | 'BLANK/' | // | '/B' | '/I 'TOC' | 'SVG/' | 'IMG/' | // | '/TOC' 'EMB_SVG' | 'EMB_SVG_G' | 'EMB_SVG_P/' | 'EMB_SVG_TEXT' | // | '/EMB_SVG' | '/EMB_SVG_G' | '/EMB_SVG_TEXT' 'EMB_SVG_TSPAN' | // | '/EMB_SVG_TSPAN' + 'MATHML' | 'CHANGE_START' | 'CHANGE_END' | 'RAW_MODE/' | 'HTML_MODE/' | 'MD_MODE/' | 'MACRO_MODE/' | 'COMMENT'; export interface TagPayload { diff --git a/src/odt/OdtProcessor.ts b/src/odt/OdtProcessor.ts index e25adcb0..143ffc25 100644 --- a/src/odt/OdtProcessor.ts +++ b/src/odt/OdtProcessor.ts @@ -71,6 +71,25 @@ export class OdtProcessor { fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer); } + for (const relativePath in this.files) { + if (!relativePath.endsWith('/content.xml')) { + continue; + } + + const fileName = relativePath.replace('/content.xml', '.xml').replace(/\s/g, '_'); + if (fileName.indexOf('/') === -1) { + const entry = this.files[relativePath]; + const buffer = await entry.async('nodebuffer'); + + this.fileNameMap[fileName] = fileName; + written.push(this.fileNameMap[fileName]); + if (!fs.existsSync(assetsDirectory)) { + fs.mkdirSync(assetsDirectory, { recursive: true }); + } + fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer); + } + } + if (fs.existsSync(assetsDirectory)) { const files = fs.readdirSync(assetsDirectory); for (const file of files) { diff --git a/src/odt/OdtToMarkdown.ts b/src/odt/OdtToMarkdown.ts index ba886876..aeeef741 100644 --- a/src/odt/OdtToMarkdown.ts +++ b/src/odt/OdtToMarkdown.ts @@ -1,3 +1,7 @@ +import path from 'path'; +import fs from 'fs'; +import { MathMLToLaTeX } from 'mathml-to-latex'; + import { DocumentContent, DocumentStyles, DrawCustomShape, DrawEnhancedGeometry, DrawFrame, DrawG, @@ -17,7 +21,7 @@ import { TextSpan } from './LibreOffice.ts'; import {urlToFolderId} from '../utils/idParsers.ts'; -import {MarkdownNodes, MarkdownTagNode} from './MarkdownNodes.ts'; +import {MarkdownNodes, MarkdownTagNode, MarkdownTextNode} from './MarkdownNodes.ts'; import {inchesToPixels, inchesToSpaces, spaces} from './utils.ts'; import {extractPath} from './extractPath.ts'; import {mergeDeep} from './mergeDeep.ts'; @@ -64,6 +68,7 @@ export class OdtToMarkdown { public readonly links: Set = new Set(); private readonly chunks: MarkdownNodes = new MarkdownNodes(); private picturesDir = ''; + private picturesDirAbsolute = ''; private rewriteRules: RewriteRule[] = []; constructor(private document: DocumentContent, private documentStyles: DocumentStyles, private fileNameMap: FileNameMap = {}) { @@ -378,7 +383,24 @@ export class OdtToMarkdown { } async drawFrameToText(currentTagNode: MarkdownTagNode, drawFrame: DrawFrame) { - if (drawFrame.object) { // TODO: MathML + if (drawFrame.object) { + if (!this.picturesDir) { + return; + } + if (drawFrame.object.href) { + const filePath = path.join(this.picturesDirAbsolute, drawFrame.object.href.replace(/\s/g, '_') + '.xml'); + try { + const mathMl = new TextDecoder().decode(fs.readFileSync(filePath)); + if (mathMl.indexOf(' -1) { + const node = this.chunks.createNode('MATHML'); + const latex = MathMLToLaTeX.convert(mathMl); + this.chunks.appendText(node, latex); + this.chunks.append(currentTagNode, node); + } + } catch (err) { + console.warn(err); + } + } return; } if (drawFrame.image) { @@ -666,8 +688,9 @@ export class OdtToMarkdown { } } - setPicturesDir(picturesDir: string) { + setPicturesDir(picturesDir: string, picturesDirAbsolute?: string) { this.picturesDir = picturesDir; + this.picturesDirAbsolute = picturesDirAbsolute || picturesDir; } setRewriteRules(rewriteRules: RewriteRule[]) { diff --git a/src/odt/executeOdtToMarkdown.ts b/src/odt/executeOdtToMarkdown.ts index d86f1cd9..079c92c3 100644 --- a/src/odt/executeOdtToMarkdown.ts +++ b/src/odt/executeOdtToMarkdown.ts @@ -1,11 +1,12 @@ -import {OdtToMarkdown} from './OdtToMarkdown'; -import {UnMarshaller} from './UnMarshaller'; -import {DocumentStyles, LIBREOFFICE_CLASSES} from './LibreOffice'; -import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/generateDocumentFrontMatter'; -import {OdtProcessor} from './OdtProcessor'; import fs from 'fs'; import path from 'path'; +import {OdtToMarkdown} from './OdtToMarkdown.ts'; +import {UnMarshaller} from './UnMarshaller.ts'; +import {DocumentStyles, LIBREOFFICE_CLASSES} from './LibreOffice.ts'; +import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/generateDocumentFrontMatter.ts'; +import {OdtProcessor} from './OdtProcessor.ts'; + export async function executeOdtToMarkdown(workerData) { const processor = new OdtProcessor(workerData.odtPath, true); await processor.load(); @@ -26,9 +27,9 @@ export async function executeOdtToMarkdown(workerData) { const converter = new OdtToMarkdown(document, styles, fileNameMap); converter.setRewriteRules(workerData.rewriteRules); if (workerData.realFileName === '_index.md') { - converter.setPicturesDir('./' + workerData.realFileName.replace(/.md$/, '.assets/')); + converter.setPicturesDir('./' + workerData.realFileName.replace(/.md$/, '.assets/'), workerData.picturesDirAbsolute); } else { - converter.setPicturesDir('../' + workerData.realFileName.replace(/.md$/, '.assets/')); + converter.setPicturesDir('../' + workerData.realFileName.replace(/.md$/, '.assets/'), workerData.picturesDirAbsolute); } const markdown = await converter.convert(); const links = Array.from(converter.links); diff --git a/src/odt/postprocess/convertMathMl.ts b/src/odt/postprocess/convertMathMl.ts new file mode 100644 index 00000000..0b91e759 --- /dev/null +++ b/src/odt/postprocess/convertMathMl.ts @@ -0,0 +1,27 @@ +import {MarkdownNodes} from '../MarkdownNodes.js'; +import {walkRecursiveSync} from '../markdownNodesUtils.js'; + +export function convertMathMl(markdownChunks: MarkdownNodes) { + walkRecursiveSync(markdownChunks.body, (chunk, ctx: { nodeIdx: number }) => { + if (!(chunk.isTag && chunk.tag === 'MATHML')) { + return; + } + + const prevChunk = chunk.parent.children[ctx.nodeIdx - 1]; + const nextChunk = chunk.parent.children[ctx.nodeIdx + 1]; + + if (prevChunk?.isTag === false || nextChunk?.isTag === false) { + const text = chunk.children.filter(c => c.isTag === false).map(c => c['text']).join('\n'); + chunk.parent.children.splice(ctx.nodeIdx, 1, { + isTag: false, + text: '$$' + text + '$$' + }); + return; + } + + chunk.tag = 'PRE'; + chunk.payload.lang = 'math'; + const brNode = markdownChunks.createNode('EMPTY_LINE/'); + chunk.parent.children.splice(ctx.nodeIdx + 1, 0, brNode); + }); +} diff --git a/src/odt/postprocess/mergeParagraphs.ts b/src/odt/postprocess/mergeParagraphs.ts index b2966779..341c6bd3 100644 --- a/src/odt/postprocess/mergeParagraphs.ts +++ b/src/odt/postprocess/mergeParagraphs.ts @@ -15,6 +15,10 @@ export function mergeParagraphs(markdownChunks: MarkdownNodes) { } if (chunk.isTag && ['P', 'PRE'].includes(chunk.tag)) { + if (chunk.tag === 'PRE' && chunk.payload?.lang === 'math') { + return; + } + const nextChunk = chunk.parent.children[ctx.nodeIdx + 1]; if (nextChunk?.isTag && nextChunk.tag === chunk.tag) { const children = nextChunk.children.splice(0, nextChunk.children.length); diff --git a/src/odt/postprocess/postProcess.ts b/src/odt/postprocess/postProcess.ts index 439756b9..b743b62b 100644 --- a/src/odt/postprocess/postProcess.ts +++ b/src/odt/postprocess/postProcess.ts @@ -23,6 +23,7 @@ import {removeEmptyTags} from './removeEmptyTags.ts'; import {removeExcessiveLines} from './removeExcessiveLines.ts'; import {applyRewriteRules} from './applyRewriteRules.ts'; import {RewriteRule} from '../applyRewriteRule.ts'; +import {convertMathMl} from './convertMathMl.js'; export async function postProcess(chunks: MarkdownNodes, rewriteRules: RewriteRule[]) { convertToc(chunks); @@ -31,6 +32,7 @@ export async function postProcess(chunks: MarkdownNodes, rewriteRules: RewriteRu fixSpacesInsideInlineFormatting(chunks); await fixBoldItalic(chunks); hideSuggestedChanges(chunks); + convertMathMl(chunks); trimParagraphs(chunks); addEmptyLinesAfterParas(chunks); diff --git a/test/odt_md/MarkDownTransform.test.ts b/test/odt_md/MarkDownTransform.test.ts index 4cf18ac1..128caf30 100644 --- a/test/odt_md/MarkDownTransform.test.ts +++ b/test/odt_md/MarkDownTransform.test.ts @@ -1,7 +1,7 @@ import {assert} from 'chai'; import fs from 'fs'; -import {compareTexts} from '../utils.ts'; +import {compareTexts, createTmpDir} from '../utils.ts'; import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts'; import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts'; import {UnMarshaller} from '../../src/odt/UnMarshaller.ts'; @@ -137,13 +137,20 @@ async function transformOdt(id: string) { const odtPath = folder.getRealPath() + '/' + id + '.odt'; const processor = new OdtProcessor(odtPath); await processor.load(); + const tmpDir: string = createTmpDir(); + await processor.unzipAssets(tmpDir, id + '.md'); if (!processor.getContentXml()) { throw Error('No odt processed'); } - return transform(processor.getContentXml(), processor.getStylesXml()); + try { + const markdown = await transform(processor.getContentXml(), processor.getStylesXml(), tmpDir + `/${id}.assets`); + return markdown.replaceAll(tmpDir + `/${id}.assets`, ''); + } finally { + fs.rmSync(tmpDir, { recursive: true }); + } } -async function transform(contentXml: string, stylesXml: string) { +async function transform(contentXml: string, stylesXml: string, assetsDir: string) { const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent'); const document: DocumentContent = parser.unmarshal(contentXml); if (!document) { @@ -155,5 +162,6 @@ async function transform(contentXml: string, stylesXml: string) { throw Error('No styles unmarshalled'); } const converter = new OdtToMarkdown(document, styles); + converter.setPicturesDir(assetsDir); return await converter.convert(); } diff --git a/test/odt_md/example-document.md b/test/odt_md/example-document.md index 07b1c4fd..8e7a0d81 100644 --- a/test/odt_md/example-document.md +++ b/test/odt_md/example-document.md @@ -64,7 +64,9 @@ After subtable ## Image -![](1000000000000200000001804F9AAE46CD6D0DF2.gif) +![](1000000000000640000001CF60FB0243CA95EC14.jpg) + +![](10000000000003F0000003F092F85671239C65F9.jpg) ## Preformatted Text @@ -80,7 +82,7 @@ Code blocks are part of the Markdown spec, but syntax highlighting isn't. Howeve ### Typescript / Javascript -{{% markdown %}} +{{markdown}} ```javascript class MyClass { @@ -98,7 +100,7 @@ module MyModule { declare magicNumber number; myArray.forEach(() => { }); // fat arrow syntax ``` -{{% /markdown %}} +{{/markdown}} ## Video @@ -136,10 +138,20 @@ Some **bold** **_boldanditalic_*** italic* text ### Using the actual equation object +```math +E = m c^{2} +``` + +```math +e^{i \pi} - 1 = 0 +``` + ### Text equivalent *E=mc**2* +Inline $$E = m c^{2}$$ math + ## Footnotes 1Footnotes should display as a footnote, and should always display at the very end of the document (page)**?** This is some sample text with a footnote. diff --git a/test/odt_md/example-document.odt b/test/odt_md/example-document.odt index 3ac88a37..88fd51d8 100644 Binary files a/test/odt_md/example-document.odt and b/test/odt_md/example-document.odt differ diff --git a/test/utils.ts b/test/utils.ts index 8e1145cc..336d9f37 100644 --- a/test/utils.ts +++ b/test/utils.ts @@ -4,8 +4,8 @@ import path from 'path'; import {createPatch} from 'diff'; import {ansi_colors} from '../src/utils/logger/colors.ts'; -export function createTmpDir() { - return fs.mkdtempSync(path.join(os.tmpdir(), 'wg-')); +export function createTmpDir(prefix = 'wg-') { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); } // eslint-disable-next-line @typescript-eslint/no-unused-vars