diff --git a/package-lock.json b/package-lock.json index 8954872f..610fbeb3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -50,6 +50,7 @@ "lunr-languages": "1.10.0", "marked": "9.0.2", "mathjs": "10.5.0", + "mathml-to-latex": "1.4.0", "minimist": "1.2.6", "mitt": "^3.0.0", "open": "^7.4.2", @@ -1752,6 +1753,14 @@ "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.2.45.tgz", "integrity": "sha512-Ewzq5Yhimg7pSztDV+RH1UDKBzmtqieXQlpTVm2AwraoRL/Rks96mvd8Vgi7Lj+h+TH8dv7mXD3FRZR3TUvbSg==" }, + "node_modules/@xmldom/xmldom": { + "version": "0.8.10", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz", + "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -4523,6 +4532,14 @@ "node": ">= 12" } }, + "node_modules/mathml-to-latex": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/mathml-to-latex/-/mathml-to-latex-1.4.0.tgz", + "integrity": "sha512-dRVr2hCh/dwM8Cn1ZlKtb1Rw48z4fsUuZIWoOdMZ3Tct0v+QMSgxrO2nV69UIgySF51VW8qPEskNzhLLBrl5QQ==", + "dependencies": { + "@xmldom/xmldom": "^0.8.10" + } + }, "node_modules/media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", @@ -7695,6 +7712,11 @@ "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.2.45.tgz", "integrity": "sha512-Ewzq5Yhimg7pSztDV+RH1UDKBzmtqieXQlpTVm2AwraoRL/Rks96mvd8Vgi7Lj+h+TH8dv7mXD3FRZR3TUvbSg==" }, + "@xmldom/xmldom": { + "version": "0.8.10", + "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz", + "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==" + }, "accepts": { "version": "1.3.8", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.8.tgz", @@ -9799,6 +9821,14 @@ "typed-function": "^2.1.0" } }, + "mathml-to-latex": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/mathml-to-latex/-/mathml-to-latex-1.4.0.tgz", + "integrity": "sha512-dRVr2hCh/dwM8Cn1ZlKtb1Rw48z4fsUuZIWoOdMZ3Tct0v+QMSgxrO2nV69UIgySF51VW8qPEskNzhLLBrl5QQ==", + "requires": { + "@xmldom/xmldom": "^0.8.10" + } + }, "media-typer": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-0.3.0.tgz", diff --git a/package.json b/package.json index 609a63c3..a6d9da2e 100644 --- a/package.json +++ b/package.json @@ -102,6 +102,7 @@ "lunr-languages": "1.10.0", "marked": "9.0.2", "mathjs": "10.5.0", + "mathml-to-latex": "1.4.0", "minimist": "1.2.6", "mitt": "^3.0.0", "open": "^7.4.2", diff --git a/src/containers/transform/TaskLocalFileTransform.ts b/src/containers/transform/TaskLocalFileTransform.ts index 58634f1b..da54738e 100644 --- a/src/containers/transform/TaskLocalFileTransform.ts +++ b/src/containers/transform/TaskLocalFileTransform.ts @@ -173,7 +173,7 @@ export class TaskLocalFileTransform extends QueueTask { errors: Array; } - const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', { + const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', { localFile, realFileName: this.realFileName, odtPath, diff --git a/src/odt/OdtProcessor.ts b/src/odt/OdtProcessor.ts index e25adcb0..143ffc25 100644 --- a/src/odt/OdtProcessor.ts +++ b/src/odt/OdtProcessor.ts @@ -71,6 +71,25 @@ export class OdtProcessor { fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer); } + for (const relativePath in this.files) { + if (!relativePath.endsWith('/content.xml')) { + continue; + } + + const fileName = relativePath.replace('/content.xml', '.xml').replace(/\s/g, '_'); + if (fileName.indexOf('/') === -1) { + const entry = this.files[relativePath]; + const buffer = await entry.async('nodebuffer'); + + this.fileNameMap[fileName] = fileName; + written.push(this.fileNameMap[fileName]); + if (!fs.existsSync(assetsDirectory)) { + fs.mkdirSync(assetsDirectory, { recursive: true }); + } + fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer); + } + } + if (fs.existsSync(assetsDirectory)) { const files = fs.readdirSync(assetsDirectory); for (const file of files) { diff --git a/src/odt/OdtToMarkdown.ts b/src/odt/OdtToMarkdown.ts index ba886876..cd21b947 100644 --- a/src/odt/OdtToMarkdown.ts +++ b/src/odt/OdtToMarkdown.ts @@ -1,3 +1,7 @@ +import path from 'path'; +import fs from 'fs'; +import { MathMLToLaTeX } from 'mathml-to-latex'; + import { DocumentContent, DocumentStyles, DrawCustomShape, DrawEnhancedGeometry, DrawFrame, DrawG, @@ -17,7 +21,7 @@ import { TextSpan } from './LibreOffice.ts'; import {urlToFolderId} from '../utils/idParsers.ts'; -import {MarkdownNodes, MarkdownTagNode} from './MarkdownNodes.ts'; +import {MarkdownNodes, MarkdownTagNode, MarkdownTextNode} from './MarkdownNodes.ts'; import {inchesToPixels, inchesToSpaces, spaces} from './utils.ts'; import {extractPath} from './extractPath.ts'; import {mergeDeep} from './mergeDeep.ts'; @@ -378,7 +382,26 @@ export class OdtToMarkdown { } async drawFrameToText(currentTagNode: MarkdownTagNode, drawFrame: DrawFrame) { - if (drawFrame.object) { // TODO: MathML + if (drawFrame.object) { + if (!this.picturesDir) { + return; + } + if (drawFrame.object.href) { + const filePath = path.join(this.picturesDir, drawFrame.object.href.replace(/\s/g, '_') + '.xml'); + try { + const mathMl = new TextDecoder().decode(fs.readFileSync(filePath)); + if (mathMl.indexOf(' -1) { + const node = this.chunks.createNode('PRE', { lang: 'math' }); + const latex = MathMLToLaTeX.convert(mathMl); + this.chunks.appendText(node, latex); + this.chunks.append(currentTagNode, node); + const brNode = this.chunks.createNode('EMPTY_LINE/'); + this.chunks.append(currentTagNode, brNode); + } + } catch (err) { + console.warn(err); + } + } return; } if (drawFrame.image) { diff --git a/src/odt/postprocess/mergeParagraphs.ts b/src/odt/postprocess/mergeParagraphs.ts index b2966779..341c6bd3 100644 --- a/src/odt/postprocess/mergeParagraphs.ts +++ b/src/odt/postprocess/mergeParagraphs.ts @@ -15,6 +15,10 @@ export function mergeParagraphs(markdownChunks: MarkdownNodes) { } if (chunk.isTag && ['P', 'PRE'].includes(chunk.tag)) { + if (chunk.tag === 'PRE' && chunk.payload?.lang === 'math') { + return; + } + const nextChunk = chunk.parent.children[ctx.nodeIdx + 1]; if (nextChunk?.isTag && nextChunk.tag === chunk.tag) { const children = nextChunk.children.splice(0, nextChunk.children.length); diff --git a/test/odt_md/MarkDownTransform.test.ts b/test/odt_md/MarkDownTransform.test.ts index 4cf18ac1..69022e02 100644 --- a/test/odt_md/MarkDownTransform.test.ts +++ b/test/odt_md/MarkDownTransform.test.ts @@ -1,7 +1,7 @@ import {assert} from 'chai'; import fs from 'fs'; -import {compareTexts} from '../utils.ts'; +import {compareTexts, createTmpDir} from '../utils.ts'; import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts'; import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts'; import {UnMarshaller} from '../../src/odt/UnMarshaller.ts'; @@ -109,6 +109,7 @@ describe('MarkDownTransformTest', () => { it('test ./example-document.md', async () => { const testMarkdown = fs.readFileSync(__dirname + '/example-document.md').toString(); const markdown = await transformOdt('example-document'); + console.log(markdown); assert.ok(compareTexts(testMarkdown, markdown, false)); }); @@ -137,13 +138,20 @@ async function transformOdt(id: string) { const odtPath = folder.getRealPath() + '/' + id + '.odt'; const processor = new OdtProcessor(odtPath); await processor.load(); + const tmpDir: string = createTmpDir(); + await processor.unzipAssets(tmpDir, id + '.md'); if (!processor.getContentXml()) { throw Error('No odt processed'); } - return transform(processor.getContentXml(), processor.getStylesXml()); + try { + const markdown = await transform(processor.getContentXml(), processor.getStylesXml(), tmpDir + `/${id}.assets`); + return markdown.replaceAll(tmpDir + `/${id}.assets`, ''); + } finally { + fs.rmSync(tmpDir, { recursive: true }); + } } -async function transform(contentXml: string, stylesXml: string) { +async function transform(contentXml: string, stylesXml: string, assetsDir: string) { const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent'); const document: DocumentContent = parser.unmarshal(contentXml); if (!document) { @@ -155,5 +163,6 @@ async function transform(contentXml: string, stylesXml: string) { throw Error('No styles unmarshalled'); } const converter = new OdtToMarkdown(document, styles); + converter.setPicturesDir(assetsDir); return await converter.convert(); } diff --git a/test/odt_md/example-document.md b/test/odt_md/example-document.md index 07b1c4fd..898e4806 100644 --- a/test/odt_md/example-document.md +++ b/test/odt_md/example-document.md @@ -136,6 +136,14 @@ Some **bold** **_boldanditalic_*** italic* text ### Using the actual equation object +```math +E = m c^{2} +``` + +```math +e^{i \pi} - 1 = 0 +``` + ### Text equivalent *E=mc**2* diff --git a/test/utils.ts b/test/utils.ts index 8e1145cc..336d9f37 100644 --- a/test/utils.ts +++ b/test/utils.ts @@ -4,8 +4,8 @@ import path from 'path'; import {createPatch} from 'diff'; import {ansi_colors} from '../src/utils/logger/colors.ts'; -export function createTmpDir() { - return fs.mkdtempSync(path.join(os.tmpdir(), 'wg-')); +export function createTmpDir(prefix = 'wg-') { + return fs.mkdtempSync(path.join(os.tmpdir(), prefix)); } // eslint-disable-next-line @typescript-eslint/no-unused-vars