Skip to content

Commit

Permalink
Add mathml support
Browse files Browse the repository at this point in the history
  • Loading branch information
ggodlewski committed Apr 26, 2024
1 parent 6390f34 commit 7469933
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 8 deletions.
30 changes: 30 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@
"lunr-languages": "1.10.0",
"marked": "9.0.2",
"mathjs": "10.5.0",
"mathml-to-latex": "1.4.0",
"minimist": "1.2.6",
"mitt": "^3.0.0",
"open": "^7.4.2",
Expand Down
2 changes: 1 addition & 1 deletion src/containers/transform/TaskLocalFileTransform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ export class TaskLocalFileTransform extends QueueTask {
errors: Array<string>;
}

const workerResult: WorkerResult = await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', {
const workerResult: WorkerResult = <WorkerResult>await this.jobManagerContainer.scheduleWorker('OdtToMarkdown', {
localFile,
realFileName: this.realFileName,
odtPath,
Expand Down
19 changes: 19 additions & 0 deletions src/odt/OdtProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ export class OdtProcessor {
fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer);
}

for (const relativePath in this.files) {
if (!relativePath.endsWith('/content.xml')) {
continue;
}

const fileName = relativePath.replace('/content.xml', '.xml').replace(/\s/g, '_');
if (fileName.indexOf('/') === -1) {
const entry = this.files[relativePath];
const buffer = await entry.async('nodebuffer');

this.fileNameMap[fileName] = fileName;
written.push(this.fileNameMap[fileName]);
if (!fs.existsSync(assetsDirectory)) {
fs.mkdirSync(assetsDirectory, { recursive: true });
}
fs.writeFileSync(path.join(assetsDirectory, this.fileNameMap[fileName]), buffer);
}
}

if (fs.existsSync(assetsDirectory)) {
const files = fs.readdirSync(assetsDirectory);
for (const file of files) {
Expand Down
27 changes: 25 additions & 2 deletions src/odt/OdtToMarkdown.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import path from 'path';
import fs from 'fs';
import { MathMLToLaTeX } from 'mathml-to-latex';

import {
DocumentContent, DocumentStyles, DrawCustomShape, DrawEnhancedGeometry,
DrawFrame, DrawG,
Expand All @@ -17,7 +21,7 @@ import {
TextSpan
} from './LibreOffice.ts';
import {urlToFolderId} from '../utils/idParsers.ts';
import {MarkdownNodes, MarkdownTagNode} from './MarkdownNodes.ts';
import {MarkdownNodes, MarkdownTagNode, MarkdownTextNode} from './MarkdownNodes.ts';

Check notice

Code scanning / CodeQL

Unused variable, import, function or class Note

Unused import MarkdownTextNode.
import {inchesToPixels, inchesToSpaces, spaces} from './utils.ts';
import {extractPath} from './extractPath.ts';
import {mergeDeep} from './mergeDeep.ts';
Expand Down Expand Up @@ -378,7 +382,26 @@ export class OdtToMarkdown {
}

async drawFrameToText(currentTagNode: MarkdownTagNode, drawFrame: DrawFrame) {
if (drawFrame.object) { // TODO: MathML
if (drawFrame.object) {
if (!this.picturesDir) {
return;
}
if (drawFrame.object.href) {
const filePath = path.join(this.picturesDir, drawFrame.object.href.replace(/\s/g, '_') + '.xml');
try {
const mathMl = new TextDecoder().decode(fs.readFileSync(filePath));
if (mathMl.indexOf('<math ') > -1) {
const node = this.chunks.createNode('PRE', { lang: 'math' });
const latex = MathMLToLaTeX.convert(mathMl);
this.chunks.appendText(node, latex);
this.chunks.append(currentTagNode, node);
const brNode = this.chunks.createNode('EMPTY_LINE/');
this.chunks.append(currentTagNode, brNode);
}
} catch (err) {
console.warn(err);
}
}
return;
}
if (drawFrame.image) {
Expand Down
4 changes: 4 additions & 0 deletions src/odt/postprocess/mergeParagraphs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ export function mergeParagraphs(markdownChunks: MarkdownNodes) {
}

if (chunk.isTag && ['P', 'PRE'].includes(chunk.tag)) {
if (chunk.tag === 'PRE' && chunk.payload?.lang === 'math') {
return;
}

const nextChunk = chunk.parent.children[ctx.nodeIdx + 1];
if (nextChunk?.isTag && nextChunk.tag === chunk.tag) {
const children = nextChunk.children.splice(0, nextChunk.children.length);
Expand Down
15 changes: 12 additions & 3 deletions test/odt_md/MarkDownTransform.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {assert} from 'chai';
import fs from 'fs';

import {compareTexts} from '../utils.ts';
import {compareTexts, createTmpDir} from '../utils.ts';
import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts';
import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts';
import {UnMarshaller} from '../../src/odt/UnMarshaller.ts';
Expand Down Expand Up @@ -109,6 +109,7 @@ describe('MarkDownTransformTest', () => {
it('test ./example-document.md', async () => {
const testMarkdown = fs.readFileSync(__dirname + '/example-document.md').toString();
const markdown = await transformOdt('example-document');
console.log(markdown);
assert.ok(compareTexts(testMarkdown, markdown, false));
});

Expand Down Expand Up @@ -137,13 +138,20 @@ async function transformOdt(id: string) {
const odtPath = folder.getRealPath() + '/' + id + '.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const tmpDir: string = createTmpDir();
await processor.unzipAssets(tmpDir, id + '.md');
if (!processor.getContentXml()) {
throw Error('No odt processed');
}
return transform(processor.getContentXml(), processor.getStylesXml());
try {
const markdown = await transform(processor.getContentXml(), processor.getStylesXml(), tmpDir + `/${id}.assets`);
return markdown.replaceAll(tmpDir + `/${id}.assets`, '');
} finally {
fs.rmSync(tmpDir, { recursive: true });
}
}

async function transform(contentXml: string, stylesXml: string) {
async function transform(contentXml: string, stylesXml: string, assetsDir: string) {
const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent');
const document: DocumentContent = parser.unmarshal(contentXml);
if (!document) {
Expand All @@ -155,5 +163,6 @@ async function transform(contentXml: string, stylesXml: string) {
throw Error('No styles unmarshalled');
}
const converter = new OdtToMarkdown(document, styles);
converter.setPicturesDir(assetsDir);
return await converter.convert();
}
8 changes: 8 additions & 0 deletions test/odt_md/example-document.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,14 @@ Some **bold** **_boldanditalic_*** italic* text

### Using the actual equation object

```math
E = m c^{2}
```

```math
e^{i \pi} - 1 = 0
```

### Text equivalent

*E=mc**2*
Expand Down
4 changes: 2 additions & 2 deletions test/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import path from 'path';
import {createPatch} from 'diff';
import {ansi_colors} from '../src/utils/logger/colors.ts';

export function createTmpDir() {
return fs.mkdtempSync(path.join(os.tmpdir(), 'wg-'));
export function createTmpDir(prefix = 'wg-') {
return fs.mkdtempSync(path.join(os.tmpdir(), prefix));
}

// eslint-disable-next-line @typescript-eslint/no-unused-vars
Expand Down

0 comments on commit 7469933

Please sign in to comment.