Skip to content

Commit

Permalink
Add odt2md
Browse files Browse the repository at this point in the history
  • Loading branch information
ggodlewski committed May 3, 2024
1 parent 817d8c1 commit 4201645
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 18 deletions.
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"bin": {
"wgd": "src/wikigdrive.sh",
"wikigdrive": "src/wikigdrive.sh",
"wikigdrivectl": "src/wikigdrivectl.sh"
"wikigdrivectl": "src/wikigdrivectl.sh",
"odt2md": "src/odt2md.sh"
},
"main": "src/cli/wikigdrive.ts",
"scripts": {
Expand Down
89 changes: 89 additions & 0 deletions src/cli/odt2md.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
'use strict';

import path from 'path';
import minimist from 'minimist';
import {fileURLToPath} from 'url';
import {Buffer} from 'buffer';
import fs from 'fs';

import {OdtProcessor} from '../odt/OdtProcessor.js';
import {UnMarshaller} from '../odt/UnMarshaller.js';
import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../odt/LibreOffice.js';
import {OdtToMarkdown} from '../odt/OdtToMarkdown.js';

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

process.env.GIT_SHA = process.env.GIT_SHA || 'dev';

async function usage() {
const pkg = JSON.parse(new TextDecoder().decode(fs.readFileSync(path.resolve(__dirname, '..', '..', 'package.json'))));

const commandUsage = 'echo "test" | odt2md\n\nor\n\nodt2md filename.odt';

console.log(
`${pkg.name} version: ${pkg.version}, ${process.env.GIT_SHA}\n\nUsage:\n${commandUsage.trim()}\n`);
}

async function main() {
const inputArr = [];

process.stdin.on( 'data', function(data) { inputArr.push(data); } );

await new Promise(resolve => {
setTimeout(() => {
process.stdin.destroy();
resolve(null);
}, 50);
process.stdin.on( 'end', resolve);
});

const argv = minimist(process.argv.slice(2));

if (inputArr.length === 0) {
if (argv._.length < 1 || argv.h || argv.help) {
await usage();
process.exit(1);
}

inputArr.push(fs.readFileSync(path.resolve(process.cwd(), argv._[0])));
}

if (inputArr.length === 0) {
console.error('No input');
process.exit(1);
}

const processor = new OdtProcessor();
await processor.loadFromBuffer(Buffer.concat(inputArr));
if (!processor.getContentXml()) {
throw Error('No odt processed');
}

const parser = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentContent');
const document: DocumentContent = parser.unmarshal(processor.getContentXml());
if (!document) {
throw Error('No document unmarshalled');
}
const parserStyles = new UnMarshaller(LIBREOFFICE_CLASSES, 'DocumentStyles');
const styles: DocumentStyles = parserStyles.unmarshal(processor.getStylesXml());
if (!styles) {
throw Error('No styles unmarshalled');
}
const converter = new OdtToMarkdown(document, styles, processor.getFileNameMap(), processor.getXmlMap());
const markdown = await converter.convert();
console.log(markdown);
}

try {
await main();
process.exit(0);
} catch (err) {
if (err.isUsageError) {
console.error(err.message);
await usage();
} else {
console.error(err);
}
process.exit(1);
}
4 changes: 2 additions & 2 deletions src/containers/transform/TaskLocalFileTransform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ export class TaskLocalFileTransform extends QueueTask {
const picturesDirAbsolute = destinationPath + '/' + this.realFileName.replace(/.md$/, '.assets/');

if (SINGLE_THREADED_TRANSFORM) {
const processor = new OdtProcessor(odtPath, true);
await processor.load();
const processor = new OdtProcessor(true);
await processor.load(odtPath);
await processor.unzipAssets(destinationPath, this.realFileName);
const content = processor.getContentXml();
const stylesXml = processor.getStylesXml();
Expand Down
24 changes: 20 additions & 4 deletions src/odt/OdtProcessor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,33 @@ export class OdtProcessor {
private fileNameMap: { [name: string]: string };
private xmlMap: { [name: string]: string };

constructor(private odtPath: string, private contentAddressable = false) {
constructor(private contentAddressable = false) {
this.fileNameMap = {};
this.xmlMap = {};
}

async load() {
if (!fs.existsSync(this.odtPath)) {
async load(odtPath: string) {
if (!fs.existsSync(odtPath)) {
return;
}
const jsZip = new JSZip();
const input: Buffer = fs.readFileSync(this.odtPath);
const input: Buffer = fs.readFileSync(odtPath);
const zip = await jsZip.loadAsync(input);

this.files = zip.folder('').files;

if (this.files['content.xml']) {
this.contentXml = await this.files['content.xml'].async('string');
}
if (this.files['styles.xml']) {
this.stylesXml = await this.files['styles.xml'].async('string');
}

await this.processMathMl();
}

async loadFromBuffer(input: Buffer): Promise<void> {
const jsZip = new JSZip();
const zip = await jsZip.loadAsync(input);

this.files = zip.folder('').files;
Expand Down
4 changes: 2 additions & 2 deletions src/odt/executeOdtToMarkdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ import {generateDocumentFrontMatter} from '../containers/transform/frontmatters/
import {OdtProcessor} from './OdtProcessor.ts';

export async function executeOdtToMarkdown(workerData) {
const processor = new OdtProcessor(workerData.odtPath, true);
await processor.load();
const processor = new OdtProcessor(true);
await processor.load(workerData.odtPath);
await processor.unzipAssets(workerData.destinationPath, workerData.realFileName);
const content = processor.getContentXml();
const stylesXml = processor.getStylesXml();
Expand Down
32 changes: 32 additions & 0 deletions src/odt2md.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/usr/bin/env bash

FULL_PATH="$(readlink -f ${BASH_SOURCE[0]})"
MAIN_DIR=$(dirname "$FULL_PATH")/..
NODE_MODULES=$MAIN_DIR/node_modules

POSITIONAL_ARGS=()
INSPECT=""

ORIG_ARGS=$@

while [[ $# -gt 0 ]]; do
case $1 in
--inspect)
INSPECT="$1"
shift # past argument
;;
*)
if [[ -z "$CMD" ]]; then
CMD=$1
fi
POSITIONAL_ARGS+=("$1") # save positional arg
shift # past argument
;;
esac
done

if test "$INSPECT" = "--inspect"; then
/usr/bin/env node --inspect --no-warnings --enable-source-maps --experimental-specifier-resolution=node --loader ts-node/esm $MAIN_DIR/src/cli/odt2md.ts $ORIG_ARGS
else
/usr/bin/env node --no-warnings --enable-source-maps --experimental-specifier-resolution=node --loader ts-node/esm $MAIN_DIR/src/cli/odt2md.ts $ORIG_ARGS
fi
4 changes: 2 additions & 2 deletions test/odt/OdtLoad.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ describe('OdtLoad', () => {
it('test content.xml transform to object', async () => {
const fileSystem = new FileContentService(__dirname);
const odtPath = fileSystem.getRealPath() + '/' + 'example_document.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const processor = new OdtProcessor();
await processor.load(odtPath);

const content = processor.getContentXml();

Expand Down
4 changes: 2 additions & 2 deletions test/odt_md/Issues.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ describe('MarkDownTransformTest', () => {
async function transformOdt(id: string) {
const folder = new FileContentService(__dirname);
const odtPath = folder.getRealPath() + '/' + id + '.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const processor = new OdtProcessor();
await processor.load(odtPath);
if (!processor.getContentXml()) {
throw Error('No odt processed');
}
Expand Down
6 changes: 3 additions & 3 deletions test/odt_md/MarkDownTransform.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import {assert} from 'chai';
import fs from 'fs';

import {compareTexts, createTmpDir} from '../utils.ts';
import {compareTexts} from '../utils.ts';
import {OdtToMarkdown} from '../../src/odt/OdtToMarkdown.ts';
import {DocumentContent, DocumentStyles, LIBREOFFICE_CLASSES} from '../../src/odt/LibreOffice.ts';
import {UnMarshaller} from '../../src/odt/UnMarshaller.ts';
Expand Down Expand Up @@ -135,8 +135,8 @@ describe('MarkDownTransformTest', () => {
async function transformOdt(id: string) {
const folder = new FileContentService(__dirname);
const odtPath = folder.getRealPath() + '/' + id + '.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const processor = new OdtProcessor();
await processor.load(odtPath);
if (!processor.getContentXml()) {
throw Error('No odt processed');
}
Expand Down
4 changes: 2 additions & 2 deletions test/odt_md/RewriteRules.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ describe('RewriteRulesTest', () => {
async function transformOdt(id: string) {
const folder = new FileContentService(__dirname);
const odtPath = folder.getRealPath() + '/' + id + '.odt';
const processor = new OdtProcessor(odtPath);
await processor.load();
const processor = new OdtProcessor();
await processor.load(odtPath);
if (!processor.getContentXml()) {
throw Error('No odt processed');
}
Expand Down

0 comments on commit 4201645

Please sign in to comment.