From d824553a0fdd5a524fd76dc664067da6cbd1603b Mon Sep 17 00:00:00 2001 From: Alex Garnett Date: Wed, 15 Jul 2020 11:30:39 -0700 Subject: [PATCH] flow control workinggit status! release time --- index.html | 5 ++++- ochre.js | 36 +++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/index.html b/index.html index 7db3f9b..f641f3c 100644 --- a/index.html +++ b/index.html @@ -32,7 +32,10 @@

Folder containing PDFs



- + +
+

diff --git a/ochre.js b/ochre.js index e395d0d..cd71c9f 100644 --- a/ochre.js +++ b/ochre.js @@ -22,30 +22,36 @@ function setlocation() { } function ocr() { - buttonBlock = document.getElementById("ocr"); + buttonBlock = document.getElementById("button"); buttonBlock.style.display = "none"; + waitingBlock = document.getElementById("waiting"); + waitingBlock.style.display = "inline"; fs.readdir(document.getElementById("pdfdir").value, function (err, files) { if (err) { return console.log("Couldn't parse directory path."); } files.forEach(function (file) { - let input = path.join(document.getElementById("pdfdir").value, file); - let tempdirObject = tmp.dirSync(); - let tempdir = tempdirObject.name; - let outpath = path.join(tempdir, "out-%05d.png"); - execSync(commandJoin([path.join("win","gs","bin","gswin64c.exe"), "-o", path.join(tempdir, "%05d.png"), "-sDEVICE=png16m", - "-r300", "-dPDFFitPage=true", input])); - glob(path.join(tempdir, "*.png"), function (er, files) { - files.forEach(f => execSync(commandJoin([path.join("win","Tesseract-OCR","tesseract.exe"), f, path.join(tempdir, path.parse(f).name), "pdf"]))); + let input = path.join(document.getElementById("pdfdir").value, file); + let tempdirObject = tmp.dirSync(); + let tempdir = tempdirObject.name; + execSync([path.join("win","gs","bin","gswin64c.exe"), "-o", path.join(tempdir, "%05d.png"), "-sDEVICE=png16m", "-r300", "-dPDFFitPage=true", input].map(x => `"${x}"`).join(' ')) + var itemsProcessed = 0; + glob(path.join(tempdir, "*.png"), function (er, files) { + files.forEach(f => { + execSync(commandJoin([path.join("win","Tesseract-OCR","tesseract.exe"), f, path.join(tempdir, path.parse(f).name), "pdf"])); + itemsProcessed++; + if(itemsProcessed === files.length) { + let joined_file = path.join(tempdir, "joined.pdf"); + execSync(commandJoin([path.join("win","PDFtk","bin","pdftk.exe"), path.join(tempdir, "*.pdf"), "cat", "output", joined_file])); + let output = input.concat('.ocr.pdf') + execSync(commandJoin([path.join("win","gs","bin","gswin64c.exe"), "-sDEVICE=pdfwrite", "-sPAPERSIZE=letter", "-dFIXEDMEDIA", "-dPDFFitPage", "-o", output, joined_file])); + buttonBlock.style.display = "block"; + waitingBlock.style.display = "none"; + } }); - let pdfs = glob.sync(path.join(tempdir, "*.pdf")).sort(); - let joined_file = path.join(tempdir, "joined.pdf"); - execSync(commandJoin([path.join("win","PDFtk","bin","pdftk.exe"), pdfs, "cat", "output", joined_file])); - let output = input.concat('.ocr.pdf') - execSync(commandJoin([path.join("win","gs","bin","gswin64.exe"), "-sDEVICE=pdfwrite", "-sPAPERSIZE=letter", "-dFIXEDMEDIA", "-dPDFFitPage", "-o", output, joined_file])); + }); }); }); - buttonBlock.style.display = "block"; } document.getElementById("ocr").addEventListener("click", ocr);