From e3cf968fa5441474151717cbc2d661631012e614 Mon Sep 17 00:00:00 2001 From: Alain Borel Date: Sat, 7 Sep 2024 14:06:05 +0200 Subject: [PATCH 1/5] integrate PR #41 + Zotero6 fix --- src/chrome/content/preferences.xul | 12 ++++++ src/chrome/content/zoteroocr.js | 58 +++++++++++++++++++--------- src/defaults/preferences/defaults.js | 3 ++ src/prefs.js | 3 ++ src/prefs.xhtml | 11 +++++- src/zotero-ocr.js | 51 ++++++++++++++++-------- 6 files changed, 101 insertions(+), 37 deletions(-) diff --git a/src/chrome/content/preferences.xul b/src/chrome/content/preferences.xul index 783c975..76df0fe 100644 --- a/src/chrome/content/preferences.xul +++ b/src/chrome/content/preferences.xul @@ -31,6 +31,9 @@ + + + @@ -42,6 +45,14 @@ @@ -55,6 +66,7 @@ + diff --git a/src/chrome/content/zoteroocr.js b/src/chrome/content/zoteroocr.js index 534707a..bd0f807 100644 --- a/src/chrome/content/zoteroocr.js +++ b/src/chrome/content/zoteroocr.js @@ -105,7 +105,7 @@ Zotero.OCR = new function() { // if the PDF has no parent item, there is no reasonable place to attach the output files // => create an empty parent item to keep things tidy if (pdfItem.isTopLevelItem()) { - await Zotero.getActiveZoteroPane().createEmptyParent(pdfItem); + yield Zotero.getActiveZoteroPane().createEmptyParent(pdfItem); } item = Zotero.Items.get(item.parentItemID); } @@ -138,10 +138,13 @@ Zotero.OCR = new function() { let imageList = OS.Path.join(dir, 'image-list.txt'); if (!(yield OS.File.exists(imageList))) { try { - Zotero.debug("Running " + pdfinfo + ' ' + pdf + ' ' + infofile); - yield Zotero.Utilities.Internal.exec(pdfinfo, [pdf, infofile]); - Zotero.debug("Running " + pdftoppm + ' -png -r 300 ' + pdf + ' ' + dir + '/page'); - yield Zotero.Utilities.Internal.exec(pdftoppm, ['-png', '-r', 300, pdf, dir + '/page']); + let pdfinfoCmdArgs = [pdf, infofile]; + Zotero.debug("Running " + pdfinfo + ' ' + pdfinfoCmdArgs.join(' ')); + yield Zotero.Utilities.Internal.exec(pdfinfo, pdfinfoCmdArgs); + + let pdftoppmCmdArgs = ['-png', '-r', Zotero.Prefs.get("zoteroocr.outputDPI"), pdf, dir + '/page']; + Zotero.debug("Running " + pdftoppm + ' ' + pdftoppmCmdArgs.join(' ')); + yield Zotero.Utilities.Internal.exec(pdftoppm, pdftoppmCmdArgs); } catch (e) { Zotero.logError(e); @@ -159,6 +162,8 @@ Zotero.OCR = new function() { let parameters = [dir + '/image-list.txt']; parameters.push(ocrbase); + parameters.push('--psm'); + parameters.push(Zotero.Prefs.get("zoteroocr.PSMMode")); if (Zotero.Prefs.get("zoteroocr.language")) { parameters.push('-l'); parameters.push(Zotero.Prefs.get("zoteroocr.language")); @@ -207,26 +212,41 @@ Zotero.OCR = new function() { for (let i = 1; i < upperLimit; i++) { let pagename = 'page-' + i + '.html'; let htmlfile = Zotero.File.pathToFile(OS.Path.join(dir, pagename)); - let pagecontent = preamble + "
\n\n'; + let pagecontent = preamble + "
\n\n'; Zotero.File.putContents(htmlfile, pagecontent); - // Zotero.Attachments.importFromFile() works in group libraries, linkFromFile() did not - await Zotero.Attachments.importFromFile({ - file: OS.Path.join(dir, pagename), - contentType: "text/html", - parentItemID: item.id, - libraryID: item.libraryID - }); + // Zotero.Attachments.importFromFile() works in group libraries, linkFromFile() does not + if (Zotero.Prefs.get("zoteroocr.outputAsCopyAttachment")) { + yield Zotero.Attachments.importFromFile({ + file: OS.Path.join(dir, pagename), + contentType: "text/html", + libraryID: item.libraryID, + parentItemID: item.id, + }); + } else { + yield Zotero.Attachments.linkFromFile({ + file: OS.Path.join(dir, pagename), + contentType: "text/html", + parentItemID: item.id + }); + } } } // attach PDF if it is a new one if (Zotero.Prefs.get("zoteroocr.outputPDF") && !(Zotero.Prefs.get("zoteroocr.overwritePDF"))) { - // Zotero.Attachments.importFromFile() works in group libraries, linkFromFile() did not - await Zotero.Attachments.importFromFile({ - file: ocrbase + '.pdf', - parentItemID: item.id, - libraryID: item.libraryID - }); + // Zotero.Attachments.importFromFile() works in group libraries, linkFromFile() does not + if (Zotero.Prefs.get("zoteroocr.outputAsCopyAttachment")) { + yield Zotero.Attachments.importFromFile({ + file: ocrbase + '.pdf', + libraryID: item.libraryID, + parentItemID: item.id, + }); + } else { + yield Zotero.Attachments.linkFromFile({ + file: ocrbase + '.pdf', + parentItemID: item.id + }); + } } if (!Zotero.Prefs.get("zoteroocr.outputPNG") && imageListArray) { diff --git a/src/defaults/preferences/defaults.js b/src/defaults/preferences/defaults.js index b8d17a2..68c2dbb 100644 --- a/src/defaults/preferences/defaults.js +++ b/src/defaults/preferences/defaults.js @@ -5,3 +5,6 @@ pref("extensions.zotero.zoteroocr.overwritePDF", false); pref("extensions.zotero.zoteroocr.outputHocr", true); pref("extensions.zotero.zoteroocr.outputPNG", true); pref("extensions.zotero.zoteroocr.maximumPagesAsHtml", "5"); +pref("extensions.zotero.zoteroocr.outputDPI", "300"); +pref("extensions.zotero.zoteroocr.PSMMode", "3"); +pref("extensions.zotero.zoteroocr.outputAsCopyAttachment", true); diff --git a/src/prefs.js b/src/prefs.js index 7238aa6..7a5c8ad 100644 --- a/src/prefs.js +++ b/src/prefs.js @@ -4,3 +4,6 @@ pref("extensions.zotero.zoteroocr.overwritePDF", false); pref("extensions.zotero.zoteroocr.outputHocr", true); pref("extensions.zotero.zoteroocr.outputPNG", true); pref("extensions.zotero.zoteroocr.maximumPagesAsHtml", "5"); +pref("extensions.zotero.zoteroocr.outputDPI", "300"); +pref("extensions.zotero.zoteroocr.PSMMode", "3"); +pref("extensions.zotero.zoteroocr.outputAsCopyAttachment", true); diff --git a/src/prefs.xhtml b/src/prefs.xhtml index 152276d..2b3659c 100644 --- a/src/prefs.xhtml +++ b/src/prefs.xhtml @@ -9,18 +9,27 @@