This script add-in generates a custom column that shows the number of words in documents. It uses Pandoc and a bit of RegEx.
The regex used to count the words is similarly quick-and-dirty and its results will most likely differ from other counting methods - feel free to offer improvements!
In general, it should work with all file types Pandoc can read.
How to set up and use
Install Pandoc from www.pandoc.org
Save ColumnPandocWordCount.js.txt to ↓
%appdata%\GPSoftware\Directory Opus\Script AddIns
Adjust the path to
pandoc.exe
in the script file
Toggle the column with...
Set COLUMNSTOGGLE="scp:PandocWordCount/PWC(!,a,0)"
... or use it for renaming.
The column shows the number of words or an error code.
Error Codes | |
---|---|
-1 | Extension was excluded |
-2 | Conversion with Pandoc was not successful |
-3 | File is empty |
Things you might enjoy reading
Inspired by Column: Word Count for Microsoft Word documents
How to use buttons and scripts from this forum
The script's inner workings
var cmd = DOpus.Create().Command();
var fsu = DOpus.FSUtil();
var fso = new ActiveXObject('Scripting.FileSystemObject');
var wsh = new ActiveXObject('WScript.Shell');
var exePandoc = fsu.Resolve('/programfiles\\Pandoc\\pandoc.exe'); // adjust to your system
var extensions = DOpus.Create().StringSetI('.doc', '.docx', '.txt', '.md', '.log'); // append if needed
function OnInit(initData) {
initData.name = 'PandocWordCount';
initData.desc = 'Count words with a bit of regex';
initData.version = '2024-02-16';
initData.url = 'https://resource.dopus.com/t/column-pandoc-word-count/37559';
initData.default_enable = true;
initData.min_version = '12.24';
}
function OnAddColumns(addColData) {
var col = addColData.AddColumn();
col.name = 'PWC';
col.header = 'PWC';
col.label = 'PWC';
col.method = 'OnColumn';
col.justify = 'right';
col.autogroup = true;
col.type = 'double';
}
function OnColumn(scriptColData) {
var item = scriptColData.item;
if (item.is_dir) return;
if (!extensions.exists(item.ext)) {
scriptColData.value = -1;
return; // Not a wanted extension
}
var tmpFile = fsu.GetTempFilePath('.txt', 'pwc-');
var cmdLine = '"' + exePandoc + '"' +
' --wrap=none' +
' --write=plain' +
' --output="' + tmpFile + '"' +
' "' + item + '"';
// DOpus.Output(cmdLine);
wsh.Run(cmdLine, 0, true);
if (!fsu.Exists(tmpFile)) {
scriptColData.value = -2;
return; // Conversion with Pandoc was not successful
}
var tf = fso.OpenTextFile(tmpFile, 1);
scriptColData.value = tf.AtEndOfStream ? -3 : tf.ReadAll().split(/\b\W+\b/).length; // return -3, if file is empty
tf.Close();
// cmd.RunCommand('Delete FILE="' + tmpFile + '" QUIET');
}
// https://www.verypdf.com/wordpress/201301/how-to-convert-pdf-to-text-by-command-line-34013.html
// https://www.xpdfreader.com/download.html
// Q: How can I convert PDFs to other formats using pandoc?
// A: You can’t. You can try opening the PDF in Word or Google Docs and saving in a format from which pandoc can convert directly.