This script add-in generates a custom column that shows the number of words in documents. It uses Pandoc and a bit of RegEx.
So far it is a quick hack inspired by this thread
The regex used to count the words is similarly quick-and-dirty and its results will most likely differ from other counting methods - feel free to offer improvements!
In general, it should work with all file types Pandoc can read.
How to use:
- Install Pandoc from www.pandoc.org
- Download ColumnPandocWordCount.js.txt to
/scripts
- In the script file adjust the path to
pandoc.exe
- Add Pandoc Word Count as a column (or use it for renaming)
var cmd = DOpus.Create().Command();
var fsu = DOpus.FSUtil();
var fso = new ActiveXObject('Scripting.FileSystemObject');
var wsh = new ActiveXObject('WScript.Shell');
var exePandoc = fsu.Resolve('/programfiles\\Pandoc\\pandoc.exe'); // adjust to your system
var extensions = DOpus.Create().StringSetI('.doc', '.docx', '.txt', '.md', '.log'); // append if needed
function OnInit(initData) {
initData.name = 'Pandoc Word Count';
initData.desc = 'Count words with a bit of regex';
initData.default_enable = true;
initData.min_version = '12.0';
var col = initData.AddColumn();
col.name = 'PWC';
col.header = 'PWC';
col.method = 'OnColumn';
col.label = 'Pandoc Word Count';
col.justify = 'right';
col.autogroup = true;
col.type = 'number';
}
function OnColumn(scriptColData) {
var item = scriptColData.item;
if (item.is_dir) return;
if (!extensions.exists(item.ext)) return;
var currDate = DOpus.Create().Date();
var tmpFile = fsu.Resolve('/temp\\' + currDate.Format('D#yyyyMMdd-T#HHmmss') + '-' + currDate.ms + '.txt');
var cmdLine = '"' + exePandoc + '" --wrap=none --write=plain --output="' + tmpFile + '" "' + item + '"';
DOpus.Output(cmdLine);
wsh.Run(cmdLine, 0, true);
if (!fsu.Exists(tmpFile)) return; // Conversion with Pandoc was not successful
var tf = fso.OpenTextFile(tmpFile, 1);
var tmpText = tf.ReadAll();
tf.Close();
cmd.RunCommand('Delete FILE="' + tmpFile + '" QUIET');
var wordCount = tmpText.split(/\b\W+\b/).length;
scriptColData.value = wordCount;
}