Column: Pandoc Word Count

This script add-in generates a custom column that shows the number of words in documents. It uses Pandoc and a bit of RegEx.

So far it is a quick hack inspired by this thread

The regex used to count the words is similarly quick-and-dirty and its results will most likely differ from other counting methods - feel free to offer improvements!

In general, it should work with all file types Pandoc can read.

How to use:

var cmd = DOpus.Create().Command();
var fsu = DOpus.FSUtil();
var fso = new ActiveXObject('Scripting.FileSystemObject');
var wsh = new ActiveXObject('WScript.Shell');
var exePandoc = fsu.Resolve('/programfiles\\Pandoc\\pandoc.exe'); // adjust to your system
var extensions = DOpus.Create().StringSetI('.doc', '.docx', '.txt', '.md', '.log'); // append if needed

function OnInit(initData) {
    initData.name = 'Pandoc Word Count';
    initData.desc = 'Count words with a bit of regex';
    initData.default_enable = true;
    initData.min_version = '12.0';

    var col = initData.AddColumn();
    col.name = 'PWC';
    col.header = 'PWC';
    col.method = 'OnColumn';
    col.label = 'Pandoc Word Count';
    col.justify = 'right';
    col.autogroup = true;
    col.type = 'number';
}

function OnColumn(scriptColData) {
    var item = scriptColData.item;
    if (item.is_dir) return;
    if (!extensions.exists(item.ext)) return;

    var currDate = DOpus.Create().Date();
    var tmpFile = fsu.Resolve('/temp\\' + currDate.Format('D#yyyyMMdd-T#HHmmss') + '-' + currDate.ms + '.txt');

    var cmdLine = '"' + exePandoc + '" --wrap=none --write=plain --output="' + tmpFile + '" "' + item + '"';
    DOpus.Output(cmdLine);
    wsh.Run(cmdLine, 0, true);

    if (!fsu.Exists(tmpFile)) return; // Conversion with Pandoc was not successful

    var tf = fso.OpenTextFile(tmpFile, 1);
    var tmpText = tf.ReadAll();
    tf.Close();

    cmd.RunCommand('Delete FILE="' + tmpFile + '" QUIET');

    var wordCount = tmpText.split(/\b\W+\b/).length;
    scriptColData.value = wordCount;
}

2 Likes