About:
I needed to search and eliminate duplicates within a folder with ~10k files.
Files with different names could have the same filesize and be duplicates. Also most of them were HUGE and as such the possibility of having false duplicates is nonexistent.
The Dopus' own Duplicate finder can only do MD5 content compare with mismatched filenames which would take forever so I rolled a simplistic script function.
My plan was to enumerate any duplicates and add them to a coll://collection
Even disregarding the total lack of optimisation in the enumerating phase - everything went pretty fast until I hit a wall with the Adding to collection part.
Attempt #1:
cmd.RunCommand("Copy COPYTOCOLL=member FILE TO col://collection")
inside the loop. Took 3 minutes to populate the collection with the 1300 found duplicates.
I might completely suck at reading docs but I couldn't find a way to manipulate a specific collection within a script as an object directly without using the DOpus.Command interface.
Attempt #2:
So next I thought of writing the found dupes to a text file and importing that one as a collection with a single command as that's where the bottleneck was.
Writing the list was simple enough but then I hit another wall - namely I cannot invoke
/col import /clear "tempfile"
from within dopus as it is not an internal but an external dopusrt command.
Finally:
After using another ActiveX scripting object I got the dopusrt path from registry and ran it as an external command. Now the entire thing is near-instant.
As a bonus it tells dopus to navigate to the new collection and sort it by size.
Finally as I'm mostly a C# guy - any ideas on optimizing the dumb duplicate finding logic are welcome.
Have fun with it.
Get the Script:
Raw Javascript
function OnClick(clickData)
{
DOpus.ClearOutput();
var cmd = clickData.func.command;
enumFiles = new Enumerator(clickData.func.sourcetab.files);
enumFiles.moveFirst();
DOpus.Output("Enumerating files in: " + String(clickData.func.sourcetab.path));
var x = new Array();
while (enumFiles.atEnd() == false) {
var file = enumFiles.item()
var index = String(file.size);
var filename = String(file.realpath);
//Add all files to an array keyed by bytesize
if(x.hasOwnProperty(index)) {
x[index].push(filename);
} else {
x[index] = new Array(filename);
}
enumFiles.moveNext();
}
var fso = new ActiveXObject("Scripting.FileSystemObject");
var tfolder = fso.GetSpecialFolder(2); //TemporaryFolder = 2
var tname = fso.GetTempName();
var tfile = tfolder.CreateTextFile(tname, true, true); //Overwrite flag & make it unicode
var duplicateGroups = 0;
//Add all files from groups with more than one member to a collection
for (var k in x) {
if(x[k].length > 1) {
duplicateGroups++;
for(var i = 0; i < x[k].length; i++) {
//cmd.RunCommand("Copy COPYTOCOLL=member \"" +x[k][i]+ "\" TO \"coll://DuplicateSizes/\""); // <-- awfully slow
tfile.writeline(x[k][i])
}
}
}
tfile.close();
var collectionDump = tfolder + "\\" + tname;
DOpus.Output("Dumped " +duplicateGroups+" duplicate groups to: " + collectionDump);
//Shell object to get dopus path from registry and run the dopusrt command
var shell = new ActiveXObject("WScript.shell");
//Find dopus in registry
var dopusPath = shell.RegRead("HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\DOpus.exe\\Path") + "\\dopusrt.exe";
DOpus.Output("Dopusrt found at: " + dopusPath);
//Run dopusrt command
shell.run("\"" + dopusPath + "\" /col import /clear /create /nocheck DuplicateSizes \"" + collectionDump + "\"")
DOpus.Output("Importing to collection: DuplicateSizes");
//Navigate to the collection
cmd.RunCommand("GO path=coll://DuplicateSizes/");
//Sort by Size
cmd.RunCommand("SET SORTBY=size");
}
Button Code
<?xml version="1.0"?>
<button backcol="none" display="both" label_pos="right" textcol="none">
<label>File Size Duplicates</label>
<tip>Dump all files in current view - having size duplicates to a Collection</tip>
<icon1>#dupepane</icon1>
<function type="script">
<instruction>@script JScript</instruction>
<instruction>function OnClick(clickData)</instruction>
<instruction>{</instruction>
<instruction> DOpus.ClearOutput();</instruction>
<instruction> var cmd = clickData.func.command;</instruction>
<instruction> enumFiles = new Enumerator(clickData.func.sourcetab.files);</instruction>
<instruction> enumFiles.moveFirst();</instruction>
<instruction />
<instruction> DOpus.Output("Enumerating files in: " + String(clickData.func.sourcetab.path));</instruction>
<instruction />
<instruction> var x = new Array();</instruction>
<instruction> </instruction>
<instruction> while (enumFiles.atEnd() == false) {</instruction>
<instruction />
<instruction> var file = enumFiles.item()</instruction>
<instruction> var index = String(file.size);</instruction>
<instruction> var filename = String(file.realpath);</instruction>
<instruction />
<instruction> //Add all files to an array keyed by bytesize</instruction>
<instruction> if(x.hasOwnProperty(index)) {</instruction>
<instruction> x[index].push(filename);</instruction>
<instruction> } else {</instruction>
<instruction> x[index] = new Array(filename);</instruction>
<instruction> }</instruction>
<instruction />
<instruction> enumFiles.moveNext();</instruction>
<instruction> }</instruction>
<instruction />
<instruction> var fso = new ActiveXObject("Scripting.FileSystemObject");</instruction>
<instruction> var tfolder = fso.GetSpecialFolder(2); //TemporaryFolder = 2</instruction>
<instruction> var tname = fso.GetTempName();</instruction>
<instruction> var tfile = tfolder.CreateTextFile(tname, true, true); //Overwrite flag & make it unicode</instruction>
<instruction />
<instruction> var duplicateGroups = 0;</instruction>
<instruction> </instruction>
<instruction> //Add all files from groups with more than one member to a collection</instruction>
<instruction> for (var k in x) {</instruction>
<instruction> if(x[k].length > 1) {</instruction>
<instruction> duplicateGroups++;</instruction>
<instruction> for(var i = 0; i < x[k].length; i++) {</instruction>
<instruction> //cmd.RunCommand("Copy COPYTOCOLL=member \"" +x[k][i]+ "\" TO \"coll://DuplicateSizes/\""); // <-- awfully slow</instruction>
<instruction> tfile.writeline(x[k][i])</instruction>
<instruction> }</instruction>
<instruction> </instruction>
<instruction> }</instruction>
<instruction> }</instruction>
<instruction />
<instruction> tfile.close();</instruction>
<instruction> var collectionDump = tfolder + "\\" + tname;</instruction>
<instruction> DOpus.Output("Dumped " +duplicateGroups+" duplicate groups to: " + collectionDump);</instruction>
<instruction />
<instruction> //Shell object to get dopus path from registry and run the dopusrt command</instruction>
<instruction> var shell = new ActiveXObject("WScript.shell");</instruction>
<instruction />
<instruction> //Find dopus in registry</instruction>
<instruction> var dopusPath = shell.RegRead("HKLM\\SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\DOpus.exe\\Path") + "\\dopusrt.exe";</instruction>
<instruction> DOpus.Output("Dopusrt found at: " + dopusPath);</instruction>
<instruction />
<instruction> //Run dopusrt command</instruction>
<instruction> shell.run("\"" + dopusPath + "\" /col import /clear /create /nocheck DuplicateSizes \"" + collectionDump + "\"")</instruction>
<instruction> DOpus.Output("Importing to collection: DuplicateSizes");</instruction>
<instruction />
<instruction> //Navigate to the collection</instruction>
<instruction> cmd.RunCommand("GO path=coll://DuplicateSizes/");</instruction>
<instruction />
<instruction> //Sort by Size</instruction>
<instruction> cmd.RunCommand("SET SORTBY=size");</instruction>
<instruction>}</instruction>
</function>
</button>