Find Duplicates but only for partial file names (command)

Hi, I have a request on similar lines. Almost like what is requested here [url]Find and Delete Partial-Named "Duplicate" Files].

Let's say, for the following files: (name and version number)
First Set of files 2.4.07.ext
First Set of files 3.0.ext
Second Set 1.0.ext
Second Set 1.9.3.ext
Second Set 2.7.ext
Second Set 3.3.1.2.ext
Third Single 5.2.ext
and so on..

I would like the following selection possibilities:

  1. Select the older version files of a set (for deletion), i.e. leave alone the highest version file in each set and single files and select the rest.
  2. Select the oldest version files alone.
  3. Select only the Singles.

Thanks :slight_smile:

Set the MODE variable to your preferred kind of mode, maybe create 3 buttons for each.
Same EasyFilter as before, but I added an Init() callback to register the custom functions.


var MODES   = { SINGLE      : 0,    //items without further versions
                OBSOLETE    : 1,    //items having newer version
                LATEST      : 2};   //items being latest version

var MODE    = MODES.LATEST;

////////////////////////////////////////////////////////////////////////////////
var Init = function(){
    this.Sets = {};
    this.HasVersion = function(file){
        var match = /((?:\d+\.)+\d+)/.exec(file.baseName);
        if (match) return (file.version = match[1]);
    }
    this.GetSetName = function(file){
        return file.baseName.replace(/(.+?)((?:\d+\.)+\d+)/,"$1");
    }
}
////////////////////////////////////////////////////////////////////////////////
var PreFilter = function( file ){
    //return true if this file(type) is of interest in the process
    if (this.HasVersion(file)) return true;
}
////////////////////////////////////////////////////////////////////////////////
var MainFilter = function( file ){
    //return true to run evaluation on this specific file(type)
    var setName = (file.setName = this.GetSetName(file));
    if (!setName) return false;
    if (typeof this.Sets[setName] == "undefined") this.Sets[setName] = [];
    this.Sets[setName][this.Sets[setName].length] = file;
    return true;
}
////////////////////////////////////////////////////////////////////////////////
var Evaluate = function( file ){
    //return true to execute the foreach-operation on this file (select, delete etc.)
    var set = this.Sets[file.setName];
    if (MODE == MODES.SINGLE){ if (set && set.length == 1) return true; return false; }
    if (!set) return false;
    if (!set.sorted) set = set.sort(
        function(a,b){ return VersionCompare(a.version, b.version, {lexicographical:true, zeroExtend:true} ); } );

    set.sorted = true; //first item == highest version
    if (MODE == MODES.LATEST)   {if (set[0].name == file.name) return true; return false;}
    if (MODE == MODES.OBSOLETE) {if (set[0].name != file.name) return true; return false;}
    return false;
}
////////////////////////////////////////////////////////////////////////////////
var ExecuteForEach = function( file ){
    //execute for each file that passed evaluation
    this.cmd.RunCommand('Select "'+file.name+'" EXACT');
} 
////////////////////////////////////////////////////////////////////////////////
var ExecuteForAll = function(){
    //execute finally
    this.cmd.RunCommand('SelectEx MAKEVISIBLE');
} 
////////////////////////////////////////////////////////////////////////////////
function EasyFilter(filesIn,cmd,init,preFilter,mainFilter,evaluate,executeForEach,executeForAll){
    this.version        = 0.2; //init added, and "nulled" files will not be evaluated
    this.files          = [];
    this.filesFiltered  = [];
    this.filesEvaluated = [];
    this.cmd            = cmd;
    this.Init           = init;
    this.PreFilter      = preFilter;
    this.MainFilter     = mainFilter;
    this.Evaluate       = evaluate;
    this.ExecuteForEach = executeForEach;
    this.ExecuteForAll  = executeForAll;
    ////////////////////////////////////////////////////////////////////////////
    String.prototype.esc = function(str){
        return this.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
    }
    ////////////////////////////////////////////////////////////////////////////
    this.FileExists = function( fileNameRegex ){
        fileNameRegex = new RegExp(fileNameRegex);
        for(var i=0;i<this.files.length;i++){
            if (this.files[i].name.search(fileNameRegex)!=-1)
                return this.files[i];
        }
        return null;
    }
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("Initialising....");
    this.Init();
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("PreFiltering ["+filesIn.length+"] files..");
    for(var i=0;i<filesIn.length;i++){
        if (this.PreFilter(filesIn[i])===true){
            this.files[this.files.length] = filesIn[i];
            DOpus.Output("    PreFilter passed ["+filesIn[i].name+"]");
        } else {
            //DOpus.Output("    PreFilter ignore ["+filesIn[i].name+"]");
        }
    }
    DOpus.Output("");
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("MainFiltering ["+this.files.length+"] files..");
    for(var i=0;i<this.files.length;i++){
        if (this.MainFilter(this.files[i])===true){
            DOpus.Output("    MainFilter passed ["+this.files[i].name+"]");
            this.filesFiltered[this.filesFiltered.length] = this.files[i];
        } else {
            //DOpus.Output("    MainFilter ignore ["+filesIn[i].name+"]");
        }
    }
    DOpus.Output("");
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("Evaluating ["+this.filesFiltered.length+"] files..");
    for(var i=0;i<this.filesFiltered.length;i++){
        if (this.filesFiltered[i] && this.Evaluate(this.filesFiltered[i])===true){
            DOpus.Output("    Evaluation passed ["+this.filesFiltered[i].name+"]");
            this.filesEvaluated[this.filesEvaluated.length] = this.filesFiltered[i];
        } else {
            //DOpus.Output("    Evaluation ignore ["+this.filesFiltered[i].name+"]");
        }
    }
    DOpus.Output("");
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("Processing ["+this.filesEvaluated.length+"] files..");
    for(var i=0;i<this.filesEvaluated.length;i++){
        DOpus.Output("    Running ForEachOp ["+this.filesEvaluated[i].name+"]");
        this.ExecuteForEach(this.filesEvaluated[i]);
    }
    DOpus.Output("");
    ////////////////////////////////////////////////////////////////////////////
    DOpus.Output("Running ForAll-Operation..");
    this.ExecuteForAll();
    DOpus.Output("");
}
////////////////////////////////////////////////////////////////////////////////
function OnClick(data){
    var filesTab = data.func.sourcetab.files, files = [];
    var cmd = data.func.command; cmd.ClearFiles();
    ////////////////////////////////////////////////////////////////////////////
    for(var i=0;i<filesTab.count;i++){
        var file = {    name        : String(filesTab(i).name).toLowerCase(),
                        baseName    : String(filesTab(i).name_stem).toLowerCase(),
                        ext         : String(filesTab(i).ext).toLowerCase() };
        files[files.length] = file;
    }
    var filter = new EasyFilter(files,cmd,Init,PreFilter,MainFilter,Evaluate,ExecuteForEach,ExecuteForAll);
    DOpus.Output("Done.");
}
//////////////////////////////////////////////////////////////////////////
function VersionCompare(v1, v2, options){
    //compares two software version numbers (e.g. "1.7.1" or "1.2b")
    //copyright by Jon Papaioannou (["john", "papaioannou"].join(".") + "@gmail.com")
    //This function is in the public domain. Do what you want with it, no strings attached.
    var lexicographical = options && options.lexicographical,
        zeroExtend = options && options.zeroExtend,
        v1parts = v1.split('.'),
        v2parts = v2.split('.');

    function isValidPart(x) { return (lexicographical ? /^\d+[A-Za-z]*$/ : /^\d+$/).test(x); }

    for(var v=0;v<v1parts;v++) if (!isValidPart(v1parts[v])){ return NaN; }
    for(var v=0;v<v2parts;v++) if (!isValidPart(v2parts[v])){ return NaN; }
    
    if (zeroExtend) {
        while (v1parts.length < v2parts.length) v1parts.push("0");
        while (v2parts.length < v1parts.length) v2parts.push("0");
    }

    if (!lexicographical) {
        v1parts = v1parts.map(Number);
        v2parts = v2parts.map(Number);
    }

    for (var i = 0; i < v1parts.length; ++i) {
        if (v2parts.length == i) { return -1; }
        if (Number(v1parts[i]) == Number(v2parts[i])) { continue; }
        else if (Number(v1parts[i]) > Number(v2parts[i])) { return -1; }
        else { return 1; }
    }

    if (v1parts.length != v2parts.length) { return 1; }
    return 0;
}

Awesome! Thank you soo much!
There were 2 places where it missed in the whole lot..
(Latest Mode)

I think it missed the 'Bluelight Filter' one, since there was no period in the version number.
The 'CalcTape' one is due to the parentheses in the version number.
Pls fix it. More importantly the first one.

Thanks again :slight_smile:

I like to quote Leo here, for further help and fixes, please link your account.
Also please post the problematic sets (file names), so I don't need to create them by hand.

The fact that "Bluelight Filter" fails, is indeed the missing dot in the version, is "60" a version? o)
For "CalcTape" I'd need to look more closely.

Thank you tbone. Hours were saved, maybe days.
I had the same issue as original poster, downloaded your script and rejoiced.
I was looking forward to the challenge of scripting it myself, but you saved me some hairs.

Would it be difficult to extend the function to file names of this format?
T123[A]
T123[B]

Please provide more details, I can't make up sense with the two filenames yet. o)

In the meantime, I fixed some of my posts, so they any code appears normal again (was mal-formatted after forum switch).

tbone,

See these file names for instance:

1060[2].pdf
1061[C].pdf
1061[1].pdf
1062[C].pdf
1063[9].pdf
1063[10].pdf
1064[C].pdf

In this case, the files shown bold should be selected

What determines which files should be selected? If the number is odd? If it's 1061 or 1063? A few examples is not really enough, you need to actually be explicit about the rules.

I guess I can see a pattern here. Select those files which have a sibling while looking at similar names and ignoring what's after the first "[" bracket. I'll be back..

Hello Palladios, try this..
Use one of the button/EasyFilter codes above and replace existing functions with these.
This groups files (in MainFilter()) which share the same name up to the "[" (specified in GetSetName()).
Finally all the files which have more than 1 entry in their set will be flagged to be processed (in Evaluate()). If you have trouble, let us know. o)

CODE REMOVED, OBSOLETE, LOOK BELOW.. o)

Hi tbone,

It seems to fail at line 15, MainFilter:

I can't work out why :thinking:

Same filter functionality but using EasyFilter v0.3.
I kind of disliked how some things were put together, this should integrate more easily and is easier to understand as well, I hope.

////////////////////////////////////////////////////////////////////////////////
var easyFilterConfig = {
	GetGroupName : function( item ){
		return item.name.replace(new RegExp("(.*?)\\[(.*)","ig"), "$1");
	},
	Evaluate : function( item ){
		if (item.group.length > 1) return true;
	}
};

////////////////////////////////////////////////////////////////////////////////
function OnClick(data){
	var cmd = data.func.command; cmd.ClearFiles();
	
	var myEasyFilter	= new EasyFilter( easyFilterConfig );
	var allItems		= myEasyFilter.ImportDOItems( data.func.sourcetab.files );
	var filteredItems	= myEasyFilter.Run();
		
	DOpus.Output("Processing ["+filteredItems.length+"] filtered items.."); 
	for(var i=0;i<filteredItems.length;i++) {
		DOpus.Output("    " + filteredItems[i].name);
		cmd.RunCommand('Select "'+filteredItems[i].name+'" EXACT');
	}
	
	cmd.RunCommand('SelectEx MAKEVISIBLE');
	
	DOpus.Output("");
	DOpus.Output("Button done.");
}
////////////////////////////////////////////////////////////////////////////////
function EasyFilter(config){
	this.version		  = 0.3;
	//+group handling (GetGroupName()), renamed FileExists() to SelectItemByRegEx(),
	//+SelectItemsByRegEx(), added target poperty and target collection for
	//SelectItem(s)ByRegEx(), general cleanup done to allow easier integration
	
	this.Items 			  = [];
	this.ItemsPreFiltered = [];
	this.ItemsFiltered	  = [];
	this.ItemsEvaluated	  = [];
	this.ItemGroups		  = {};
	
	////////////////////////////////////////////////////////////////////////////
	this.Init = config.Init || function(){
		//custom init code
	};
	////////////////////////////////////////////////////////////////////////////
	this.GetGroupName = config.GetGroupName || function( item ){
		//override GetGroupName() to enable custom grouping
		return "na";
	}
	////////////////////////////////////////////////////////////////////////////
	this.PreFilter = config.PreFilter || function(item){
		//return true if this item(type) is of interest in the process
		return true;
	};
	////////////////////////////////////////////////////////////////////////////
	this.MainFilter = config.MainFilter || function(item){
		//Return true to let final evaluation run on this specific item(type).
		//Other items can be expluded from final evaluation as well, by setting
		//them to NULL in "this.ItemsFiltered[]".

		//Use SelectItem(s)ByRegEx() to look for existence of other item(s) while deciding.
		//var item = SelectItemByRegEx( "^Aa.+\.pdf", "name", "ItemsPreFiltered");
		//You can set at what property and at what collection to look at.
		return true;
	};
	////////////////////////////////////////////////////////////////////////////
	this.Evaluate = config.Evaluate || function(item){
		//return true to let the item pass final evaluation
		return true;
	};
	////////////////////////////////////////////////////////////////////////////
	String.prototype.esc = function(str){
		return this.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
	}
	////////////////////////////////////////////////////////////////////////////
	this.ImportDOItems = function( doItems) {
		for(var i=0;i<doItems.count;i++){
			var efItem = new EasyFilterItem();
			efItem.importDOItem(doItems(i));
			this.Items.push(efItem);
		}
		return this.Items;
	}
	////////////////////////////////////////////////////////////////////////////
	this.AddItemToGroup = function( item ) {
		var groupName = item.groupName = this.GetGroupName(item);
		if (typeof this.ItemGroups[groupName] == "undefined")
			this.ItemGroups[groupName] = [];
		this.ItemGroups[groupName].push(item);
		item.group = this.ItemGroups[groupName];
	}
	////////////////////////////////////////////////////////////////////////////
	this.SelectItemByRegEx = function( regexStr, targetProperty, targetCollection){
		return this.SelectItemsByRegEx( regexStr, targetProperty, true);
	}
	////////////////////////////////////////////////////////////////////////////
	this.SelectItemsByRegEx = function( regexStr, targetProperty, targetCollection, _singleMode ){
		targetProperty = targetProperty || "name";
		targetCollection = targetCollection || "ItemsPreFiltered";
		var files = [], regex = new RegExp(regexStr);
		for(var i=0;i<this[targetCollection].length;i++){
			if (this[targetCollection][i][targetProperty].search(regex)!=-1) {
				if (_singleMode) return this[targetCollection][i];
				files.push(this[targetCollection][i]);
			}
		}
		return _singleMode ? null : files;
	}
	////////////////////////////////////////////////////////////////////////////
	this.Run = function() {
		////////////////////////////////////////////////////////////////////////
		DOpus.Output("Initialising....");
		this.Init();
		////////////////////////////////////////////////////////////////////////
		DOpus.Output("PreFiltering ["+this.Items.length+"] files..");
		for(var i=0;i<this.Items.length;i++){
			if (this.PreFilter(this.Items[i])===true){
				this.ItemsPreFiltered.push(this.Items[i]);
				DOpus.Output("    PreFilter passed ["+this.Items[i].name+"]");
			} else {
				//DOpus.Output("    PreFilter ignore ["+itemsIn[i].name+"]");
			}
		}
		DOpus.Output("");
		////////////////////////////////////////////////////////////////////////
		DOpus.Output("MainFiltering ["+this.ItemsPreFiltered.length+"] files..");
		for(var i=0;i<this.ItemsPreFiltered.length;i++){
			var item = this.ItemsPreFiltered[i];
			if (this.MainFilter(item)===true){
				DOpus.Output("    MainFilter passed ["+item.name+"]");
				this.AddItemToGroup(item);
				this.ItemsFiltered.push(item);
			} else {
				//DOpus.Output("    MainFilter ignore ["+item.name+"]");
			}
		}
		DOpus.Output("");
		////////////////////////////////////////////////////////////////////////
		DOpus.Output("Evaluating ["+this.ItemsFiltered.length+"] files..");
		for(var i=0;i<this.ItemsFiltered.length;i++){
			if (this.ItemsFiltered[i] && this.Evaluate(this.ItemsFiltered[i])===true){
				DOpus.Output("    Evaluation passed ["+this.ItemsFiltered[i].name+"]");
				this.ItemsEvaluated.push(this.ItemsFiltered[i]);
			} else {
				//DOpus.Output("    Evaluation ignore ["+this.ItemsFiltered[i].name+"]");
			}
		}
		DOpus.Output("");
		DOpus.Output("Filtering done.");
		DOpus.Output("");
		return this.ItemsEvaluated;
	}
}
////////////////////////////////////////////////////////////////////////////////
function EasyFilterItem(){
	this.doItem		= null;
	this.name		= null;
	this.baseName	= null;
	this.ext		= null;
	this.group		= null;
	this.groupName	= null;
	
	////////////////////////////////////////////////////////////////////////////
	this.importDOItem = function ( doItem ) {
		this.doItem		= doItem;
		this.name		= String(doItem.name).toLowerCase();
		this.baseName	= String(doItem.name_stem).toLowerCase();
		this.ext		= String(doItem.ext).toLowerCase();
		//todo: could add the group name right here
	}
}

In your screenshot I can't see wether you put the code together correctly, but anyway, just use the new one I posted, it's enhanced in every way and consider to reduce your screenshot so we keep the thread more tidy. It may all be my fault, sorry just in case! o))

Hi Jon,

the rule is to select all duplicate file names, excluding the [xx] suffix, where xx is of arbitrary length and character type.

So to give more real examples (bold=files to be selected):
A-T2-1326-P1[M].pdf
A-T2-1326-P1[N].pdf
A-T2-1326-P1[O].pdf
A-T2-1326-P2[N].pdf
A-T2-1327-P1[N].pdf
A-T2-1327-P2[N].pdf
A-T2-1328-P1[N].pdf
A-T2-1328-P1[1].pdf
A-T2-1328-P2[N].pdf
A-T2-1329-P1[B].pdf
A-T2-1329-P1[8].pdf
A-T2-1329-P1[9].pdf
A-T2-1329-P1[10].pdf
A-T2-1329-P2[1].pdf
A-T2-1330-P1[3].pdf
A-T3-1330-P1[3].pdf

tbone, with your new script I am now getting this error:
image

Please install the SelectEx script addin or remove the line which runs it at the end in the OnClick() function.
I always use "SelectEx MAKEVISIBLE" to scroll selected items into view, just in case they are out of view. o)

Of course. It works great now.

I also note that changing the character in the Regex, line 4, allows for selecting file names of a different suffix delimeter:
i.e
("(.?)\\[(.)","ig"), "$1") works with ...[xx].pdf file names.
("(.?)_(.)","ig"), "$1") works with ..._xx.pdf file names.

Thanks again tbone, this script is now a must have for me.

I'm glad to hear. o)
Yes, if you change the code in GetGroupName() you have control over what the filter uses to group items.
If we override the Evaluate() function, like we also did, you can influence what final condition must be met to allow items to pass through. Here we told EasyFilter to pass only items which are in a group of at least 2 items.

Script wise, things could be simpler, but what I aim for with this, is some generic solution to most filtering tasks. Code is a bit more than usual, but you can concentrate on the configuration part of the filter with this instead of writing the same loops and things all over again.

The filter works like a mini ETL process, if you are curious read this.. o):