Find item occurring most frequently in a list

I created this for error checking a NSCalendar date. Occasionally NSCalendar would return garbage, so I had it run 10 times then used the most prolific value.

set aList to {"a", "a", "a", "b", "c", "a", "b", "c", "a", "e", "f", "a", "a", "a"}

set prolificItem to mostProlificItem(aList)

on mostProlificItem(aList)
	set itemOccuranceList to {}
	repeat with i from 1 to count of aList
		set thisItem to item i of aList
		set x to indexOfItemIfExists(thisItem, itemOccuranceList)
		if x = missing value then
			set end of itemOccuranceList to {thisItem, 1}
		else
			set occuranceItem to item x of itemOccuranceList
			set itemOccuranceCount to item 2 of occuranceItem
			set item x of itemOccuranceList to {thisItem, (itemOccuranceCount + 1)}
		end if
	end repeat
	
	set prolificItem to {missing value, 0}
	repeat with i from 1 to count of itemOccuranceList
		set thisItem to item i of itemOccuranceList
		set thisItemCount to (item 2 of thisItem) as integer
		set currentProlificCount to (item 2 of prolificItem) as integer
		if thisItemCount > currentProlificCount then
			set prolificItem to thisItem
		end if
	end repeat
	
	if item 2 of prolificItem > 1 then
		return item 1 of prolificItem
	else
		return missing value
	end if
end mostProlificItem

on indexOfItemIfExists(thisItem, itemOccuranceList)
	repeat with x from 1 to count of itemOccuranceList
		set occuranceItem to item x of itemOccuranceList
		set itemStrValue to item 1 of occuranceItem
		if itemStrValue = thisItem then
			return x
		end if
	end repeat
	return missing value
end indexOfItemIfExists

Nice. :slight_smile:

Since it’s possible that two or more items might be co-most prolific, and since ‘missing value’ itself might be the most prolific value in a list, here’s a minor variation which returns a list of the most prolific value(s). It also checks for the subject list being empty.

set alist to {}
repeat 100 times
	set end of alist to some item of {random number from 1 to 7, some character of "abcdefg", some item of {"dog", "cat", {aardvark:17}, {1, 2, 3}, missing value, pi, current date}}
end repeat

mostProlificItem(alist)

on mostProlificItem(alist)
	if ((count alist) is 0) then return missing value
	
	set itemOccuranceList to {}
	repeat with i from 1 to count of alist
		set thisItem to item i of alist
		set x to indexOfItemIfExists(thisItem, itemOccuranceList)
		if x = missing value then
			set end of itemOccuranceList to {thisItem, 1}
		else
			set occuranceItem to item x of itemOccuranceList
			set itemOccuranceCount to item 2 of occuranceItem
			set item x of itemOccuranceList to {thisItem, (itemOccuranceCount + 1)}
		end if
	end repeat
	
	-- Modified section.
	set {mostProlific, mostOccurrences} to item 1 of itemOccuranceList
	set mostProlific to {mostProlific}
	repeat with i from 2 to (count itemOccuranceList)
		set {thisItem, thisItemCount} to item i of itemOccuranceList
		if (thisItemCount = mostOccurrences) then
			set end of mostProlific to thisItem
		else if (thisItemCount > mostOccurrences) then
			set {mostProlific, mostOccurrences} to {{thisItem}, thisItemCount}
		end if
	end repeat
	
	if (mostOccurrences > 1) then
		return mostProlific -- or: return {mostProlific, mostOccurrences}
	else
		return missing value
	end if
end mostProlificItem

on indexOfItemIfExists(thisItem, itemOccuranceList)
	repeat with x from 1 to count of itemOccuranceList
		set occuranceItem to item x of itemOccuranceList
		set itemStrValue to item 1 of occuranceItem
		if itemStrValue = thisItem then
			return x
		end if
	end repeat
	return missing value
end indexOfItemIfExists

Very nice. I wasn’t too concerned about a tie in occurrences since, for my use, the error only happens once in a great while, but I’m sure your modifications will come in handy.

ASObjC version (180 microseconds, 1st plain AppleScript version-19 microseconds, 2nd plain AppleScript version-17 microseconds):

use AppleScript version "2.4"
use scripting additions
use framework "Foundation"
property |⌘| : a reference to current application

set aList to {"a", "a", "a", "b", "c", "a", "b", "c", "a", "e", "f", "a", "a", "a"}
set anOrderedListOfRecords to countItemsAppearance(aList) of me
return item 1 of anOrderedListOfRecords

on countItemsAppearance(aList)
	set aSet to |⌘|'s NSCountedSet's alloc()'s initWithArray:aList
	set anArray to |⌘|'s NSMutableArray's array()
	set anEnumerator to aSet's objectEnumerator()
	repeat
		set aValue to anEnumerator's nextObject()
		if aValue is missing value then exit repeat
		anArray's addObject:(|⌘|'s NSDictionary's dictionaryWithObjects:{aValue, (aSet's countForObject:aValue)} forKeys:{"aName", "numberOfTimes"})
	end repeat
	-- Sort in descending order by number of occurrences
	set aDescriptor to |⌘|'s NSSortDescriptor's sortDescriptorWithKey:"numberOfTimes" ascending:false
	anArray's sortUsingDescriptors:{aDescriptor}
	return anArray as list -- is one list of records
end countItemsAppearance

NOTE: This is a surprise for me - I did not expect the ASObjC version to be 10 times slower than simple versions. If anyone can find the best use of ASObjC, I will be grateful.

UPDATE: As I checked, the ASObjC version becomes faster after > 100 items in the list. With 1000 items Nigel’s script - about 99 msec, ASObjC version - about 73 msec. With 10000 items Nigel’s script - about 29.97 seconds, ASObjC version - about 20.44 seconds.

:cool:

Here’s a minor variant which returns all the co-most frequently occurring values, where relevant.

use AppleScript version "2.4"
use scripting additions
use framework "Foundation"
property |⌘| : a reference to current application

set aList to {"a", "a", "a", "b", "c", "a", "b", "c", "a", "e", "f", "a", "a", "a", "b", "b", "b", "b", "b", "b"}

return mostProlificItems(aList)

on mostProlificItems(aList)
	set aSet to |⌘|'s NSCountedSet's alloc()'s initWithArray:aList
	set anArray to |⌘|'s NSMutableArray's array()
	repeat with aValue in aSet's allObjects()
		(anArray's addObject:(|⌘|'s NSDictionary's dictionaryWithObjects:{aValue, (aSet's countForObject:aValue)} forKeys:{"aName", "numberOfTimes"}))
	end repeat
	
	-- Filter the array on the highest "numberOfTimes" value.
	set highestFrequency to anArray's valueForKeyPath:("@max.numberOfTimes")
	set aPredicate to |⌘|'s class "NSPredicate"'s predicateWithFormat_("numberOfTimes == %@", highestFrequency)
	anArray's filterUsingPredicate:(aPredicate)
	
	-- Get the "aName" values from the remaining dictionaries.
	set mostFrequentValues to (anArray's valueForKey:("aName")) as list
	
	-- Return them along with their frequency.
	return {values:mostFrequentValues, numberOfTimes:highestFrequency as integer}
end mostProlificItems

Just to note that unlike the AS scripts above, switching to ASObjC and back returns copies of the original values, not the original objects. This may be relevant if the list contains lists, records, or dates. If the list contains script objects, enums, or handler labels, the ASObjC methods used here ignore them. The methods are case sensitive by default.

If you want the items returned in the order they first appear in the list, change …

repeat with aValue in aSet's allObjects()

… to:

repeat with aValue in (|⌘|'s class "NSOrderedSet"'s orderedSetWithArray:(aList))

So here’s a further development of KniazidisR’s script which acts case-insensitively unless the call to it is in a ‘considering case’ statement. I rather like the idea of returning co-most frequent values in the order and form of their first instances in the original list, so I’ve included the NSOrderedSet idea. The two classes of set seem capable themselves of recognising when integers and reals represent the same values.

use AppleScript version "2.4"
use scripting additions
use framework "Foundation"

property |⌘| : a reference to current application

set aList to {"a", "A", "a", 7.0, "B", {x:"HellO"}, "c", 7, 7, {x:"Hello"}, 7, "a", "b", "c", 7, "a", {x:"HELLO"}, "e", "f", 7, {x:"hello"}, "a", "a", 7, 7, "a", "b", {x:"HellO"}, "b", "b", {x:"HeLLo"}, "b", {x:"Hello"}, "b", {x:"Hello"}, "b"}

-- considering case
mostProlificItems(aList)
-- end considering

on mostProlificItems(aList)
	set countedSet to |⌘|'s NSCountedSet's alloc()'s initWithArray:aList -- For the occurrence count of each value (distinguished case-sensitively).
	set orderedSet to |⌘|'s class "NSOrderedSet"'s orderedSetWithArray:(aList) -- For the first instance of each value (ditto) in the original order.
	
	script o
		property listOfValues : orderedSet's array() as list -- The ordered set's contents as a list of AS values
		property listOfRecords : {}
		property checklist : {}
	end script
	
	set checklistLength to 0
	repeat with i from 1 to (count o's listOfValues)
		set aValue to item i of o's listOfValues
		set aCount to (countedSet's countForObject:aValue) as integer
		-- Has this value already been handled in another case when case is being ignored?
		if (o's checklist contains {aValue}) then
			-- If so, get the index of the version in the checklist and update the count in the corresponding record.
			repeat with j from 1 to checklistLength
				if (item j of o's checklist is aValue) then exit repeat
			end repeat
			tell item j of o's listOfRecords to set its numberOfTimes to (its numberOfTimes) + aCount
		else
			-- Otherwise add a new record to the list of records and the value itself to the checklist.
			set end of o's listOfRecords to {aName:aValue, numberOfTimes:aCount}
			set end of o's checklist to aValue
			set checklistLength to checklistLength + 1
		end if
	end repeat
	
	-- Derive a mutable array of dictionaries from the list of records just built.
	set anArray to |⌘|'s class "NSMutableArray"'s arrayWithArray:(o's listOfRecords)
	-- Get the highest "numberOfTimes" value contained in any of the dictionaries.
	set highestFrequency to anArray's valueForKeyPath:("@max.numberOfTimes")
	-- Filter for the dictionaries having that "numberOfTimes" value.
	set aPredicate to |⌘|'s class "NSPredicate"'s predicateWithFormat_("numberOfTimes == %@", highestFrequency)
	tell anArray to filterUsingPredicate:(aPredicate)
	-- Get the "aName" values from these dictionaries.
	set mostFrequentValues to anArray's valueForKey:("aName")
	
	-- Return these and the "numberOfTimes" value.
	return {values:mostFrequentValues as list, numberOfTimes:highestFrequency as integer}
end mostProlificItems

Edit: Speeded up slightly and shortened by having the case-insensitive section iterate through an AppleScript list instead of through the ordered set and by using this section instead of a separate, purely ASObjC one for case-sensitive work as well!