If I understood correctly your definition of “sparse”, this function should be exactly what you want:
# python ≥ 2.5
import itertools, heapq
def make_sparse(sequence):
grouped= sorted(sequence)
item_counts= []
for item, item_seq in itertools.groupby(grouped):
count= max(enumerate(item_seq))[0] + 1
item_counts.append( (-count, item) ) # negative count for heapq purposes
heapq.heapify(item_counts)
count1, item1= heapq.heappop(item_counts)
yield item1; count1+= 1
while True:
try:
count2, item2= heapq.heappop(item_counts)
except IndexError: # no other item remains
break
yield item2; count2+= 1
if count1 < 0:
heapq.heappush(item_counts, (count1, item1))
item1, count1= item2, count2
# loop is done, produce remaining item1 items
while count1 < 0:
yield item1; count1+= 1
if __name__ == "__main__":
# initial example
print list(make_sparse(
"duck duck duck duck goose goose goose dog".split()))
# updated example
print list(make_sparse([
'duck', 'duck', 'duck', 'duck', 'duck', 'duck',
'goose', 'goose', 'goose', 'goose', 'dog', 'dog']))
# now a hard case: item 'a' appears more than:
# > total_len//2 times if total_len is even
# > total_len//2+1 times if total_len is odd
print list(make_sparse("aaaaaabbcc"))
These examples produce this output:
['duck', 'goose', 'duck', 'goose', 'duck', 'dog', 'duck', 'goose']
['duck', 'goose', 'duck', 'goose', 'duck', 'dog', 'duck', 'goose', 'duck', 'dog', 'duck', 'goose']
['a', 'b', 'a', 'c', 'a', 'b', 'a', 'c', 'a', 'a']
A subtle note: in the first and second examples, reversing the output order might look more optimal.