This is the kind of thing I have done when I wanted to diff directories:
#!/usr/bin/env python
import os, os.path
import stat
def traverse_path(start_dir='.'):
for root, dirs, files in os.walk(start_dir, topdown=False):
for f in files:
complete_path = os.path.join(root, f)
try:
m = os.stat(complete_path)[stat.ST_MODE]
if stat.S_ISREG(m):
yield complete_path[len(start_dir):]
except OSError, err:
print 'Skipping', complete_path
except IOError, err:
print 'Skipping', complete_path
if __name__ == '__main__':
s = set(traverse_path('/home/hughdbrown'))
t = set(traverse_path('/home.backup/hughdbrown'))
for e in s - t:
print e
print '-' * 25
for e in t - s:
print e
Notice that there is a check for regular files. I seem to recall that I encountered files used as semaphores or which were written to by one process and read by another or something. It turned out to be important.
You can add code to delete files, according to whatever rules you like.