I am building a python script which will be removing duplicates from my library as an exercise in python. The idea is to build a dict containing a dict ( with the data and statistic on the file / folder ) for every file in folder in the library. It currently works with a set number of subfolder. This is an example of what it gives out.
>>> Files
{'/root/dupclean/working/test': {'FilenameEncoding': {'confidence': 1.0, 'encoding': 'ascii'}, 'File': False, 'T\xc3\xa9l\xc3\xa9phone': {'FilenameEncoding': {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}, 'File': False, 'Extension': 'Folder', 'LastModified': 1284064857, 'FullPath': '/root/dupclean/working/test/T\xc3\xa9l\xc3\xa9phone', 'CreationTime': 1284064857, 'LastAccessed': 1284064857, 'Best Of': {'FilenameEncoding': {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}, 'File': False, 'Extension': 'Folder', 'LastModified': 1284064965, 'FullPath': '/root/dupclean/working/test/T\xc3\xa9l\xc3\xa9phone/Best Of', '10 New York Avec Toi.mp3': {'FilenameEncoding': {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}, 'File': True, 'Extension': 'mp3', 'LastModified': 1284064858, 'FullPath': '/root/dupclean/working/test/T\xc3\xa9l\xc3\xa9phone/Best Of/10 New York Avec Toi.mp3', 'CreationTime': 1284064858, 'LastAccessed': 1284064858, 'Size': 2314368L}, 'CreationTime': 1284064965, 'LastAccessed': 1284064857}}}}
This is how I am producing it now:
ROOT = Settings['path']
Files = {ROOT: {'File': False, 'FilenameEncoding': chardet.detect(ROOT)},}
for fileName in os.listdir ( ROOT ):
fileStats = ROOT + '/' + fileName
if os.path.isdir ( fileStats ):
Files[ROOT][fileName] = Analyse(fileStats)
for fileName2 in os.listdir ( ROOT + '/' + fileName):
dbg(70, "Scanning " + ROOT + '/' + fileName + '/' + fileName2)
fileStats2 = ROOT + '/' + fileName + '/' + fileName2
#third level
if os.path.isdir ( fileStats2 ):
Files[ROOT][fileName][fileName2] = Analyse(fileStats2)
for fileName3 in os.listdir ( ROOT + '/' + fileName + '/' + fileName2):
dbg(70, "Scanning " + ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3)
fileStats3 = ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3
#Fourth level
if os.path.isdir ( fileStats3 ):
Files[ROOT][fileName][fileName2][fileName3] = Analyse(fileStats3)
for fileName4 in os.listdir ( ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3):
dbg(70, "Scanning " + ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4)
fileStats4 = ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4
#Fifth level
if os.path.isdir ( fileStats4 ):
Files[ROOT][fileName][fileName2][fileName3][fileName4] = Analyse(fileStats4)
for fileName5 in os.listdir ( ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4):
dbg(70, "Scanning " + ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4 + '/' + fileName5)
fileStats5 = ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4 + '/' + fileName5
#Sicth level
if os.path.isdir ( fileStats5 ):
Files[ROOT][fileName][fileName2][fileName3][fileName4][fileName5] = Analyse(fileStats5)
dbg(10, "There was still a folder left in "+ROOT + '/' + fileName + '/' + fileName2 + '/' + fileName3 + '/' + fileName4)
else:
Files[ROOT][fileName][fileName2][fileName3][fileName4][fileName5] = Analyse(fileStats5)
else:
Files[ROOT][fileName][fileName2][fileName3][fileName4] = Analyse(fileStats4)
else:
Files[ROOT][fileName][fileName2][fileName3] = Analyse(fileStats3)
else:
Files[ROOT][fileName][fileName2] = Analyse(fileStats2)
else:
Files[ROOT][fileName] = Analyse(fileStats)
This is obviously wrong, but for the life of me, I just can't figure out a way to do it recursively !
Any help or pointers would be greatly apreciated.