Hi to all, i have a module (a single .py file, actually), with a class called HashedDir.
when i import the file and instanciate 2 instances of that class, when i check the object's fields they're always the same, even if the two objects should be different.
Eg:
h1 = HashedDir('/path/to/dir')
print h1.getList()['files'] # /path/to/dir
h2 = HashedDir('some/other/path')
print h1.getList()['files'] # some/other/path
print h2.getList()['files'] # some/other/path
Any idea?
This is the class:
from os import walk
from os import path
from hashlib import md5
import re
class HashedDir:
"""
A list of files with associated md5 hashes generated retrieving thou
a recursive walk in the directory tree starting from a provided root
directory. Also stores the dirs in each dir
"""
# {'files': [
# ('/path/to/file1', '52bc309e11259af15e4623c7a0abc28c'),
# ('/path/to/file2', '52bc309e11259af15e4623c7a0abc28c'),
# ('/path/to/dir/file3', '52bc309e11259af15e4623c7a0abc28c')
# ],
# 'dirs': ['/path/to/dir1', '/path/to/dir2']
# }
fileList = {'files': [], 'dirs': []}
ignoreList = []
def __init__(self, rootDir, ignoreList=[]):
"""
ignoreList is a list of regular expressions. If a file or a dir matches
that regular expression, don't count it
"""
self.ignoreList = ignoreList
for dirpath, dirnames, filenames in walk(rootDir):
for fileName in filenames:
completeName = path.join(dirpath,fileName)
hash = md5(open(completeName).read()).hexdigest()
relativePath = self._relativePath(completeName, rootDir)
if not self._toBeIgnored(relativePath):
self.fileList['files'].append((relativePath, hash))
for dirName in dirnames:
completeName = path.join(dirpath, dirName)
relativePath = self._relativePath(completeName, rootDir)
if not self._toBeIgnored(relativePath):
self.fileList['dirs'].append(relativePath)
def _relativePath(self, path, base):
return path.replace(base, '')
def _toBeIgnored(self, path):
for regex in self.ignoreList:
if re.compile(regex).search(path) != None:
return True
return False
def getList(self):
return self.fileList
Thanks in advance