7 Mar, 2022
basenames:
{
'/some/dir/IMG_0001': ['/some/dir/IMG_0001.CR2', '/some/dir/IMG_0001.JPG']
...
}
paths:
{
'/some/dir/IMG_0001.CR2': 'abc123',
'/some/dir/IMG_0001.JPG': 'def456',
...
}
groups, mappings = jgphoto.group(basenames, paths)
groups:
{
'aaa111': ['aaa111', 'abc123', ...]
}
mappings:
{
'aaa111': 'aaa111',
'abc123': 'aaa111',
}
def group(candidates, paths, groups=None, mappings=None):
start = time.time()
groups = groups or {} # Lowest sha -> All related shas
mappings = mappings or {} # One related sha -> Lowest sha
debug('Grouping based on ', len(candidates), 'candidates ...')
counter = 1
for candidate in candidates:
counter += 1
if len(candidates) < 2:
continue
if counter % 1000 == 0:
info(counter, str(round(100.0*counter/len(candidates), 2))+'%', time.time() - start)
shas = set()
new_group = set()
debug('Investigating', candidates[candidate])
debug('Groups start', groups)
debug('Mappings start', mappings)
for path in candidates[candidate]:
if path not in paths:
error('Path "{}" not in paths'.format(path))
continue
filesha = paths[path]
new_group.add(filesha)
debug('Added', filesha, 'to shas', shas, 'and new_group', new_group)
if filesha in mappings:
debug('Found', filesha, 'in mappings, so looking through the shas in all mapped groups too')
shas.add(filesha)
for sha in groups[mappings[filesha]]:
new_group.add(sha)
shas.add(sha)
debug('Added the mapped sha', sha, 'to the new_group', new_group)
group = list(sorted(new_group))
if not group:
continue
leader = group[0]
debug('So the leader is', leader, 'the new_group is', new_group, 'and the shas is', shas)
# So at this point, let's see if anything has changed
debug('Shas are same as new groups?', shas == new_group)
if not shas:
debug('Nothing has changed so we set the leader', leader, 'for the new group', group)
# This is a new group
groups[leader] = group
for sha in new_group:
debug('Updating the mappings to set', sha, '->', leader)
mappings[sha] = leader
else:
# Otherwise there will be at least one sha that needs to be changed
debug('We need to clean up the mappings for the old groups', shas)
for sha in shas:
if sha in groups:
# We've found something that needs to be changed.
for target in groups[sha]:
mappings[target] = leader
del groups[sha]
# Lets set the group and the mappings (possibly setting some we've just set, but that's OK
groups[leader] = group
for sha in group:
mappings[sha] = leader
debug('Groups end', groups)
debug('Mappings end', mappings)
debug('done.')
return groups, mappings
Be the first to comment.
Copyright James Gardner 1996-2020 All Rights Reserved. Admin.