Home Blog CV Projects Patterns Notes Book Colophon Search

Python Grouping

7 Mar, 2022

    basenames:
    {
        '/some/dir/IMG_0001': ['/some/dir/IMG_0001.CR2', '/some/dir/IMG_0001.JPG']
        ...
    }
    paths:
    {
        '/some/dir/IMG_0001.CR2': 'abc123',
        '/some/dir/IMG_0001.JPG': 'def456',
        ...
    }
    groups, mappings = jgphoto.group(basenames, paths)
    groups:
    {
        'aaa111': ['aaa111', 'abc123', ...]
    }
    mappings:
    {
        'aaa111': 'aaa111',
        'abc123': 'aaa111',
    }
def group(candidates, paths, groups=None, mappings=None):
    start = time.time()
    groups = groups or {}      # Lowest sha -> All related shas
    mappings = mappings or {}  # One related sha -> Lowest sha
    debug('Grouping based on ', len(candidates), 'candidates ...')
    counter = 1
    for candidate in candidates:
        counter += 1
        if len(candidates) < 2:
            continue
        if counter % 1000 == 0:
            info(counter, str(round(100.0*counter/len(candidates), 2))+'%', time.time() - start)
        shas = set()
        new_group = set()
        debug('Investigating', candidates[candidate])
        debug('Groups start', groups)
        debug('Mappings start', mappings)
        for path in candidates[candidate]:
            if path not in paths:
                error('Path "{}" not in paths'.format(path))
                continue
            filesha = paths[path]
            new_group.add(filesha)
            debug('Added', filesha, 'to shas', shas, 'and new_group', new_group)
            if filesha in mappings:
                debug('Found', filesha, 'in mappings, so looking through the shas in all mapped groups too')
                shas.add(filesha)
                for sha in groups[mappings[filesha]]:
                    new_group.add(sha)
                    shas.add(sha)
                    debug('Added the mapped sha', sha, 'to the new_group', new_group)
        group = list(sorted(new_group))
        if not group:
            continue
        leader = group[0]
        debug('So the leader is', leader, 'the new_group is', new_group, 'and the shas is', shas)
        # So at this point, let's see if anything has changed
        debug('Shas are same as new groups?', shas == new_group)
        if not shas:
            debug('Nothing has changed so we set the leader', leader, 'for the new group', group)
            # This is a new group
            groups[leader] = group
            for sha in new_group:
                debug('Updating the mappings to set', sha, '->', leader)
                mappings[sha] = leader
        else:
            # Otherwise there will be at least one sha that needs to be changed
            debug('We need to clean up the mappings for the old groups', shas)
            for sha in shas:
                if sha in groups:
                    # We've found something that needs to be changed.
                    for target in groups[sha]:
                        mappings[target] = leader
                    del groups[sha]
            # Lets set the group and the mappings (possibly setting some we've just set, but that's OK
            groups[leader] = group
            for sha in group:
                mappings[sha] = leader
        debug('Groups end', groups)
        debug('Mappings end', mappings)
    debug('done.')
    return groups, mappings

Comments

Be the first to comment.

Add Comment





Copyright James Gardner 1996-2020 All Rights Reserved. Admin.