Flattening and Reverse Merging Dictionaries in Python

Flattening and Reverse Merging Dictionaries in Python

We often deal with or construct some more complex multi-layer nested dictionary data, which although not difficult, is often a rather troublesome thing, such as when we need such data.

types:
  typeA:
    items:
      - item1
      - item2
  typeB:
    items:
      - item1
  typeN:
    # ...

The most basic approach:

data = {'types': {}}
def append(_type, item):
    global data
    if _type not in data['types']:
        data['types'][_type] = []
    data['types'].append(item)

To simplify this approach, we can use the dict.setdefault method:

data = {'types': {}}
def append(_type, item):
    global data
    data['types'].setdefault(_type, []).append(item)

But in more complex scenarios, having to continuously nest the setdefault method can still be too cumbersome. Moreover, in some dynamic situations, we need to be able to control the behavior of the program through generated strings. In such cases, we can opt to flatten and then recombine the dictionaries (code to follow):

>>> example = {'a': 1, 'b': {}, 'c': {'d': [], 'e': {'f': 0, 'h': u'xxx', 'i': {'j'}, 'k': object()}}}
>>> flatten_dict(example) # doctest: +ELLIPSIS
{'a': 1, 'b': {}, 'c.d': [], 'c.e.k': <object object at ...>, 'c.e.i': set(['j']), 'c.e.h': u'xxx', 'c.e.f': 0}

This way, it becomes more convenient if we need to control hierarchical objects:

def set_data(obj, user, data):
    flatted_data = flatten_dict(data)
    for key, value in tuple(flatted_data.items()):
        if key in get_readonly_fields(user):
            del flatted_data[key]
    obj.data = nestify_dict(flatted_data)

def get_readonly_fields(user):
    if user.is_superuser:
        return []
    return ['c.d', 'c.e.h']

We can also conveniently construct the data mentioned in the example above using this method (though this example might not be complex enough):

data = {}
def append(key_as_string, item):
    global data
    flatted_data = flatten_dict(data)
    flatted_data.setdefault('types.' + key_as_string, []).append(item)
    data = nestify_dict(flatted_data)

example:

In [4]: append('a.b.c.d', 'itemA')

In [5]: data
Out[5]: {'types': {'a': {'b': {'c': {'d': ['itemA']}}}}}

In [6]: data
Out[6]: {'types': {'a': {'b': {'c': {'d': ['itemA']}}}}}

In [7]: append('a.b.c.d', 'itemA')

In [8]: data
Out[8]: {'types': {'a': {'b': {'c': {'d': ['itemA', 'itemA']}}}}}

In [9]: append('a.b.c.d', 'itemA')

In [10]: data
Out[10]: {'types': {'a': {'b': {'c': {'d': ['itemA', 'itemA', 'itemA']}}}}}

In [11]: append('a.b.c.x', 'itemA')

In [12]: data
Out[12]:
{'types': {'a': {'b': {'c': {'d': ['itemA', 'itemA', 'itemA'],
     'x': ['itemA']}}}}}

Code

gist


""" Flatten dict and vice versa
    Authored by: Fang Jiaan (fduodev@gmail.com)
    Example:
    >>> example = {'a': 1, 'b': {}, 'c': {'d': [], 'e': {'f': 0, 'h': u'xxx', 'i': {'j'}, 'k': object()}}}
    >>> flatten_dict(example) # doctest: +ELLIPSIS
    {'a': 1, 'b': {}, 'c.d': [], 'c.e.k': <object object at ...>, 'c.e.i': set(['j']), 'c.e.h': u'xxx', 'c.e.f': 0}
    >>> assert nestify_dict(flatten_dict(example)) == example
    >>> assert 'c.e.h' in flatten_dict(example)
    >>> assert flatten_dict(example)['c.e.h'] == example['c']['e']['h']
    >>> example2 = {'x.y': 1}
    >>> flatten_dict(example2) # doctest: +IGNORE_EXCEPTION_DETAIL
    Traceback (most recent call last):
    ValueError: Separator . already in key, this may lead unexpected behaviour, choose another.
"""
import collections

def flatten_dict(d, parent_key='', sep='.', quiet=False):
    items = []
    for k, v in d.items():

        if not quiet and sep in k:
            raise ValueError('Separator "%(sep)s" already in key, '
                             'this may lead unexpected behaviour, '
                             'choose another.' % dict(sep=sep))

        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
            if not v:  # empty dict
                items.append((new_key, v))
        else:
            items.append((new_key, v))
    return dict(items)

def nestify_dict(d, sep='.'):
    ret = {}
    for k, v in d.items():
        if sep in k:
            keys = k.split(sep)
            target = ret

            while len(keys) > 1:
                current_key = keys.pop(0)
                target = target.setdefault(current_key, {})
            else:
                assert len(keys) == 1
                target[keys[0]] = v
        else:
            ret[k] = v
    return ret
comments powered by Disqus