Skip to content

Original order of keys lost after deserialization for std::multimap and std::multiset #865

Description

@lordofdestiny

Description

As defined per the C++11 standard, as referenced here for std::multimap and std::multiset,
the order of the elements/keys-value pairs that compare equivalent is the order of insertion and does not change. There is a bug in how the deserialization of these standard containers is implemented, which results in order of equivalent keys being flipped after each subsequent deserialization.

How to reproduce

Here is a minimal code to reproduce the bug:

#include <iostream>
#include <string>
#include <iterator>
#include <sstream>

#include "cereal/types/map.hpp"
#include "cereal/types/set.hpp"
#include "cereal/archives/binary.hpp"

struct MockValue
{
    char key, value;

    MockValue() = default;

    explicit MockValue(char key, char value)
        : key(key), value(value) {}

    bool operator<(const MockValue &other) const
    {
        return this->key < other.key;
    }
    bool operator==(const MockValue &other) const
    {
        return this->key == other.key && this->value == other.value;
    }

    friend std::ostream &operator<<(std::ostream &os, const MockValue &val)
    {
        return os << "[Key=" << val.key << ", Value=" << val.value << "]";
    }

    template <class Archive>
    void serialize(Archive &archive)
    {
        archive(key, value);
    }
};

// Print generic std::pair
template <template <typename...> class Pair, typename... Args,
          typename = std::pair<typename Pair<Args...>::first_type,
                               typename Pair<Args...>::second_type>>
std::ostream &operator<<(std::ostream &os, const Pair<Args...> &pair)
{
    return os << "Pair{" << pair.first << ", " << pair.second << "}";
}

// Print standard containers
template <
    template <typename...> class Container,
    typename... Args>
void printContainer(Container<Args...> &container)
{
    using VType = typename Container<Args...>::value_type;
    auto begin = container.begin();
    auto end = container.end();
    std::copy(begin, end, std::ostream_iterator<VType>(std::cout, "\n"));
}

void testMultimap() {
    std::cout << std::string(40, '=') << "std::multimap" << std::string(40, '=') << '\n';
    std::stringstream buffer;

    {
        std::multimap<char, MockValue> mmap;
        mmap.emplace('a', MockValue('a', 'a'));
        mmap.emplace('a', MockValue('a', 'b'));
        mmap.emplace('a', MockValue('a', 'c'));
        mmap.emplace('a', MockValue('a', 'd'));
        mmap.emplace('b', MockValue('b', 'a'));
        mmap.emplace('b', MockValue('b', 'b'));
        mmap.emplace('b', MockValue('b', 'c'));
        mmap.emplace('b', MockValue('b', 'd'));
    
        std::cout << "Before:\n";
        printContainer(mmap);
        std::cout << std::string(50,'-') << '\n';
        
        cereal::BinaryOutputArchive ar(buffer);
        ar(mmap);
    }
    
    {
        std::multimap<char, MockValue> mmap;
        cereal::BinaryInputArchive ar(buffer);
        ar(mmap);
        
        std::cout << "After: \n";
        printContainer(mmap);
    }

}

void testMultiset() {
    std::cout << std::string(40, '=') << "std::multiset" << std::string(40, '=') << '\n';
    std::stringstream buffer;

    {
        std::multiset<MockValue> mset;
        mset.emplace(MockValue('a', 'a'));
        mset.emplace(MockValue('a', 'b'));
        mset.emplace(MockValue('a', 'c'));
        mset.emplace(MockValue('a', 'd'));
        mset.emplace(MockValue('b', 'a'));
        mset.emplace(MockValue('b', 'b'));
        mset.emplace(MockValue('b', 'c'));
        mset.emplace(MockValue('b', 'd'));
    
        std::cout << "Before:\n";
        printContainer(mset);
        std::cout << std::string(50,'-') << '\n';
        
        cereal::BinaryOutputArchive ar(buffer);
        ar(mset);
    }
    
    {
        std::multiset<MockValue> mset;
        cereal::BinaryInputArchive ar(buffer);
        ar(mset);
        
        std::cout << "After: \n";
        printContainer(mset);
    }

}

int main()
{
    testMultimap();
    testMultiset();
}

And this is the output with the current release:

Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After: 
Pair{a, [Key=a, Value=d]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=a]}
Pair{b, [Key=b, Value=d]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=a]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After: 
[Key=a, Value=d]
[Key=a, Value=c]
[Key=a, Value=b]
[Key=a, Value=a]
[Key=b, Value=d]
[Key=b, Value=c]
[Key=b, Value=b]
[Key=b, Value=a]

This is the expected output ( note identical before and after outputs):

========================================std::multimap========================================
Before:
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
--------------------------------------------------
After: 
Pair{a, [Key=a, Value=a]}
Pair{a, [Key=a, Value=b]}
Pair{a, [Key=a, Value=c]}
Pair{a, [Key=a, Value=d]}
Pair{b, [Key=b, Value=a]}
Pair{b, [Key=b, Value=b]}
Pair{b, [Key=b, Value=c]}
Pair{b, [Key=b, Value=d]}
========================================std::multiset========================================
Before:
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]
--------------------------------------------------
After: 
[Key=a, Value=a]
[Key=a, Value=b]
[Key=a, Value=c]
[Key=a, Value=d]
[Key=b, Value=a]
[Key=b, Value=b]
[Key=b, Value=c]
[Key=b, Value=d]

Diagnosis

I've been able to diagnose the issue, namely here for std::multimap, and here for std::multiset. In both cases, emplace_hint() is used. This function inserts the new value before the value pointed to by the hint. This means that the order of inserted keys is reversed after the container is deserialized. I presume this function was used because of performance considerations, but it does not result in the expected behavior.

Note that this must be considered a bug because it violates the expected type invariants of std::multimap and std::multiset.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions