Skip to content

Some improvement ColumnMap #257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clickhouse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ INSTALL(FILES base/input.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/open_telemetry.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/output.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/platform.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/projected_iterator.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/singleton.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/socket.h DESTINATION include/clickhouse/base/)
INSTALL(FILES base/string_utils.h DESTINATION include/clickhouse/base/)
Expand Down
55 changes: 55 additions & 0 deletions clickhouse/base/projected_iterator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#pragma once

#include <iterator>
#include <type_traits>
#include <utility>

namespace clickhouse {

template <typename UnaryFunction, typename Iterator, typename Reference = decltype(std::declval<UnaryFunction>()(std::declval<Iterator>())),
typename Value = std::decay_t<Reference>>
class ProjectedIterator {
public:
using value_type = Value;
using reference = Reference;
using pointer = Reference;
using difference_type = typename std::iterator_traits<Iterator>::difference_type;
using iterator_category = typename std::iterator_traits<Iterator>::iterator_category;

ProjectedIterator() = default;

inline ProjectedIterator(Iterator const& iterator, UnaryFunction functor)
: iterator_(iterator)
, functor_(std::move(functor)) {
}

inline UnaryFunction functor() const { return functor; }

inline Iterator const& base() const { return iterator_; }

inline reference operator*() const { return functor_(iterator_); }

inline ProjectedIterator& operator++() {
++iterator_;
return *this;
}

inline ProjectedIterator& operator--() {
--iterator_;
return *this;
}

inline bool operator==(const ProjectedIterator& other) const {
return this->iterator_ == other.iterator_;
}

inline bool operator!=(const ProjectedIterator& other) const {
return !(*this == other);
}

private:
Iterator iterator_;
UnaryFunction functor_;
};

} // namespace clickhouse
4 changes: 3 additions & 1 deletion clickhouse/columns/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ class ColumnArrayT : public ColumnArray {
const size_t size_;
size_t index_;
public:
Iterator() = default;

Iterator(std::shared_ptr<NestedColumnType> typed_nested_data, size_t offset, size_t size, size_t index)
: typed_nested_data_(typed_nested_data)
, offset_(offset)
Expand Down Expand Up @@ -270,7 +272,7 @@ class ColumnArrayT : public ColumnArray {
size_t counter = 0;

while (begin != end) {
nested_data.Append(*begin);
nested_data.Append(std::move(*begin));
++begin;
++counter;
}
Expand Down
42 changes: 28 additions & 14 deletions clickhouse/columns/map.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
#pragma once

#include "../base/projected_iterator.h"
#include "array.h"
#include "column.h"
#include "tuple.h"

#include <functional>
#include <map>

namespace clickhouse {
Expand Down Expand Up @@ -122,6 +124,8 @@ class ColumnMapT : public ColumnMap {
typename ArrayColumnType::ArrayValueView::Iterator data_iterator_;

public:
Iterator() = default;

Iterator(typename ArrayColumnType::ArrayValueView::Iterator data_iterator)
: data_iterator_(data_iterator) {}

Expand Down Expand Up @@ -187,13 +191,19 @@ class ColumnMapT : public ColumnMap {
if (size() != other.size()) {
return false;
}
using Vector = std::vector<std::pair<Key, Value>>;
Vector l(begin(), end());
Vector r(other.begin(), other.end());
auto comp = [](const auto& l, const auto& r) { return l.frist < r.first; };
std::sort(l.begin(), l.end(), comp);
std::sort(r.begin(), r.end(), comp);
return std::equal(l.begin(), l.end(), r.begin(), r.end());
const auto make_index = [](const auto& data) {
std::vector<size_t> result{data.Size()};
std::generate(result.begin(), result.end(), [i = 0] () mutable {return i++;});
std::sort(result.begin(), result.end(), [&data](size_t l, size_t r) {return data[l] < data[r];});
return result;
};
const auto index = make_index(data_);
for (const auto& val : other.data_) {
if (!std::binary_search(index.begin(), index.end(), val,
[&data = data_](const auto& l, size_t r) {return l < data[r];})) {
return false;
}
}
return true;
}

Expand All @@ -214,13 +224,17 @@ class ColumnMapT : public ColumnMap {

template <typename T>
inline void Append(const T& value) {
// TODO Refuse to copy.
std::vector<std::tuple<typename T::key_type, typename T::mapped_type>> container;
container.reserve(value.size());
for (const auto& i : value) {
container.emplace_back(i.first, i.second);
}
typed_data_->Append(container.begin(), container.end());
using BaseIter = decltype(value.begin());
using KeyOfT = decltype(std::declval<BaseIter>()->first);
using ValOfT = decltype(std::declval<BaseIter>()->second);
using Functor = std::function<std::tuple<KeyOfT, ValOfT>(const BaseIter&)>;
using Iterator = ProjectedIterator<Functor, BaseIter>;

Functor functor = [](const BaseIter& i) {
return std::make_tuple(std::cref(i->first), std::cref(i->second));
};

typed_data_->Append(Iterator{value.begin(), functor}, Iterator{value.end(), functor});
}

static auto Wrap(ColumnMap&& col) {
Expand Down