Skip to content

Commit cc027b1

Browse files
committed
add flatbuffers
1 parent d90979e commit cc027b1

File tree

9 files changed

+239
-12
lines changed

9 files changed

+239
-12
lines changed

CMakeLists.txt

+31-1
Original file line numberDiff line numberDiff line change
@@ -179,12 +179,28 @@ include_directories(${avro_PREFIX}/include)
179179
set(AVRO_LIBRARIES ${avro_PREFIX}/lib/libavrocpp_s.a)
180180
set(AVRO_GENERATOR ${avro_PREFIX}/bin/avrogencpp)
181181

182+
set(flatbuffers_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/external/flatbuffers)
183+
ExternalProject_Add(
184+
flatbuffers
185+
PREFIX ${flatbuffers_PREFIX}
186+
URL "https://github.com/google/flatbuffers/archive/v1.3.0.tar.gz"
187+
URL_MD5 "86d13e8c4c19c2bbadff86c5435b2fca"
188+
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${flatbuffers_PREFIX} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
189+
LOG_UPDATE ON
190+
LOG_CONFIGURE ON
191+
LOG_BUILD ON
192+
)
193+
include_directories(${flatbuffers_PREFIX}/include)
194+
set(FLATBUFFERS_LIBRARIES ${flatbuffers_PREFIX}/lib/libflatbuffers.a)
195+
set(FLATBUFFERS_GENERATOR ${flatbuffers_PREFIX}/bin/flatc)
196+
182197
set(LINKLIBS ${THRIFT_LIBRARIES}
183198
${MSGPACK_LIBRARIES}
184199
${PROTOBUF_LIBRARIES}
185200
${CAPNPROTO_LIBRARIES}
186201
${BOOST_LIBRARIES}
187202
${AVRO_LIBRARIES}
203+
${FLATBUFFERS_LIBRARIES}
188204
${CMAKE_THREAD_LIBS_INIT}
189205
${ZLIB_LIBRARIES}
190206
)
@@ -249,6 +265,19 @@ set_source_files_properties(
249265
)
250266
set(AVRO_SERIALIZATION_SOURCES ${cpp_serializers_SOURCE_DIR}/avro/record.hpp)
251267

268+
add_custom_command(
269+
DEPENDS ${cpp_serializers_SOURCE_DIR}/test.fbs
270+
COMMAND ${FLATBUFFERS_GENERATOR}
271+
ARGS --cpp -o ${cpp_serializers_SOURCE_DIR}/flatbuffers ${cpp_serializers_SOURCE_DIR}/test.fbs
272+
OUTPUT "${cpp_serializers_SOURCE_DIR}/flatbuffers/test_generated.h"
273+
COMMENT "Executing FlatBuffers compiler"
274+
)
275+
set_source_files_properties(
276+
${cpp_serializers_SOURCE_DIR}/flatbuffers/test_generated.h
277+
PROPERTIES GENERATED TRUE
278+
)
279+
set(FLATBUFFERS_SERIALIZATION_SOURCES ${cpp_serializers_SOURCE_DIR}/flatbuffers/test_generated.h)
280+
252281
set(BOOST_SERIALIZATION_SOURCES ${cpp_serializers_SOURCE_DIR}/boost/record.cpp)
253282
set(CEREAL_SERIALIZATION_SOURCES ${cpp_serializers_SOURCE_DIR}/cereal/record.cpp)
254283

@@ -259,7 +288,8 @@ add_executable(benchmark ${cpp_serializers_SOURCE_DIR}/test.cpp
259288
${BOOST_SERIALIZATION_SOURCES}
260289
${CEREAL_SERIALIZATION_SOURCES}
261290
${AVRO_SERIALIZATION_SOURCES}
291+
${FLATBUFFERS_SERIALIZATION_SOURCES}
262292
)
263-
add_dependencies(benchmark thrift msgpack protobuf capnproto boost cereal avro)
293+
add_dependencies(benchmark thrift msgpack protobuf capnproto boost cereal avro flatbuffers)
264294
target_link_libraries(benchmark ${LINKLIBS})
265295
set_target_properties(benchmark PROPERTIES COMPILE_FLAGS "-std=c++11")

README.md

+28-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
1-
#### [Thrift](http://thrift.apache.org/) vs. [Protobuf](https://code.google.com/p/protobuf/) vs. [Boost.Serialization](http://www.boost.org/libs/serialization) vs. [Msgpack](http://msgpack.org/) vs. [Cereal](http://uscilab.github.io/cereal/index.html) vs. [Avro](http://avro.apache.org/) serialization/deserialization time test for C++.
1+
#### About
2+
3+
Compare various data serialization libraries for C++.
4+
5+
* [Thrift](http://thrift.apache.org/)
6+
* [Protobuf](https://code.google.com/p/protobuf/)
7+
* [Boost.Serialization](http://www.boost.org/libs/serialization)
8+
* [Msgpack](http://msgpack.org/)
9+
* [Cereal](http://uscilab.github.io/cereal/index.html)
10+
* [Avro](http://avro.apache.org/)
11+
* [Capnproto](https://capnproto.org/)
12+
* [Flatbuffers](https://google.github.io/flatbuffers/)
213

314
#### Build
415
This project does not have any external library dependencies. All (boost, thrift etc.) needed libraries are downloaded
@@ -38,6 +49,8 @@ on a typical desktop computer with Intel Core i5 processor running Ubuntu 14.04.
3849
* msgpack 0.5.9
3950
* cereal 1.1.2
4051
* avro 1.7.7
52+
* capnproto 0.5.2
53+
* flatbuffers 1.3.0
4154

4255
| serializer | object's size | avg. total time |
4356
| -------------- | ------------- | --------------- |
@@ -49,14 +62,24 @@ on a typical desktop computer with Intel Core i5 processor running Ubuntu 14.04.
4962
| cereal | 17416 | 10688 |
5063
| avro | 12288 | 31750 |
5164

52-
Size mesuared in bytes, time mesuared in milliseconds.
53-
54-
##### Graphical representations
55-
5665
###### Size
5766

5867
![Size](images/size.png)
5968

6069
###### Time
6170

6271
![Time](images/time.png)
72+
73+
For capnproto and flatbuffers since they already store data in a "serialized" form and serialization basically means getting pointer
74+
to internal storage, we measure full build/serialize/deserialze cycle. On all the other libraries we measured serialize/deserialze
75+
cycle of already build data structure.
76+
77+
| serializer | object's size | avg. total time |
78+
| -------------- | ------------- | --------------- |
79+
| capnproto | 17768 | 4396 |
80+
| flatbuffers | 17632 | 12494 |
81+
82+
![Time](images/time2.png)
83+
84+
Size mesuared in bytes, time mesuared in milliseconds.
85+

flatbuffers/test_generated.h

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// automatically generated by the FlatBuffers compiler, do not modify
2+
3+
#ifndef FLATBUFFERS_GENERATED_TEST_FLATBUFFERS_TEST_H_
4+
#define FLATBUFFERS_GENERATED_TEST_FLATBUFFERS_TEST_H_
5+
6+
#include "flatbuffers/flatbuffers.h"
7+
8+
9+
namespace flatbuffers_test {
10+
11+
struct Record;
12+
13+
struct Record FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
14+
enum {
15+
VT_IDS = 4,
16+
VT_STRINGS = 6
17+
};
18+
const flatbuffers::Vector<int64_t> *ids() const { return GetPointer<const flatbuffers::Vector<int64_t> *>(VT_IDS); }
19+
const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *strings() const { return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_STRINGS); }
20+
bool Verify(flatbuffers::Verifier &verifier) const {
21+
return VerifyTableStart(verifier) &&
22+
VerifyField<flatbuffers::uoffset_t>(verifier, VT_IDS) &&
23+
verifier.Verify(ids()) &&
24+
VerifyField<flatbuffers::uoffset_t>(verifier, VT_STRINGS) &&
25+
verifier.Verify(strings()) &&
26+
verifier.VerifyVectorOfStrings(strings()) &&
27+
verifier.EndTable();
28+
}
29+
};
30+
31+
struct RecordBuilder {
32+
flatbuffers::FlatBufferBuilder &fbb_;
33+
flatbuffers::uoffset_t start_;
34+
void add_ids(flatbuffers::Offset<flatbuffers::Vector<int64_t>> ids) { fbb_.AddOffset(Record::VT_IDS, ids); }
35+
void add_strings(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> strings) { fbb_.AddOffset(Record::VT_STRINGS, strings); }
36+
RecordBuilder(flatbuffers::FlatBufferBuilder &_fbb) : fbb_(_fbb) { start_ = fbb_.StartTable(); }
37+
RecordBuilder &operator=(const RecordBuilder &);
38+
flatbuffers::Offset<Record> Finish() {
39+
auto o = flatbuffers::Offset<Record>(fbb_.EndTable(start_, 2));
40+
return o;
41+
}
42+
};
43+
44+
inline flatbuffers::Offset<Record> CreateRecord(flatbuffers::FlatBufferBuilder &_fbb,
45+
flatbuffers::Offset<flatbuffers::Vector<int64_t>> ids = 0,
46+
flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> strings = 0) {
47+
RecordBuilder builder_(_fbb);
48+
builder_.add_strings(strings);
49+
builder_.add_ids(ids);
50+
return builder_.Finish();
51+
}
52+
53+
inline const flatbuffers_test::Record *GetRecord(const void *buf) { return flatbuffers::GetRoot<flatbuffers_test::Record>(buf); }
54+
55+
inline bool VerifyRecordBuffer(flatbuffers::Verifier &verifier) { return verifier.VerifyBuffer<flatbuffers_test::Record>(); }
56+
57+
inline void FinishRecordBuffer(flatbuffers::FlatBufferBuilder &fbb, flatbuffers::Offset<flatbuffers_test::Record> root) { fbb.Finish(root); }
58+
59+
} // namespace flatbuffers_test
60+
61+
#endif // FLATBUFFERS_GENERATED_TEST_FLATBUFFERS_TEST_H_

images/graphs.R

+27-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,34 @@
11
library(ggplot2)
22

3-
name <- c("thrift-binary", "thrift-compact", "protobuf", "boost", "msgpack", "cereal", "avro")
3+
names.size <- c("thrift-binary", "thrift-compact", "protobuf", "boost", "msgpack", "cereal", "avro", "capnproto", "flatbuffers")
4+
names.time <- c("thrift-binary", "thrift-compact", "protobuf", "boost", "msgpack", "cereal", "avro")
5+
names.time2 <- c("capnproto", "flatbuffers")
46
# data from the 1000000 simulations
57
# for t in thrift-binary thrift-compact protobuf boost msgpack cereal avro; do echo -n "$t: "; ./benchmark 1 $t | grep size | awk '{print $4}'; done
6-
size <- c(17017, 11597, 12571, 17470, 11902, 17416, 12288)
8+
size <- c(17017, 11597, 12571, 17470, 11902, 17416, 12288, 17768, 17632)
79
# for t in thrift-binary thrift-compact protobuf boost msgpack cereal avro; do rm -f /tmp/$t.time; echo -n "$t: "; for i in `seq 1 50`; do ./benchmark 1000000 $t | grep time | awk '{print $4}' >>/tmp/$t.time; done; awk '{ sum += $1 } END { print sum/50}' /tmp/$t.time; done
810
time <- c(13763, 27017, 21034, 22945, 23560, 10688, 31750)
11+
time2 <- c(4396, 12494)
12+
13+
data.size <- as.data.frame(list(serializer = names.size, size = size))
14+
data.time <- as.data.frame(list(serializer = names.time, time = time))
15+
data.time2 <- as.data.frame(list(serializer = names.time2, time = time2))
16+
17+
ggplot(data.size, aes(x = as.factor(serializer), y = as.factor(size), fill = serializer)) +
18+
geom_bar(stat = "identity") +
19+
xlab("serializer") +
20+
ylab("size") +
21+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
22+
23+
ggplot(data.time, aes(x = as.factor(serializer), y = as.factor(time), fill = serializer)) +
24+
geom_bar(stat = "identity") +
25+
xlab("serializer") +
26+
ylab("time") +
27+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
28+
29+
ggplot(data.time2, aes(x = as.factor(serializer), y = as.factor(time), fill = serializer)) +
30+
geom_bar(stat = "identity") +
31+
xlab("serializer") +
32+
ylab("time") +
33+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
934

10-
qplot(factor(name), y=size, geom="bar", fill=factor(name), stat="identity") + xlab("serializer")
11-
qplot(factor(name), y=time, geom="bar", fill=factor(name), stat="identity") + xlab("serializer")

images/size.png

9.82 KB
Loading

images/time.png

8.22 KB
Loading

images/time2.png

5.89 KB
Loading

test.cpp

+84-2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "msgpack/record.hpp"
2626
#include "cereal/record.hpp"
2727
#include "avro/record.hpp"
28+
#include "flatbuffers/test_generated.h"
2829

2930
#include "data.hpp"
3031

@@ -203,9 +204,25 @@ capnproto_serialization_test(size_t iterations)
203204

204205
auto start = std::chrono::high_resolution_clock::now();
205206
for (size_t i = 0; i < iterations; i++) {
207+
capnp::MallocMessageBuilder message;
208+
Record::Builder r1 = message.getRoot<Record>();
209+
210+
auto ids = r1.initIds(kIntegers.size());
211+
for (size_t i = 0; i < kIntegers.size(); i++) {
212+
ids.set(i, kIntegers[i]);
213+
}
214+
215+
auto strings = r1.initStrings(kStringsCount);
216+
for (size_t i = 0; i < kStringsCount; i++) {
217+
strings.set(i, kStringValue);
218+
}
219+
206220
serialized = message.getSegmentsForOutput();
207221
capnp::SegmentArrayMessageReader reader(serialized);
208-
reader.getRoot<Record>();
222+
auto r2 = reader.getRoot<Record>();
223+
224+
(void)r2.getIds().size();
225+
(void)r2.getStrings().size();
209226
}
210227
auto finish = std::chrono::high_resolution_clock::now();
211228
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(finish - start).count();
@@ -394,13 +411,74 @@ avro_serialization_test(size_t iterations)
394411
std::cout << "avro: time = " << duration << " milliseconds" << std::endl << std::endl;
395412
}
396413

414+
void
415+
flatbuffers_serialization_test(size_t iterations)
416+
{
417+
using namespace flatbuffers_test;
418+
419+
std::vector<flatbuffers::Offset<flatbuffers::String>> strings;
420+
strings.reserve(kStringsCount);
421+
422+
flatbuffers::FlatBufferBuilder builder;
423+
for (size_t i = 0; i < kStringsCount; i++) {
424+
strings.push_back(builder.CreateString(kStringValue));
425+
}
426+
427+
auto ids_vec = builder.CreateVector(kIntegers);
428+
auto strings_vec = builder.CreateVector(strings);
429+
auto r1 = CreateRecord(builder, ids_vec, strings_vec);
430+
431+
builder.Finish(r1);
432+
433+
auto p = reinterpret_cast<char*>(builder.GetBufferPointer());
434+
auto sz = builder.GetSize();
435+
std::vector<char> buf(p, p + sz);
436+
437+
auto r2 = GetRecord(buf.data());
438+
if (r2->strings()->size() != kStringsCount || r2->ids()->size() != kIntegers.size()) {
439+
throw std::logic_error("flatbuffer's case: deserialization failed");
440+
}
441+
442+
std::cout << "flatbuffers: size = " << builder.GetSize() << " bytes" << std::endl;
443+
444+
builder.ReleaseBufferPointer();
445+
446+
auto start = std::chrono::high_resolution_clock::now();
447+
for (size_t i = 0; i < iterations; i++) {
448+
builder.Clear();
449+
strings.clear();
450+
// buf.clear();
451+
452+
for (size_t i = 0; i < kStringsCount; i++) {
453+
strings.push_back(builder.CreateString(kStringValue));
454+
}
455+
456+
auto ids_vec = builder.CreateVector(kIntegers);
457+
auto strings_vec = builder.CreateVector(strings);
458+
auto r1 = CreateRecord(builder, ids_vec, strings_vec);
459+
builder.Finish(r1);
460+
461+
auto p = reinterpret_cast<char*>(builder.GetBufferPointer());
462+
auto sz = builder.GetSize();
463+
std::vector<char> buf(p, p + sz);
464+
auto r2 = GetRecord(buf.data());
465+
(void)r2->ids()[0];
466+
467+
builder.ReleaseBufferPointer();
468+
}
469+
auto finish = std::chrono::high_resolution_clock::now();
470+
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(finish - start).count();
471+
472+
std::cout << "flatbuffers: time = " << duration << " milliseconds" << std::endl << std::endl;
473+
}
474+
397475
int
398476
main(int argc, char **argv)
399477
{
400478
GOOGLE_PROTOBUF_VERIFY_VERSION;
401479

402480
if (argc < 2) {
403-
std::cout << "usage: " << argv[0] << " N [thrift-binary thrift-compact protobuf boost msgpack cereal avro]";
481+
std::cout << "usage: " << argv[0] << " N [thrift-binary thrift-compact protobuf boost msgpack cereal avro flatbuffers]";
404482
std::cout << std::endl << std::endl;
405483
std::cout << "arguments: " << std::endl;
406484
std::cout << " N -- number of iterations" << std::endl << std::endl;
@@ -461,6 +539,10 @@ main(int argc, char **argv)
461539
if (names.empty() || names.find("avro") != names.end()) {
462540
avro_serialization_test(iterations);
463541
}
542+
543+
if (names.empty() || names.find("flatbuffers") != names.end()) {
544+
flatbuffers_serialization_test(iterations);
545+
}
464546
} catch (std::exception &exc) {
465547
std::cerr << "Error: " << exc.what() << std::endl;
466548
return EXIT_FAILURE;

test.fbs

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace flatbuffers_test;
2+
3+
table Record {
4+
ids:[long];
5+
strings:[string];
6+
}
7+
8+
root_type Record;

0 commit comments

Comments
 (0)