Skip to content

Commit a52aadc

Browse files
committed
add utf8 unit test
1 parent 0ed91e2 commit a52aadc

File tree

4 files changed

+280
-1
lines changed

4 files changed

+280
-1
lines changed

test/Jamfile

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ feature launcher : none valgrind : composite ;
8888
feature.compose <launcher>valgrind : <testing.launcher>"valgrind --tool=memcheck -v --num-callers=20 --read-var-info=yes --track-origins=yes --error-exitcode=222 --suppressions=valgrind_suppressions.txt" <valgrind>on ;
8989

9090
test-suite libtorrent :
91+
[ run test_utf8.cpp ]
9192
[ run test_gzip.cpp ]
9293
[ run test_bitfield.cpp ]
9394
[ run test_torrent_info.cpp ]

test/test_gzip.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ int test_main()
4242
{
4343
std::vector<char> zipped;
4444
error_code ec;
45-
int r = load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
45+
load_file(combine_path("..", "zeroes.gz"), zipped, ec, 1000000);
4646
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
4747
, ec.message().c_str());
4848
TEST_CHECK(!ec);

test/test_utf8.cpp

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
/*
2+
3+
Copyright (c) 2014, Arvid Norberg
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions
8+
are met:
9+
10+
* Redistributions of source code must retain the above copyright
11+
notice, this list of conditions and the following disclaimer.
12+
* Redistributions in binary form must reproduce the above copyright
13+
notice, this list of conditions and the following disclaimer in
14+
the documentation and/or other materials provided with the distribution.
15+
* Neither the name of the author nor the names of its
16+
contributors may be used to endorse or promote products derived
17+
from this software without specific prior written permission.
18+
19+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29+
POSSIBILITY OF SUCH DAMAGE.
30+
31+
*/
32+
33+
#include "test.hpp"
34+
#include "libtorrent/utf8.hpp"
35+
#include "libtorrent/ConvertUTF.h"
36+
#include "setup_transfer.hpp" // for load_file
37+
#include "file.hpp" // for combine_path
38+
39+
#include <vector>
40+
41+
using namespace libtorrent;
42+
43+
int test_main()
44+
{
45+
std::vector<char> utf8_source;
46+
error_code ec;
47+
load_file(combine_path("..", "utf8_test.txt"), utf8_source, ec, 1000000);
48+
if (ec) fprintf(stderr, "failed to open file: (%d) %s\n", ec.value()
49+
, ec.message().c_str());
50+
TEST_CHECK(!ec);
51+
52+
// test lower level conversions
53+
54+
// utf8 -> utf16 -> utf32 -> utf8
55+
{
56+
std::vector<UTF16> utf16(utf8_source.size());
57+
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
58+
UTF16* out16 = &utf16[0];
59+
ConversionResult ret = ConvertUTF8toUTF16(&in8, in8 + utf8_source.size()
60+
, &out16, out16 + utf16.size(), strictConversion);
61+
62+
TEST_EQUAL(ret, conversionOK);
63+
64+
std::vector<UTF32> utf32(utf8_source.size());
65+
UTF16 const* in16 = &utf16[0];
66+
UTF32* out32 = &utf32[0];
67+
ret = ConvertUTF16toUTF32(&in16, out16
68+
, &out32, out32 + utf32.size(), strictConversion);
69+
70+
TEST_EQUAL(ret, conversionOK);
71+
72+
std::vector<UTF8> utf8(utf8_source.size());
73+
UTF32 const* in32 = &utf32[0];
74+
UTF8* out8 = &utf8[0];
75+
ret = ConvertUTF32toUTF8(&in32, out32
76+
, &out8, out8 + utf8.size(), strictConversion);
77+
78+
TEST_EQUAL(ret, conversionOK);
79+
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
80+
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
81+
}
82+
83+
// utf8 -> utf32 -> utf16 -> utf8
84+
{
85+
std::vector<UTF32> utf32(utf8_source.size());
86+
UTF8 const* in8 = (UTF8 const*)&utf8_source[0];
87+
UTF32* out32 = &utf32[0];
88+
ConversionResult ret = ConvertUTF8toUTF32(&in8, in8 + utf8_source.size()
89+
, &out32, out32 + utf32.size(), strictConversion);
90+
91+
TEST_EQUAL(ret, conversionOK);
92+
93+
std::vector<UTF16> utf16(utf8_source.size());
94+
UTF32 const* in32 = &utf32[0];
95+
UTF16* out16 = &utf16[0];
96+
ret = ConvertUTF32toUTF16(&in32, out32
97+
, &out16, out16 + utf16.size(), strictConversion);
98+
99+
TEST_EQUAL(ret, conversionOK);
100+
101+
std::vector<UTF8> utf8(utf8_source.size());
102+
UTF16 const* in16 = &utf16[0];
103+
UTF8* out8 = &utf8[0];
104+
ret = ConvertUTF16toUTF8(&in16, out16
105+
, &out8, out8 + utf8.size(), strictConversion);
106+
107+
TEST_EQUAL(ret, conversionOK);
108+
TEST_EQUAL(out8 - &utf8[0], utf8_source.size());
109+
TEST_CHECK(std::equal(&utf8[0], out8, (UTF8 const*)&utf8_source[0]));
110+
}
111+
112+
// test higher level conversions
113+
114+
std::string utf8;
115+
std::copy(utf8_source.begin(), utf8_source.end(), std::back_inserter(utf8));
116+
117+
std::wstring wide;
118+
utf8_conv_result_t ret = utf8_wchar(utf8, wide);
119+
TEST_EQUAL(ret, conversion_ok);
120+
121+
std::string identity;
122+
ret = wchar_utf8(wide, identity);
123+
TEST_EQUAL(ret, conversion_ok);
124+
125+
TEST_EQUAL(utf8, identity);
126+
return 0;
127+
}
128+

test/utf8_test.txt

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
Sentences that contain all letters commonly used in a language
2+
--------------------------------------------------------------
3+
4+
Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> -- 2012-04-11
5+
6+
This is an example of a plain-text file encoded in UTF-8.
7+
8+
9+
Danish (da)
10+
---------
11+
12+
Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen
13+
Wolther spillede på xylofon.
14+
(= Quiz contestants were eating strawbery with cream while Wolther
15+
the circus clown played on xylophone.)
16+
17+
German (de)
18+
-----------
19+
20+
Falsches Üben von Xylophonmusik quält jeden größeren Zwerg
21+
(= Wrongful practicing of xylophone music tortures every larger dwarf)
22+
23+
Zwölf Boxkämpfer jagten Eva quer über den Sylter Deich
24+
(= Twelve boxing fighters hunted Eva across the dike of Sylt)
25+
26+
Heizölrückstoßabdämpfung
27+
(= fuel oil recoil absorber)
28+
(jqvwxy missing, but all non-ASCII letters in one word)
29+
30+
Greek (el)
31+
----------
32+
33+
Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ στὸ χρυσαφὶ ξέφωτο
34+
(= No more shall I see acacias or myrtles in the golden clearing)
35+
36+
Ξεσκεπάζω τὴν ψυχοφθόρα βδελυγμία
37+
(= I uncover the soul-destroying abhorrence)
38+
39+
English (en)
40+
------------
41+
42+
The quick brown fox jumps over the lazy dog
43+
44+
Spanish (es)
45+
------------
46+
47+
El pingüino Wenceslao hizo kilómetros bajo exhaustiva lluvia y
48+
frío, añoraba a su querido cachorro.
49+
(Contains every letter and every accent, but not every combination
50+
of vowel + acute.)
51+
52+
French (fr)
53+
-----------
54+
55+
Portez ce vieux whisky au juge blond qui fume sur son île intérieure, à
56+
côté de l'alcôve ovoïde, où les bûches se consument dans l'âtre, ce
57+
qui lui permet de penser à la cænogenèse de l'être dont il est question
58+
dans la cause ambiguë entendue à Moÿ, dans un capharnaüm qui,
59+
pense-t-il, diminue çà et là la qualité de son œuvre.
60+
61+
l'île exiguë
62+
Où l'obèse jury mûr
63+
Fête l'haï volapük,
64+
Âne ex aéquo au whist,
65+
Ôtez ce vœu déçu.
66+
67+
Le cœur déçu mais l'âme plutôt naïve, Louÿs rêva de crapaüter en
68+
canoë au delà des îles, près du mälström où brûlent les novæ.
69+
70+
Irish Gaelic (ga)
71+
-----------------
72+
73+
D'fhuascail Íosa, Úrmhac na hÓighe Beannaithe, pór Éava agus Ádhaimh
74+
75+
Hungarian (hu)
76+
--------------
77+
78+
Árvíztűrő tükörfúrógép
79+
(= flood-proof mirror-drilling machine, only all non-ASCII letters)
80+
81+
Icelandic (is)
82+
--------------
83+
84+
Kæmi ný öxi hér ykist þjófum nú bæði víl og ádrepa
85+
86+
Sævör grét áðan því úlpan var ónýt
87+
(some ASCII letters missing)
88+
89+
Japanese (jp)
90+
-------------
91+
92+
Hiragana: (Iroha)
93+
94+
いろはにほへとちりぬるを
95+
わかよたれそつねならむ
96+
うゐのおくやまけふこえて
97+
あさきゆめみしゑひもせす
98+
99+
Katakana:
100+
101+
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
102+
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン
103+
104+
Hebrew (iw)
105+
-----------
106+
107+
? דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה
108+
109+
Polish (pl)
110+
-----------
111+
112+
Pchnąć w tę łódź jeża lub ośm skrzyń fig
113+
(= To push a hedgehog or eight bins of figs in this boat)
114+
115+
Russian (ru)
116+
------------
117+
118+
В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!
119+
(= Would a citrus live in the bushes of south? Yes, but only a fake one!)
120+
121+
Съешь же ещё этих мягких французских булок да выпей чаю
122+
(= Eat some more of these fresh French loafs and have some tea)
123+
124+
Thai (th)
125+
---------
126+
127+
[--------------------------|------------------------]
128+
๏ เป็นมนุษย์สุดประเสริฐเลิศคุณค่า กว่าบรรดาฝูงสัตว์เดรัจฉาน
129+
จงฝ่าฟันพัฒนาวิชาการ อย่าล้างผลาญฤๅเข่นฆ่าบีฑาใคร
130+
ไม่ถือโทษโกรธแช่งซัดฮึดฮัดด่า หัดอภัยเหมือนกีฬาอัชฌาสัย
131+
ปฏิบัติประพฤติกฎกำหนดใจ พูดจาให้จ๊ะๆ จ๋าๆ น่าฟังเอย ฯ
132+
133+
[The copyright for the Thai example is owned by The Computer
134+
Association of Thailand under the Royal Patronage of His Majesty the
135+
King.]
136+
137+
Turkish (tr)
138+
------------
139+
140+
Pijamalı hasta, yağız şoföre çabucak güvendi.
141+
(=Patient with pajamas, trusted swarthy driver quickly)
142+
143+
144+
Special thanks to the people from all over the world who contributed
145+
these sentences since 1999.
146+
147+
A much larger collection of such pangrams is now available at
148+
149+
http://en.wikipedia.org/wiki/List_of_pangrams
150+

0 commit comments

Comments
 (0)