|
1
|
# |
|
2
|
# Copyright (c) 2016 D. Richard Hipp |
|
3
|
# |
|
4
|
# This program is free software; you can redistribute it and/or |
|
5
|
# modify it under the terms of the Simplified BSD License (also |
|
6
|
# known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
# |
|
8
|
# This program is distributed in the hope that it will be useful, |
|
9
|
# but without any warranty; without even the implied warranty of |
|
10
|
# merchantability or fitness for a particular purpose. |
|
11
|
# |
|
12
|
# Author contact information: |
|
13
|
# [email protected] |
|
14
|
# http://www.hwaci.com/drh/ |
|
15
|
# |
|
16
|
############################################################################ |
|
17
|
# |
|
18
|
# The focus of this file is to test pre-commit warnings. |
|
19
|
# |
|
20
|
|
|
21
|
test_setup |
|
22
|
|
|
23
|
# binary |
|
24
|
write_file binary "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" |
|
25
|
|
|
26
|
# text with CRLF lines |
|
27
|
write_file crlf.txt [subst -nocommands -novariables \ |
|
28
|
{ordinary text\r |
|
29
|
cariage returns and line feeds\r |
|
30
|
on several lines\r\n}] |
|
31
|
|
|
32
|
# text with mixed line endings |
|
33
|
write_file cr-only.txt "AAA\rBBB\rCCC\r" |
|
34
|
write_file cr-lf-crlf.txt "AAA\rBBB\nCCC\r\n" |
|
35
|
|
|
36
|
# unix plain text includes the empty file by generalization |
|
37
|
write_file empty "" |
|
38
|
write_file plain.txt { |
|
39
|
Lorem ipsum |
|
40
|
dolor sic amet |
|
41
|
} |
|
42
|
|
|
43
|
# try long lines |
|
44
|
set a3 "abcdefgh" |
|
45
|
set a4 $a3$a3 |
|
46
|
set a5 $a4$a4 |
|
47
|
set a6 $a5$a5 |
|
48
|
write_file line-0064 "$a6\n" |
|
49
|
set a7 $a6$a6 |
|
50
|
set a8 $a7$a7 |
|
51
|
set a9 $a8$a8 |
|
52
|
set a10 $a9$a9 |
|
53
|
write_file line-1024 "$a10\n" |
|
54
|
set a11 $a10$a10 |
|
55
|
write_file line-2048 "$a11\n" |
|
56
|
set a12 $a11$a11 |
|
57
|
write_file line-4096 "$a12\n" |
|
58
|
set a13 $a12$a12 |
|
59
|
write_file line-8192 "$a13\n" |
|
60
|
set a14 $a13$a13 |
|
61
|
write_file line-16K "$a14\n" |
|
62
|
set a15 $a14$a14 |
|
63
|
write_file line-32K "$a15\n" |
|
64
|
set a16 $a15$a15 |
|
65
|
write_file line-64K "$a16\n" |
|
66
|
|
|
67
|
# UTF-8 extends 7-bit ASCII using bytes 80 and above to encode |
|
68
|
# larger character codes. Unicode uses U+0 through U+10FFFF only, |
|
69
|
# with U+D800 through U+DFFF reserved for surrogate pairs. |
|
70
|
# UTF-8 is valid if it is the shortest possible coding, encodes a |
|
71
|
# valid Unicode code point. But it's complicated. |
|
72
|
write_file utf-mod-c0-80 "11 bit NUL:\xC0\x80 is sometimes ok\n" |
|
73
|
write_file utf-bad-e0-80-80 "16 bit NUL:\xE0\x80\x80 is bad\n" |
|
74
|
write_file utf-bad-f0-80-80-80 "21 bit NUL:\xF0\x80\x80\x80 is bad\n" |
|
75
|
write_file utf-bad-f8-80-80-80-80 "26 bit NUL:\xF8\x80\x80\x80\x80 is bad\n" |
|
76
|
write_file utf-bad-fc-80-80-80-80-80 "31 bit NUL:\xFC\x80\x80\x80\x80\x80 is bad\n" |
|
77
|
write_file utf-bad-fe-80-80-80-80-80-80 "36 bit NUL:\xFC\x80\x80\x80\x80\x80 is bad\n" |
|
78
|
write_file utf-bad-c0-81 "overlong SOH:\xC0\x81 is bad\n" |
|
79
|
write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n" |
|
80
|
write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n" |
|
81
|
write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n" |
|
82
|
write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n" |
|
83
|
write_file utf-bad-ff "no byte FF:\xFF\n" |
|
84
|
write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n" |
|
85
|
write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n" |
|
86
|
write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n" |
|
87
|
set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n" |
|
88
|
protOut $emoji |
|
89
|
write_file utf-8-emoji $emoji |
|
90
|
write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji" |
|
91
|
|
|
92
|
# UTF-16 uses 16-bit values to cover all valid unicode code points |
|
93
|
# from U+0 to U+10FFFF, using surrogate pairs to escape the BMP. |
|
94
|
# Interchange require knowing (and preserving) byte order. |
|
95
|
set hello16LE "h\x00e\x00l\x00l\x00o\x00\n\x00" |
|
96
|
set hello16BE "\x00h\x00e\x00l\x00l\x00o\x00\n" |
|
97
|
write_file utf-16le-hello $hello16LE |
|
98
|
write_file utf-16be-hello $hello16BE |
|
99
|
set bomLE "\xff\xfe" |
|
100
|
set bomBE "\xfe\xff" |
|
101
|
write_file utf-16le-bomle-hello "$bomLE$hello16LE" |
|
102
|
write_file utf-16be-bombe-hello "$bomBE$hello16BE" |
|
103
|
write_file utf-16le-bombe-hello "$bomBE$hello16LE" |
|
104
|
write_file utf-16be-bomle-hello "$bomLE$hello16BE" |
|
105
|
set le16 [read_file [file join $testdir utf16le.txt]] |
|
106
|
set be16 [read_file [file join $testdir utf16be.txt]] |
|
107
|
write_file utf-16le.txt $le16 |
|
108
|
write_file utf-16be.txt $be16 |
|
109
|
write_file utf-nobom-16le.txt [string range $le16 2 end] |
|
110
|
write_file utf-nobom-16be.txt [string range $be16 2 end] |
|
111
|
#write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end] |
|
112
|
#write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end] |
|
113
|
|
|
114
|
# make all the test files known to fossil, then test |
|
115
|
fossil addremove |
|
116
|
fossil test-commit-warning --no-settings -v |
|
117
|
test pre-commit-warnings-1 {[normalize_result] eq \ |
|
118
|
[subst -nocommands -novariables [string trim { |
|
119
|
1\tbinary\tbinary data |
|
120
|
1\tcr-lf-crlf.txt\tmixed line endings |
|
121
|
1\tcr-only.txt\tCR line endings |
|
122
|
1\tcrlf.txt\tCR/LF line endings |
|
123
|
0\tempty\t |
|
124
|
0\tline-0064\t |
|
125
|
0\tline-1024\t |
|
126
|
0\tline-16K\t |
|
127
|
0\tline-2048\t |
|
128
|
1\tline-32K\tlong lines |
|
129
|
0\tline-4096\t |
|
130
|
1\tline-64K\tlong lines |
|
131
|
0\tline-8192\t |
|
132
|
0\tplain.txt\t |
|
133
|
1\tutf-16be-bombe-hello\tUnicode |
|
134
|
1\tutf-16be-bomle-hello\tUnicode |
|
135
|
1\tutf-16be-hello\tbinary data |
|
136
|
1\tutf-16be.txt\tUnicode |
|
137
|
1\tutf-16le-bombe-hello\tUnicode |
|
138
|
1\tutf-16le-bomle-hello\tUnicode |
|
139
|
1\tutf-16le-hello\tbinary data |
|
140
|
1\tutf-16le.txt\tUnicode |
|
141
|
0\tutf-8-bom-emoji\t |
|
142
|
0\tutf-8-emoji\t |
|
143
|
1\tutf-bad-c0-81\tinvalid UTF-8 |
|
144
|
1\tutf-bad-c0-bf\tinvalid UTF-8 |
|
145
|
1\tutf-bad-c1-bf\tinvalid UTF-8 |
|
146
|
1\tutf-bad-e0-80-80\tinvalid UTF-8 |
|
147
|
1\tutf-bad-f0-80-80-80\tinvalid UTF-8 |
|
148
|
1\tutf-bad-f4-90-80-80\tinvalid UTF-8 |
|
149
|
1\tutf-bad-f8-80-80-80-80\tinvalid UTF-8 |
|
150
|
1\tutf-bad-f9-80-80-80-80\tinvalid UTF-8 |
|
151
|
1\tutf-bad-fc-80-80-80-80-80\tinvalid UTF-8 |
|
152
|
1\tutf-bad-fe-80-80-80-80-80-80\tinvalid UTF-8 |
|
153
|
1\tutf-bad-ff\tinvalid UTF-8 |
|
154
|
0\tutf-ill16-lead\t |
|
155
|
0\tutf-ill16-pair\t |
|
156
|
0\tutf-ill16-trail\t |
|
157
|
0\tutf-mod-c0-80\t |
|
158
|
1\tutf-nobom-16be.txt\tbinary data |
|
159
|
1\tutf-nobom-16le.txt\tbinary data |
|
160
|
1}]]} |
|
161
|
|
|
162
|
|
|
163
|
############################################################################### |
|
164
|
|
|
165
|
|
|
166
|
test_cleanup |
|
167
|
|