Fossil SCM

Added some UTF-16 cases, and a BOM case for UTF-8 to commit-warning.test

rberteig 2017-03-21 23:45 trunk
Commit 8705a525358dd9be086960dbf83a6c6ab5ab8cce913e3a8477e1ef861cbe23eb
1 file changed +34 -2
--- test/commit-warning.test
+++ test/commit-warning.test
@@ -76,16 +76,37 @@
7676
write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n"
7777
write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n"
7878
write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n"
7979
write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n"
8080
write_file utf-bad-ff "no byte FF:\xFF\n"
81
-write_file utf-ill16-lead "lead surrogate:\xED\xA0\x80 is ill formed\n"
82
-write_file utf-ill16-trail "trail surrogate:\xED\xB0\x80 is ill formed\n"
81
+write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n"
82
+write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n"
8383
write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n"
8484
set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n"
8585
protOut $emoji
8686
write_file utf-8-emoji $emoji
87
+write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji"
88
+
89
+# UTF-16 uses 16-bit values to cover all valid unicode code points
90
+# from U+0 to U+10FFFF, using surrogate pairs to escape the BMP.
91
+# Interchange require knowing (and preserving) byte order.
92
+write_file utf-16le-hello "h\x00e\x00l\x00l\x00o\x00\n\x00"
93
+write_file utf-16be-hello "\x00h\x00e\x00l\x00l\x00o\x00\n"
94
+set bomLE "\xff\xfeh"
95
+set bomBE "\xfe\xffh"
96
+write_file utf-16le-bomle-hello "$bomLE\x00e\x00l\x00l\x00o\x00\n\x00"
97
+write_file utf-16be-bombe-hello "$bomBE\x00h\x00e\x00l\x00l\x00o\x00\n"
98
+write_file utf-16le-bombe-hello "$bomBE\x00e\x00l\x00l\x00o\x00\n\x00"
99
+write_file utf-16be-bomle-hello "$bomLE\x00h\x00e\x00l\x00l\x00o\x00\n"
100
+set le16 [read_file [file join $testdir utf16le.txt]]
101
+set be16 [read_file [file join $testdir utf16be.txt]]
102
+write_file utf-16le.txt $le16
103
+write_file utf-16be.txt $be16
104
+write_file utf-nobom-16le.txt [string range $le16 2 end]
105
+write_file utf-nobom-16be.txt [string range $be16 2 end]
106
+#write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end]
107
+#write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end]
87108
88109
# make all the test files known to fossil, then test
89110
fossil addremove
90111
fossil test-commit-warning --no-settings -v
91112
test pre-commit-warnings-1 {[normalize_result] eq \
@@ -99,10 +120,19 @@
99120
0\tline-1024\t
100121
0\tline-4096\t
101122
1\tline-64K\tlong lines
102123
1\tline-8192\tlong lines
103124
0\tplain.txt\t
125
+1\tutf-16be-bombe-hello\tbinary data
126
+1\tutf-16be-bomle-hello\tbinary data
127
+1\tutf-16be-hello\tbinary data
128
+1\tutf-16be.txt\tUnicode
129
+1\tutf-16le-bombe-hello\tUnicode
130
+1\tutf-16le-bomle-hello\tUnicode
131
+1\tutf-16le-hello\tbinary data
132
+1\tutf-16le.txt\tUnicode
133
+0\tutf-8-bom-emoji\t
104134
0\tutf-8-emoji\t
105135
1\tutf-bad-c0-81\tinvalid UTF-8
106136
1\tutf-bad-c0-bf\tinvalid UTF-8
107137
1\tutf-bad-c1-bf\tinvalid UTF-8
108138
1\tutf-bad-e0-80-80\tinvalid UTF-8
@@ -115,10 +145,12 @@
115145
1\tutf-bad-ff\tinvalid UTF-8
116146
0\tutf-ill16-lead\t
117147
0\tutf-ill16-pair\t
118148
0\tutf-ill16-trail\t
119149
0\tutf-mod-c0-80\t
150
+1\tutf-nobom-16be.txt\tbinary data
151
+1\tutf-nobom-16le.txt\tbinary data
120152
1}]]}
121153
122154
123155
###############################################################################
124156
125157
--- test/commit-warning.test
+++ test/commit-warning.test
@@ -76,16 +76,37 @@
76 write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n"
77 write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n"
78 write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n"
79 write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n"
80 write_file utf-bad-ff "no byte FF:\xFF\n"
81 write_file utf-ill16-lead "lead surrogate:\xED\xA0\x80 is ill formed\n"
82 write_file utf-ill16-trail "trail surrogate:\xED\xB0\x80 is ill formed\n"
83 write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n"
84 set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n"
85 protOut $emoji
86 write_file utf-8-emoji $emoji
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
88 # make all the test files known to fossil, then test
89 fossil addremove
90 fossil test-commit-warning --no-settings -v
91 test pre-commit-warnings-1 {[normalize_result] eq \
@@ -99,10 +120,19 @@
99 0\tline-1024\t
100 0\tline-4096\t
101 1\tline-64K\tlong lines
102 1\tline-8192\tlong lines
103 0\tplain.txt\t
 
 
 
 
 
 
 
 
 
104 0\tutf-8-emoji\t
105 1\tutf-bad-c0-81\tinvalid UTF-8
106 1\tutf-bad-c0-bf\tinvalid UTF-8
107 1\tutf-bad-c1-bf\tinvalid UTF-8
108 1\tutf-bad-e0-80-80\tinvalid UTF-8
@@ -115,10 +145,12 @@
115 1\tutf-bad-ff\tinvalid UTF-8
116 0\tutf-ill16-lead\t
117 0\tutf-ill16-pair\t
118 0\tutf-ill16-trail\t
119 0\tutf-mod-c0-80\t
 
 
120 1}]]}
121
122
123 ###############################################################################
124
125
--- test/commit-warning.test
+++ test/commit-warning.test
@@ -76,16 +76,37 @@
76 write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n"
77 write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n"
78 write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n"
79 write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n"
80 write_file utf-bad-ff "no byte FF:\xFF\n"
81 write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n"
82 write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n"
83 write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n"
84 set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n"
85 protOut $emoji
86 write_file utf-8-emoji $emoji
87 write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji"
88
89 # UTF-16 uses 16-bit values to cover all valid unicode code points
90 # from U+0 to U+10FFFF, using surrogate pairs to escape the BMP.
91 # Interchange require knowing (and preserving) byte order.
92 write_file utf-16le-hello "h\x00e\x00l\x00l\x00o\x00\n\x00"
93 write_file utf-16be-hello "\x00h\x00e\x00l\x00l\x00o\x00\n"
94 set bomLE "\xff\xfeh"
95 set bomBE "\xfe\xffh"
96 write_file utf-16le-bomle-hello "$bomLE\x00e\x00l\x00l\x00o\x00\n\x00"
97 write_file utf-16be-bombe-hello "$bomBE\x00h\x00e\x00l\x00l\x00o\x00\n"
98 write_file utf-16le-bombe-hello "$bomBE\x00e\x00l\x00l\x00o\x00\n\x00"
99 write_file utf-16be-bomle-hello "$bomLE\x00h\x00e\x00l\x00l\x00o\x00\n"
100 set le16 [read_file [file join $testdir utf16le.txt]]
101 set be16 [read_file [file join $testdir utf16be.txt]]
102 write_file utf-16le.txt $le16
103 write_file utf-16be.txt $be16
104 write_file utf-nobom-16le.txt [string range $le16 2 end]
105 write_file utf-nobom-16be.txt [string range $be16 2 end]
106 #write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end]
107 #write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end]
108
109 # make all the test files known to fossil, then test
110 fossil addremove
111 fossil test-commit-warning --no-settings -v
112 test pre-commit-warnings-1 {[normalize_result] eq \
@@ -99,10 +120,19 @@
120 0\tline-1024\t
121 0\tline-4096\t
122 1\tline-64K\tlong lines
123 1\tline-8192\tlong lines
124 0\tplain.txt\t
125 1\tutf-16be-bombe-hello\tbinary data
126 1\tutf-16be-bomle-hello\tbinary data
127 1\tutf-16be-hello\tbinary data
128 1\tutf-16be.txt\tUnicode
129 1\tutf-16le-bombe-hello\tUnicode
130 1\tutf-16le-bomle-hello\tUnicode
131 1\tutf-16le-hello\tbinary data
132 1\tutf-16le.txt\tUnicode
133 0\tutf-8-bom-emoji\t
134 0\tutf-8-emoji\t
135 1\tutf-bad-c0-81\tinvalid UTF-8
136 1\tutf-bad-c0-bf\tinvalid UTF-8
137 1\tutf-bad-c1-bf\tinvalid UTF-8
138 1\tutf-bad-e0-80-80\tinvalid UTF-8
@@ -115,10 +145,12 @@
145 1\tutf-bad-ff\tinvalid UTF-8
146 0\tutf-ill16-lead\t
147 0\tutf-ill16-pair\t
148 0\tutf-ill16-trail\t
149 0\tutf-mod-c0-80\t
150 1\tutf-nobom-16be.txt\tbinary data
151 1\tutf-nobom-16le.txt\tbinary data
152 1}]]}
153
154
155 ###############################################################################
156
157

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button