Fossil SCM
Added some UTF-16 cases, and a BOM case for UTF-8 to commit-warning.test
Commit
8705a525358dd9be086960dbf83a6c6ab5ab8cce913e3a8477e1ef861cbe23eb
Parent
e583068713164db…
1 file changed
+34
-2
+34
-2
| --- test/commit-warning.test | ||
| +++ test/commit-warning.test | ||
| @@ -76,16 +76,37 @@ | ||
| 76 | 76 | write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n" |
| 77 | 77 | write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n" |
| 78 | 78 | write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n" |
| 79 | 79 | write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n" |
| 80 | 80 | write_file utf-bad-ff "no byte FF:\xFF\n" |
| 81 | -write_file utf-ill16-lead "lead surrogate:\xED\xA0\x80 is ill formed\n" | |
| 82 | -write_file utf-ill16-trail "trail surrogate:\xED\xB0\x80 is ill formed\n" | |
| 81 | +write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n" | |
| 82 | +write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n" | |
| 83 | 83 | write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n" |
| 84 | 84 | set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n" |
| 85 | 85 | protOut $emoji |
| 86 | 86 | write_file utf-8-emoji $emoji |
| 87 | +write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji" | |
| 88 | + | |
| 89 | +# UTF-16 uses 16-bit values to cover all valid unicode code points | |
| 90 | +# from U+0 to U+10FFFF, using surrogate pairs to escape the BMP. | |
| 91 | +# Interchange require knowing (and preserving) byte order. | |
| 92 | +write_file utf-16le-hello "h\x00e\x00l\x00l\x00o\x00\n\x00" | |
| 93 | +write_file utf-16be-hello "\x00h\x00e\x00l\x00l\x00o\x00\n" | |
| 94 | +set bomLE "\xff\xfeh" | |
| 95 | +set bomBE "\xfe\xffh" | |
| 96 | +write_file utf-16le-bomle-hello "$bomLE\x00e\x00l\x00l\x00o\x00\n\x00" | |
| 97 | +write_file utf-16be-bombe-hello "$bomBE\x00h\x00e\x00l\x00l\x00o\x00\n" | |
| 98 | +write_file utf-16le-bombe-hello "$bomBE\x00e\x00l\x00l\x00o\x00\n\x00" | |
| 99 | +write_file utf-16be-bomle-hello "$bomLE\x00h\x00e\x00l\x00l\x00o\x00\n" | |
| 100 | +set le16 [read_file [file join $testdir utf16le.txt]] | |
| 101 | +set be16 [read_file [file join $testdir utf16be.txt]] | |
| 102 | +write_file utf-16le.txt $le16 | |
| 103 | +write_file utf-16be.txt $be16 | |
| 104 | +write_file utf-nobom-16le.txt [string range $le16 2 end] | |
| 105 | +write_file utf-nobom-16be.txt [string range $be16 2 end] | |
| 106 | +#write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end] | |
| 107 | +#write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end] | |
| 87 | 108 | |
| 88 | 109 | # make all the test files known to fossil, then test |
| 89 | 110 | fossil addremove |
| 90 | 111 | fossil test-commit-warning --no-settings -v |
| 91 | 112 | test pre-commit-warnings-1 {[normalize_result] eq \ |
| @@ -99,10 +120,19 @@ | ||
| 99 | 120 | 0\tline-1024\t |
| 100 | 121 | 0\tline-4096\t |
| 101 | 122 | 1\tline-64K\tlong lines |
| 102 | 123 | 1\tline-8192\tlong lines |
| 103 | 124 | 0\tplain.txt\t |
| 125 | +1\tutf-16be-bombe-hello\tbinary data | |
| 126 | +1\tutf-16be-bomle-hello\tbinary data | |
| 127 | +1\tutf-16be-hello\tbinary data | |
| 128 | +1\tutf-16be.txt\tUnicode | |
| 129 | +1\tutf-16le-bombe-hello\tUnicode | |
| 130 | +1\tutf-16le-bomle-hello\tUnicode | |
| 131 | +1\tutf-16le-hello\tbinary data | |
| 132 | +1\tutf-16le.txt\tUnicode | |
| 133 | +0\tutf-8-bom-emoji\t | |
| 104 | 134 | 0\tutf-8-emoji\t |
| 105 | 135 | 1\tutf-bad-c0-81\tinvalid UTF-8 |
| 106 | 136 | 1\tutf-bad-c0-bf\tinvalid UTF-8 |
| 107 | 137 | 1\tutf-bad-c1-bf\tinvalid UTF-8 |
| 108 | 138 | 1\tutf-bad-e0-80-80\tinvalid UTF-8 |
| @@ -115,10 +145,12 @@ | ||
| 115 | 145 | 1\tutf-bad-ff\tinvalid UTF-8 |
| 116 | 146 | 0\tutf-ill16-lead\t |
| 117 | 147 | 0\tutf-ill16-pair\t |
| 118 | 148 | 0\tutf-ill16-trail\t |
| 119 | 149 | 0\tutf-mod-c0-80\t |
| 150 | +1\tutf-nobom-16be.txt\tbinary data | |
| 151 | +1\tutf-nobom-16le.txt\tbinary data | |
| 120 | 152 | 1}]]} |
| 121 | 153 | |
| 122 | 154 | |
| 123 | 155 | ############################################################################### |
| 124 | 156 | |
| 125 | 157 |
| --- test/commit-warning.test | |
| +++ test/commit-warning.test | |
| @@ -76,16 +76,37 @@ | |
| 76 | write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n" |
| 77 | write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n" |
| 78 | write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n" |
| 79 | write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n" |
| 80 | write_file utf-bad-ff "no byte FF:\xFF\n" |
| 81 | write_file utf-ill16-lead "lead surrogate:\xED\xA0\x80 is ill formed\n" |
| 82 | write_file utf-ill16-trail "trail surrogate:\xED\xB0\x80 is ill formed\n" |
| 83 | write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n" |
| 84 | set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n" |
| 85 | protOut $emoji |
| 86 | write_file utf-8-emoji $emoji |
| 87 | |
| 88 | # make all the test files known to fossil, then test |
| 89 | fossil addremove |
| 90 | fossil test-commit-warning --no-settings -v |
| 91 | test pre-commit-warnings-1 {[normalize_result] eq \ |
| @@ -99,10 +120,19 @@ | |
| 99 | 0\tline-1024\t |
| 100 | 0\tline-4096\t |
| 101 | 1\tline-64K\tlong lines |
| 102 | 1\tline-8192\tlong lines |
| 103 | 0\tplain.txt\t |
| 104 | 0\tutf-8-emoji\t |
| 105 | 1\tutf-bad-c0-81\tinvalid UTF-8 |
| 106 | 1\tutf-bad-c0-bf\tinvalid UTF-8 |
| 107 | 1\tutf-bad-c1-bf\tinvalid UTF-8 |
| 108 | 1\tutf-bad-e0-80-80\tinvalid UTF-8 |
| @@ -115,10 +145,12 @@ | |
| 115 | 1\tutf-bad-ff\tinvalid UTF-8 |
| 116 | 0\tutf-ill16-lead\t |
| 117 | 0\tutf-ill16-pair\t |
| 118 | 0\tutf-ill16-trail\t |
| 119 | 0\tutf-mod-c0-80\t |
| 120 | 1}]]} |
| 121 | |
| 122 | |
| 123 | ############################################################################### |
| 124 | |
| 125 |
| --- test/commit-warning.test | |
| +++ test/commit-warning.test | |
| @@ -76,16 +76,37 @@ | |
| 76 | write_file utf-bad-c0-bf "overlong '?':\xC0\xBF is bad\n" |
| 77 | write_file utf-bad-c1-bf "overlong DEL:\xC1\xBF is bad\n" |
| 78 | write_file utf-bad-f4-90-80-80 "U+110000 not allowed:\xF4\x90\x80 not unicode\n" |
| 79 | write_file utf-bad-f9-80-80-80-80 "U+2000000 not allowed:\xF9\x80\x80\x80\x80 not unicode\n" |
| 80 | write_file utf-bad-ff "no byte FF:\xFF\n" |
| 81 | write_file utf-ill16-lead "lead surrogate U+D800:\xED\xA0\x80 is ill formed\n" |
| 82 | write_file utf-ill16-trail "trail surrogate U+DC00:\xED\xB0\x80 is ill formed\n" |
| 83 | write_file utf-ill16-pair "surrogate pair U+10000:\xED\xA0\x80\xED\xB0\x80 is ill formed\n" |
| 84 | set emoji "micro-smile \xC2\xB5\xE2\x98\xBA\npale facepalm \xF0\x9F\xA4\xA6\xF0\x9F\x8F\xBB\n" |
| 85 | protOut $emoji |
| 86 | write_file utf-8-emoji $emoji |
| 87 | write_file utf-8-bom-emoji "\xef\xbb\xbf$emoji" |
| 88 | |
| 89 | # UTF-16 uses 16-bit values to cover all valid unicode code points |
| 90 | # from U+0 to U+10FFFF, using surrogate pairs to escape the BMP. |
| 91 | # Interchange require knowing (and preserving) byte order. |
| 92 | write_file utf-16le-hello "h\x00e\x00l\x00l\x00o\x00\n\x00" |
| 93 | write_file utf-16be-hello "\x00h\x00e\x00l\x00l\x00o\x00\n" |
| 94 | set bomLE "\xff\xfeh" |
| 95 | set bomBE "\xfe\xffh" |
| 96 | write_file utf-16le-bomle-hello "$bomLE\x00e\x00l\x00l\x00o\x00\n\x00" |
| 97 | write_file utf-16be-bombe-hello "$bomBE\x00h\x00e\x00l\x00l\x00o\x00\n" |
| 98 | write_file utf-16le-bombe-hello "$bomBE\x00e\x00l\x00l\x00o\x00\n\x00" |
| 99 | write_file utf-16be-bomle-hello "$bomLE\x00h\x00e\x00l\x00l\x00o\x00\n" |
| 100 | set le16 [read_file [file join $testdir utf16le.txt]] |
| 101 | set be16 [read_file [file join $testdir utf16be.txt]] |
| 102 | write_file utf-16le.txt $le16 |
| 103 | write_file utf-16be.txt $be16 |
| 104 | write_file utf-nobom-16le.txt [string range $le16 2 end] |
| 105 | write_file utf-nobom-16be.txt [string range $be16 2 end] |
| 106 | #write_file [file join $::env(TEMP) utf-nobom-16le.txt] [string range $le16 2 end] |
| 107 | #write_file [file join $::env(TEMP) utf-nobom-16be.txt] [string range $be16 2 end] |
| 108 | |
| 109 | # make all the test files known to fossil, then test |
| 110 | fossil addremove |
| 111 | fossil test-commit-warning --no-settings -v |
| 112 | test pre-commit-warnings-1 {[normalize_result] eq \ |
| @@ -99,10 +120,19 @@ | |
| 120 | 0\tline-1024\t |
| 121 | 0\tline-4096\t |
| 122 | 1\tline-64K\tlong lines |
| 123 | 1\tline-8192\tlong lines |
| 124 | 0\tplain.txt\t |
| 125 | 1\tutf-16be-bombe-hello\tbinary data |
| 126 | 1\tutf-16be-bomle-hello\tbinary data |
| 127 | 1\tutf-16be-hello\tbinary data |
| 128 | 1\tutf-16be.txt\tUnicode |
| 129 | 1\tutf-16le-bombe-hello\tUnicode |
| 130 | 1\tutf-16le-bomle-hello\tUnicode |
| 131 | 1\tutf-16le-hello\tbinary data |
| 132 | 1\tutf-16le.txt\tUnicode |
| 133 | 0\tutf-8-bom-emoji\t |
| 134 | 0\tutf-8-emoji\t |
| 135 | 1\tutf-bad-c0-81\tinvalid UTF-8 |
| 136 | 1\tutf-bad-c0-bf\tinvalid UTF-8 |
| 137 | 1\tutf-bad-c1-bf\tinvalid UTF-8 |
| 138 | 1\tutf-bad-e0-80-80\tinvalid UTF-8 |
| @@ -115,10 +145,12 @@ | |
| 145 | 1\tutf-bad-ff\tinvalid UTF-8 |
| 146 | 0\tutf-ill16-lead\t |
| 147 | 0\tutf-ill16-pair\t |
| 148 | 0\tutf-ill16-trail\t |
| 149 | 0\tutf-mod-c0-80\t |
| 150 | 1\tutf-nobom-16be.txt\tbinary data |
| 151 | 1\tutf-nobom-16le.txt\tbinary data |
| 152 | 1}]]} |
| 153 | |
| 154 | |
| 155 | ############################################################################### |
| 156 | |
| 157 |