Fossil SCM

Updated pikchr.c for HTML entity escaping fix.

stephan 2022-06-14 14:34 trunk
Commit b3a0a4cdfbab7c6a8a82c2bf9b4ddfd7a74147d98ae91eaeb2a15b0ccb9e9b41
+33 -2
--- extsrc/pikchr.c
+++ extsrc/pikchr.c
@@ -4532,10 +4532,39 @@
45324532
}
45334533
memcpy(p->zOut+p->nOut, zText, n);
45344534
p->nOut += n;
45354535
p->zOut[p->nOut] = 0;
45364536
}
4537
+
4538
+/*
4539
+** Given a string and its length, returns true if the string begins
4540
+** with a construct which syntactically matches an HTML entity escape
4541
+** sequence (without checking for whether it's a known entity). Always
4542
+** returns false if zText[0] is false or n<4. Entities match the
4543
+** equivalent of the regexes `&#[0-9]+;` and `&[a-zA-Z]+;`.
4544
+*/
4545
+static int pik_isentity(char const * zText, int n){
4546
+ int i = 0;
4547
+ if( n<4 || '&'!=zText[0] ) return 0;
4548
+ n--;
4549
+ zText++;
4550
+ if( '#'==zText[0] ){
4551
+ zText++;
4552
+ n--;
4553
+ for(i=0; i<n; i++){
4554
+ if( i>1 && ';'==zText[i] ) return 1;
4555
+ else if( zText[i]<'0' || zText[i]>'9' ) return 0;
4556
+ }
4557
+ }else{
4558
+ for( i=0; i<n; i++ ){
4559
+ if( i>1 && ';'==zText[i] ) return 1;
4560
+ else if( zText[i]<'A' || zText[i]>'z'
4561
+ || (zText[i]>'Z' && zText[i]<'a') ) return 0;
4562
+ }
4563
+ }
4564
+ return 0;
4565
+}
45374566
45384567
/*
45394568
** Append text to zOut with HTML characters escaped.
45404569
**
45414570
** * The space character is changed into non-breaking space (U+00a0)
@@ -4564,12 +4593,14 @@
45644593
if( i ) pik_append(p, zText, i);
45654594
if( i==n ) break;
45664595
switch( c ){
45674596
case '<': { pik_append(p, "&lt;", 4); break; }
45684597
case '>': { pik_append(p, "&gt;", 4); break; }
4569
- case '&': { pik_append(p, "&amp;", 5); break; }
45704598
case ' ': { pik_append(p, "\302\240;", 2); break; }
4599
+ case '&':
4600
+ if( pik_isentity(zText+i, n-i) ){ pik_append(p, "&", 1); }
4601
+ else { pik_append(p, "&amp;", 5); }
45714602
}
45724603
i++;
45734604
n -= i;
45744605
zText += i;
45754606
i = 0;
@@ -8096,6 +8127,6 @@
80968127
80978128
80988129
#endif /* PIKCHR_TCL */
80998130
81008131
8101
-#line 8126 "pikchr.c"
8132
+#line 8157 "pikchr.c"
81028133
--- extsrc/pikchr.c
+++ extsrc/pikchr.c
@@ -4532,10 +4532,39 @@
4532 }
4533 memcpy(p->zOut+p->nOut, zText, n);
4534 p->nOut += n;
4535 p->zOut[p->nOut] = 0;
4536 }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4537
4538 /*
4539 ** Append text to zOut with HTML characters escaped.
4540 **
4541 ** * The space character is changed into non-breaking space (U+00a0)
@@ -4564,12 +4593,14 @@
4564 if( i ) pik_append(p, zText, i);
4565 if( i==n ) break;
4566 switch( c ){
4567 case '<': { pik_append(p, "&lt;", 4); break; }
4568 case '>': { pik_append(p, "&gt;", 4); break; }
4569 case '&': { pik_append(p, "&amp;", 5); break; }
4570 case ' ': { pik_append(p, "\302\240;", 2); break; }
 
 
 
4571 }
4572 i++;
4573 n -= i;
4574 zText += i;
4575 i = 0;
@@ -8096,6 +8127,6 @@
8096
8097
8098 #endif /* PIKCHR_TCL */
8099
8100
8101 #line 8126 "pikchr.c"
8102
--- extsrc/pikchr.c
+++ extsrc/pikchr.c
@@ -4532,10 +4532,39 @@
4532 }
4533 memcpy(p->zOut+p->nOut, zText, n);
4534 p->nOut += n;
4535 p->zOut[p->nOut] = 0;
4536 }
4537
4538 /*
4539 ** Given a string and its length, returns true if the string begins
4540 ** with a construct which syntactically matches an HTML entity escape
4541 ** sequence (without checking for whether it's a known entity). Always
4542 ** returns false if zText[0] is false or n<4. Entities match the
4543 ** equivalent of the regexes `&#[0-9]+;` and `&[a-zA-Z]+;`.
4544 */
4545 static int pik_isentity(char const * zText, int n){
4546 int i = 0;
4547 if( n<4 || '&'!=zText[0] ) return 0;
4548 n--;
4549 zText++;
4550 if( '#'==zText[0] ){
4551 zText++;
4552 n--;
4553 for(i=0; i<n; i++){
4554 if( i>1 && ';'==zText[i] ) return 1;
4555 else if( zText[i]<'0' || zText[i]>'9' ) return 0;
4556 }
4557 }else{
4558 for( i=0; i<n; i++ ){
4559 if( i>1 && ';'==zText[i] ) return 1;
4560 else if( zText[i]<'A' || zText[i]>'z'
4561 || (zText[i]>'Z' && zText[i]<'a') ) return 0;
4562 }
4563 }
4564 return 0;
4565 }
4566
4567 /*
4568 ** Append text to zOut with HTML characters escaped.
4569 **
4570 ** * The space character is changed into non-breaking space (U+00a0)
@@ -4564,12 +4593,14 @@
4593 if( i ) pik_append(p, zText, i);
4594 if( i==n ) break;
4595 switch( c ){
4596 case '<': { pik_append(p, "&lt;", 4); break; }
4597 case '>': { pik_append(p, "&gt;", 4); break; }
 
4598 case ' ': { pik_append(p, "\302\240;", 2); break; }
4599 case '&':
4600 if( pik_isentity(zText+i, n-i) ){ pik_append(p, "&", 1); }
4601 else { pik_append(p, "&amp;", 5); }
4602 }
4603 i++;
4604 n -= i;
4605 zText += i;
4606 i = 0;
@@ -8096,6 +8127,6 @@
8127
8128
8129 #endif /* PIKCHR_TCL */
8130
8131
8132 #line 8157 "pikchr.c"
8133
--- extsrc/pikchr.wasm
+++ extsrc/pikchr.wasm
cannot compute difference between binary files
11
--- extsrc/pikchr.wasm
+++ extsrc/pikchr.wasm
0 annot compute difference between binary files
1
--- extsrc/pikchr.wasm
+++ extsrc/pikchr.wasm
0 annot compute difference between binary files
1

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button