Fossil SCM

Improvements to the HTML beautifier.

drh 2012-08-11 02:33 UTC wysiwyg
Commit 0e57bafcfa4d9a9eb19ab63b2ce8ca8ddbd9db39
1 file changed +66 -30
+66 -30
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1792,54 +1792,90 @@
17921792
*/
17931793
void htmlTidy(const char *zIn, Blob *pOut){
17941794
int n;
17951795
int nPre = 0;
17961796
int iCur = 0;
1797
+ int wantSpace = 0;
1798
+ int omitSpace = 1;
17971799
while( zIn[0] ){
17981800
n = nextHtmlToken(zIn);
17991801
if( zIn[0]=='<' && n>1 ){
1800
- if( isWord(zIn, "<pre", 4) ){
1801
- if( iCur && nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1802
- nPre++;
1803
- }else if( isWord(zIn, "</pre", 5) ){
1804
- nPre--;
1805
- if( nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1806
- }else if( isWord(zIn, "<blockquote", 11)
1807
- || isWord(zIn, "<center", 7)
1808
- || (isWord(zIn, "<h", 2) && fossil_isdigit(zIn[2]))
1809
- || isWord(zIn, "<p", 2)
1810
- || isWord(zIn, "<table", 6) ){
1811
- blob_append(pOut, "\n\n", 1 + (iCur>0));
1812
- iCur = 0;
1813
- }else if( isWord(zIn, "<dd", 3)
1814
- || isWord(zIn, "<div", 4)
1815
- || isWord(zIn, "<dl", 3)
1816
- || isWord(zIn, "<dt", 3)
1817
- || isWord(zIn, "<li", 3)
1818
- || isWord(zIn, "<ol", 3)
1819
- || isWord(zIn, "<td", 3)
1820
- || isWord(zIn, "<th", 3)
1821
- || isWord(zIn, "<tr", 3)
1822
- || isWord(zIn, "<ul", 3) ){
1823
- if( iCur>0 ) blob_append(pOut, "\n", 1);
1824
- iCur = 0;
1802
+ int i, j;
1803
+ int isCloseTag;
1804
+ int eTag;
1805
+ int eType;
1806
+ char zTag[32];
1807
+ isCloseTag = zIn[1]=='/';
1808
+ for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
1809
+ zTag[i] = fossil_tolower(zIn[j]);
1810
+ }
1811
+ zTag[i] = 0;
1812
+ eTag = findTag(zTag);
1813
+ eType = aMarkup[eTag].iType;
1814
+ if( eTag==MARKUP_PRE ){
1815
+ if( isCloseTag ){
1816
+ nPre--;
1817
+ blob_append(pOut, zIn, n);
1818
+ zIn += n;
1819
+ if( nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1820
+ continue;
1821
+ }else{
1822
+ if( iCur && nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1823
+ nPre++;
1824
+ }
1825
+ }else if( eType & (MUTYPE_BLOCK|MUTYPE_TABLE) ){
1826
+ if( !isCloseTag && nPre==0 && blob_size(pOut)>0 ){
1827
+ blob_append(pOut, "\n\n", 1 + (iCur>0));
1828
+ iCur = 0;
1829
+ }
1830
+ wantSpace = 0;
1831
+ omitSpace = 1;
1832
+ }else if( (eType & (MUTYPE_LIST|MUTYPE_LI|MUTYPE_TR|MUTYPE_TD))!=0
1833
+ || eTag==MARKUP_HR
1834
+ ){
1835
+ if( nPre==0 && (!isCloseTag || (eType&MUTYPE_LIST)!=0) && iCur>0 ){
1836
+ blob_append(pOut, "\n", 1);
1837
+ iCur = 0;
1838
+ }
1839
+ wantSpace = 0;
1840
+ omitSpace = 1;
1841
+ }
1842
+ if( wantSpace && nPre==0 ){
1843
+ if( iCur+n+1>=80 ){
1844
+ blob_append(pOut, "\n", 1);
1845
+ iCur = 0;
1846
+ }else{
1847
+ blob_append(pOut, " ", 1);
1848
+ iCur++;
1849
+ }
18251850
}
18261851
blob_append(pOut, zIn, n);
18271852
iCur += n;
1853
+ wantSpace = 0;
1854
+ if( eTag==MARKUP_BR || eTag==MARKUP_HR ){
1855
+ blob_append(pOut, "\n", 1);
1856
+ iCur = 0;
1857
+ }
18281858
}else if( fossil_isspace(zIn[0]) ){
18291859
if( nPre ){
18301860
blob_append(pOut, zIn, n);
1831
- }else if( iCur>=70 ){
1832
- blob_append(pOut, "\n", 1);
1833
- iCur = 0;
18341861
}else{
1835
- blob_append(pOut, " ", 1);
1836
- iCur++;
1862
+ wantSpace = !omitSpace;
18371863
}
18381864
}else{
1865
+ if( wantSpace && nPre==0 ){
1866
+ if( iCur+n+1>=80 ){
1867
+ blob_append(pOut, "\n", 1);
1868
+ iCur = 0;
1869
+ }else{
1870
+ blob_append(pOut, " ", 1);
1871
+ iCur++;
1872
+ }
1873
+ }
18391874
blob_append(pOut, zIn, n);
18401875
iCur += n;
1876
+ wantSpace = omitSpace = 0;
18411877
}
18421878
zIn += n;
18431879
}
18441880
if( iCur ) blob_append(pOut, "\n", 1);
18451881
}
18461882
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1792,54 +1792,90 @@
1792 */
1793 void htmlTidy(const char *zIn, Blob *pOut){
1794 int n;
1795 int nPre = 0;
1796 int iCur = 0;
 
 
1797 while( zIn[0] ){
1798 n = nextHtmlToken(zIn);
1799 if( zIn[0]=='<' && n>1 ){
1800 if( isWord(zIn, "<pre", 4) ){
1801 if( iCur && nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1802 nPre++;
1803 }else if( isWord(zIn, "</pre", 5) ){
1804 nPre--;
1805 if( nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1806 }else if( isWord(zIn, "<blockquote", 11)
1807 || isWord(zIn, "<center", 7)
1808 || (isWord(zIn, "<h", 2) && fossil_isdigit(zIn[2]))
1809 || isWord(zIn, "<p", 2)
1810 || isWord(zIn, "<table", 6) ){
1811 blob_append(pOut, "\n\n", 1 + (iCur>0));
1812 iCur = 0;
1813 }else if( isWord(zIn, "<dd", 3)
1814 || isWord(zIn, "<div", 4)
1815 || isWord(zIn, "<dl", 3)
1816 || isWord(zIn, "<dt", 3)
1817 || isWord(zIn, "<li", 3)
1818 || isWord(zIn, "<ol", 3)
1819 || isWord(zIn, "<td", 3)
1820 || isWord(zIn, "<th", 3)
1821 || isWord(zIn, "<tr", 3)
1822 || isWord(zIn, "<ul", 3) ){
1823 if( iCur>0 ) blob_append(pOut, "\n", 1);
1824 iCur = 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1825 }
1826 blob_append(pOut, zIn, n);
1827 iCur += n;
 
 
 
 
 
1828 }else if( fossil_isspace(zIn[0]) ){
1829 if( nPre ){
1830 blob_append(pOut, zIn, n);
1831 }else if( iCur>=70 ){
1832 blob_append(pOut, "\n", 1);
1833 iCur = 0;
1834 }else{
1835 blob_append(pOut, " ", 1);
1836 iCur++;
1837 }
1838 }else{
 
 
 
 
 
 
 
 
 
1839 blob_append(pOut, zIn, n);
1840 iCur += n;
 
1841 }
1842 zIn += n;
1843 }
1844 if( iCur ) blob_append(pOut, "\n", 1);
1845 }
1846
--- src/wikiformat.c
+++ src/wikiformat.c
@@ -1792,54 +1792,90 @@
1792 */
1793 void htmlTidy(const char *zIn, Blob *pOut){
1794 int n;
1795 int nPre = 0;
1796 int iCur = 0;
1797 int wantSpace = 0;
1798 int omitSpace = 1;
1799 while( zIn[0] ){
1800 n = nextHtmlToken(zIn);
1801 if( zIn[0]=='<' && n>1 ){
1802 int i, j;
1803 int isCloseTag;
1804 int eTag;
1805 int eType;
1806 char zTag[32];
1807 isCloseTag = zIn[1]=='/';
1808 for(i=0, j=1+isCloseTag; i<30 && fossil_isalnum(zIn[j]); i++, j++){
1809 zTag[i] = fossil_tolower(zIn[j]);
1810 }
1811 zTag[i] = 0;
1812 eTag = findTag(zTag);
1813 eType = aMarkup[eTag].iType;
1814 if( eTag==MARKUP_PRE ){
1815 if( isCloseTag ){
1816 nPre--;
1817 blob_append(pOut, zIn, n);
1818 zIn += n;
1819 if( nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1820 continue;
1821 }else{
1822 if( iCur && nPre==0 ){ blob_append(pOut, "\n", 1); iCur = 0; }
1823 nPre++;
1824 }
1825 }else if( eType & (MUTYPE_BLOCK|MUTYPE_TABLE) ){
1826 if( !isCloseTag && nPre==0 && blob_size(pOut)>0 ){
1827 blob_append(pOut, "\n\n", 1 + (iCur>0));
1828 iCur = 0;
1829 }
1830 wantSpace = 0;
1831 omitSpace = 1;
1832 }else if( (eType & (MUTYPE_LIST|MUTYPE_LI|MUTYPE_TR|MUTYPE_TD))!=0
1833 || eTag==MARKUP_HR
1834 ){
1835 if( nPre==0 && (!isCloseTag || (eType&MUTYPE_LIST)!=0) && iCur>0 ){
1836 blob_append(pOut, "\n", 1);
1837 iCur = 0;
1838 }
1839 wantSpace = 0;
1840 omitSpace = 1;
1841 }
1842 if( wantSpace && nPre==0 ){
1843 if( iCur+n+1>=80 ){
1844 blob_append(pOut, "\n", 1);
1845 iCur = 0;
1846 }else{
1847 blob_append(pOut, " ", 1);
1848 iCur++;
1849 }
1850 }
1851 blob_append(pOut, zIn, n);
1852 iCur += n;
1853 wantSpace = 0;
1854 if( eTag==MARKUP_BR || eTag==MARKUP_HR ){
1855 blob_append(pOut, "\n", 1);
1856 iCur = 0;
1857 }
1858 }else if( fossil_isspace(zIn[0]) ){
1859 if( nPre ){
1860 blob_append(pOut, zIn, n);
 
 
 
1861 }else{
1862 wantSpace = !omitSpace;
 
1863 }
1864 }else{
1865 if( wantSpace && nPre==0 ){
1866 if( iCur+n+1>=80 ){
1867 blob_append(pOut, "\n", 1);
1868 iCur = 0;
1869 }else{
1870 blob_append(pOut, " ", 1);
1871 iCur++;
1872 }
1873 }
1874 blob_append(pOut, zIn, n);
1875 iCur += n;
1876 wantSpace = omitSpace = 0;
1877 }
1878 zIn += n;
1879 }
1880 if( iCur ) blob_append(pOut, "\n", 1);
1881 }
1882

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button