81 {
"iexcl",
"\241", 0 },
82 {
"cent",
"\242", 0 },
83 {
"pound",
"\243", 0 },
84 {
"curren",
"\244", 0 },
86 {
"brvbar",
"\246", 0 },
87 {
"sect",
"\247", 0 },
89 {
"copy",
"\251", 0 },
90 {
"ordf",
"\252", 0 },
91 {
"laquo",
"\253", 0 },
95 {
"macr",
"\257", 0 },
97 {
"plusmn",
"\261", 0 },
98 {
"sup2",
"\262", 0 },
99 {
"sup3",
"\263", 0 },
100 {
"acute",
"\264", 0 },
101 {
"micro",
"\265", 0 },
102 {
"para",
"\266", 0 },
103 {
"middot",
"\267", 0 },
104 {
"cedil",
"\270", 0 },
105 {
"sup1",
"\271", 0 },
106 {
"ordm",
"\272", 0 },
107 {
"raquo",
"\273", 0 },
108 {
"frac14",
"\274", 0 },
109 {
"frac12",
"\275", 0 },
110 {
"frac34",
"\276", 0 },
111 {
"iquest",
"\277", 0 },
112 {
"Agrave",
"\300", 0 },
113 {
"Aacute",
"\301", 0 },
114 {
"Acirc",
"\302", 0 },
115 {
"Atilde",
"\303", 0 },
116 {
"Auml",
"\304", 0 },
117 {
"Aring",
"\305", 0 },
118 {
"AElig",
"\306", 0 },
119 {
"Ccedil",
"\307", 0 },
120 {
"Egrave",
"\310", 0 },
121 {
"Eacute",
"\311", 0 },
122 {
"Ecirc",
"\312", 0 },
123 {
"Euml",
"\313", 0 },
124 {
"Igrave",
"\314", 0 },
125 {
"Iacute",
"\315", 0 },
126 {
"Icirc",
"\316", 0 },
127 {
"Iuml",
"\317", 0 },
128 {
"ETH",
"\320", 0 },
129 {
"Ntilde",
"\321", 0 },
130 {
"Ograve",
"\322", 0 },
131 {
"Oacute",
"\323", 0 },
132 {
"Ocirc",
"\324", 0 },
133 {
"Otilde",
"\325", 0 },
134 {
"Ouml",
"\326", 0 },
135 {
"times",
"\327", 0 },
136 {
"Oslash",
"\330", 0 },
137 {
"Ugrave",
"\331", 0 },
138 {
"Uacute",
"\332", 0 },
139 {
"Ucirc",
"\333", 0 },
140 {
"Uuml",
"\334", 0 },
141 {
"Yacute",
"\335", 0 },
142 {
"THORN",
"\336", 0 },
143 {
"szlig",
"\337", 0 },
144 {
"agrave",
"\340", 0 },
145 {
"aacute",
"\341", 0 },
146 {
"acirc",
"\342", 0 },
147 {
"atilde",
"\343", 0 },
148 {
"auml",
"\344", 0 },
149 {
"aring",
"\345", 0 },
150 {
"aelig",
"\346", 0 },
151 {
"ccedil",
"\347", 0 },
152 {
"egrave",
"\350", 0 },
153 {
"eacute",
"\351", 0 },
154 {
"ecirc",
"\352", 0 },
155 {
"euml",
"\353", 0 },
156 {
"igrave",
"\354", 0 },
157 {
"iacute",
"\355", 0 },
158 {
"icirc",
"\356", 0 },
159 {
"iuml",
"\357", 0 },
160 {
"eth",
"\360", 0 },
161 {
"ntilde",
"\361", 0 },
162 {
"ograve",
"\362", 0 },
163 {
"oacute",
"\363", 0 },
164 {
"ocirc",
"\364", 0 },
165 {
"otilde",
"\365", 0 },
166 {
"ouml",
"\366", 0 },
167 {
"divide",
"\367", 0 },
168 {
"oslash",
"\370", 0 },
169 {
"ugrave",
"\371", 0 },
170 {
"uacute",
"\372", 0 },
171 {
"ucirc",
"\373", 0 },
172 {
"uuml",
"\374", 0 },
173 {
"yacute",
"\375", 0 },
174 {
"thorn",
"\376", 0 },
175 {
"yuml",
"\377", 0 },
183#define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
202 while ((
c = *zName) != 0) {
215static void EscHashStats()
233 if (cnt > max) max =
cnt;
235 printf(
"Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
236 max, (
double)
sum/(
double)notempty, i, i-notempty,
237 100.0*(i-notempty)/(
double)i);
322 static int isInit = 0;
330 if (z[from] ==
'&') {
331 if (z[from+1] ==
'#') {
334 while (isdigit(z[i])) {
335 v =
v*10 + z[i] -
'0';
338 if (z[i] ==
';') { i++; }
343 if (
v >= 0x80 &&
v < 0xa0) {
355 while (z[i] && isalnum(z[i])) ++i;
360 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
364 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
366 if (
c ==
';') from++;
376 }
else if (((
unsigned char) z[from]) >= 0x80 &&
377 ((
unsigned char) z[from]) < 0xa0) {
415 while ((
c = *zName) != 0) {
434static void HtmlHashStats() {
451 if (cnt > max) max =
cnt;
454 printf(
"longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
455 max, (
double)
sum/(
double)notempty, i, i-notempty,
456 100.0*(i-notempty)/(
double)i);
532 case '\t':
return (iCol | 7) + 1;
533 default:
return iCol+1;
544 if (isupper(*z)) *z = tolower(*z);
581 int pIsInNoScript = 0;
582 int pIsInNoFrames = 0;
587 static char null[1] = {
"" };
592 if (inpCol < 0)
return n;
596 while ((
c = z[
n]) != 0) {
599 if (
c == -64 && z[
n+1] == -128) {
619 zEnd =
"</noscript>";
622 zEnd =
"</noframes>";
633 for (i =
n ; z[i]; i++) {
634 if (z[i] ==
'\'' || z[i] ==
'"') {
636 }
else if (z[i] ==
'\n') {
639 if (z[i] ==
'<' && z[i+1] ==
'/' &&
640 strncasecmp(&z[i], zEnd, nEnd) == 0) {
641 if (zEnd[3] ==
'c' && ((sqcnt % 2) == 1))
continue;
648 if (z[i] == 0)
goto incomplete;
658 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
679 z =
fZText = HtmlRealloc(z, ol+rl);
680 memmove(z +
n + rl, z+
n, ol -
n);
681 memmove(z +
n, result, rl);
692 else if (isspace((
unsigned char)
c)) {
696 (
c = z[
n+i]) != 0 && isspace((
unsigned char)
c) &&
c !=
'\n' &&
c !=
'\r';
698 if (
c ==
'\r' && z[
n+i+1] ==
'\n') ++i;
710 if (pElem == 0)
goto incomplete;
714 if (
c ==
'\n' ||
c ==
'\r') {
720 int iColStart = inpCol;
722 for (j = 0; j < i; j++) {
725 pElem->
fCount = inpCol - iColStart;
732 (!isalpha(z[
n+1]) && z[
n+1] !=
'/' && z[
n+1] !=
'!' && z[
n+1] !=
'?')) {
735 for (i = 1; (
c = z[
n+i]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'<'; i++) {}
736 if (z[
n+i-1] ==
'.' || z[
n+i-1] ==
'!' || z[
n+i-1] ==
'?') sawdot = 2;
737 if (
c == 0)
goto incomplete;
741 if (i >= 10 && strncasecmp(&z[
n],
"</listing>", 10) == 0) {
748 if (i >= 6 && strncasecmp(&z[
n],
"</xmp>", 6) == 0) {
755 if (i >= 11 && strncasecmp(&z[
n],
"</textarea>", 11) == 0) {
766 if (pElem == 0)
goto incomplete;
770 strncpy(tpElem->
fZText, &z[
n], i);
780 }
else if (strncmp(&z[
n],
"<!--", 4) == 0) {
783 for (i = 4; z[
n+i]; i++) {
784 if (z[
n+i] ==
'-' && strncmp(&z[
n+i],
"-->", 3) == 0)
break;
786 if (z[
n+i] == 0)
goto incomplete;
789 if (pElem == 0)
goto incomplete;
794 strncpy(tpElem->
fZText, &z[
n+4], i-4);
802 for (j = 0; j < i+3; j++) {
817 (
c = z[
n+i]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>' && (i < 2 ||
c !=
'/');
820 if (
c == 0)
goto incomplete;
824 while (isspace((
unsigned char)z[
n+i])) ++i;
825 while ((
c = z[
n+i]) != 0 &&
c !=
'>' && (
c !=
'/' || z[
n+i+1] !=
'>')) {
827 argv[argc] = &z[
n+i];
829 while ((
c = z[
n+i+j]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>' &&
830 c !=
'=' && (
c !=
'/' || z[
n+i+j+1] !=
'>')) ++j;
832 if (
c == 0)
goto incomplete;
834 while (isspace((
unsigned char)
c)) {
838 if (
c == 0)
goto incomplete;
848 while (isspace((
unsigned char)
c)) {
852 if (
c == 0)
goto incomplete;
853 if (
c ==
'\'' ||
c ==
'"') {
856 argv[argc] = &z[
n+i];
857 for (j = 0; (
c = z[
n+i+j]) != 0 &&
c != cQuote; j++) {}
858 if (
c == 0)
goto incomplete;
862 argv[argc] = &z[
n+i];
863 for (j = 0; (
c = z[
n+i+j]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>'; j++) {}
864 if (
c == 0)
goto incomplete;
869 while (isspace(z[
n+i])) ++i;
878 if (
c == 0)
goto incomplete;
879 for (j = 0; j < i+1; j++) {
890 c = argv[0][arglen[0]];
891 argv[0][arglen[0]] = 0;
894 if (strcasecmp(pMap->
fZName, argv[0]) == 0)
break;
896 argv[0][arglen[0]] =
c;
897 if (pMap == 0)
continue;
903 if (pElem == 0)
goto incomplete;
933 switch (pMap->
fType) {
997 if (selfClose && argv[0][0] !=
'/' &&
998 strcmp(&pMap[1].fZName[1], pMap->
fZName) == 0) {
1002 goto makeMarkupEntry;
1020 int arglen[],
char *argv[])
1086 int len = strlen(
text);
1093 char *tmp =
new char[
fNAlloc];
1129 char *zType,
char *zArgs,
int offs)
1142 if (strcmp(zType,
"Text") == 0) {
1144 if (pElem == 0)
return 0;
1150 }
else if (!strcmp(zType,
"Space")) {
1152 if (pElem == 0)
return 0;
1156 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1158 if (pMap == 0)
return 0;
1159 if (zArgs == 0 || *zArgs == 0) {
1163 if (pElem == 0)
return 0;
1172 if (!SplitList(zArgs, &argc, &argv))
return 0;
1177 if (pElem == 0)
return 1;
1179 while (--argc >= 0)
if (argv[argc])
delete[] argv[argc];
1207 char *cp = 0,
c, *cp2;
1209 if (
GetIndex(argv[3], &p, &i) != 0) {
1220 if (argv[2][0] ==
'b') {
1221 if (!istxt)
return 1;
1222 if (i == 0 || i ==
l)
return 1;
1231 if ((!istxt) || (!
l) || (!i)) {
1234 RemoveElements(p, p);
1238 RemoveElements(p, p);
1244 memcpy(cp+i-1, cp+i,
l-i+1);
1247 if (ins.i-- <= 0) ins.i = 0;
1251 if (
c ==
'\n' ||
c ==
'\r') {
1255 int t, j, alen = strlen(argv[4]);
1260 if (
text->fZText == (
char*) ((&
text->fZText)+1)) {
1262 strcpy(cp,
text->fZText);
1265 strcpy(cp,
text->fZText);
1267 cp2 =
new char[alen+1];
1268 memcpy(cp2, argv[4], alen+1);
1271 memmove(cp+alen+i, cp+i,
l-i+1);
1272 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1274 delete[]
text->fZText;
1288 AddStrOffset(p, argv[4], i);
1308 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1342 static char zBuf[200];
1357 snprintf(zBuf, 200,
"space: \"\\n\"");
1359 snprintf(zBuf, 200,
"space: \" \"");
1365 if (block->
fN > 0) {
1367 if (
n > 150)
n = 150;
1368 snprintf(zBuf, 200,
"<Block z=\"%.*s\">",
n, block->
fZ);
1383 for (j = 1 ; j < p->
fCount; j += 2) {
1384 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf),
" %s=\"%s\"",
1413 for (i = 0; i + 1 < pElem->
fCount; i += 2) {
1426 static char zBuf[200];
1433 strcpy(zBuf,
"NULL");
1451 strlcpy(zBuf, zName,
sizeof(zBuf));
1477 while (p && p != pEnd) {
1483 str->
Append(
"{ Text \"");
1489 snprintf(zLine, 100,
"Space %d %d ",
1499 str->
Append(
"{ Markup ");
1508 for (i = 0; i < p->
fCount; ++i) {
1531 const char *z = block->
fZ;
1533 if (
n == 0 || z == 0) {
1537 printf(
"Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1541 printf(
"Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
void ToLower(char *z)
Convert a string to all lower-case letters.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
static int EscHash(const char *zName)
static int HtmlHash(const char *zName)
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t HtmlMarkupMap[]
static struct SgEsc_t gEscSequences[]
static void HtmlHashInit(void)
#define HTML_MARKUP_HASH_SIZE
#define HTML_MARKUP_COUNT
#define O_HtmlImageMarkup
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
virtual char * ProcessScript(TGHtmlScript *)
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
const char * TypeToName(int type)
Convert a type into a symbolic name.
int Tokenize()
Process as much of the input HTML as possible.
int NameToType(char *zType)
Convert a markup name into a type integer.
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
virtual int ProcessToken(TGHtmlElement *, const char *, int)
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
TString & Append(const char *cs)
SHtmlTokenMap_t * fPCollide
static uint64_t sum(uint64_t i)