80 {
"iexcl",
"\241", 0 },
81 {
"cent",
"\242", 0 },
82 {
"pound",
"\243", 0 },
83 {
"curren",
"\244", 0 },
85 {
"brvbar",
"\246", 0 },
86 {
"sect",
"\247", 0 },
88 {
"copy",
"\251", 0 },
89 {
"ordf",
"\252", 0 },
90 {
"laquo",
"\253", 0 },
94 {
"macr",
"\257", 0 },
96 {
"plusmn",
"\261", 0 },
97 {
"sup2",
"\262", 0 },
98 {
"sup3",
"\263", 0 },
99 {
"acute",
"\264", 0 },
100 {
"micro",
"\265", 0 },
101 {
"para",
"\266", 0 },
102 {
"middot",
"\267", 0 },
103 {
"cedil",
"\270", 0 },
104 {
"sup1",
"\271", 0 },
105 {
"ordm",
"\272", 0 },
106 {
"raquo",
"\273", 0 },
107 {
"frac14",
"\274", 0 },
108 {
"frac12",
"\275", 0 },
109 {
"frac34",
"\276", 0 },
110 {
"iquest",
"\277", 0 },
111 {
"Agrave",
"\300", 0 },
112 {
"Aacute",
"\301", 0 },
113 {
"Acirc",
"\302", 0 },
114 {
"Atilde",
"\303", 0 },
115 {
"Auml",
"\304", 0 },
116 {
"Aring",
"\305", 0 },
117 {
"AElig",
"\306", 0 },
118 {
"Ccedil",
"\307", 0 },
119 {
"Egrave",
"\310", 0 },
120 {
"Eacute",
"\311", 0 },
121 {
"Ecirc",
"\312", 0 },
122 {
"Euml",
"\313", 0 },
123 {
"Igrave",
"\314", 0 },
124 {
"Iacute",
"\315", 0 },
125 {
"Icirc",
"\316", 0 },
126 {
"Iuml",
"\317", 0 },
127 {
"ETH",
"\320", 0 },
128 {
"Ntilde",
"\321", 0 },
129 {
"Ograve",
"\322", 0 },
130 {
"Oacute",
"\323", 0 },
131 {
"Ocirc",
"\324", 0 },
132 {
"Otilde",
"\325", 0 },
133 {
"Ouml",
"\326", 0 },
134 {
"times",
"\327", 0 },
135 {
"Oslash",
"\330", 0 },
136 {
"Ugrave",
"\331", 0 },
137 {
"Uacute",
"\332", 0 },
138 {
"Ucirc",
"\333", 0 },
139 {
"Uuml",
"\334", 0 },
140 {
"Yacute",
"\335", 0 },
141 {
"THORN",
"\336", 0 },
142 {
"szlig",
"\337", 0 },
143 {
"agrave",
"\340", 0 },
144 {
"aacute",
"\341", 0 },
145 {
"acirc",
"\342", 0 },
146 {
"atilde",
"\343", 0 },
147 {
"auml",
"\344", 0 },
148 {
"aring",
"\345", 0 },
149 {
"aelig",
"\346", 0 },
150 {
"ccedil",
"\347", 0 },
151 {
"egrave",
"\350", 0 },
152 {
"eacute",
"\351", 0 },
153 {
"ecirc",
"\352", 0 },
154 {
"euml",
"\353", 0 },
155 {
"igrave",
"\354", 0 },
156 {
"iacute",
"\355", 0 },
157 {
"icirc",
"\356", 0 },
158 {
"iuml",
"\357", 0 },
159 {
"eth",
"\360", 0 },
160 {
"ntilde",
"\361", 0 },
161 {
"ograve",
"\362", 0 },
162 {
"oacute",
"\363", 0 },
163 {
"ocirc",
"\364", 0 },
164 {
"otilde",
"\365", 0 },
165 {
"ouml",
"\366", 0 },
166 {
"divide",
"\367", 0 },
167 {
"oslash",
"\370", 0 },
168 {
"ugrave",
"\371", 0 },
169 {
"uacute",
"\372", 0 },
170 {
"ucirc",
"\373", 0 },
171 {
"uuml",
"\374", 0 },
172 {
"yacute",
"\375", 0 },
173 {
"thorn",
"\376", 0 },
174 {
"yuml",
"\377", 0 },
182#define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
201 while ((
c = *zName) != 0) {
214static void EscHashStats()
234 printf(
"Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
235 max, (
double)
sum/(
double)notempty, i, i-notempty,
236 100.0*(i-notempty)/(
double)i);
321 static int isInit = 0;
329 if (z[from] ==
'&') {
330 if (z[from+1] ==
'#') {
333 while (isdigit(z[i])) {
334 v =
v*10 + z[i] -
'0';
337 if (z[i] ==
';') { i++; }
342 if (
v >= 0x80 &&
v < 0xa0) {
354 while (z[i] && isalnum(z[i])) ++i;
359 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
363 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
365 if (
c ==
';') from++;
375 }
else if (((
unsigned char) z[from]) >= 0x80 &&
376 ((
unsigned char) z[from]) < 0xa0) {
414 while ((
c = *zName) != 0) {
433static void HtmlHashStats() {
453 printf(
"longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
454 max, (
double)
sum/(
double)notempty, i, i-notempty,
455 100.0*(i-notempty)/(
double)i);
531 case '\t':
return (iCol | 7) + 1;
532 default:
return iCol+1;
543 if (isupper(*z)) *z = tolower(*z);
580 int pIsInNoScript = 0;
581 int pIsInNoFrames = 0;
586 static char null[1] = {
"" };
591 if (inpCol < 0)
return n;
595 while ((
c = z[
n]) != 0) {
598 if (
c == -64 && z[
n+1] == -128) {
618 zEnd =
"</noscript>";
621 zEnd =
"</noframes>";
632 for (i =
n ; z[i]; i++) {
633 if (z[i] ==
'\'' || z[i] ==
'"') {
635 }
else if (z[i] ==
'\n') {
638 if (z[i] ==
'<' && z[i+1] ==
'/' &&
639 strncasecmp(&z[i], zEnd, nEnd) == 0) {
640 if (zEnd[3] ==
'c' && ((sqcnt % 2) == 1))
continue;
647 if (z[i] == 0)
goto incomplete;
657 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
678 z =
fZText = HtmlRealloc(z, ol+rl);
679 memmove(z +
n + rl, z+
n, ol -
n);
680 memmove(z +
n, result, rl);
691 else if (isspace((
unsigned char)
c)) {
695 (
c = z[
n+i]) != 0 && isspace((
unsigned char)
c) &&
c !=
'\n' &&
c !=
'\r';
697 if (
c ==
'\r' && z[
n+i+1] ==
'\n') ++i;
709 if (pElem == 0)
goto incomplete;
713 if (
c ==
'\n' ||
c ==
'\r') {
719 int iColStart = inpCol;
721 for (j = 0; j < i; j++) {
724 pElem->
fCount = inpCol - iColStart;
731 (!isalpha(z[
n+1]) && z[
n+1] !=
'/' && z[
n+1] !=
'!' && z[
n+1] !=
'?')) {
734 for (i = 1; (
c = z[
n+i]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'<'; i++) {}
735 if (z[
n+i-1] ==
'.' || z[
n+i-1] ==
'!' || z[
n+i-1] ==
'?') sawdot = 2;
736 if (
c == 0)
goto incomplete;
740 if (i >= 10 && strncasecmp(&z[
n],
"</listing>", 10) == 0) {
747 if (i >= 6 && strncasecmp(&z[
n],
"</xmp>", 6) == 0) {
754 if (i >= 11 && strncasecmp(&z[
n],
"</textarea>", 11) == 0) {
765 if (pElem == 0)
goto incomplete;
769 strncpy(tpElem->
fZText, &z[
n], i);
779 }
else if (strncmp(&z[
n],
"<!--", 4) == 0) {
782 for (i = 4; z[
n+i]; i++) {
783 if (z[
n+i] ==
'-' && strncmp(&z[
n+i],
"-->", 3) == 0)
break;
785 if (z[
n+i] == 0)
goto incomplete;
788 if (pElem == 0)
goto incomplete;
793 strncpy(tpElem->
fZText, &z[
n+4], i-4);
801 for (j = 0; j < i+3; j++) {
816 (
c = z[
n+i]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>' && (i < 2 ||
c !=
'/');
819 if (
c == 0)
goto incomplete;
823 while (isspace((
unsigned char)z[
n+i])) ++i;
824 while ((
c = z[
n+i]) != 0 &&
c !=
'>' && (
c !=
'/' || z[
n+i+1] !=
'>')) {
826 argv[argc] = &z[
n+i];
828 while ((
c = z[
n+i+j]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>' &&
829 c !=
'=' && (
c !=
'/' || z[
n+i+j+1] !=
'>')) ++j;
831 if (
c == 0)
goto incomplete;
833 while (isspace((
unsigned char)
c)) {
837 if (
c == 0)
goto incomplete;
847 while (isspace((
unsigned char)
c)) {
851 if (
c == 0)
goto incomplete;
852 if (
c ==
'\'' ||
c ==
'"') {
855 argv[argc] = &z[
n+i];
856 for (j = 0; (
c = z[
n+i+j]) != 0 &&
c != cQuote; j++) {}
857 if (
c == 0)
goto incomplete;
861 argv[argc] = &z[
n+i];
862 for (j = 0; (
c = z[
n+i+j]) != 0 && !isspace((
unsigned char)
c) &&
c !=
'>'; j++) {}
863 if (
c == 0)
goto incomplete;
868 while (isspace(z[
n+i])) ++i;
877 if (
c == 0)
goto incomplete;
878 for (j = 0; j < i+1; j++) {
889 c = argv[0][arglen[0]];
890 argv[0][arglen[0]] = 0;
893 if (strcasecmp(pMap->
fZName, argv[0]) == 0)
break;
895 argv[0][arglen[0]] =
c;
896 if (pMap == 0)
continue;
902 if (pElem == 0)
goto incomplete;
932 switch (pMap->
fType) {
996 if (selfClose && argv[0][0] !=
'/' &&
997 strcmp(&pMap[1].fZName[1], pMap->
fZName) == 0) {
1001 goto makeMarkupEntry;
1019 int arglen[],
char *argv[])
1085 int len = strlen(
text);
1092 char *tmp =
new char[
fNAlloc];
1128 char *zType,
char *zArgs,
int offs)
1141 if (strcmp(zType,
"Text") == 0) {
1143 if (pElem == 0)
return 0;
1149 }
else if (!strcmp(zType,
"Space")) {
1151 if (pElem == 0)
return 0;
1155 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1157 if (pMap == 0)
return 0;
1158 if (zArgs == 0 || *zArgs == 0) {
1162 if (pElem == 0)
return 0;
1171 if (!SplitList(zArgs, &argc, &argv))
return 0;
1176 if (pElem == 0)
return 1;
1178 while (--argc >= 0)
if (argv[argc])
delete[] argv[argc];
1206 char *cp = 0,
c, *cp2;
1208 if (
GetIndex(argv[3], &p, &i) != 0) {
1219 if (argv[2][0] ==
'b') {
1220 if (!istxt)
return 1;
1221 if (i == 0 || i ==
l)
return 1;
1230 if ((!istxt) || (!
l) || (!i)) {
1233 RemoveElements(p, p);
1237 RemoveElements(p, p);
1243 memcpy(cp+i-1, cp+i,
l-i+1);
1246 if (ins.i-- <= 0) ins.i = 0;
1250 if (
c ==
'\n' ||
c ==
'\r') {
1254 int t, j, alen = strlen(argv[4]);
1259 if (
text->fZText == (
char*) ((&
text->fZText)+1)) {
1261 strcpy(cp,
text->fZText);
1264 strcpy(cp,
text->fZText);
1266 cp2 =
new char[alen+1];
1267 memcpy(cp2, argv[4], alen+1);
1270 memmove(cp+alen+i, cp+i,
l-i+1);
1271 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1273 delete[]
text->fZText;
1287 AddStrOffset(p, argv[4], i);
1307 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1341 static char zBuf[200];
1356 snprintf(zBuf, 200,
"space: \"\\n\"");
1358 snprintf(zBuf, 200,
"space: \" \"");
1364 if (
block->fN > 0) {
1366 if (
n > 150)
n = 150;
1382 for (j = 1 ; j < p->
fCount; j += 2) {
1383 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf),
" %s=\"%s\"",
1412 for (i = 0; i + 1 < pElem->
fCount; i += 2) {
1425 static char zBuf[200];
1432 strcpy(zBuf,
"NULL");
1450 strlcpy(zBuf, zName,
sizeof(zBuf));
1476 while (p && p != pEnd) {
1482 str->
Append(
"{ Text \"");
1488 snprintf(zLine, 100,
"Space %d %d ",
1498 str->
Append(
"{ Markup ");
1507 for (i = 0; i < p->
fCount; ++i) {
1530 const char *z =
block->fZ;
1532 if (
n == 0 || z == 0) {
1536 printf(
"Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1540 printf(
"Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
void ToLower(char *z)
Convert a string to all lower-case letters.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
static int EscHash(const char *zName)
static int HtmlHash(const char *zName)
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t HtmlMarkupMap[]
static struct SgEsc_t gEscSequences[]
static void HtmlHashInit(void)
#define HTML_MARKUP_HASH_SIZE
#define HTML_MARKUP_COUNT
#define O_HtmlImageMarkup
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
virtual char * ProcessScript(TGHtmlScript *)
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
const char * TypeToName(int type)
Convert a type into a symbolic name.
int Tokenize()
Process as much of the input HTML as possible.
int NameToType(char *zType)
Convert a markup name into a type integer.
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
virtual int ProcessToken(TGHtmlElement *, const char *, int)
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
TString & Append(const char *cs)
SHtmlTokenMap_t * fPCollide
static long int sum(long int i)