80 {
"iexcl",
"\241", 0 },
81 {
"cent",
"\242", 0 },
82 {
"pound",
"\243", 0 },
83 {
"curren",
"\244", 0 },
85 {
"brvbar",
"\246", 0 },
86 {
"sect",
"\247", 0 },
88 {
"copy",
"\251", 0 },
89 {
"ordf",
"\252", 0 },
90 {
"laquo",
"\253", 0 },
94 {
"macr",
"\257", 0 },
96 {
"plusmn",
"\261", 0 },
97 {
"sup2",
"\262", 0 },
98 {
"sup3",
"\263", 0 },
99 {
"acute",
"\264", 0 },
100 {
"micro",
"\265", 0 },
101 {
"para",
"\266", 0 },
102 {
"middot",
"\267", 0 },
103 {
"cedil",
"\270", 0 },
104 {
"sup1",
"\271", 0 },
105 {
"ordm",
"\272", 0 },
106 {
"raquo",
"\273", 0 },
107 {
"frac14",
"\274", 0 },
108 {
"frac12",
"\275", 0 },
109 {
"frac34",
"\276", 0 },
110 {
"iquest",
"\277", 0 },
111 {
"Agrave",
"\300", 0 },
112 {
"Aacute",
"\301", 0 },
113 {
"Acirc",
"\302", 0 },
114 {
"Atilde",
"\303", 0 },
115 {
"Auml",
"\304", 0 },
116 {
"Aring",
"\305", 0 },
117 {
"AElig",
"\306", 0 },
118 {
"Ccedil",
"\307", 0 },
119 {
"Egrave",
"\310", 0 },
120 {
"Eacute",
"\311", 0 },
121 {
"Ecirc",
"\312", 0 },
122 {
"Euml",
"\313", 0 },
123 {
"Igrave",
"\314", 0 },
124 {
"Iacute",
"\315", 0 },
125 {
"Icirc",
"\316", 0 },
126 {
"Iuml",
"\317", 0 },
127 {
"ETH",
"\320", 0 },
128 {
"Ntilde",
"\321", 0 },
129 {
"Ograve",
"\322", 0 },
130 {
"Oacute",
"\323", 0 },
131 {
"Ocirc",
"\324", 0 },
132 {
"Otilde",
"\325", 0 },
133 {
"Ouml",
"\326", 0 },
134 {
"times",
"\327", 0 },
135 {
"Oslash",
"\330", 0 },
136 {
"Ugrave",
"\331", 0 },
137 {
"Uacute",
"\332", 0 },
138 {
"Ucirc",
"\333", 0 },
139 {
"Uuml",
"\334", 0 },
140 {
"Yacute",
"\335", 0 },
141 {
"THORN",
"\336", 0 },
142 {
"szlig",
"\337", 0 },
143 {
"agrave",
"\340", 0 },
144 {
"aacute",
"\341", 0 },
145 {
"acirc",
"\342", 0 },
146 {
"atilde",
"\343", 0 },
147 {
"auml",
"\344", 0 },
148 {
"aring",
"\345", 0 },
149 {
"aelig",
"\346", 0 },
150 {
"ccedil",
"\347", 0 },
151 {
"egrave",
"\350", 0 },
152 {
"eacute",
"\351", 0 },
153 {
"ecirc",
"\352", 0 },
154 {
"euml",
"\353", 0 },
155 {
"igrave",
"\354", 0 },
156 {
"iacute",
"\355", 0 },
157 {
"icirc",
"\356", 0 },
158 {
"iuml",
"\357", 0 },
159 {
"eth",
"\360", 0 },
160 {
"ntilde",
"\361", 0 },
161 {
"ograve",
"\362", 0 },
162 {
"oacute",
"\363", 0 },
163 {
"ocirc",
"\364", 0 },
164 {
"otilde",
"\365", 0 },
165 {
"ouml",
"\366", 0 },
166 {
"divide",
"\367", 0 },
167 {
"oslash",
"\370", 0 },
168 {
"ugrave",
"\371", 0 },
169 {
"uacute",
"\372", 0 },
170 {
"ucirc",
"\373", 0 },
171 {
"uuml",
"\374", 0 },
172 {
"yacute",
"\375", 0 },
173 {
"thorn",
"\376", 0 },
174 {
"yuml",
"\377", 0 },
182 #define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7) 201 while ((c = *zName) != 0) {
214 static void EscHashStats()
223 for (i = 0; i <
sizeof(
gEscSequences) /
sizeof(gEscSequences[0]); i++) {
232 if (cnt > max) max =
cnt;
234 printf(
"Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
235 max, (
double)sum/(
double)notempty, i, i-notempty,
236 100.0*(i-notempty)/(
double)i);
246 for (i = 0; i < int(
sizeof(gEscSequences) /
sizeof(gEscSequences[i])); i++) {
249 int c = gEscSequences[i].value[0];
250 xclass::UniCharToUtf(c, gEscSequences[i].value);
253 h =
EscHash(gEscSequences[i].fZName);
254 gEscSequences[i].fPNext = gApEscHash[
h];
255 gApEscHash[
h] = &gEscSequences[i];
321 static int isInit = 0;
329 if (z[from] ==
'&') {
330 if (z[from+1] ==
'#') {
333 while (isdigit(z[i])) {
334 v = v*10 + z[i] -
'0';
337 if (z[i] ==
';') { i++; }
342 if (v >= 0x80 && v < 0xa0) {
354 while (z[i] && isalnum(z[i])) ++i;
359 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
363 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
365 if (c ==
';') from++;
375 }
else if (((
unsigned char) z[from]) >= 0x80 &&
376 ((
unsigned char) z[from]) < 0xa0) {
414 while ((c = *zName) != 0) {
433 static void HtmlHashStats() {
450 if (cnt > max) max =
cnt;
453 printf(
"longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
454 max, (
double)sum/(
double)notempty, i, i-notempty,
455 100.0*(i-notempty)/(
double)i);
467 h =
HtmlHash(HtmlMarkupMap[i].fZName);
469 gApMap[
h] = &HtmlMarkupMap[i];
515 if (fPFirst == p) fPFirst = pNew;
518 pNew->
fElId = ++fIdind;
531 case '\t':
return (iCol | 7) + 1;
532 default:
return iCol+1;
543 if (isupper(*z)) *z = tolower(*z);
574 # define mxARG 200 // Maximum number of parameters in a single markup 578 #ifdef pIsInMeachnism 580 int pIsInNoScript = 0;
581 int pIsInNoFrames = 0;
586 static char null[1] = {
"" };
591 if (inpCol < 0)
return n;
595 while ((c = z[n]) != 0) {
598 if (c == -64 && z[n+1] == -128) {
618 zEnd =
"</noscript>";
621 zEnd =
"</noframes>";
632 for (i = n ; z[i]; i++) {
633 if (z[i] ==
'\'' || z[i] ==
'"') {
635 }
else if (z[i] ==
'\n') {
638 if (z[i] ==
'<' && z[i+1] ==
'/' &&
639 strncasecmp(&z[i], zEnd, nEnd) == 0) {
640 if (zEnd[3] ==
'c' && ((sqcnt % 2) == 1))
continue;
647 if (z[i] == 0)
goto incomplete;
653 #ifdef pIsInMeachnism 657 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
678 z = fZText = HtmlRealloc(z, ol+rl);
679 memmove(z + n + rl, z+n, ol - n);
680 memmove(z + n, result, rl);
691 else if (isspace((
unsigned char)c)) {
695 (c = z[n+i]) != 0 && isspace((
unsigned char)c) && c !=
'\n' && c !=
'\r';
697 if (c ==
'\r' && z[n+i+1] ==
'\n') ++i;
698 #if 0 // this is certainly NOT OK, since it alters pre-formatted text 702 pElem->
fElId = ++fIdind;
705 AppendElement(pElem);
709 if (pElem == 0)
goto incomplete;
712 pElem->
fElId = ++fIdind;
713 if (c ==
'\n' || c ==
'\r') {
719 int iColStart = inpCol;
721 for (j = 0; j < i; j++) {
724 pElem->
fCount = inpCol - iColStart;
726 AppendElement(pElem);
730 else if (c !=
'<' || fIPlaintext != 0 ||
731 (!isalpha(z[n+1]) && z[n+1] !=
'/' && z[n+1] !=
'!' && z[n+1] !=
'?')) {
734 for (i = 1; (c = z[n+i]) != 0 && !isspace((
unsigned char)c) && c !=
'<'; i++) {}
735 if (z[n+i-1] ==
'.' || z[n+i-1] ==
'!' || z[n+i-1] ==
'?') sawdot = 2;
736 if (c == 0)
goto incomplete;
737 if (fIPlaintext != 0 && z[n] ==
'<') {
738 switch (fIPlaintext) {
740 if (i >= 10 && strncasecmp(&z[n],
"</listing>", 10) == 0) {
747 if (i >= 6 && strncasecmp(&z[n],
"</xmp>", 6) == 0) {
754 if (i >= 11 && strncasecmp(&z[n],
"</textarea>", 11) == 0) {
765 if (pElem == 0)
goto incomplete;
767 tpElem->
fElId = ++fIdind;
769 strncpy(tpElem->
fZText, &z[n], i);
771 AppendElement(pElem);
779 }
else if (strncmp(&z[n],
"<!--", 4) == 0) {
782 for (i = 4; z[n+i]; i++) {
783 if (z[n+i] ==
'-' && strncmp(&z[n+i],
"-->", 3) == 0)
break;
785 if (z[n+i] == 0)
goto incomplete;
788 if (pElem == 0)
goto incomplete;
791 tpElem->
fElId = ++fIdind;
793 strncpy(tpElem->
fZText, &z[n+4], i-4);
796 AppendElement(pElem);
799 AppToken(pElem, 0, n+4);
801 for (j = 0; j < i+3; j++) {
816 (c = z[n+i]) != 0 && !isspace((
unsigned char)c) && c !=
'>' && (i < 2 || c !=
'/');
819 if (c == 0)
goto incomplete;
823 while (isspace((
unsigned char)z[n+i])) ++i;
824 while ((c = z[n+i]) != 0 && c !=
'>' && (c !=
'/' || z[n+i+1] !=
'>')) {
826 argv[argc] = &z[n+i];
828 while ((c = z[n+i+j]) != 0 && !isspace((
unsigned char)c) && c !=
'>' &&
829 c !=
'=' && (c !=
'/' || z[n+i+j+1] !=
'>')) ++j;
831 if (c == 0)
goto incomplete;
833 while (isspace((
unsigned char)c)) {
837 if (c == 0)
goto incomplete;
847 while (isspace((
unsigned char)c)) {
851 if (c == 0)
goto incomplete;
852 if (c ==
'\'' || c ==
'"') {
855 argv[argc] = &z[n+i];
856 for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
857 if (c == 0)
goto incomplete;
861 argv[argc] = &z[n+i];
862 for (j = 0; (c = z[n+i+j]) != 0 && !isspace((
unsigned char)c) && c !=
'>'; j++) {}
863 if (c == 0)
goto incomplete;
868 while (isspace(z[n+i])) ++i;
877 if (c == 0)
goto incomplete;
878 for (j = 0; j < i+1; j++) {
889 c = argv[0][arglen[0]];
890 argv[0][arglen[0]] = 0;
892 for (pMap = gApMap[h]; pMap; pMap = pMap->
fPCollide) {
893 if (strcasecmp(pMap->
fZName, argv[0]) == 0)
break;
895 argv[0][arglen[0]] = c;
896 if (pMap == 0)
continue;
901 pElem = MakeMarkupEntry(pMap->
fObjType, pMap->
fType, argc, arglen, argv);
902 if (pElem == 0)
goto incomplete;
904 pElem->
fElId = ++fIdind;
913 if (ProcessToken(pElem, pMap->
fZName, pMap->
fType)) {
931 AppendElement(pElem);
932 switch (pMap->
fType) {
940 fIPlaintext = pMap->
fType;
944 if (!fHasFrames)
break;
945 #ifdef pIsInMeachnism 951 if (!fHasScript)
break;
952 #ifdef pIsInMeachnism 956 #ifdef pIsInMeachnism 965 if (!fAddEndTags)
break;
968 AppToken(e, pElem, n);
980 if (!fAddEndTags)
break;
983 AppToken(e, pElem, n);
996 if (selfClose && argv[0][0] !=
'/' &&
997 strcmp(&pMap[1].fZName[1], pMap->
fZName) == 0) {
1001 goto makeMarkupEntry;
1019 int arglen[],
char *argv[])
1025 e =
new TGHtmlCell(type, argc, arglen, argv);
1033 e =
new TGHtmlRef(type, argc, arglen, argv);
1037 e =
new TGHtmlLi(type, argc, arglen, argv);
1053 e =
new TGHtmlForm(type, argc, arglen, argv);
1057 e =
new TGHtmlHr(type, argc, arglen, argv);
1085 int len = strlen(text);
1088 fNAlloc = len + 100;
1089 fZText =
new char [fNAlloc];
1090 }
else if (fNText + len >= fNAlloc) {
1091 fNAlloc += len + 100;
1092 char *tmp =
new char[fNAlloc];
1094 strcpy(tmp, fZText);
1106 strcpy(&fZText[fNText], text);
1108 fNComplete = Tokenize();
1128 char *zType,
char *zArgs,
int offs)
1141 if (strcmp(zType,
"Text") == 0) {
1143 if (pElem == 0)
return 0;
1147 pElem->
fCount = strlen(zArgs);
1149 }
else if (!strcmp(zType,
"Space")) {
1151 if (pElem == 0)
return 0;
1154 for (pMap = gApMap[h]; pMap; pMap = pMap->
fPCollide) {
1155 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1157 if (pMap == 0)
return 0;
1158 if (zArgs == 0 || *zArgs == 0) {
1161 pElem = MakeMarkupEntry(pMap->
fObjType, pMap->
fType, 1, 0, 0);
1162 if (pElem == 0)
return 0;
1171 if (!SplitList(zArgs, &argc, &argv))
return 0;
1175 pElem = MakeMarkupEntry(pMap->
fObjType, pMap->
fType, argc, 0, argv);
1176 if (pElem == 0)
return 1;
1178 while (--argc >= 0)
if (argv[argc])
delete[] argv[argc];
1186 pElem->
fElId = ++fIdind;
1188 AppToken(pElem, pToken, offs);
1206 char *cp = 0, c, *cp2;
1208 if (GetIndex(argv[3], &p, &i) != 0) {
1219 if (argv[2][0] ==
'b') {
1220 if (!istxt)
return 1;
1221 if (i == 0 || i == l)
return 1;
1222 pElem = InsertToken(p->
fPNext,
"Text", cp + i, -1);
1230 if ((!istxt) || (!l) || (!i)) {
1233 RemoveElements(p, p);
1237 RemoveElements(p, p);
1243 memcpy(cp+i-1, cp+i, l-i+1);
1246 if (ins.i-- <= 0) ins.i = 0;
1250 if (c ==
'\n' || c ==
'\r') {
1254 int t, j, alen = strlen(argv[4]);
1261 strcpy(cp, text->
fZText);
1264 strcpy(cp, text->
fZText);
1266 cp2 =
new char[alen+1];
1267 memcpy(cp2, argv[4], alen+1);
1270 memmove(cp+alen+i, cp+i, l-i+1);
1271 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1279 p = InsertToken(p ? p->
fPNext : 0,
"Text", argv[4], -1);
1287 AddStrOffset(p, argv[4], i);
1306 for (pMap = gApMap[h]; pMap; pMap = pMap->
fPCollide) {
1307 if (strcasecmp(pMap->
fZName, zType) == 0)
break;
1341 static char zBuf[200];
1356 snprintf(zBuf, 200,
"space: \"\\n\"");
1358 snprintf(zBuf, 200,
"space: \" \"");
1364 if (block->
fN > 0) {
1366 if (n > 150) n = 150;
1367 snprintf(zBuf, 200,
"<Block z=\"%.*s\">", n, block->
fZ);
1382 for (j = 1 ; j < p->
fCount; j += 2) {
1383 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf),
" %s=\"%s\"",
1412 for (i = 0; i + 1 < pElem->
fCount; i += 2) {
1425 static char zBuf[200];
1432 strcpy(zBuf,
"NULL");
1450 strlcpy(zBuf, zName,
sizeof(zBuf));
1462 return HtmlMarkupMap+
n;
1476 while (p && p != pEnd) {
1482 str->
Append(
"{ Text \"");
1488 snprintf(zLine, 100,
"Space %d %d ",
1498 str->
Append(
"{ Markup ");
1507 for (i = 0; i < p->
fCount; ++i) {
1527 for (p = first; p != last; p = p->
fPNext) {
1530 const char *
z = block->
fZ;
1532 if (n == 0 || z == 0) {
1536 printf(
"Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1540 printf(
"Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
static long int sum(long int i)
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
int NameToType(char *zType)
Convert a markup name into a type integer.
SHtmlTokenMap_t * fPCollide
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
static struct SgEsc_t gEscSequences[]
#define HTML_MARKUP_HASH_SIZE
TString & Append(const char *cs)
static void HtmlHashInit(void)
int Tokenize()
Process as much of the input HTML as possible.
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
void ToLower(char *z)
Convert a string to all lower-case letters.
static int EscHash(const char *zName)
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
static int HtmlHash(const char *zName)
#define O_HtmlImageMarkup
const char * TypeToName(int type)
Convert a type into a symbolic name.
SHtmlTokenMap_t HtmlMarkupMap[]
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
#define HTML_MARKUP_COUNT
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
you should not use this method at all Int_t Int_t z
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]