Logo ROOT   6.18/05
Reference Guide
TGHtmlParse.cxx
Go to the documentation of this file.
1// $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
2// Author: Valeriy Onuchin 03/05/2007
3
4/*************************************************************************
5 * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
6 * All rights reserved. *
7 * *
8 * For the licensing terms see $ROOTSYS/LICENSE. *
9 * For the list of contributors see $ROOTSYS/README/CREDITS. *
10 *************************************************************************/
11
12/**************************************************************************
13
14 HTML widget for xclass. Based on tkhtml 1.28
15 Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
16 Copyright (C) 2002-2003 Hector Peraza.
17
18 This library is free software; you can redistribute it and/or
19 modify it under the terms of the GNU Library General Public
20 License as published by the Free Software Foundation; either
21 version 2 of the License, or (at your option) any later version.
22
23 This library is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 Library General Public License for more details.
27
28 You should have received a copy of the GNU Library General Public
29 License along with this library; if not, write to the Free
30 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31
32**************************************************************************/
33
34// A tokenizer that converts raw HTML into a linked list of HTML elements.
35
36#include <string.h>
37#include <stdlib.h>
38#include <stdio.h>
39#include <ctype.h>
40
41#include "TGHtml.h"
42#include "TGHtmlTokens.h"
43
44
45//----------------------------------------------------------------------
46
48
49
50/****************** Begin Escape Sequence Translator *************/
51
52// The next section of code implements routines used to translate
53// the '&' escape sequences of SGML to individual characters.
54// Examples:
55//
56// &amp; &
57// &lt; <
58// &gt; >
59// &nbsp; nonbreakable space
60//
61
62// Each escape sequence is recorded as an instance of the following
63// structure
64
65struct SgEsc_t {
66 const char *fZName; // The name of this escape sequence. ex: "amp"
67 char fValue[8]; // The value for this sequence. ex: "&"
68 SgEsc_t *fPNext; // Next sequence with the same hash on zName
69};
70
71// The following is a table of all escape sequences. Add new sequences
72// by adding entries to this table.
73
74static struct SgEsc_t gEscSequences[] = {
75 { "quot", "\"", 0 },
76 { "amp", "&", 0 },
77 { "lt", "<", 0 },
78 { "gt", ">", 0 },
79 { "nbsp", " ", 0 },
80 { "iexcl", "\241", 0 },
81 { "cent", "\242", 0 },
82 { "pound", "\243", 0 },
83 { "curren", "\244", 0 },
84 { "yen", "\245", 0 },
85 { "brvbar", "\246", 0 },
86 { "sect", "\247", 0 },
87 { "uml", "\250", 0 },
88 { "copy", "\251", 0 },
89 { "ordf", "\252", 0 },
90 { "laquo", "\253", 0 },
91 { "not", "\254", 0 },
92 { "shy", "\255", 0 },
93 { "reg", "\256", 0 },
94 { "macr", "\257", 0 },
95 { "deg", "\260", 0 },
96 { "plusmn", "\261", 0 },
97 { "sup2", "\262", 0 },
98 { "sup3", "\263", 0 },
99 { "acute", "\264", 0 },
100 { "micro", "\265", 0 },
101 { "para", "\266", 0 },
102 { "middot", "\267", 0 },
103 { "cedil", "\270", 0 },
104 { "sup1", "\271", 0 },
105 { "ordm", "\272", 0 },
106 { "raquo", "\273", 0 },
107 { "frac14", "\274", 0 },
108 { "frac12", "\275", 0 },
109 { "frac34", "\276", 0 },
110 { "iquest", "\277", 0 },
111 { "Agrave", "\300", 0 },
112 { "Aacute", "\301", 0 },
113 { "Acirc", "\302", 0 },
114 { "Atilde", "\303", 0 },
115 { "Auml", "\304", 0 },
116 { "Aring", "\305", 0 },
117 { "AElig", "\306", 0 },
118 { "Ccedil", "\307", 0 },
119 { "Egrave", "\310", 0 },
120 { "Eacute", "\311", 0 },
121 { "Ecirc", "\312", 0 },
122 { "Euml", "\313", 0 },
123 { "Igrave", "\314", 0 },
124 { "Iacute", "\315", 0 },
125 { "Icirc", "\316", 0 },
126 { "Iuml", "\317", 0 },
127 { "ETH", "\320", 0 },
128 { "Ntilde", "\321", 0 },
129 { "Ograve", "\322", 0 },
130 { "Oacute", "\323", 0 },
131 { "Ocirc", "\324", 0 },
132 { "Otilde", "\325", 0 },
133 { "Ouml", "\326", 0 },
134 { "times", "\327", 0 },
135 { "Oslash", "\330", 0 },
136 { "Ugrave", "\331", 0 },
137 { "Uacute", "\332", 0 },
138 { "Ucirc", "\333", 0 },
139 { "Uuml", "\334", 0 },
140 { "Yacute", "\335", 0 },
141 { "THORN", "\336", 0 },
142 { "szlig", "\337", 0 },
143 { "agrave", "\340", 0 },
144 { "aacute", "\341", 0 },
145 { "acirc", "\342", 0 },
146 { "atilde", "\343", 0 },
147 { "auml", "\344", 0 },
148 { "aring", "\345", 0 },
149 { "aelig", "\346", 0 },
150 { "ccedil", "\347", 0 },
151 { "egrave", "\350", 0 },
152 { "eacute", "\351", 0 },
153 { "ecirc", "\352", 0 },
154 { "euml", "\353", 0 },
155 { "igrave", "\354", 0 },
156 { "iacute", "\355", 0 },
157 { "icirc", "\356", 0 },
158 { "iuml", "\357", 0 },
159 { "eth", "\360", 0 },
160 { "ntilde", "\361", 0 },
161 { "ograve", "\362", 0 },
162 { "oacute", "\363", 0 },
163 { "ocirc", "\364", 0 },
164 { "otilde", "\365", 0 },
165 { "ouml", "\366", 0 },
166 { "divide", "\367", 0 },
167 { "oslash", "\370", 0 },
168 { "ugrave", "\371", 0 },
169 { "uacute", "\372", 0 },
170 { "ucirc", "\373", 0 },
171 { "uuml", "\374", 0 },
172 { "yacute", "\375", 0 },
173 { "thorn", "\376", 0 },
174 { "yuml", "\377", 0 },
175};
176
177
178// The size of the handler hash table. For best results this should
179// be a prime number which is about the same size as the number of
180// escape sequences known to the system.
181
182#define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
183
184
185// The hash table
186//
187// If the name of an escape sequence hashes to the value H, then
188// gApEscHash[H] will point to a linked list of Esc structures, one of
189// which will be the Esc structure for that escape sequence.
190
191static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
192
193
194// Hash a escape sequence name. The value returned is an integer
195// between 0 and ESC_HASH_SIZE-1, inclusive.
196
197static int EscHash(const char *zName) {
198 int h = 0; // The hash value to be returned
199 char c; // The next character in the name being hashed
200
201 while ((c = *zName) != 0) {
202 h = h<<5 ^ h ^ c;
203 zName++;
204 }
205 if (h < 0) h = -h;
206
207 return h % ESC_HASH_SIZE;
208}
209
210#ifdef TEST
211// Compute the longest and average collision chain length for the
212// escape sequence hash table
213
214static void EscHashStats()
215{
216 int i;
217 int sum = 0;
218 int max = 0;
219 int cnt;
220 int notempty = 0;
221 struct SgEsc_t *p;
222
223 for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
224 cnt = 0;
225 p = gApEscHash[i];
226 if (p) notempty++;
227 while (p) {
228 ++cnt;
229 p = p->fPNext;
230 }
231 sum += cnt;
232 if (cnt > max) max = cnt;
233 }
234 printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
235 max, (double)sum/(double)notempty, i, i-notempty,
236 100.0*(i-notempty)/(double)i);
237}
238#endif
239
240// Initialize the escape sequence hash table
241
242static void EscInit() {
243 int i; /* For looping thru the list of escape sequences */
244 int h; /* The hash on a sequence */
245
246 for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
247/* #ifdef XCLASS_UTF_MAX */
248#if 0
249 int c = gEscSequences[i].value[0];
250 xclass::UniCharToUtf(c, gEscSequences[i].value);
251 }
252#endif
253 h = EscHash(gEscSequences[i].fZName);
254 gEscSequences[i].fPNext = gApEscHash[h];
256 }
257#ifdef TEST
258 EscHashStats();
259#endif
260}
261
262
263// This table translates the non-standard microsoft characters between 0x80
264// and 0x9f into plain ASCII so that the characters will be visible on Unix
265// systems. Care is taken to translate the characters into values less than
266// 0x80, to avoid UTF-8 problems.
267
268static char gAcMsChar[] = {
269 /* 0x80 */ 'C',
270 /* 0x81 */ ' ',
271 /* 0x82 */ ',',
272 /* 0x83 */ 'f',
273 /* 0x84 */ '"',
274 /* 0x85 */ '.',
275 /* 0x86 */ '*',
276 /* 0x87 */ '*',
277 /* 0x88 */ '^',
278 /* 0x89 */ '%',
279 /* 0x8a */ 'S',
280 /* 0x8b */ '<',
281 /* 0x8c */ 'O',
282 /* 0x8d */ ' ',
283 /* 0x8e */ 'Z',
284 /* 0x8f */ ' ',
285 /* 0x90 */ ' ',
286 /* 0x91 */ '\'',
287 /* 0x92 */ '\'',
288 /* 0x93 */ '"',
289 /* 0x94 */ '"',
290 /* 0x95 */ '*',
291 /* 0x96 */ '-',
292 /* 0x97 */ '-',
293 /* 0x98 */ '~',
294 /* 0x99 */ '@',
295 /* 0x9a */ 's',
296 /* 0x9b */ '>',
297 /* 0x9c */ 'o',
298 /* 0x9d */ ' ',
299 /* 0x9e */ 'z',
300 /* 0x9f */ 'Y',
301};
302
303
304////////////////////////////////////////////////////////////////////////////////
305/// Translate escape sequences in the string "z". "z" is overwritten
306/// with the translated sequence.
307///
308/// Unrecognized escape sequences are unaltered.
309///
310/// Example:
311///
312/// input = "AT&amp;T &gt MCI"
313/// output = "AT&T > MCI"
314
316{
317 int from; // Read characters from this position in z[]
318 int to; // Write characters into this position in z[]
319 int h; // A hash on the escape sequence
320 struct SgEsc_t *p; // For looping down the escape sequence collision chain
321 static int isInit = 0; // True after initialization
322
323 from = to = 0;
324 if (!isInit) {
325 EscInit();
326 isInit = 1;
327 }
328 while (z[from]) {
329 if (z[from] == '&') {
330 if (z[from+1] == '#') {
331 int i = from + 2;
332 int v = 0;
333 while (isdigit(z[i])) {
334 v = v*10 + z[i] - '0';
335 i++;
336 }
337 if (z[i] == ';') { i++; }
338
339 // Translate the non-standard microsoft characters in the range of
340 // 0x80 to 0x9f into something we can see.
341
342 if (v >= 0x80 && v < 0xa0) {
343 v = gAcMsChar[v & 0x1f];
344 }
345
346 // Put the character in the output stream in place of the "&#000;".
347 // How we do this depends on whether or not we are using UTF-8.
348
349 z[to++] = v;
350 from = i;
351 } else {
352 int i = from+1;
353 int c;
354 while (z[i] && isalnum(z[i])) ++i;
355 c = z[i];
356 z[i] = 0;
357 h = EscHash(&z[from+1]);
358 p = gApEscHash[h];
359 while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
360 z[i] = c;
361 if (p) {
362 int j;
363 for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
364 from = i;
365 if (c == ';') from++;
366 } else {
367 z[to++] = z[from++];
368 }
369 }
370
371 // Look for the non-standard microsoft characters between 0x80 and 0x9f
372 // and translate them into printable ASCII codes. Separate algorithms
373 // are required to do this for plain ascii and for utf-8.
374
375 } else if (((unsigned char) z[from]) >= 0x80 &&
376 ((unsigned char) z[from]) < 0xa0) {
377 z[to++] = gAcMsChar[z[from++] & 0x1f];
378 } else {
379 z[to++] = z[from++];
380 }
381 }
382 z[to] = 0;
383}
384
385/******************* End Escape Sequence Translator ***************/
386
387/******************* Begin HTML tokenizer code *******************/
388
389// The following variable becomes TRUE when the markup hash table
390// (stored in HtmlMarkupMap[]) is initialized.
391
392static int gIsInit = 0;
393
394// The hash table for HTML markup names.
395//
396// If an HTML markup name hashes to H, then gApMap[H] will point to
397// a linked list of sgMap structure, one of which will describe the
398// the particular markup (if it exists.)
399
401
402// Hash a markup name
403//
404// HTML markup is case insensitive, so this function will give the
405// same hash regardless of the case of the markup name.
406//
407// The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
408// inclusive.
409
410static int HtmlHash(const char *zName) {
411 int h = 0;
412 char c;
413
414 while ((c = *zName) != 0) {
415 if (isupper(c)) { // do we have to check for this??????
416 c = tolower(c);
417 }
418 h = h<<5 ^ h ^ c;
419 zName++;
420 }
421 if (h < 0) {
422 h = -h;
423 }
424
425 return h % HTML_MARKUP_HASH_SIZE;
426}
427
428
429#ifdef TEST
430// Compute the longest and average collision chain length for the
431// markup hash table
432
433static void HtmlHashStats() {
434 int i;
435 int sum = 0;
436 int max = 0;
437 int cnt;
438 int notempty = 0;
439 struct sgMap *p;
440
441 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
442 cnt = 0;
443 p = gApMap[i];
444 if (p) notempty++;
445 while (p) {
446 cnt++;
447 p = p->fPCollide;
448 }
449 sum += cnt;
450 if (cnt > max) max = cnt;
451 }
452
453 printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
454 max, (double)sum/(double)notempty, i, i-notempty,
455 100.0*(i-notempty)/(double)i);
456}
457#endif
458
459
460// Initialize the escape sequence hash table
461
462static void HtmlHashInit(void){
463 int i;
464 int h; // The hash on a markup name
465
466 for (i = 0; i < HTML_MARKUP_COUNT; i++) {
467 h = HtmlHash(HtmlMarkupMap[i].fZName);
469 gApMap[h] = &HtmlMarkupMap[i];
470 }
471#ifdef TEST
472 HtmlHashStats();
473#endif
474}
475
476////////////////////////////////////////////////////////////////////////////////
477/// Append the given TGHtmlElement to the tokenizers list of elements
478
480{
481 pElem->fPNext = 0;
482 pElem->fPPrev = fPLast;
483 if (fPFirst == 0) {
484 fPFirst = pElem;
485 } else {
486 fPLast->fPNext = pElem;
487 }
488 fPLast = pElem;
489 fNToken++;
490}
491
492////////////////////////////////////////////////////////////////////////////////
493/// Insert token pNew before token p
494
496{
497 if (offs < 0) {
498 if (p) {
499 offs = p->fOffs;
500 } else {
501 offs = fNText;
502 }
503 }
504
505////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
506
507// pNew->fCount = 0;
508 pNew->fOffs = offs;
509 pNew->fPNext = p;
510 if (p) {
511 pNew->fElId = p->fElId;
512 p->fElId = ++fIdind;
513 pNew->fPPrev = p->fPPrev;
514 if (p->fPPrev) p->fPPrev->fPNext = pNew;
515 if (fPFirst == p) fPFirst = pNew;
516 p->fPPrev = pNew;
517 } else {
518 pNew->fElId = ++fIdind;
519 AppendElement(pNew);
520 }
521 fNToken++;
522}
523
524////////////////////////////////////////////////////////////////////////////////
525/// Compute the new column index following the given character.
526
527static int NextColumn(int iCol, char c)
528{
529 switch (c) {
530 case '\n': return 0;
531 case '\t': return (iCol | 7) + 1;
532 default: return iCol+1;
533 }
534 /* NOT REACHED */
535}
536
537////////////////////////////////////////////////////////////////////////////////
538/// Convert a string to all lower-case letters.
539
540void ToLower(char *z)
541{
542 while (*z) {
543 if (isupper(*z)) *z = tolower(*z);
544 z++;
545 }
546}
547
548////////////////////////////////////////////////////////////////////////////////
549/// Process as much of the input HTML as possible. Construct new
550/// TGHtmlElement objects and appended them to the list. Return
551/// the number of characters actually processed.
552///
553/// This routine may invoke a callback procedure which could delete
554/// the HTML widget.
555///
556/// This routine is not reentrant for the same HTML widget. To
557/// prevent reentrancy (during a callback), the p->fICol field is
558/// set to a negative number. This is a flag to future invocations
559/// not to reentry this routine. The p->fICol field is restored
560/// before exiting, of course.
561
563{
564 char *z; // The input HTML text
565 int c; // The next character of input
566 int n; // Number of characters processed so far
567 int inpCol; // Column of input
568 int i, j; // Loop counters
569 int h; // Result from HtmlHash()
570 TGHtmlElement *pElem;// A new HTML element
571 int selfClose; // True for content free elements. Ex: <br/>
572 int argc; // The number of arguments on a markup
573 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
574# define mxARG 200 // Maximum number of parameters in a single markup
575 char *argv[mxARG]; // Pointers to each markup argument.
576 int arglen[mxARG]; // Length of each markup argument
577 //int rl, ol;
578#ifdef pIsInMeachnism
579 int pIsInScript = 0;
580 int pIsInNoScript = 0;
581 int pIsInNoFrames = 0;
582#endif
583 int sawdot = 0;
584 int inLi = 0;
585
586 static char null[1] = { "" };
587
588 inpCol = fICol;
589 n = fNComplete;
590 z = fZText;
591 if (inpCol < 0) return n; // Prevents recursion
592 fICol = -1;
593 pElem = 0;
594
595 while ((c = z[n]) != 0) {
596
597 sawdot--;
598 if (c == -64 && z[n+1] == -128) {
599 n += 2;
600 continue;
601 }
602
603 if (fPScript) {
604
605 // We are in the middle of <SCRIPT>...</SCRIPT>. Just look for
606 // the </SCRIPT> markup. (later:) Treat <STYLE>...</STYLE> the
607 // same way.
608
609 TGHtmlScript *pScr = fPScript;
610 const char *zEnd;
611 int nEnd;
612 //int curline, curch, curlast = n;
613 int sqcnt;
614 if (pScr->fType == Html_SCRIPT) {
615 zEnd = "</script>";
616 nEnd = 9;
617 } else if (pScr->fType == Html_NOSCRIPT) {
618 zEnd = "</noscript>";
619 nEnd = 11;
620 } else if (pScr->fType == Html_NOFRAMES) {
621 zEnd = "</noframes>";
622 nEnd = 11;
623 } else {
624 zEnd = "</style>";
625 nEnd = 8;
626 }
627 if (pScr->fNStart < 0) {
628 pScr->fNStart = n;
629 pScr->fNScript = 0;
630 }
631 sqcnt = 0;
632 for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
633 if (z[i] == '\'' || z[i] == '"') {
634 sqcnt++; // Skip if odd # quotes
635 } else if (z[i] == '\n') {
636 sqcnt = 0;
637 }
638 if (z[i] == '<' && z[i+1] == '/' &&
639 strncasecmp(&z[i], zEnd, nEnd) == 0) {
640 if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
641 pScr->fNScript = i - n;
642 fPScript = 0;
643 n = i + nEnd;
644 break;
645 }
646 }
647 if (z[i] == 0) goto incomplete;
648 if (fPScript) {
649 pScr->fNScript = i - n;
650 n = i;
651 }
652 else {
653#ifdef pIsInMeachnism
654 // If there is a script, execute it now and insert any output
655 // to the html stream for parsing as html. (ie. client side scripting)
656
657 if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
658
659 //for (curch = 0, curline = 1; curch <= curlast; curch++)
660 // if (z[curch] == '\n') curline++;
661
662 // arglist in pElem and text pointers in pScr?
663 // Inline scripts can contain unmatched brackets :-)
664 //char varind[50];
665 //sprintf(varind, "HtmlScrVar%d", p->varind++);
666 //char savech = fZText[pScr->fNStart + pScr->fNScript];
667 //fZText[pScr->fNStart + pScr->fNScript] = 0;
668 //char *scriptBody = StrDup(fZText[pScr->fNStart]);
669 //fZText[pScr->fNStart + pScr->fNScript] = savech;
670 AdvanceLayout(p);
671 inParse++;
672 char *result = ProcessScript((TGHtmlScript *) pElem); // pElem or pScr??
673 inParse--;
674 if (result) {
675 ol = fNAlloc;
676 rl = strlen(result);
677 fNAlloc += rl;
678 z = fZText = HtmlRealloc(z, ol+rl);
679 memmove(z + n + rl, z+n, ol - n);
680 memmove(z + n, result, rl);
681 }
682 }
683 pIsInScript = 0;
684 pIsInNoScript = 0;
685 pIsInNoFrames = 0;
686#endif
687 }
688 //continue;
689
690 }
691 else if (isspace((unsigned char)c)) {
692
693 // White space
694 for (i = 0;
695 (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
696 i++) { }
697 if (c == '\r' && z[n+i+1] == '\n') ++i;
698#if 0 // this is certainly NOT OK, since it alters pre-formatted text
699 if (sawdot == 1) {
700 pElem = new TGHtmlTextElement(2);
701 strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
702 pElem->fElId = ++fIdind;
703 pElem->fOffs = n;
704 pElem->fCount = 1;
705 AppendElement(pElem);
706 }
707#endif
708 pElem = new TGHtmlSpaceElement;
709 if (pElem == 0) goto incomplete;
710 ((TGHtmlSpaceElement *)pElem)->fW = 0;
711 pElem->fOffs = n;
712 pElem->fElId = ++fIdind;
713 if (c == '\n' || c == '\r') {
714 pElem->fFlags = HTML_NewLine;
715 pElem->fCount = 1;
716 i++;
717 inpCol = 0;
718 } else {
719 int iColStart = inpCol;
720 pElem->fFlags = 0;
721 for (j = 0; j < i; j++) {
722 inpCol = NextColumn(inpCol, z[n+j]);
723 }
724 pElem->fCount = inpCol - iColStart;
725 }
726 AppendElement(pElem);
727 n += i;
728
729 }
730 else if (c != '<' || fIPlaintext != 0 ||
731 (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
732
733 // Ordinary text
734 for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
735 if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
736 if (c == 0) goto incomplete;
737 if (fIPlaintext != 0 && z[n] == '<') {
738 switch (fIPlaintext) {
739 case Html_LISTING:
740 if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
741 fIPlaintext = 0;
742 goto doMarkup;
743 }
744 break;
745
746 case Html_XMP:
747 if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
748 fIPlaintext = 0;
749 goto doMarkup;
750 }
751 break;
752
753 case Html_TEXTAREA:
754 if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
755 fIPlaintext = 0;
756 goto doMarkup;
757 }
758 break;
759
760 default:
761 break;
762 }
763 }
764 pElem = new TGHtmlTextElement(i);
765 if (pElem == 0) goto incomplete;
766 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
767 tpElem->fElId = ++fIdind;
768 tpElem->fOffs = n;
769 strncpy(tpElem->fZText, &z[n], i);
770 tpElem->fZText[i] = 0;
771 AppendElement(pElem);
772 if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
774 }
775 pElem->fCount = (Html_16_t) strlen(tpElem->fZText);
776 n += i;
777 inpCol += i;
778
779 } else if (strncmp(&z[n], "<!--", 4) == 0) {
780
781 // An HTML comment. Just skip it.
782 for (i = 4; z[n+i]; i++) {
783 if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
784 }
785 if (z[n+i] == 0) goto incomplete;
786
787 pElem = new TGHtmlTextElement(i);
788 if (pElem == 0) goto incomplete;
789 TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
790 tpElem->fType = Html_COMMENT;
791 tpElem->fElId = ++fIdind;
792 tpElem->fOffs = n;
793 strncpy(tpElem->fZText, &z[n+4], i-4);
794 tpElem->fZText[i-4] = 0;
795 tpElem->fCount = 0;
796 AppendElement(pElem);
797
798 pElem = new TGHtmlElement(Html_EndCOMMENT);
799 AppToken(pElem, 0, n+4);
800
801 for (j = 0; j < i+3; j++) {
802 inpCol = NextColumn(inpCol, z[n+j]);
803 }
804 n += i + 3;
805
806 }
807 else {
808
809 // Markup.
810 //
811 // First get the name of the markup
812doMarkup:
813 argc = 1;
814 argv[0] = &z[n+1];
815 for (i = 1;
816 (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
817 i++) {}
818 arglen[0] = i - 1;
819 if (c == 0) goto incomplete;
820
821 // Now parse up the arguments
822
823 while (isspace((unsigned char)z[n+i])) ++i;
824 while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
825 if (argc > mxARG - 3) argc = mxARG - 3;
826 argv[argc] = &z[n+i];
827 j = 0;
828 while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
829 c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
830 arglen[argc] = j;
831 if (c == 0) goto incomplete;
832 i += j;
833 while (isspace((unsigned char)c)) {
834 i++;
835 c = z[n+i];
836 }
837 if (c == 0) goto incomplete;
838 argc++;
839 if (c != '=') {
840 argv[argc] = null;
841 arglen[argc] = 0;
842 argc++;
843 continue;
844 }
845 i++;
846 c = z[n+i];
847 while (isspace((unsigned char)c)) {
848 i++;
849 c = z[n+i];
850 }
851 if (c == 0) goto incomplete;
852 if (c == '\'' || c == '"') {
853 int cQuote = c;
854 i++;
855 argv[argc] = &z[n+i];
856 for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
857 if (c == 0) goto incomplete;
858 arglen[argc] = j;
859 i += j+1;
860 } else {
861 argv[argc] = &z[n+i];
862 for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
863 if (c == 0) goto incomplete;
864 arglen[argc] = j;
865 i += j;
866 }
867 argc++;
868 while (isspace(z[n+i])) ++i;
869 }
870 if (c == '/') {
871 i++;
872 c = z[n+i];
873 selfClose = 1;
874 } else {
875 selfClose = 0;
876 }
877 if (c == 0) goto incomplete;
878 for (j = 0; j < i+1; j++) {
879 inpCol = NextColumn(inpCol, z[n+j]);
880 }
881 n += i + 1;
882
883 // Lookup the markup name in the hash table
884
885 if (!gIsInit) {
886 HtmlHashInit();
887 gIsInit = 1;
888 }
889 c = argv[0][arglen[0]];
890 argv[0][arglen[0]] = 0;
891 h = HtmlHash(argv[0]);
892 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
893 if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
894 }
895 argv[0][arglen[0]] = c;
896 if (pMap == 0) continue; // Ignore unknown markup
897
898makeMarkupEntry:
899 // Construct a TGHtmlMarkupElement object for this markup.
900
901 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
902 if (pElem == 0) goto incomplete;
903
904 pElem->fElId = ++fIdind;
905 pElem->fOffs = n;
906
907 AddFormInfo(pElem);
908
909 // The new markup has now been constructed in pElem. But before
910 // appending it to the list, check to see if there is a special
911 // handler for this markup type.
912
913 if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
914 // delete pElem;
915
916 // Tricky, tricky. The user function might have caused the p->fZText
917 // pointer to change, so renew our copy of that pointer.
918
919 z = fZText;
920 if (z == 0) {
921 n = 0;
922 inpCol = 0;
923 goto incomplete;
924 }
925 continue;
926 }
927
928 // No special handler for this markup. Just append it to the
929 // list of all tokens.
930
931 AppendElement(pElem);
932 switch (pMap->fType) {
933 case Html_TABLE:
934 break;
935
936 case Html_PLAINTEXT:
937 case Html_LISTING:
938 case Html_XMP:
939 case Html_TEXTAREA:
940 fIPlaintext = pMap->fType;
941 break;
942
943 case Html_NOFRAMES:
944 if (!fHasFrames) break;
945#ifdef pIsInMeachnism
946 pIsInNoFrames = 1;
947#endif
948 case Html_NOSCRIPT:
949 break;
950 // coverity[unreachable]
951 if (!fHasScript) break;
952#ifdef pIsInMeachnism
953 pIsInNoScript = 1;
954#endif
955 case Html_SCRIPT:
956#ifdef pIsInMeachnism
957 pIsInScript = 1;
958#endif
959 // fallthrough
960 case Html_STYLE:
961 fPScript = (TGHtmlScript *) pElem;
962 break;
963
964 case Html_LI:
965 if (!fAddEndTags) break;
966 if (inLi) {
968 AppToken(e, pElem, n);
969 } else {
970 inLi = 1;
971 }
972 break;
973
974 case Html_EndLI:
975 inLi=0;
976 break;
977
978 case Html_EndOL:
979 case Html_EndUL:
980 if (!fAddEndTags) break;
981 if (inLi) {
983 AppToken(e, pElem, n);
984 } else {
985 inLi = 0;
986 }
987 break;
988
989 default:
990 break;
991 }
992
993 // If this is self-closing markup (ex: <br/> or <img/>) then
994 // synthesize a closing token.
995
996 if (selfClose && argv[0][0] != '/' &&
997 strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
998 selfClose = 0;
999 pMap++;
1000 argc = 1;
1001 goto makeMarkupEntry;
1002 }
1003 }
1004 }
1005
1006incomplete:
1007 fICol = inpCol;
1008 ////fPScript = 0;
1009
1010 return n;
1011}
1012
1013/************************** End HTML Tokenizer Code ***************************/
1014
1015////////////////////////////////////////////////////////////////////////////////
1016/// Make one markup entry.
1017
1019 int arglen[], char *argv[])
1020{
1022
1023 switch (objType) {
1024 case O_HtmlCell:
1025 e = new TGHtmlCell(type, argc, arglen, argv);
1026 break;
1027
1028 case O_HtmlTable:
1029 e = new TGHtmlTable(type, argc, arglen, argv);
1030 break;
1031
1032 case O_HtmlRef:
1033 e = new TGHtmlRef(type, argc, arglen, argv);
1034 break;
1035
1036 case O_HtmlLi:
1037 e = new TGHtmlLi(type, argc, arglen, argv);
1038 break;
1039
1040 case O_HtmlListStart:
1041 e = new TGHtmlListStart(type, argc, arglen, argv);
1042 break;
1043
1044 case O_HtmlImageMarkup:
1045 e = new TGHtmlImageMarkup(type, argc, arglen, argv);
1046 break;
1047
1048 case O_HtmlInput:
1049 e = new TGHtmlInput(type, argc, arglen, argv);
1050 break;
1051
1052 case O_HtmlForm:
1053 e = new TGHtmlForm(type, argc, arglen, argv);
1054 break;
1055
1056 case O_HtmlHr:
1057 e = new TGHtmlHr(type, argc, arglen, argv);
1058 break;
1059
1060 case O_HtmlAnchor:
1061 e = new TGHtmlAnchor(type, argc, arglen, argv);
1062 break;
1063
1064 case O_HtmlScript:
1065 e = new TGHtmlScript(type, argc, arglen, argv);
1066 break;
1067
1068 case O_HtmlMapArea:
1069 e = new TGHtmlMapArea(type, argc, arglen, argv);
1070 break;
1071
1072 default:
1073 e = new TGHtmlMarkupElement(type, argc, arglen, argv);
1074 break;
1075 }
1076
1077 return e;
1078}
1079
1080////////////////////////////////////////////////////////////////////////////////
1081/// Append text to the tokenizer engine.
1082
1084{
1085 int len = strlen(text);
1086
1087 if (fNText == 0) {
1088 fNAlloc = len + 100;
1089 fZText = new char [fNAlloc];
1090 } else if (fNText + len >= fNAlloc) {
1091 fNAlloc += len + 100;
1092 char *tmp = new char[fNAlloc];
1093 // coverity[secure_coding]
1094 strcpy(tmp, fZText);
1095 delete[] fZText;
1096 fZText = tmp;
1097 }
1098
1099 if (fZText == 0) {
1100 fNText = 0;
1101 UNTESTED;
1102 return;
1103 }
1104
1105 // coverity[secure_coding]
1106 strcpy(&fZText[fNText], text);
1107 fNText += len;
1108 fNComplete = Tokenize();
1109}
1110
1111////////////////////////////////////////////////////////////////////////////////
1112/// This routine takes a text representation of a token, converts it into an
1113/// TGHtmlElement object and inserts it immediately prior to pToken. If pToken
1114/// is 0, then the newly created TGHtmlElement is appended.
1115///
1116/// This routine does nothing to resize, restyle, relayout or redisplay
1117/// the HTML. That is the calling routines responsibility.
1118///
1119/// Return the new TGHtmlElement object if successful. Return zero if
1120/// zType is not a known markup name.
1121///
1122/// pToken - Insert before this. Append if pToken == 0
1123/// zType - Type of markup. Ex: "/a" or "table"
1124/// zArgs - List of arguments
1125/// offs - Calculate offset, and insert changed text into fZText!
1126
1128 char *zType, char *zArgs, int offs)
1129{
1130 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1131 int h; // The hash on zType
1132 TGHtmlElement *pElem; // The new element
1133 //int nByte; // How many bytes to allocate
1134 //int i; // Loop counter
1135
1136 if (!gIsInit) {
1137 HtmlHashInit();
1138 gIsInit = 1;
1139 }
1140
1141 if (strcmp(zType, "Text") == 0) {
1142 pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
1143 if (pElem == 0) return 0;
1144 if (zArgs) {
1145 // coverity[secure_coding]
1146 strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
1147 pElem->fCount = (Html_16_t) strlen(zArgs);
1148 }
1149 } else if (!strcmp(zType, "Space")) {
1150 pElem = new TGHtmlSpaceElement();
1151 if (pElem == 0) return 0;
1152 } else {
1153 h = HtmlHash(zType);
1154 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1155 if (strcasecmp(pMap->fZName, zType) == 0) break;
1156 }
1157 if (pMap == 0) return 0;
1158 if (zArgs == 0 || *zArgs == 0) {
1159 // Special case of no arguments. This is a lot easier...
1160 // well... now its the same thing!
1161 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
1162 if (pElem == 0) return 0;
1163 } else {
1164 // The general case. There are arguments that need to be parsed
1165 // up. This is slower, but we gotta do it.
1166 //int argc;
1167 //char **argv;
1168 //char *zBuf;
1169
1170#if 0
1171 if (!SplitList(zArgs, &argc, &argv)) return 0;
1172
1173 // shall we insert a dummy argv[0]?
1174
1175 pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
1176 if (pElem == 0) return 1;
1177
1178 while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
1179 delete[] argv;
1180#else
1181 return 0;
1182#endif
1183 }
1184 }
1185
1186 pElem->fElId = ++fIdind;
1187
1188 AppToken(pElem, pToken, offs);
1189
1190 return pElem;
1191}
1192
1193////////////////////////////////////////////////////////////////////////////////
1194/// Insert text into text token, or break token into two text tokens.
1195/// Also, handle backspace char by deleting text.
1196/// Should also handle newline char by splitting text.
1197
1198int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
1199{
1200#if 0
1201 TGHtmlElement *p, *pElem;
1202 int i, l, n = 0;
1203 int idx = 0;
1204 int ptyp = Html_Unknown;
1205 int istxt = 0;
1206 char *cp = 0, c, *cp2;
1207
1208 if (GetIndex(argv[3], &p, &i) != 0) {
1209 // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
1210 return 0;
1211 }
1212 if (p) {
1213 ptyp = p->fType;
1214 if ((istxt = (ptyp == Html_Text))) {
1215 l = p->fCount;
1216 cp = ((TGHtmlTextElement *)p)->fZText;
1217 }
1218 }
1219 if (argv[2][0] == 'b') { // Break text token into two.
1220 if (!istxt) return 1;
1221 if (i == 0 || i == l) return 1;
1222 pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
1223 cp[i] = 0;
1224 p->fCount = i;
1225 return 1;
1226 }
1227 c = argv[4][0];
1228 if (!c) return 1;
1229 if (c == '\b') {
1230 if ((!istxt) || (!l) || (!i)) {
1231 if (!p) return 1;
1232 if (p->fType == Html_BR)
1233 RemoveElements(p, p);
1234 return 1;
1235 }
1236 if (p && l == 1) {
1237 RemoveElements(p, p);
1238 return 1;
1239 }
1240 if (i == l)
1241 cp[p->fCount] = 0;
1242 else
1243 memcpy(cp+i-1, cp+i, l-i+1);
1244
1245 cp[--p->fCount] = 0;
1246 if (ins.i-- <= 0) ins.i = 0;
1247 ins.p = p;
1248 return 1;
1249 }
1250 if (c == '\n' || c == '\r') {
1251 }
1252 if (istxt) {
1253 char *cp;
1254 int t, j, alen = strlen(argv[4]);
1255 n = alen + l;
1256
1258
1259 if (text->fZText == (char*) ((&text->fZText)+1)) {
1260 cp = new char[n+1];
1261 strcpy(cp, text->fZText);
1262 } else {
1263 cp = new char[n+1];
1264 strcpy(cp, text->fZText);
1265 }
1266 cp2 = new char[alen+1];
1267 memcpy(cp2, argv[4], alen+1);
1269 alen = strlen(cp2);
1270 memmove(cp+alen+i, cp+i, l-i+1);
1271 for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1272 delete[] cp2;
1273 delete[] text->fZText;
1274 text->fZText = cp;
1275 p->fCount = strlen(cp);
1276 ins.p = p;
1277 ins.i = i+alen;
1278 } else {
1279 p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
1280 AddStyle(p);
1281 i = 0;
1282 ins.p = p;
1283 ins.i = 1;
1284 }
1285 if (p) {
1286 idx = p->base.id;
1287 AddStrOffset(p, argv[4], i);
1288 }
1289#endif
1290 return 1;
1291}
1292
1293////////////////////////////////////////////////////////////////////////////////
1294/// Returns token map matching zType name.
1295
1297{
1298 SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1299 int h; // The hash on zType
1300
1301 if (!gIsInit) {
1302 HtmlHashInit();
1303 gIsInit = 1;
1304 }
1305 h = HtmlHash(zType);
1306 for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1307 if (strcasecmp(pMap->fZName, zType) == 0) break;
1308 }
1309
1310 return pMap;
1311}
1312
1313////////////////////////////////////////////////////////////////////////////////
1314/// Convert a markup name into a type integer
1315
1316int TGHtml::NameToType(char *zType)
1317{
1318 SHtmlTokenMap_t *pMap = NameToPmap(zType);
1319 return pMap ? pMap->fType : (int)Html_Unknown;
1320}
1321
1322////////////////////////////////////////////////////////////////////////////////
1323/// Convert a type into a symbolic name
1324
1325const char *TGHtml::TypeToName(int type)
1326{
1327 if (type >= Html_A && type <= Html_EndXMP) {
1328 SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
1329 return pMap->fZName;
1330 } else {
1331 return "???";
1332 }
1333}
1334
1335////////////////////////////////////////////////////////////////////////////////
1336/// For debugging purposes, print information about a token
1337
1339{
1340//#ifdef DEBUG
1341 static char zBuf[200];
1342 int j;
1343 const char *zName;
1344
1345 if (p == 0) {
1346 snprintf(zBuf, 200, "NULL");
1347 return zBuf;
1348 }
1349 switch (p->fType) {
1350 case Html_Text:
1351 snprintf(zBuf, 200, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
1352 break;
1353
1354 case Html_Space:
1355 if (p->fFlags & HTML_NewLine) {
1356 snprintf(zBuf, 200, "space: \"\\n\"");
1357 } else {
1358 snprintf(zBuf, 200, "space: \" \"");
1359 }
1360 break;
1361
1362 case Html_Block: {
1363 TGHtmlBlock *block = (TGHtmlBlock *) p;
1364 if (block->fN > 0) {
1365 int n = block->fN;
1366 if (n > 150) n = 150;
1367 snprintf(zBuf, 200, "<Block z=\"%.*s\">", n, block->fZ);
1368 } else {
1369 snprintf(zBuf, 200, "<Block>");
1370 }
1371 break;
1372 }
1373
1374 default:
1375 if (p->fType >= HtmlMarkupMap[0].fType
1377 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1378 } else {
1379 zName = "Unknown";
1380 }
1381 snprintf(zBuf, 200, "markup (%d) <%s", p->fType, zName);
1382 for (j = 1 ; j < p->fCount; j += 2) {
1383 snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf), " %s=\"%s\"",
1384 ((TGHtmlMarkupElement *)p)->fArgv[j-1],
1385 ((TGHtmlMarkupElement *)p)->fArgv[j]);
1386 }
1387 // coverity[secure_coding]
1388 strcat(zBuf, ">");
1389 break;
1390 }
1391 return zBuf;
1392//#else
1393// return 0;
1394//#endif
1395}
1396
1397////////////////////////////////////////////////////////////////////////////////
1398/// Append all the arguments of the given markup to the given TGString.
1399///
1400/// Example: If the markup is <IMG SRC=image.gif ALT="hello!">
1401/// then the following text is appended to the TGString:
1402///
1403/// "src image.gif alt hello!"
1404///
1405/// Notice how all attribute names are converted to lower case.
1406/// This conversion happens in the parser.
1407
1409{
1410 int i;
1411
1412 for (i = 0; i + 1 < pElem->fCount; i += 2) {
1413 str->Append(pElem->fArgv[i]);
1414 str->Append("=");
1415 str->Append(pElem->fArgv[i+1]);
1416 str->Append(" ");
1417 }
1418}
1419
1420////////////////////////////////////////////////////////////////////////////////
1421/// Returns token name of html element p.
1422
1424{
1425 static char zBuf[200];
1426 //int j;
1427 const char *zName;
1428
1429 zBuf[0] = 0;
1430 if (p == 0) {
1431 // coverity[secure_coding]: zBuf is large enough
1432 strcpy(zBuf, "NULL");
1433 return zBuf;
1434 }
1435 switch (p->fType) {
1436 case Html_Text:
1437 case Html_Space:
1438 break;
1439
1440 case Html_Block:
1441 break;
1442
1443 default:
1444 if (p->fType >= HtmlMarkupMap[0].fType &&
1446 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1447 } else {
1448 zName = "Unknown";
1449 }
1450 strlcpy(zBuf, zName, sizeof(zBuf));
1451 break;
1452 }
1453
1454 return zBuf;
1455}
1456
1457////////////////////////////////////////////////////////////////////////////////
1458/// Returns token map at location n.
1459
1461{
1462 return HtmlMarkupMap+n;
1463}
1464
1465////////////////////////////////////////////////////////////////////////////////
1466/// Return all tokens between the two elements as a string list.
1467
1469{
1470 TGString *str;
1471 int i;
1472 const char *zName;
1473 char zLine[100];
1474
1475 str = new TGString("");
1476 while (p && p != pEnd) {
1477 switch (p->fType) {
1478 case Html_Block:
1479 break;
1480
1481 case Html_Text:
1482 str->Append("{ Text \"");
1483 str->Append(((TGHtmlTextElement *)p)->fZText);
1484 str->Append("\" } ");
1485 break;
1486
1487 case Html_Space:
1488 snprintf(zLine, 100, "Space %d %d ",
1489 p->fCount, (p->fFlags & HTML_NewLine) != 0);
1490 str->Append(zLine);
1491 break;
1492
1493 case Html_Unknown:
1494 str->Append("Unknown ");
1495 break;
1496
1497 default:
1498 str->Append("{ Markup ");
1499 if (p->fType >= HtmlMarkupMap[0].fType &&
1501 zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1502 } else {
1503 zName = "Unknown";
1504 }
1505 str->Append(zName);
1506 str->Append(" ");
1507 for (i = 0; i < p->fCount; ++i) {
1508 str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
1509 str->Append(" ");
1510 }
1511 str->Append("} ");
1512 break;
1513 }
1514 p = p->fPNext;
1515 }
1516
1517 return str;
1518}
1519
1520////////////////////////////////////////////////////////////////////////////////
1521/// Print a list of tokens
1522
1524{
1525 TGHtmlElement *p;
1526
1527 for (p = first; p != last; p = p->fPNext) {
1528 if (p->fType == Html_Block) {
1529 TGHtmlBlock *block = (TGHtmlBlock *) p;
1530 const char *z = block->fZ;
1531 int n = block->fN;
1532 if (n == 0 || z == 0) {
1533 n = 1;
1534 z = "";
1535 }
1536 printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1537 p->fFlags, p->fCount, block->fLeft, block->fRight,
1538 block->fTop, block->fBottom, n, z);
1539 } else {
1540 printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
1541 p->fStyle.fFont, p->fStyle.fColor,
1542 p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
1543 }
1544 }
1545}
SVector< double, 2 > v
Definition: Dict.h:5
PyObject * fValue
#define c(i)
Definition: RSha256.hxx:101
#define h(i)
Definition: RSha256.hxx:106
#define e(i)
Definition: RSha256.hxx:103
void ToLower(char *z)
Convert a string to all lower-case letters.
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
static int EscHash(const char *zName)
static int HtmlHash(const char *zName)
static char gAcMsChar[]
static int gIsInit
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
SHtmlTokenMap_t HtmlMarkupMap[]
static struct SgEsc_t gEscSequences[]
Definition: TGHtmlParse.cxx:74
#define mxARG
static void HtmlHashInit(void)
#define ESC_HASH_SIZE
static void EscInit()
@ Html_COMMENT
Definition: TGHtmlTokens.h:74
@ Html_TEXTAREA
Definition: TGHtmlTokens.h:176
@ Html_XMP
Definition: TGHtmlTokens.h:193
@ Html_STYLE
Definition: TGHtmlTokens.h:167
@ Html_SCRIPT
Definition: TGHtmlTokens.h:158
@ Html_LI
Definition: TGHtmlTokens.h:122
@ Html_LISTING
Definition: TGHtmlTokens.h:125
@ Html_TABLE
Definition: TGHtmlTokens.h:172
@ Html_EndUL
Definition: TGHtmlTokens.h:189
@ Html_EndOL
Definition: TGHtmlTokens.h:144
@ Html_NOFRAMES
Definition: TGHtmlTokens.h:139
@ Html_PLAINTEXT
Definition: TGHtmlTokens.h:151
@ Html_Block
Definition: TGHtmlTokens.h:45
@ Html_Space
Definition: TGHtmlTokens.h:43
@ Html_Text
Definition: TGHtmlTokens.h:42
@ Html_A
Definition: TGHtmlTokens.h:46
@ Html_NOSCRIPT
Definition: TGHtmlTokens.h:141
@ Html_EndLI
Definition: TGHtmlTokens.h:123
@ Html_EndXMP
Definition: TGHtmlTokens.h:194
@ Html_BR
Definition: TGHtmlTokens.h:65
@ Html_Unknown
Definition: TGHtmlTokens.h:44
@ Html_EndCOMMENT
Definition: TGHtmlTokens.h:75
#define HTML_MARKUP_HASH_SIZE
Definition: TGHtmlTokens.h:200
#define HTML_MARKUP_COUNT
Definition: TGHtmlTokens.h:199
#define O_HtmlInput
Definition: TGHtml.h:857
#define O_HtmlHr
Definition: TGHtml.h:859
#define O_HtmlTable
Definition: TGHtml.h:852
#define UNTESTED
Definition: TGHtml.h:64
#define HTML_NewLine
Definition: TGHtml.h:275
#define O_HtmlImageMarkup
Definition: TGHtml.h:856
short Html_16_t
Definition: TGHtml.h:136
#define O_HtmlAnchor
Definition: TGHtml.h:860
#define O_HtmlLi
Definition: TGHtml.h:854
#define O_HtmlMapArea
Definition: TGHtml.h:862
#define O_HtmlRef
Definition: TGHtml.h:853
#define O_HtmlCell
Definition: TGHtml.h:851
#define O_HtmlScript
Definition: TGHtml.h:861
#define O_HtmlListStart
Definition: TGHtml.h:855
#define O_HtmlForm
Definition: TGHtml.h:858
int type
Definition: TGX11.cxx:120
#define snprintf
Definition: civetweb.c:1540
Html_u16_t fN
Definition: TGHtml.h:719
Html_u16_t fRight
Definition: TGHtml.h:718
char * fZ
Definition: TGHtml.h:716
Html_u16_t fLeft
Definition: TGHtml.h:718
int fTop
Definition: TGHtml.h:717
int fBottom
Definition: TGHtml.h:717
Html_u8_t fFlags
Definition: TGHtml.h:265
Html_u8_t fType
Definition: TGHtml.h:264
SHtmlStyle_t fStyle
Definition: TGHtml.h:263
TGHtmlElement * fPPrev
Definition: TGHtml.h:262
Html_16_t fCount
Definition: TGHtml.h:266
TGHtmlElement * fPNext
Definition: TGHtml.h:261
int fNStart
Definition: TGHtml.h:683
int fNScript
Definition: TGHtml.h:684
Html_16_t fW
Definition: TGHtml.h:308
char * fZText
Definition: TGHtml.h:300
int fAddEndTags
Definition: TGHtml.h:1245
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
int fICol
Definition: TGHtml.h:1173
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
int fIdind
Definition: TGHtml.h:1270
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
TGHtmlElement * fPFirst
Definition: TGHtml.h:1128
void AddStyle(TGHtmlElement *p)
This routine adds information to the input texts that doesn't change when the display is resized or w...
virtual char * ProcessScript(TGHtmlScript *)
Definition: TGHtml.h:957
int fNToken
Definition: TGHtml.h:1130
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
const char * TypeToName(int type)
Convert a type into a symbolic name.
int Tokenize()
Process as much of the input HTML as possible.
int NameToType(char *zType)
Convert a markup name into a type integer.
int fIPlaintext
Definition: TGHtml.h:1175
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
char * fZText
Definition: TGHtml.h:1168
int fNAlloc
Definition: TGHtml.h:1170
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
virtual int ProcessToken(TGHtmlElement *, const char *, int)
Definition: TGHtml.h:927
TGHtmlScript * fPScript
Definition: TGHtml.h:1179
int fHasFrames
Definition: TGHtml.h:1244
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
TGHtmlElement * fPLast
Definition: TGHtml.h:1129
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
int fNComplete
Definition: TGHtml.h:1171
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
int GetIndex(const char *zIndex, TGHtmlElement **ppToken, int *pIndex)
This routine decodes a complete index specification.
int fHasScript
Definition: TGHtml.h:1243
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
void AddFormInfo(TGHtmlElement *p)
Add the DOM control information for form elements.
Definition: TGHtmlForm.cxx:565
int fNText
Definition: TGHtml.h:1169
TString & Append(const char *cs)
Definition: TString.h:559
TText * text
const Int_t n
Definition: legend1.C:16
null_t< F > null()
Definition: first.py:1
const char * cnt
Definition: TXMLSetup.cxx:74
unsigned int fColor
Definition: TGHtml.h:145
unsigned int fAlign
Definition: TGHtml.h:147
unsigned int fFont
Definition: TGHtml.h:144
unsigned int fFlags
Definition: TGHtml.h:150
Html_16_t fObjType
Definition: TGHtml.h:842
Html_16_t fType
Definition: TGHtml.h:841
SHtmlTokenMap_t * fPCollide
Definition: TGHtml.h:843
const char * fZName
Definition: TGHtml.h:840
auto * l
Definition: textangle.C:4
static long int sum(long int i)
Definition: Factory.cxx:2258