Logo ROOT   6.12/07
Reference Guide
TGHtmlParse.cxx
Go to the documentation of this file.
1 // $Id: TGHtmlParse.cxx,v 1.1 2007/05/04 17:07:01 brun Exp $
2 // Author: Valeriy Onuchin 03/05/2007
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2001, Rene Brun, Fons Rademakers and Reiner Rohlfs *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /**************************************************************************
13 
14  HTML widget for xclass. Based on tkhtml 1.28
15  Copyright (C) 1997-2000 D. Richard Hipp <drh@acm.org>
16  Copyright (C) 2002-2003 Hector Peraza.
17 
18  This library is free software; you can redistribute it and/or
19  modify it under the terms of the GNU Library General Public
20  License as published by the Free Software Foundation; either
21  version 2 of the License, or (at your option) any later version.
22 
23  This library is distributed in the hope that it will be useful,
24  but WITHOUT ANY WARRANTY; without even the implied warranty of
25  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26  Library General Public License for more details.
27 
28  You should have received a copy of the GNU Library General Public
29  License along with this library; if not, write to the Free
30  Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
31 
32 **************************************************************************/
33 
34 // A tokenizer that converts raw HTML into a linked list of HTML elements.
35 
36 #include <string.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 
41 #include "TGHtml.h"
42 #include "TGHtmlTokens.h"
43 
44 
45 //----------------------------------------------------------------------
46 
48 
49 
50 /****************** Begin Escape Sequence Translator *************/
51 
52 // The next section of code implements routines used to translate
53 // the '&' escape sequences of SGML to individual characters.
54 // Examples:
55 //
56 // &amp; &
57 // &lt; <
58 // &gt; >
59 // &nbsp; nonbreakable space
60 //
61 
62 // Each escape sequence is recorded as an instance of the following
63 // structure
64 
65 struct SgEsc_t {
66  const char *fZName; // The name of this escape sequence. ex: "amp"
67  char fValue[8]; // The value for this sequence. ex: "&"
68  SgEsc_t *fPNext; // Next sequence with the same hash on zName
69 };
70 
71 // The following is a table of all escape sequences. Add new sequences
72 // by adding entries to this table.
73 
74 static struct SgEsc_t gEscSequences[] = {
75  { "quot", "\"", 0 },
76  { "amp", "&", 0 },
77  { "lt", "<", 0 },
78  { "gt", ">", 0 },
79  { "nbsp", " ", 0 },
80  { "iexcl", "\241", 0 },
81  { "cent", "\242", 0 },
82  { "pound", "\243", 0 },
83  { "curren", "\244", 0 },
84  { "yen", "\245", 0 },
85  { "brvbar", "\246", 0 },
86  { "sect", "\247", 0 },
87  { "uml", "\250", 0 },
88  { "copy", "\251", 0 },
89  { "ordf", "\252", 0 },
90  { "laquo", "\253", 0 },
91  { "not", "\254", 0 },
92  { "shy", "\255", 0 },
93  { "reg", "\256", 0 },
94  { "macr", "\257", 0 },
95  { "deg", "\260", 0 },
96  { "plusmn", "\261", 0 },
97  { "sup2", "\262", 0 },
98  { "sup3", "\263", 0 },
99  { "acute", "\264", 0 },
100  { "micro", "\265", 0 },
101  { "para", "\266", 0 },
102  { "middot", "\267", 0 },
103  { "cedil", "\270", 0 },
104  { "sup1", "\271", 0 },
105  { "ordm", "\272", 0 },
106  { "raquo", "\273", 0 },
107  { "frac14", "\274", 0 },
108  { "frac12", "\275", 0 },
109  { "frac34", "\276", 0 },
110  { "iquest", "\277", 0 },
111  { "Agrave", "\300", 0 },
112  { "Aacute", "\301", 0 },
113  { "Acirc", "\302", 0 },
114  { "Atilde", "\303", 0 },
115  { "Auml", "\304", 0 },
116  { "Aring", "\305", 0 },
117  { "AElig", "\306", 0 },
118  { "Ccedil", "\307", 0 },
119  { "Egrave", "\310", 0 },
120  { "Eacute", "\311", 0 },
121  { "Ecirc", "\312", 0 },
122  { "Euml", "\313", 0 },
123  { "Igrave", "\314", 0 },
124  { "Iacute", "\315", 0 },
125  { "Icirc", "\316", 0 },
126  { "Iuml", "\317", 0 },
127  { "ETH", "\320", 0 },
128  { "Ntilde", "\321", 0 },
129  { "Ograve", "\322", 0 },
130  { "Oacute", "\323", 0 },
131  { "Ocirc", "\324", 0 },
132  { "Otilde", "\325", 0 },
133  { "Ouml", "\326", 0 },
134  { "times", "\327", 0 },
135  { "Oslash", "\330", 0 },
136  { "Ugrave", "\331", 0 },
137  { "Uacute", "\332", 0 },
138  { "Ucirc", "\333", 0 },
139  { "Uuml", "\334", 0 },
140  { "Yacute", "\335", 0 },
141  { "THORN", "\336", 0 },
142  { "szlig", "\337", 0 },
143  { "agrave", "\340", 0 },
144  { "aacute", "\341", 0 },
145  { "acirc", "\342", 0 },
146  { "atilde", "\343", 0 },
147  { "auml", "\344", 0 },
148  { "aring", "\345", 0 },
149  { "aelig", "\346", 0 },
150  { "ccedil", "\347", 0 },
151  { "egrave", "\350", 0 },
152  { "eacute", "\351", 0 },
153  { "ecirc", "\352", 0 },
154  { "euml", "\353", 0 },
155  { "igrave", "\354", 0 },
156  { "iacute", "\355", 0 },
157  { "icirc", "\356", 0 },
158  { "iuml", "\357", 0 },
159  { "eth", "\360", 0 },
160  { "ntilde", "\361", 0 },
161  { "ograve", "\362", 0 },
162  { "oacute", "\363", 0 },
163  { "ocirc", "\364", 0 },
164  { "otilde", "\365", 0 },
165  { "ouml", "\366", 0 },
166  { "divide", "\367", 0 },
167  { "oslash", "\370", 0 },
168  { "ugrave", "\371", 0 },
169  { "uacute", "\372", 0 },
170  { "ucirc", "\373", 0 },
171  { "uuml", "\374", 0 },
172  { "yacute", "\375", 0 },
173  { "thorn", "\376", 0 },
174  { "yuml", "\377", 0 },
175 };
176 
177 
178 // The size of the handler hash table. For best results this should
179 // be a prime number which is about the same size as the number of
180 // escape sequences known to the system.
181 
182 #define ESC_HASH_SIZE (sizeof(gEscSequences)/sizeof(gEscSequences[0])+7)
183 
184 
185 // The hash table
186 //
187 // If the name of an escape sequence hashes to the value H, then
188 // gApEscHash[H] will point to a linked list of Esc structures, one of
189 // which will be the Esc structure for that escape sequence.
190 
191 static struct SgEsc_t *gApEscHash[ESC_HASH_SIZE];
192 
193 
194 // Hash a escape sequence name. The value returned is an integer
195 // between 0 and ESC_HASH_SIZE-1, inclusive.
196 
197 static int EscHash(const char *zName) {
198  int h = 0; // The hash value to be returned
199  char c; // The next character in the name being hashed
200 
201  while ((c = *zName) != 0) {
202  h = h<<5 ^ h ^ c;
203  zName++;
204  }
205  if (h < 0) h = -h;
206 
207  return h % ESC_HASH_SIZE;
208 }
209 
210 #ifdef TEST
211 // Compute the longest and average collision chain length for the
212 // escape sequence hash table
213 
214 static void EscHashStats()
215 {
216  int i;
217  int sum = 0;
218  int max = 0;
219  int cnt;
220  int notempty = 0;
221  struct SgEsc_t *p;
222 
223  for (i = 0; i < sizeof(gEscSequences) / sizeof(gEscSequences[0]); i++) {
224  cnt = 0;
225  p = gApEscHash[i];
226  if (p) notempty++;
227  while (p) {
228  ++cnt;
229  p = p->fPNext;
230  }
231  sum += cnt;
232  if (cnt > max) max = cnt;
233  }
234  printf("Longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
235  max, (double)sum/(double)notempty, i, i-notempty,
236  100.0*(i-notempty)/(double)i);
237 }
238 #endif
239 
240 // Initialize the escape sequence hash table
241 
242 static void EscInit() {
243  int i; /* For looping thru the list of escape sequences */
244  int h; /* The hash on a sequence */
245 
246  for (i = 0; i < int(sizeof(gEscSequences) / sizeof(gEscSequences[i])); i++) {
247 /* #ifdef XCLASS_UTF_MAX */
248 #if 0
249  int c = gEscSequences[i].value[0];
250  xclass::UniCharToUtf(c, gEscSequences[i].value);
251  }
252 #endif
253  h = EscHash(gEscSequences[i].fZName);
254  gEscSequences[i].fPNext = gApEscHash[h];
255  gApEscHash[h] = &gEscSequences[i];
256  }
257 #ifdef TEST
258  EscHashStats();
259 #endif
260 }
261 
262 
263 // This table translates the non-standard microsoft characters between 0x80
264 // and 0x9f into plain ASCII so that the characters will be visible on Unix
265 // systems. Care is taken to translate the characters into values less than
266 // 0x80, to avoid UTF-8 problems.
267 
268 static char gAcMsChar[] = {
269  /* 0x80 */ 'C',
270  /* 0x81 */ ' ',
271  /* 0x82 */ ',',
272  /* 0x83 */ 'f',
273  /* 0x84 */ '"',
274  /* 0x85 */ '.',
275  /* 0x86 */ '*',
276  /* 0x87 */ '*',
277  /* 0x88 */ '^',
278  /* 0x89 */ '%',
279  /* 0x8a */ 'S',
280  /* 0x8b */ '<',
281  /* 0x8c */ 'O',
282  /* 0x8d */ ' ',
283  /* 0x8e */ 'Z',
284  /* 0x8f */ ' ',
285  /* 0x90 */ ' ',
286  /* 0x91 */ '\'',
287  /* 0x92 */ '\'',
288  /* 0x93 */ '"',
289  /* 0x94 */ '"',
290  /* 0x95 */ '*',
291  /* 0x96 */ '-',
292  /* 0x97 */ '-',
293  /* 0x98 */ '~',
294  /* 0x99 */ '@',
295  /* 0x9a */ 's',
296  /* 0x9b */ '>',
297  /* 0x9c */ 'o',
298  /* 0x9d */ ' ',
299  /* 0x9e */ 'z',
300  /* 0x9f */ 'Y',
301 };
302 
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// Translate escape sequences in the string "z". "z" is overwritten
306 /// with the translated sequence.
307 ///
308 /// Unrecognized escape sequences are unaltered.
309 ///
310 /// Example:
311 ///
312 /// input = "AT&amp;T &gt MCI"
313 /// output = "AT&T > MCI"
314 
316 {
317  int from; // Read characters from this position in z[]
318  int to; // Write characters into this position in z[]
319  int h; // A hash on the escape sequence
320  struct SgEsc_t *p; // For looping down the escape sequence collision chain
321  static int isInit = 0; // True after initialization
322 
323  from = to = 0;
324  if (!isInit) {
325  EscInit();
326  isInit = 1;
327  }
328  while (z[from]) {
329  if (z[from] == '&') {
330  if (z[from+1] == '#') {
331  int i = from + 2;
332  int v = 0;
333  while (isdigit(z[i])) {
334  v = v*10 + z[i] - '0';
335  i++;
336  }
337  if (z[i] == ';') { i++; }
338 
339  // Translate the non-standard microsoft characters in the range of
340  // 0x80 to 0x9f into something we can see.
341 
342  if (v >= 0x80 && v < 0xa0) {
343  v = gAcMsChar[v & 0x1f];
344  }
345 
346  // Put the character in the output stream in place of the "&#000;".
347  // How we do this depends on whether or not we are using UTF-8.
348 
349  z[to++] = v;
350  from = i;
351  } else {
352  int i = from+1;
353  int c;
354  while (z[i] && isalnum(z[i])) ++i;
355  c = z[i];
356  z[i] = 0;
357  h = EscHash(&z[from+1]);
358  p = gApEscHash[h];
359  while (p && strcmp(p->fZName, &z[from+1]) != 0) p = p->fPNext;
360  z[i] = c;
361  if (p) {
362  int j;
363  for (j = 0; p->fValue[j]; ++j) z[to++] = p->fValue[j];
364  from = i;
365  if (c == ';') from++;
366  } else {
367  z[to++] = z[from++];
368  }
369  }
370 
371  // Look for the non-standard microsoft characters between 0x80 and 0x9f
372  // and translate them into printable ASCII codes. Separate algorithms
373  // are required to do this for plain ascii and for utf-8.
374 
375  } else if (((unsigned char) z[from]) >= 0x80 &&
376  ((unsigned char) z[from]) < 0xa0) {
377  z[to++] = gAcMsChar[z[from++] & 0x1f];
378  } else {
379  z[to++] = z[from++];
380  }
381  }
382  z[to] = 0;
383 }
384 
385 /******************* End Escape Sequence Translator ***************/
386 
387 /******************* Begin HTML tokenizer code *******************/
388 
389 // The following variable becomes TRUE when the markup hash table
390 // (stored in HtmlMarkupMap[]) is initialized.
391 
392 static int gIsInit = 0;
393 
394 // The hash table for HTML markup names.
395 //
396 // If an HTML markup name hashes to H, then gApMap[H] will point to
397 // a linked list of sgMap structure, one of which will describe the
398 // the particular markup (if it exists.)
399 
401 
402 // Hash a markup name
403 //
404 // HTML markup is case insensitive, so this function will give the
405 // same hash regardless of the case of the markup name.
406 //
407 // The value returned is an integer between 0 and HTML_MARKUP_HASH_SIZE-1,
408 // inclusive.
409 
410 static int HtmlHash(const char *zName) {
411  int h = 0;
412  char c;
413 
414  while ((c = *zName) != 0) {
415  if (isupper(c)) { // do we have to check for this??????
416  c = tolower(c);
417  }
418  h = h<<5 ^ h ^ c;
419  zName++;
420  }
421  if (h < 0) {
422  h = -h;
423  }
424 
425  return h % HTML_MARKUP_HASH_SIZE;
426 }
427 
428 
429 #ifdef TEST
430 // Compute the longest and average collision chain length for the
431 // markup hash table
432 
433 static void HtmlHashStats() {
434  int i;
435  int sum = 0;
436  int max = 0;
437  int cnt;
438  int notempty = 0;
439  struct sgMap *p;
440 
441  for (i = 0; i < HTML_MARKUP_COUNT; i++) {
442  cnt = 0;
443  p = gApMap[i];
444  if (p) notempty++;
445  while (p) {
446  cnt++;
447  p = p->fPCollide;
448  }
449  sum += cnt;
450  if (cnt > max) max = cnt;
451  }
452 
453  printf("longest chain=%d avg=%g slots=%d empty=%d (%g%%)\n",
454  max, (double)sum/(double)notempty, i, i-notempty,
455  100.0*(i-notempty)/(double)i);
456 }
457 #endif
458 
459 
460 // Initialize the escape sequence hash table
461 
462 static void HtmlHashInit(void){
463  int i;
464  int h; // The hash on a markup name
465 
466  for (i = 0; i < HTML_MARKUP_COUNT; i++) {
467  h = HtmlHash(HtmlMarkupMap[i].fZName);
468  HtmlMarkupMap[i].fPCollide = gApMap[h];
469  gApMap[h] = &HtmlMarkupMap[i];
470  }
471 #ifdef TEST
472  HtmlHashStats();
473 #endif
474 }
475 
476 ////////////////////////////////////////////////////////////////////////////////
477 /// Append the given TGHtmlElement to the tokenizers list of elements
478 
480 {
481  pElem->fPNext = 0;
482  pElem->fPPrev = fPLast;
483  if (fPFirst == 0) {
484  fPFirst = pElem;
485  } else {
486  fPLast->fPNext = pElem;
487  }
488  fPLast = pElem;
489  fNToken++;
490 }
491 
492 ////////////////////////////////////////////////////////////////////////////////
493 /// Insert token pNew before token p
494 
496 {
497  if (offs < 0) {
498  if (p) {
499  offs = p->fOffs;
500  } else {
501  offs = fNText;
502  }
503  }
504 
505 ////if (p) { pNew->fStyle = p->fStyle; pNew->fFlags = p->fFlags; }
506 
507 // pNew->fCount = 0;
508  pNew->fOffs = offs;
509  pNew->fPNext = p;
510  if (p) {
511  pNew->fElId = p->fElId;
512  p->fElId = ++fIdind;
513  pNew->fPPrev = p->fPPrev;
514  if (p->fPPrev) p->fPPrev->fPNext = pNew;
515  if (fPFirst == p) fPFirst = pNew;
516  p->fPPrev = pNew;
517  } else {
518  pNew->fElId = ++fIdind;
519  AppendElement(pNew);
520  }
521  fNToken++;
522 }
523 
524 ////////////////////////////////////////////////////////////////////////////////
525 /// Compute the new column index following the given character.
526 
527 static int NextColumn(int iCol, char c)
528 {
529  switch (c) {
530  case '\n': return 0;
531  case '\t': return (iCol | 7) + 1;
532  default: return iCol+1;
533  }
534  /* NOT REACHED */
535 }
536 
537 ////////////////////////////////////////////////////////////////////////////////
538 /// Convert a string to all lower-case letters.
539 
540 void ToLower(char *z)
541 {
542  while (*z) {
543  if (isupper(*z)) *z = tolower(*z);
544  z++;
545  }
546 }
547 
548 ////////////////////////////////////////////////////////////////////////////////
549 /// Process as much of the input HTML as possible. Construct new
550 /// TGHtmlElement objects and appended them to the list. Return
551 /// the number of characters actually processed.
552 ///
553 /// This routine may invoke a callback procedure which could delete
554 /// the HTML widget.
555 ///
556 /// This routine is not reentrant for the same HTML widget. To
557 /// prevent reentrancy (during a callback), the p->fICol field is
558 /// set to a negative number. This is a flag to future invocations
559 /// not to reentry this routine. The p->fICol field is restored
560 /// before exiting, of course.
561 
563 {
564  char *z; // The input HTML text
565  int c; // The next character of input
566  int n; // Number of characters processed so far
567  int inpCol; // Column of input
568  int i, j; // Loop counters
569  int h; // Result from HtmlHash()
570  TGHtmlElement *pElem;// A new HTML element
571  int selfClose; // True for content free elements. Ex: <br/>
572  int argc; // The number of arguments on a markup
573  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
574 # define mxARG 200 // Maximum number of parameters in a single markup
575  char *argv[mxARG]; // Pointers to each markup argument.
576  int arglen[mxARG]; // Length of each markup argument
577  //int rl, ol;
578 #ifdef pIsInMeachnism
579  int pIsInScript = 0;
580  int pIsInNoScript = 0;
581  int pIsInNoFrames = 0;
582 #endif
583  int sawdot = 0;
584  int inLi = 0;
585 
586  static char null[1] = { "" };
587 
588  inpCol = fICol;
589  n = fNComplete;
590  z = fZText;
591  if (inpCol < 0) return n; // Prevents recursion
592  fICol = -1;
593  pElem = 0;
594 
595  while ((c = z[n]) != 0) {
596 
597  sawdot--;
598  if (c == -64 && z[n+1] == -128) {
599  n += 2;
600  continue;
601  }
602 
603  if (fPScript) {
604 
605  // We are in the middle of <SCRIPT>...</SCRIPT>. Just look for
606  // the </SCRIPT> markup. (later:) Treat <STYLE>...</STYLE> the
607  // same way.
608 
609  TGHtmlScript *pScr = fPScript;
610  const char *zEnd;
611  int nEnd;
612  //int curline, curch, curlast = n;
613  int sqcnt;
614  if (pScr->fType == Html_SCRIPT) {
615  zEnd = "</script>";
616  nEnd = 9;
617  } else if (pScr->fType == Html_NOSCRIPT) {
618  zEnd = "</noscript>";
619  nEnd = 11;
620  } else if (pScr->fType == Html_NOFRAMES) {
621  zEnd = "</noframes>";
622  nEnd = 11;
623  } else {
624  zEnd = "</style>";
625  nEnd = 8;
626  }
627  if (pScr->fNStart < 0) {
628  pScr->fNStart = n;
629  pScr->fNScript = 0;
630  }
631  sqcnt = 0;
632  for (i = n /*pScr->fNStart + pScr->fNScript*/; z[i]; i++) {
633  if (z[i] == '\'' || z[i] == '"') {
634  sqcnt++; // Skip if odd # quotes
635  } else if (z[i] == '\n') {
636  sqcnt = 0;
637  }
638  if (z[i] == '<' && z[i+1] == '/' &&
639  strncasecmp(&z[i], zEnd, nEnd) == 0) {
640  if (zEnd[3] == 'c' && ((sqcnt % 2) == 1)) continue;
641  pScr->fNScript = i - n;
642  fPScript = 0;
643  n = i + nEnd;
644  break;
645  }
646  }
647  if (z[i] == 0) goto incomplete;
648  if (fPScript) {
649  pScr->fNScript = i - n;
650  n = i;
651  }
652  else {
653 #ifdef pIsInMeachnism
654  // If there is a script, execute it now and insert any output
655  // to the html stream for parsing as html. (ie. client side scripting)
656 
657  if (pIsInScript && !pIsInNoScript && !pIsInNoFrames) {
658 
659  //for (curch = 0, curline = 1; curch <= curlast; curch++)
660  // if (z[curch] == '\n') curline++;
661 
662  // arglist in pElem and text pointers in pScr?
663  // Inline scripts can contain unmatched brackets :-)
664  //char varind[50];
665  //sprintf(varind, "HtmlScrVar%d", p->varind++);
666  //char savech = fZText[pScr->fNStart + pScr->fNScript];
667  //fZText[pScr->fNStart + pScr->fNScript] = 0;
668  //char *scriptBody = StrDup(fZText[pScr->fNStart]);
669  //fZText[pScr->fNStart + pScr->fNScript] = savech;
670  AdvanceLayout(p);
671  inParse++;
672  char *result = ProcessScript((TGHtmlScript *) pElem); // pElem or pScr??
673  inParse--;
674  if (result) {
675  ol = fNAlloc;
676  rl = strlen(result);
677  fNAlloc += rl;
678  z = fZText = HtmlRealloc(z, ol+rl);
679  memmove(z + n + rl, z+n, ol - n);
680  memmove(z + n, result, rl);
681  }
682  }
683  pIsInScript = 0;
684  pIsInNoScript = 0;
685  pIsInNoFrames = 0;
686 #endif
687  }
688  //continue;
689 
690  }
691  else if (isspace((unsigned char)c)) {
692 
693  // White space
694  for (i = 0;
695  (c = z[n+i]) != 0 && isspace((unsigned char)c) && c != '\n' && c != '\r';
696  i++) { }
697  if (c == '\r' && z[n+i+1] == '\n') ++i;
698 #if 0 // this is certainly NOT OK, since it alters pre-formatted text
699  if (sawdot == 1) {
700  pElem = new TGHtmlTextElement(2);
701  strcpy(((TGHtmlTextElement *)pElem)->fZText, " ");
702  pElem->fElId = ++fIdind;
703  pElem->fOffs = n;
704  pElem->fCount = 1;
705  AppendElement(pElem);
706  }
707 #endif
708  pElem = new TGHtmlSpaceElement;
709  if (pElem == 0) goto incomplete;
710  ((TGHtmlSpaceElement *)pElem)->fW = 0;
711  pElem->fOffs = n;
712  pElem->fElId = ++fIdind;
713  if (c == '\n' || c == '\r') {
714  pElem->fFlags = HTML_NewLine;
715  pElem->fCount = 1;
716  i++;
717  inpCol = 0;
718  } else {
719  int iColStart = inpCol;
720  pElem->fFlags = 0;
721  for (j = 0; j < i; j++) {
722  inpCol = NextColumn(inpCol, z[n+j]);
723  }
724  pElem->fCount = inpCol - iColStart;
725  }
726  AppendElement(pElem);
727  n += i;
728 
729  }
730  else if (c != '<' || fIPlaintext != 0 ||
731  (!isalpha(z[n+1]) && z[n+1] != '/' && z[n+1] != '!' && z[n+1] != '?')) {
732 
733  // Ordinary text
734  for (i = 1; (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '<'; i++) {}
735  if (z[n+i-1] == '.' || z[n+i-1] == '!' || z[n+i-1] == '?') sawdot = 2;
736  if (c == 0) goto incomplete;
737  if (fIPlaintext != 0 && z[n] == '<') {
738  switch (fIPlaintext) {
739  case Html_LISTING:
740  if (i >= 10 && strncasecmp(&z[n], "</listing>", 10) == 0) {
741  fIPlaintext = 0;
742  goto doMarkup;
743  }
744  break;
745 
746  case Html_XMP:
747  if (i >= 6 && strncasecmp(&z[n], "</xmp>", 6) == 0) {
748  fIPlaintext = 0;
749  goto doMarkup;
750  }
751  break;
752 
753  case Html_TEXTAREA:
754  if (i >= 11 && strncasecmp(&z[n], "</textarea>", 11) == 0) {
755  fIPlaintext = 0;
756  goto doMarkup;
757  }
758  break;
759 
760  default:
761  break;
762  }
763  }
764  pElem = new TGHtmlTextElement(i);
765  if (pElem == 0) goto incomplete;
766  TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
767  tpElem->fElId = ++fIdind;
768  tpElem->fOffs = n;
769  strncpy(tpElem->fZText, &z[n], i);
770  tpElem->fZText[i] = 0;
771  AppendElement(pElem);
772  if (fIPlaintext == 0 || fIPlaintext == Html_TEXTAREA) {
773  HtmlTranslateEscapes(tpElem->fZText);
774  }
775  pElem->fCount = strlen(tpElem->fZText);
776  n += i;
777  inpCol += i;
778 
779  } else if (strncmp(&z[n], "<!--", 4) == 0) {
780 
781  // An HTML comment. Just skip it.
782  for (i = 4; z[n+i]; i++) {
783  if (z[n+i] == '-' && strncmp(&z[n+i], "-->", 3) == 0) break;
784  }
785  if (z[n+i] == 0) goto incomplete;
786 
787  pElem = new TGHtmlTextElement(i);
788  if (pElem == 0) goto incomplete;
789  TGHtmlTextElement *tpElem = (TGHtmlTextElement *) pElem;
790  tpElem->fType = Html_COMMENT;
791  tpElem->fElId = ++fIdind;
792  tpElem->fOffs = n;
793  strncpy(tpElem->fZText, &z[n+4], i-4);
794  tpElem->fZText[i-4] = 0;
795  tpElem->fCount = 0;
796  AppendElement(pElem);
797 
798  pElem = new TGHtmlElement(Html_EndCOMMENT);
799  AppToken(pElem, 0, n+4);
800 
801  for (j = 0; j < i+3; j++) {
802  inpCol = NextColumn(inpCol, z[n+j]);
803  }
804  n += i + 3;
805 
806  }
807  else {
808 
809  // Markup.
810  //
811  // First get the name of the markup
812 doMarkup:
813  argc = 1;
814  argv[0] = &z[n+1];
815  for (i = 1;
816  (c = z[n+i]) != 0 && !isspace((unsigned char)c) && c != '>' && (i < 2 || c != '/');
817  i++) {}
818  arglen[0] = i - 1;
819  if (c == 0) goto incomplete;
820 
821  // Now parse up the arguments
822 
823  while (isspace((unsigned char)z[n+i])) ++i;
824  while ((c = z[n+i]) != 0 && c != '>' && (c != '/' || z[n+i+1] != '>')) {
825  if (argc > mxARG - 3) argc = mxARG - 3;
826  argv[argc] = &z[n+i];
827  j = 0;
828  while ((c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>' &&
829  c != '=' && (c != '/' || z[n+i+j+1] != '>')) ++j;
830  arglen[argc] = j;
831  if (c == 0) goto incomplete;
832  i += j;
833  while (isspace((unsigned char)c)) {
834  i++;
835  c = z[n+i];
836  }
837  if (c == 0) goto incomplete;
838  argc++;
839  if (c != '=') {
840  argv[argc] = null;
841  arglen[argc] = 0;
842  argc++;
843  continue;
844  }
845  i++;
846  c = z[n+i];
847  while (isspace((unsigned char)c)) {
848  i++;
849  c = z[n+i];
850  }
851  if (c == 0) goto incomplete;
852  if (c == '\'' || c == '"') {
853  int cQuote = c;
854  i++;
855  argv[argc] = &z[n+i];
856  for (j = 0; (c = z[n+i+j]) != 0 && c != cQuote; j++) {}
857  if (c == 0) goto incomplete;
858  arglen[argc] = j;
859  i += j+1;
860  } else {
861  argv[argc] = &z[n+i];
862  for (j = 0; (c = z[n+i+j]) != 0 && !isspace((unsigned char)c) && c != '>'; j++) {}
863  if (c == 0) goto incomplete;
864  arglen[argc] = j;
865  i += j;
866  }
867  argc++;
868  while (isspace(z[n+i])) ++i;
869  }
870  if (c == '/') {
871  i++;
872  c = z[n+i];
873  selfClose = 1;
874  } else {
875  selfClose = 0;
876  }
877  if (c == 0) goto incomplete;
878  for (j = 0; j < i+1; j++) {
879  inpCol = NextColumn(inpCol, z[n+j]);
880  }
881  n += i + 1;
882 
883  // Lookup the markup name in the hash table
884 
885  if (!gIsInit) {
886  HtmlHashInit();
887  gIsInit = 1;
888  }
889  c = argv[0][arglen[0]];
890  argv[0][arglen[0]] = 0;
891  h = HtmlHash(argv[0]);
892  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
893  if (strcasecmp(pMap->fZName, argv[0]) == 0) break;
894  }
895  argv[0][arglen[0]] = c;
896  if (pMap == 0) continue; // Ignore unknown markup
897 
898 makeMarkupEntry:
899  // Construct a TGHtmlMarkupElement object for this markup.
900 
901  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc, arglen, argv);
902  if (pElem == 0) goto incomplete;
903 
904  pElem->fElId = ++fIdind;
905  pElem->fOffs = n;
906 
907  AddFormInfo(pElem);
908 
909  // The new markup has now been constructed in pElem. But before
910  // appending it to the list, check to see if there is a special
911  // handler for this markup type.
912 
913  if (ProcessToken(pElem, pMap->fZName, pMap->fType)) {
914  // delete pElem;
915 
916  // Tricky, tricky. The user function might have caused the p->fZText
917  // pointer to change, so renew our copy of that pointer.
918 
919  z = fZText;
920  if (z == 0) {
921  n = 0;
922  inpCol = 0;
923  goto incomplete;
924  }
925  continue;
926  }
927 
928  // No special handler for this markup. Just append it to the
929  // list of all tokens.
930 
931  AppendElement(pElem);
932  switch (pMap->fType) {
933  case Html_TABLE:
934  break;
935 
936  case Html_PLAINTEXT:
937  case Html_LISTING:
938  case Html_XMP:
939  case Html_TEXTAREA:
940  fIPlaintext = pMap->fType;
941  break;
942 
943  case Html_NOFRAMES:
944  if (!fHasFrames) break;
945 #ifdef pIsInMeachnism
946  pIsInNoFrames = 1;
947 #endif
948  case Html_NOSCRIPT:
949  break;
950  // coverity[unreachable]
951  if (!fHasScript) break;
952 #ifdef pIsInMeachnism
953  pIsInNoScript = 1;
954 #endif
955  case Html_SCRIPT:
956 #ifdef pIsInMeachnism
957  pIsInScript = 1;
958 #endif
959  // fallthrough
960  case Html_STYLE:
961  fPScript = (TGHtmlScript *) pElem;
962  break;
963 
964  case Html_LI:
965  if (!fAddEndTags) break;
966  if (inLi) {
968  AppToken(e, pElem, n);
969  } else {
970  inLi = 1;
971  }
972  break;
973 
974  case Html_EndLI:
975  inLi=0;
976  break;
977 
978  case Html_EndOL:
979  case Html_EndUL:
980  if (!fAddEndTags) break;
981  if (inLi) {
983  AppToken(e, pElem, n);
984  } else {
985  inLi = 0;
986  }
987  break;
988 
989  default:
990  break;
991  }
992 
993  // If this is self-closing markup (ex: <br/> or <img/>) then
994  // synthesize a closing token.
995 
996  if (selfClose && argv[0][0] != '/' &&
997  strcmp(&pMap[1].fZName[1], pMap->fZName) == 0) {
998  selfClose = 0;
999  pMap++;
1000  argc = 1;
1001  goto makeMarkupEntry;
1002  }
1003  }
1004  }
1005 
1006 incomplete:
1007  fICol = inpCol;
1008  ////fPScript = 0;
1009 
1010  return n;
1011 }
1012 
1013 /************************** End HTML Tokenizer Code ***************************/
1014 
1015 ////////////////////////////////////////////////////////////////////////////////
1016 /// Make one markup entry.
1017 
1019  int arglen[], char *argv[])
1020 {
1022 
1023  switch (objType) {
1024  case O_HtmlCell:
1025  e = new TGHtmlCell(type, argc, arglen, argv);
1026  break;
1027 
1028  case O_HtmlTable:
1029  e = new TGHtmlTable(type, argc, arglen, argv);
1030  break;
1031 
1032  case O_HtmlRef:
1033  e = new TGHtmlRef(type, argc, arglen, argv);
1034  break;
1035 
1036  case O_HtmlLi:
1037  e = new TGHtmlLi(type, argc, arglen, argv);
1038  break;
1039 
1040  case O_HtmlListStart:
1041  e = new TGHtmlListStart(type, argc, arglen, argv);
1042  break;
1043 
1044  case O_HtmlImageMarkup:
1045  e = new TGHtmlImageMarkup(type, argc, arglen, argv);
1046  break;
1047 
1048  case O_HtmlInput:
1049  e = new TGHtmlInput(type, argc, arglen, argv);
1050  break;
1051 
1052  case O_HtmlForm:
1053  e = new TGHtmlForm(type, argc, arglen, argv);
1054  break;
1055 
1056  case O_HtmlHr:
1057  e = new TGHtmlHr(type, argc, arglen, argv);
1058  break;
1059 
1060  case O_HtmlAnchor:
1061  e = new TGHtmlAnchor(type, argc, arglen, argv);
1062  break;
1063 
1064  case O_HtmlScript:
1065  e = new TGHtmlScript(type, argc, arglen, argv);
1066  break;
1067 
1068  case O_HtmlMapArea:
1069  e = new TGHtmlMapArea(type, argc, arglen, argv);
1070  break;
1071 
1072  default:
1073  e = new TGHtmlMarkupElement(type, argc, arglen, argv);
1074  break;
1075  }
1076 
1077  return e;
1078 }
1079 
1080 ////////////////////////////////////////////////////////////////////////////////
1081 /// Append text to the tokenizer engine.
1082 
1084 {
1085  int len = strlen(text);
1086 
1087  if (fNText == 0) {
1088  fNAlloc = len + 100;
1089  fZText = new char [fNAlloc];
1090  } else if (fNText + len >= fNAlloc) {
1091  fNAlloc += len + 100;
1092  char *tmp = new char[fNAlloc];
1093  // coverity[secure_coding]
1094  strcpy(tmp, fZText);
1095  delete[] fZText;
1096  fZText = tmp;
1097  }
1098 
1099  if (fZText == 0) {
1100  fNText = 0;
1101  UNTESTED;
1102  return;
1103  }
1104 
1105  // coverity[secure_coding]
1106  strcpy(&fZText[fNText], text);
1107  fNText += len;
1108  fNComplete = Tokenize();
1109 }
1110 
1111 ////////////////////////////////////////////////////////////////////////////////
1112 /// This routine takes a text representation of a token, converts it into an
1113 /// TGHtmlElement object and inserts it immediately prior to pToken. If pToken
1114 /// is 0, then the newly created TGHtmlElement is appended.
1115 ///
1116 /// This routine does nothing to resize, restyle, relayout or redisplay
1117 /// the HTML. That is the calling routines responsibility.
1118 ///
1119 /// Return the new TGHtmlElement object if successful. Return zero if
1120 /// zType is not a known markup name.
1121 ///
1122 /// pToken - Insert before this. Append if pToken == 0
1123 /// zType - Type of markup. Ex: "/a" or "table"
1124 /// zArgs - List of arguments
1125 /// offs - Calculate offset, and insert changed text into fZText!
1126 
1128  char *zType, char *zArgs, int offs)
1129 {
1130  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1131  int h; // The hash on zType
1132  TGHtmlElement *pElem; // The new element
1133  //int nByte; // How many bytes to allocate
1134  //int i; // Loop counter
1135 
1136  if (!gIsInit) {
1137  HtmlHashInit();
1138  gIsInit = 1;
1139  }
1140 
1141  if (strcmp(zType, "Text") == 0) {
1142  pElem = new TGHtmlTextElement(zArgs ? strlen(zArgs) : 0);
1143  if (pElem == 0) return 0;
1144  if (zArgs) {
1145  // coverity[secure_coding]
1146  strcpy (((TGHtmlTextElement *)pElem)->fZText, zArgs);
1147  pElem->fCount = strlen(zArgs);
1148  }
1149  } else if (!strcmp(zType, "Space")) {
1150  pElem = new TGHtmlSpaceElement();
1151  if (pElem == 0) return 0;
1152  } else {
1153  h = HtmlHash(zType);
1154  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1155  if (strcasecmp(pMap->fZName, zType) == 0) break;
1156  }
1157  if (pMap == 0) return 0;
1158  if (zArgs == 0 || *zArgs == 0) {
1159  // Special case of no arguments. This is a lot easier...
1160  // well... now its the same thing!
1161  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, 1, 0, 0);
1162  if (pElem == 0) return 0;
1163  } else {
1164  // The general case. There are arguments that need to be parsed
1165  // up. This is slower, but we gotta do it.
1166  //int argc;
1167  //char **argv;
1168  //char *zBuf;
1169 
1170 #if 0
1171  if (!SplitList(zArgs, &argc, &argv)) return 0;
1172 
1173  // shall we insert a dummy argv[0]?
1174 
1175  pElem = MakeMarkupEntry(pMap->fObjType, pMap->fType, argc/*+1??*/, 0, argv);
1176  if (pElem == 0) return 1;
1177 
1178  while (--argc >= 0) if (argv[argc]) delete[] argv[argc];
1179  delete[] argv;
1180 #else
1181  return 0;
1182 #endif
1183  }
1184  }
1185 
1186  pElem->fElId = ++fIdind;
1187 
1188  AppToken(pElem, pToken, offs);
1189 
1190  return pElem;
1191 }
1192 
1193 ////////////////////////////////////////////////////////////////////////////////
1194 /// Insert text into text token, or break token into two text tokens.
1195 /// Also, handle backspace char by deleting text.
1196 /// Should also handle newline char by splitting text.
1197 
1198 int TGHtml::TextInsertCmd(int /*argc*/, char ** /*argv*/)
1199 {
1200 #if 0
1201  TGHtmlElement *p, *pElem;
1202  int i, l, n = 0;
1203  int idx = 0;
1204  int ptyp = Html_Unknown;
1205  int istxt = 0;
1206  char *cp = 0, c, *cp2;
1207 
1208  if (GetIndex(argv[3], &p, &i) != 0) {
1209  // sprintf(tmp, "malformed index: \"%s\"", argv[3]);
1210  return 0;
1211  }
1212  if (p) {
1213  ptyp = p->fType;
1214  if ((istxt = (ptyp == Html_Text))) {
1215  l = p->fCount;
1216  cp = ((TGHtmlTextElement *)p)->fZText;
1217  }
1218  }
1219  if (argv[2][0] == 'b') { // Break text token into two.
1220  if (!istxt) return 1;
1221  if (i == 0 || i == l) return 1;
1222  pElem = InsertToken(p->fPNext, "Text", cp + i, -1);
1223  cp[i] = 0;
1224  p->fCount = i;
1225  return 1;
1226  }
1227  c = argv[4][0];
1228  if (!c) return 1;
1229  if (c == '\b') {
1230  if ((!istxt) || (!l) || (!i)) {
1231  if (!p) return 1;
1232  if (p->fType == Html_BR)
1233  RemoveElements(p, p);
1234  return 1;
1235  }
1236  if (p && l == 1) {
1237  RemoveElements(p, p);
1238  return 1;
1239  }
1240  if (i == l)
1241  cp[p->fCount] = 0;
1242  else
1243  memcpy(cp+i-1, cp+i, l-i+1);
1244 
1245  cp[--p->fCount] = 0;
1246  if (ins.i-- <= 0) ins.i = 0;
1247  ins.p = p;
1248  return 1;
1249  }
1250  if (c == '\n' || c == '\r') {
1251  }
1252  if (istxt) {
1253  char *cp;
1254  int t, j, alen = strlen(argv[4]);
1255  n = alen + l;
1256 
1258 
1259  if (text->fZText == (char*) ((&text->fZText)+1)) {
1260  cp = new char[n+1];
1261  strcpy(cp, text->fZText);
1262  } else {
1263  cp = new char[n+1];
1264  strcpy(cp, text->fZText);
1265  }
1266  cp2 = new char[alen+1];
1267  memcpy(cp2, argv[4], alen+1);
1268  HtmlTranslateEscapes(cp2);
1269  alen = strlen(cp2);
1270  memmove(cp+alen+i, cp+i, l-i+1);
1271  for (j = 0; j < alen; j++) cp[i+j] = cp2[j];
1272  delete[] cp2;
1273  delete[] text->fZText;
1274  text->fZText = cp;
1275  p->fCount = strlen(cp);
1276  ins.p = p;
1277  ins.i = i+alen;
1278  } else {
1279  p = InsertToken(p ? p->fPNext : 0, "Text", argv[4], -1);
1280  AddStyle(p);
1281  i = 0;
1282  ins.p = p;
1283  ins.i = 1;
1284  }
1285  if (p) {
1286  idx = p->base.id;
1287  AddStrOffset(p, argv[4], i);
1288  }
1289 #endif
1290  return 1;
1291 }
1292 
1293 ////////////////////////////////////////////////////////////////////////////////
1294 /// Returns token map matching zType name.
1295 
1297 {
1298  SHtmlTokenMap_t *pMap; // For searching the markup name hash table
1299  int h; // The hash on zType
1300 
1301  if (!gIsInit) {
1302  HtmlHashInit();
1303  gIsInit = 1;
1304  }
1305  h = HtmlHash(zType);
1306  for (pMap = gApMap[h]; pMap; pMap = pMap->fPCollide) {
1307  if (strcasecmp(pMap->fZName, zType) == 0) break;
1308  }
1309 
1310  return pMap;
1311 }
1312 
1313 ////////////////////////////////////////////////////////////////////////////////
1314 /// Convert a markup name into a type integer
1315 
1316 int TGHtml::NameToType(char *zType)
1317 {
1318  SHtmlTokenMap_t *pMap = NameToPmap(zType);
1319  return pMap ? pMap->fType : (int)Html_Unknown;
1320 }
1321 
1322 ////////////////////////////////////////////////////////////////////////////////
1323 /// Convert a type into a symbolic name
1324 
1325 const char *TGHtml::TypeToName(int type)
1326 {
1327  if (type >= Html_A && type <= Html_EndXMP) {
1328  SHtmlTokenMap_t *pMap = gApMap[type - Html_A];
1329  return pMap->fZName;
1330  } else {
1331  return "???";
1332  }
1333 }
1334 
1335 ////////////////////////////////////////////////////////////////////////////////
1336 /// For debugging purposes, print information about a token
1337 
1339 {
1340 //#ifdef DEBUG
1341  static char zBuf[200];
1342  int j;
1343  const char *zName;
1344 
1345  if (p == 0) {
1346  snprintf(zBuf, 200, "NULL");
1347  return zBuf;
1348  }
1349  switch (p->fType) {
1350  case Html_Text:
1351  snprintf(zBuf, 200, "text: \"%.*s\"", p->fCount, ((TGHtmlTextElement *)p)->fZText);
1352  break;
1353 
1354  case Html_Space:
1355  if (p->fFlags & HTML_NewLine) {
1356  snprintf(zBuf, 200, "space: \"\\n\"");
1357  } else {
1358  snprintf(zBuf, 200, "space: \" \"");
1359  }
1360  break;
1361 
1362  case Html_Block: {
1363  TGHtmlBlock *block = (TGHtmlBlock *) p;
1364  if (block->fN > 0) {
1365  int n = block->fN;
1366  if (n > 150) n = 150;
1367  snprintf(zBuf, 200, "<Block z=\"%.*s\">", n, block->fZ);
1368  } else {
1369  snprintf(zBuf, 200, "<Block>");
1370  }
1371  break;
1372  }
1373 
1374  default:
1375  if (p->fType >= HtmlMarkupMap[0].fType
1376  && p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
1377  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1378  } else {
1379  zName = "Unknown";
1380  }
1381  snprintf(zBuf, 200, "markup (%d) <%s", p->fType, zName);
1382  for (j = 1 ; j < p->fCount; j += 2) {
1383  snprintf(&zBuf[strlen(zBuf)], 200-strlen(zBuf), " %s=\"%s\"",
1384  ((TGHtmlMarkupElement *)p)->fArgv[j-1],
1385  ((TGHtmlMarkupElement *)p)->fArgv[j]);
1386  }
1387  // coverity[secure_coding]
1388  strcat(zBuf, ">");
1389  break;
1390  }
1391  return zBuf;
1392 //#else
1393 // return 0;
1394 //#endif
1395 }
1396 
1397 ////////////////////////////////////////////////////////////////////////////////
1398 /// Append all the arguments of the given markup to the given TGString.
1399 ///
1400 /// Example: If the markup is <IMG SRC=image.gif ALT="hello!">
1401 /// then the following text is appended to the TGString:
1402 ///
1403 /// "src image.gif alt hello!"
1404 ///
1405 /// Notice how all attribute names are converted to lower case.
1406 /// This conversion happens in the parser.
1407 
1409 {
1410  int i;
1411 
1412  for (i = 0; i + 1 < pElem->fCount; i += 2) {
1413  str->Append(pElem->fArgv[i]);
1414  str->Append("=");
1415  str->Append(pElem->fArgv[i+1]);
1416  str->Append(" ");
1417  }
1418 }
1419 
1420 ////////////////////////////////////////////////////////////////////////////////
1421 /// Returns token name of html element p.
1422 
1424 {
1425  static char zBuf[200];
1426  //int j;
1427  const char *zName;
1428 
1429  zBuf[0] = 0;
1430  if (p == 0) {
1431  // coverity[secure_coding]: zBuf is large enough
1432  strcpy(zBuf, "NULL");
1433  return zBuf;
1434  }
1435  switch (p->fType) {
1436  case Html_Text:
1437  case Html_Space:
1438  break;
1439 
1440  case Html_Block:
1441  break;
1442 
1443  default:
1444  if (p->fType >= HtmlMarkupMap[0].fType &&
1445  p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
1446  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1447  } else {
1448  zName = "Unknown";
1449  }
1450  strlcpy(zBuf, zName, sizeof(zBuf));
1451  break;
1452  }
1453 
1454  return zBuf;
1455 }
1456 
1457 ////////////////////////////////////////////////////////////////////////////////
1458 /// Returns token map at location n.
1459 
1461 {
1462  return HtmlMarkupMap+n;
1463 }
1464 
1465 ////////////////////////////////////////////////////////////////////////////////
1466 /// Return all tokens between the two elements as a string list.
1467 
1469 {
1470  TGString *str;
1471  int i;
1472  const char *zName;
1473  char zLine[100];
1474 
1475  str = new TGString("");
1476  while (p && p != pEnd) {
1477  switch (p->fType) {
1478  case Html_Block:
1479  break;
1480 
1481  case Html_Text:
1482  str->Append("{ Text \"");
1483  str->Append(((TGHtmlTextElement *)p)->fZText);
1484  str->Append("\" } ");
1485  break;
1486 
1487  case Html_Space:
1488  snprintf(zLine, 100, "Space %d %d ",
1489  p->fCount, (p->fFlags & HTML_NewLine) != 0);
1490  str->Append(zLine);
1491  break;
1492 
1493  case Html_Unknown:
1494  str->Append("Unknown ");
1495  break;
1496 
1497  default:
1498  str->Append("{ Markup ");
1499  if (p->fType >= HtmlMarkupMap[0].fType &&
1500  p->fType <= HtmlMarkupMap[HTML_MARKUP_COUNT-1].fType) {
1501  zName = HtmlMarkupMap[p->fType - HtmlMarkupMap[0].fType].fZName;
1502  } else {
1503  zName = "Unknown";
1504  }
1505  str->Append(zName);
1506  str->Append(" ");
1507  for (i = 0; i < p->fCount; ++i) {
1508  str->Append(((TGHtmlMarkupElement *)p)->fArgv[i]);
1509  str->Append(" ");
1510  }
1511  str->Append("} ");
1512  break;
1513  }
1514  p = p->fPNext;
1515  }
1516 
1517  return str;
1518 }
1519 
1520 ////////////////////////////////////////////////////////////////////////////////
1521 /// Print a list of tokens
1522 
1524 {
1525  TGHtmlElement *p;
1526 
1527  for (p = first; p != last; p = p->fPNext) {
1528  if (p->fType == Html_Block) {
1529  TGHtmlBlock *block = (TGHtmlBlock *) p;
1530  const char *z = block->fZ;
1531  int n = block->fN;
1532  if (n == 0 || z == 0) {
1533  n = 1;
1534  z = "";
1535  }
1536  printf("Block flags=%02x cnt=%d x=%d..%d y=%d..%d z=\"%.*s\"\n",
1537  p->fFlags, p->fCount, block->fLeft, block->fRight,
1538  block->fTop, block->fBottom, n, z);
1539  } else {
1540  printf("Token font=%2d color=%2d align=%d flags=0x%04x name=%s\n",
1541  p->fStyle.fFont, p->fStyle.fColor,
1542  p->fStyle.fAlign, p->fStyle.fFlags, DumpToken(p));
1543  }
1544  }
1545 }
static long int sum(long int i)
Definition: Factory.cxx:2173
void TokenizerAppend(const char *text)
Append text to the tokenizer engine.
char * fZ
Definition: TGHtml.h:716
#define O_HtmlLi
Definition: TGHtml.h:854
#define O_HtmlAnchor
Definition: TGHtml.h:860
unsigned int fColor
Definition: TGHtml.h:145
void PrintList(TGHtmlElement *first, TGHtmlElement *last)
Print a list of tokens.
#define O_HtmlCell
Definition: TGHtml.h:851
void HtmlTranslateEscapes(char *z)
Translate escape sequences in the string "z".
int fTop
Definition: TGHtml.h:717
Html_16_t fObjType
Definition: TGHtml.h:842
#define O_HtmlForm
Definition: TGHtml.h:858
SHtmlTokenMap_t * NameToPmap(char *zType)
Returns token map matching zType name.
Html_u16_t fRight
Definition: TGHtml.h:718
#define O_HtmlMapArea
Definition: TGHtml.h:862
TH1 * h
Definition: legend2.C:5
int NameToType(char *zType)
Convert a markup name into a type integer.
SHtmlTokenMap_t * fPCollide
Definition: TGHtml.h:843
TGHtmlMarkupElement * MakeMarkupEntry(int objType, int type, int argc, int arglen[], char *argv[])
Make one markup entry.
int TextInsertCmd(int argc, char **argv)
Insert text into text token, or break token into two text tokens.
static int gIsInit
void AppendElement(TGHtmlElement *pElem)
Append the given TGHtmlElement to the tokenizers list of elements.
TGHtmlElement * fPNext
Definition: TGHtml.h:261
null_t< F > null()
#define O_HtmlScript
Definition: TGHtml.h:861
static struct SgEsc_t gEscSequences[]
Definition: TGHtmlParse.cxx:74
#define HTML_MARKUP_HASH_SIZE
Definition: TGHtmlTokens.h:200
unsigned int fFont
Definition: TGHtml.h:144
static void EscInit()
TString & Append(const char *cs)
Definition: TString.h:495
int fNStart
Definition: TGHtml.h:683
static void HtmlHashInit(void)
TGHtmlElement * fPPrev
Definition: TGHtml.h:262
int Tokenize()
Process as much of the input HTML as possible.
#define O_HtmlTable
Definition: TGHtml.h:852
Html_16_t fType
Definition: TGHtml.h:841
SHtmlTokenMap_t * GetMarkupMap(int n)
Returns token map at location n.
#define ESC_HASH_SIZE
int fNScript
Definition: TGHtml.h:684
void ToLower(char *z)
Convert a string to all lower-case letters.
static int EscHash(const char *zName)
SVector< double, 2 > v
Definition: Dict.h:5
SHtmlStyle_t fStyle
Definition: TGHtml.h:263
PyObject * fValue
char * GetTokenName(TGHtmlElement *p)
Returns token name of html element p.
#define O_HtmlHr
Definition: TGHtml.h:859
void AppendArglist(TGString *str, TGHtmlMarkupElement *pElem)
Append all the arguments of the given markup to the given TGString.
TGHtmlElement * InsertToken(TGHtmlElement *pToken, char *zType, char *zArgs, int offs)
This routine takes a text representation of a token, converts it into an TGHtmlElement object and ins...
Html_u16_t fN
Definition: TGHtml.h:719
static SHtmlTokenMap_t * gApMap[HTML_MARKUP_HASH_SIZE]
char * fZText
Definition: TGHtml.h:300
Html_u8_t fType
Definition: TGHtml.h:264
static int HtmlHash(const char *zName)
#define HTML_NewLine
Definition: TGHtml.h:275
unsigned int fFlags
Definition: TGHtml.h:150
const char * fZName
Definition: TGHtml.h:840
#define O_HtmlImageMarkup
Definition: TGHtml.h:856
const char * TypeToName(int type)
Convert a type into a symbolic name.
TText * text
SHtmlTokenMap_t HtmlMarkupMap[]
int type
Definition: TGX11.cxx:120
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
static int NextColumn(int iCol, char c)
Compute the new column index following the given character.
#define HTML_MARKUP_COUNT
Definition: TGHtmlTokens.h:199
#define UNTESTED
Definition: TGHtml.h:64
TGString * ListTokens(TGHtmlElement *p, TGHtmlElement *pEnd)
Return all tokens between the two elements as a string list.
you should not use this method at all Int_t Int_t z
Definition: TRolke.cxx:630
#define O_HtmlRef
Definition: TGHtml.h:853
Html_u16_t fLeft
Definition: TGHtml.h:718
auto * l
Definition: textangle.C:4
Html_16_t fCount
Definition: TGHtml.h:266
void AppToken(TGHtmlElement *pNew, TGHtmlElement *p, int offs)
Insert token pNew before token p.
char * DumpToken(TGHtmlElement *p)
For debugging purposes, print information about a token.
#define snprintf
Definition: civetweb.c:822
int fBottom
Definition: TGHtml.h:717
#define O_HtmlListStart
Definition: TGHtml.h:855
unsigned int fAlign
Definition: TGHtml.h:147
Definition: first.py:1
static char gAcMsChar[]
static struct SgEsc_t * gApEscHash[ESC_HASH_SIZE]
Html_16_t fW
Definition: TGHtml.h:308
#define mxARG
const Int_t n
Definition: legend1.C:16
const char * cnt
Definition: TXMLSetup.cxx:74
#define O_HtmlInput
Definition: TGHtml.h:857
Html_u8_t fFlags
Definition: TGHtml.h:265