[ Index ]

PHP Cross Reference of Joomla 1.5.25

title

Body

[close]

/libraries/geshi/ -> geshi.php (source)

   1  <?php
   2  // no direct access
   3  defined('_JEXEC') or die;
   4  
   5  /**
   6   * GeSHi - Generic Syntax Highlighter
   7   *
   8   * The GeSHi class for Generic Syntax Highlighting. Please refer to the
   9   * documentation at http://qbnz.com/highlighter/documentation.php for more
  10   * information about how to use this class.
  11   *
  12   * For changes, release notes, TODOs etc, see the relevant files in the docs/
  13   * directory.
  14   *
  15   *   This file is part of GeSHi.
  16   *
  17   *  GeSHi is free software; you can redistribute it and/or modify
  18   *  it under the terms of the GNU General Public License as published by
  19   *  the Free Software Foundation; either version 2 of the License, or
  20   *  (at your option) any later version.
  21   *
  22   *  GeSHi is distributed in the hope that it will be useful,
  23   *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  24   *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  25   *  GNU General Public License for more details.
  26   *
  27   *  You should have received a copy of the GNU General Public License
  28   *  along with GeSHi; if not, write to the Free Software
  29   *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  30   *
  31   * @package    geshi
  32   * @subpackage core
  33   * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
  34   * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
  35   * @license    http://gnu.org/copyleft/gpl.html GNU GPL
  36   *
  37   */
  38  
  39  //
  40  // GeSHi Constants
  41  // You should use these constant names in your programs instead of
  42  // their values - you never know when a value may change in a future
  43  // version
  44  //
  45  
  46  /** The version of this GeSHi file */
  47  define('GESHI_VERSION', '1.0.8.10');
  48  
  49  // Define the root directory for the GeSHi code tree
  50  if (!defined('GESHI_ROOT')) {
  51      /** The root directory for GeSHi */
  52      define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
  53  }
  54  /** The language file directory for GeSHi
  55      @access private */
  56  define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
  57  
  58  // Define if GeSHi should be paranoid about security
  59  if (!defined('GESHI_SECURITY_PARANOID')) {
  60      /** Tells GeSHi to be paranoid about security settings */
  61      define('GESHI_SECURITY_PARANOID', false);
  62  }
  63  
  64  // Line numbers - use with enable_line_numbers()
  65  /** Use no line numbers when building the result */
  66  define('GESHI_NO_LINE_NUMBERS', 0);
  67  /** Use normal line numbers when building the result */
  68  define('GESHI_NORMAL_LINE_NUMBERS', 1);
  69  /** Use fancy line numbers when building the result */
  70  define('GESHI_FANCY_LINE_NUMBERS', 2);
  71  
  72  // Container HTML type
  73  /** Use nothing to surround the source */
  74  define('GESHI_HEADER_NONE', 0);
  75  /** Use a "div" to surround the source */
  76  define('GESHI_HEADER_DIV', 1);
  77  /** Use a "pre" to surround the source */
  78  define('GESHI_HEADER_PRE', 2);
  79  /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
  80  define('GESHI_HEADER_PRE_VALID', 3);
  81  /**
  82   * Use a "table" to surround the source:
  83   *
  84   *  <table>
  85   *    <thead><tr><td colspan="2">$header</td></tr></thead>
  86   *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
  87   *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
  88   *  </table>
  89   *
  90   * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
  91   * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
  92   * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
  93   */
  94  define('GESHI_HEADER_PRE_TABLE', 4);
  95  
  96  // Capatalisation constants
  97  /** Lowercase keywords found */
  98  define('GESHI_CAPS_NO_CHANGE', 0);
  99  /** Uppercase keywords found */
 100  define('GESHI_CAPS_UPPER', 1);
 101  /** Leave keywords found as the case that they are */
 102  define('GESHI_CAPS_LOWER', 2);
 103  
 104  // Link style constants
 105  /** Links in the source in the :link state */
 106  define('GESHI_LINK', 0);
 107  /** Links in the source in the :hover state */
 108  define('GESHI_HOVER', 1);
 109  /** Links in the source in the :active state */
 110  define('GESHI_ACTIVE', 2);
 111  /** Links in the source in the :visited state */
 112  define('GESHI_VISITED', 3);
 113  
 114  // Important string starter/finisher
 115  // Note that if you change these, they should be as-is: i.e., don't
 116  // write them as if they had been run through htmlentities()
 117  /** The starter for important parts of the source */
 118  define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
 119  /** The ender for important parts of the source */
 120  define('GESHI_END_IMPORTANT', '<END GeSHi>');
 121  
 122  /**#@+
 123   *  @access private
 124   */
 125  // When strict mode applies for a language
 126  /** Strict mode never applies (this is the most common) */
 127  define('GESHI_NEVER', 0);
 128  /** Strict mode *might* apply, and can be enabled or
 129      disabled by {@link GeSHi->enable_strict_mode()} */
 130  define('GESHI_MAYBE', 1);
 131  /** Strict mode always applies */
 132  define('GESHI_ALWAYS', 2);
 133  
 134  // Advanced regexp handling constants, used in language files
 135  /** The key of the regex array defining what to search for */
 136  define('GESHI_SEARCH', 0);
 137  /** The key of the regex array defining what bracket group in a
 138      matched search to use as a replacement */
 139  define('GESHI_REPLACE', 1);
 140  /** The key of the regex array defining any modifiers to the regular expression */
 141  define('GESHI_MODIFIERS', 2);
 142  /** The key of the regex array defining what bracket group in a
 143      matched search to put before the replacement */
 144  define('GESHI_BEFORE', 3);
 145  /** The key of the regex array defining what bracket group in a
 146      matched search to put after the replacement */
 147  define('GESHI_AFTER', 4);
 148  /** The key of the regex array defining a custom keyword to use
 149      for this regexp's html tag class */
 150  define('GESHI_CLASS', 5);
 151  
 152  /** Used in language files to mark comments */
 153  define('GESHI_COMMENTS', 0);
 154  
 155  /** Used to work around missing PHP features **/
 156  define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
 157  
 158  /** make sure we can call stripos **/
 159  if (!function_exists('stripos')) {
 160      // the offset param of preg_match is not supported below PHP 4.3.3
 161      if (GESHI_PHP_PRE_433) {
 162          /**
 163           * @ignore
 164           */
 165          function stripos($haystack, $needle, $offset = null) {
 166              if (!is_null($offset)) {
 167                  $haystack = substr($haystack, $offset);
 168              }
 169              if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
 170                  return $match[0][1];
 171              }
 172              return false;
 173          }
 174      }
 175      else {
 176          /**
 177           * @ignore
 178           */
 179          function stripos($haystack, $needle, $offset = null) {
 180              if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
 181                  return $match[0][1];
 182              }
 183              return false;
 184          }
 185      }
 186  }
 187  
 188  /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
 189      regular expressions. Set this to false if your PCRE lib is up to date
 190      @see GeSHi->optimize_regexp_list()
 191      **/
 192  define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
 193  /** it's also important not to generate too long regular expressions
 194      be generous here... but keep in mind, that when reaching this limit we
 195      still have to close open patterns. 12k should do just fine on a 16k limit.
 196      @see GeSHi->optimize_regexp_list()
 197      **/
 198  define('GESHI_MAX_PCRE_LENGTH', 12288);
 199  
 200  //Number format specification
 201  /** Basic number format for integers */
 202  define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
 203  /** Enhanced number format for integers like seen in C */
 204  define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
 205  /** Number format to highlight binary numbers with a suffix "b" */
 206  define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
 207  /** Number format to highlight binary numbers with a prefix % */
 208  define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
 209  /** Number format to highlight binary numbers with a prefix 0b (C) */
 210  define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
 211  /** Number format to highlight octal numbers with a leading zero */
 212  define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
 213  /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
 214  define('GESHI_NUMBER_OCT_PREFIX_0O', 512);           //0[0-7]+
 215  /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
 216  define('GESHI_NUMBER_OCT_PREFIX_AT', 1024);           //@[0-7]+
 217  /** Number format to highlight octal numbers with a suffix of o */
 218  define('GESHI_NUMBER_OCT_SUFFIX', 2048);           //[0-7]+[oO]
 219  /** Number format to highlight hex numbers with a prefix 0x */
 220  define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
 221  /** Number format to highlight hex numbers with a prefix $ */
 222  define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192);           //$[0-9a-fA-F]+
 223  /** Number format to highlight hex numbers with a suffix of h */
 224  define('GESHI_NUMBER_HEX_SUFFIX', 16384);           //[0-9][0-9a-fA-F]*h
 225  /** Number format to highlight floating-point numbers without support for scientific notation */
 226  define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
 227  /** Number format to highlight floating-point numbers without support for scientific notation */
 228  define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
 229  /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
 230  define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
 231  /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
 232  define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
 233  //Custom formats are passed by RX array
 234  
 235  // Error detection - use these to analyse faults
 236  /** No sourcecode to highlight was specified
 237   * @deprecated
 238   */
 239  define('GESHI_ERROR_NO_INPUT', 1);
 240  /** The language specified does not exist */
 241  define('GESHI_ERROR_NO_SUCH_LANG', 2);
 242  /** GeSHi could not open a file for reading (generally a language file) */
 243  define('GESHI_ERROR_FILE_NOT_READABLE', 3);
 244  /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
 245  define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
 246  /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
 247  define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
 248  /**#@-*/
 249  
 250  
 251  /**
 252   * The GeSHi Class.
 253   *
 254   * Please refer to the documentation for GeSHi 1.0.X that is available
 255   * at http://qbnz.com/highlighter/documentation.php for more information
 256   * about how to use this class.
 257   *
 258   * @package   geshi
 259   * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
 260   * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
 261   */
 262  class GeSHi {
 263      /**#@+
 264       * @access private
 265       */
 266      /**
 267       * The source code to highlight
 268       * @var string
 269       */
 270      var $source = '';
 271  
 272      /**
 273       * The language to use when highlighting
 274       * @var string
 275       */
 276      var $language = '';
 277  
 278      /**
 279       * The data for the language used
 280       * @var array
 281       */
 282      var $language_data = array();
 283  
 284      /**
 285       * The path to the language files
 286       * @var string
 287       */
 288      var $language_path = GESHI_LANG_ROOT;
 289  
 290      /**
 291       * The error message associated with an error
 292       * @var string
 293       * @todo check err reporting works
 294       */
 295      var $error = false;
 296  
 297      /**
 298       * Possible error messages
 299       * @var array
 300       */
 301      var $error_messages = array(
 302          GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
 303          GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
 304          GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
 305          GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
 306      );
 307  
 308      /**
 309       * Whether highlighting is strict or not
 310       * @var boolean
 311       */
 312      var $strict_mode = false;
 313  
 314      /**
 315       * Whether to use CSS classes in output
 316       * @var boolean
 317       */
 318      var $use_classes = false;
 319  
 320      /**
 321       * The type of header to use. Can be one of the following
 322       * values:
 323       *
 324       * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
 325       * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
 326       * - GESHI_HEADER_NONE: No header is outputted.
 327       *
 328       * @var int
 329       */
 330      var $header_type = GESHI_HEADER_PRE;
 331  
 332      /**
 333       * Array of permissions for which lexics should be highlighted
 334       * @var array
 335       */
 336      var $lexic_permissions = array(
 337          'KEYWORDS' =>    array(),
 338          'COMMENTS' =>    array('MULTI' => true),
 339          'REGEXPS' =>     array(),
 340          'ESCAPE_CHAR' => true,
 341          'BRACKETS' =>    true,
 342          'SYMBOLS' =>     false,
 343          'STRINGS' =>     true,
 344          'NUMBERS' =>     true,
 345          'METHODS' =>     true,
 346          'SCRIPT' =>      true
 347      );
 348  
 349      /**
 350       * The time it took to parse the code
 351       * @var double
 352       */
 353      var $time = 0;
 354  
 355      /**
 356       * The content of the header block
 357       * @var string
 358       */
 359      var $header_content = '';
 360  
 361      /**
 362       * The content of the footer block
 363       * @var string
 364       */
 365      var $footer_content = '';
 366  
 367      /**
 368       * The style of the header block
 369       * @var string
 370       */
 371      var $header_content_style = '';
 372  
 373      /**
 374       * The style of the footer block
 375       * @var string
 376       */
 377      var $footer_content_style = '';
 378  
 379      /**
 380       * Tells if a block around the highlighted source should be forced
 381       * if not using line numbering
 382       * @var boolean
 383       */
 384      var $force_code_block = false;
 385  
 386      /**
 387       * The styles for hyperlinks in the code
 388       * @var array
 389       */
 390      var $link_styles = array();
 391  
 392      /**
 393       * Whether important blocks should be recognised or not
 394       * @var boolean
 395       * @deprecated
 396       * @todo REMOVE THIS FUNCTIONALITY!
 397       */
 398      var $enable_important_blocks = false;
 399  
 400      /**
 401       * Styles for important parts of the code
 402       * @var string
 403       * @deprecated
 404       * @todo As above - rethink the whole idea of important blocks as it is buggy and
 405       * will be hard to implement in 1.2
 406       */
 407      var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
 408  
 409      /**
 410       * Whether CSS IDs should be added to the code
 411       * @var boolean
 412       */
 413      var $add_ids = false;
 414  
 415      /**
 416       * Lines that should be highlighted extra
 417       * @var array
 418       */
 419      var $highlight_extra_lines = array();
 420  
 421      /**
 422       * Styles of lines that should be highlighted extra
 423       * @var array
 424       */
 425      var $highlight_extra_lines_styles = array();
 426  
 427      /**
 428       * Styles of extra-highlighted lines
 429       * @var string
 430       */
 431      var $highlight_extra_lines_style = 'background-color: #ffc;';
 432  
 433      /**
 434       * The line ending
 435       * If null, nl2br() will be used on the result string.
 436       * Otherwise, all instances of \n will be replaced with $line_ending
 437       * @var string
 438       */
 439      var $line_ending = null;
 440  
 441      /**
 442       * Number at which line numbers should start at
 443       * @var int
 444       */
 445      var $line_numbers_start = 1;
 446  
 447      /**
 448       * The overall style for this code block
 449       * @var string
 450       */
 451      var $overall_style = 'font-family:monospace;';
 452  
 453      /**
 454       *  The style for the actual code
 455       * @var string
 456       */
 457      var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 458  
 459      /**
 460       * The overall class for this code block
 461       * @var string
 462       */
 463      var $overall_class = '';
 464  
 465      /**
 466       * The overall ID for this code block
 467       * @var string
 468       */
 469      var $overall_id = '';
 470  
 471      /**
 472       * Line number styles
 473       * @var string
 474       */
 475      var $line_style1 = 'font-weight: normal; vertical-align:top;';
 476  
 477      /**
 478       * Line number styles for fancy lines
 479       * @var string
 480       */
 481      var $line_style2 = 'font-weight: bold; vertical-align:top;';
 482  
 483      /**
 484       * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
 485       * @var string
 486       */
 487      var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 488  
 489      /**
 490       * Flag for how line numbers are displayed
 491       * @var boolean
 492       */
 493      var $line_numbers = GESHI_NO_LINE_NUMBERS;
 494  
 495      /**
 496       * Flag to decide if multi line spans are allowed. Set it to false to make sure
 497       * each tag is closed before and reopened after each linefeed.
 498       * @var boolean
 499       */
 500      var $allow_multiline_span = true;
 501  
 502      /**
 503       * The "nth" value for fancy line highlighting
 504       * @var int
 505       */
 506      var $line_nth_row = 0;
 507  
 508      /**
 509       * The size of tab stops
 510       * @var int
 511       */
 512      var $tab_width = 8;
 513  
 514      /**
 515       * Should we use language-defined tab stop widths?
 516       * @var int
 517       */
 518      var $use_language_tab_width = false;
 519  
 520      /**
 521       * Default target for keyword links
 522       * @var string
 523       */
 524      var $link_target = '';
 525  
 526      /**
 527       * The encoding to use for entity encoding
 528       * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
 529       * @var string
 530       */
 531      var $encoding = 'utf-8';
 532  
 533      /**
 534       * Should keywords be linked?
 535       * @var boolean
 536       */
 537      var $keyword_links = true;
 538  
 539      /**
 540       * Currently loaded language file
 541       * @var string
 542       * @since 1.0.7.22
 543       */
 544      var $loaded_language = '';
 545  
 546      /**
 547       * Wether the caches needed for parsing are built or not
 548       *
 549       * @var bool
 550       * @since 1.0.8
 551       */
 552      var $parse_cache_built = false;
 553  
 554      /**
 555       * Work around for Suhosin Patch with disabled /e modifier
 556       *
 557       * Note from suhosins author in config file:
 558       * <blockquote>
 559       *   The /e modifier inside <code>preg_replace()</code> allows code execution.
 560       *   Often it is the cause for remote code execution exploits. It is wise to
 561       *   deactivate this feature and test where in the application it is used.
 562       *   The developer using the /e modifier should be made aware that he should
 563       *   use <code>preg_replace_callback()</code> instead
 564       * </blockquote>
 565       *
 566       * @var array
 567       * @since 1.0.8
 568       */
 569      var $_kw_replace_group = 0;
 570      var $_rx_key = 0;
 571  
 572      /**
 573       * some "callback parameters" for handle_multiline_regexps
 574       *
 575       * @since 1.0.8
 576       * @access private
 577       * @var string
 578       */
 579      var $_hmr_before = '';
 580      var $_hmr_replace = '';
 581      var $_hmr_after = '';
 582      var $_hmr_key = 0;
 583  
 584      /**#@-*/
 585  
 586      /**
 587       * Creates a new GeSHi object, with source and language
 588       *
 589       * @param string The source code to highlight
 590       * @param string The language to highlight the source with
 591       * @param string The path to the language file directory. <b>This
 592       *               is deprecated!</b> I've backported the auto path
 593       *               detection from the 1.1.X dev branch, so now it
 594       *               should be automatically set correctly. If you have
 595       *               renamed the language directory however, you will
 596       *               still need to set the path using this parameter or
 597       *               {@link GeSHi->set_language_path()}
 598       * @since 1.0.0
 599       */
 600      function GeSHi($source = '', $language = '', $path = '') {
 601          if (!empty($source)) {
 602              $this->set_source($source);
 603          }
 604          if (!empty($language)) {
 605              $this->set_language($language);
 606          }
 607          $this->set_language_path($path);
 608      }
 609  
 610      /**
 611       * Returns an error message associated with the last GeSHi operation,
 612       * or false if no error has occured
 613       *
 614       * @return string|false An error message if there has been an error, else false
 615       * @since  1.0.0
 616       */
 617      function error() {
 618          if ($this->error) {
 619              //Put some template variables for debugging here ...
 620              $debug_tpl_vars = array(
 621                  '{LANGUAGE}' => $this->language,
 622                  '{PATH}' => $this->language_path
 623              );
 624              $msg = str_replace(
 625                  array_keys($debug_tpl_vars),
 626                  array_values($debug_tpl_vars),
 627                  $this->error_messages[$this->error]);
 628  
 629              return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
 630          }
 631          return false;
 632      }
 633  
 634      /**
 635       * Gets a human-readable language name (thanks to Simon Patterson
 636       * for the idea :))
 637       *
 638       * @return string The name for the current language
 639       * @since  1.0.2
 640       */
 641      function get_language_name() {
 642          if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
 643              return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
 644          }
 645          return $this->language_data['LANG_NAME'];
 646      }
 647  
 648      /**
 649       * Sets the source code for this object
 650       *
 651       * @param string The source code to highlight
 652       * @since 1.0.0
 653       */
 654      function set_source($source) {
 655          $this->source = $source;
 656          $this->highlight_extra_lines = array();
 657      }
 658  
 659      /**
 660       * Sets the language for this object
 661       *
 662       * @note since 1.0.8 this function won't reset language-settings by default anymore!
 663       *       if you need this set $force_reset = true
 664       *
 665       * @param string The name of the language to use
 666       * @since 1.0.0
 667       */
 668      function set_language($language, $force_reset = false) {
 669          if ($force_reset) {
 670              $this->loaded_language = false;
 671          }
 672  
 673          //Clean up the language name to prevent malicious code injection
 674          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 675  
 676          $language = strtolower($language);
 677  
 678          //Retreive the full filename
 679          $file_name = $this->language_path . $language . '.php';
 680          if ($file_name == $this->loaded_language) {
 681              // this language is already loaded!
 682              return;
 683          }
 684  
 685          $this->language = $language;
 686  
 687          $this->error = false;
 688          $this->strict_mode = GESHI_NEVER;
 689  
 690          //Check if we can read the desired file
 691          if (!is_readable($file_name)) {
 692              $this->error = GESHI_ERROR_NO_SUCH_LANG;
 693              return;
 694          }
 695  
 696          // Load the language for parsing
 697          $this->load_language($file_name);
 698      }
 699  
 700      /**
 701       * Sets the path to the directory containing the language files. Note
 702       * that this path is relative to the directory of the script that included
 703       * geshi.php, NOT geshi.php itself.
 704       *
 705       * @param string The path to the language directory
 706       * @since 1.0.0
 707       * @deprecated The path to the language files should now be automatically
 708       *             detected, so this method should no longer be needed. The
 709       *             1.1.X branch handles manual setting of the path differently
 710       *             so this method will disappear in 1.2.0.
 711       */
 712      function set_language_path($path) {
 713          if(strpos($path,':')) {
 714              //Security Fix to prevent external directories using fopen wrappers.
 715              if(DIRECTORY_SEPARATOR == "\\") {
 716                  if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
 717                      return;
 718                  }
 719              } else {
 720                  return;
 721              }
 722          }
 723          if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
 724              //Security Fix to prevent external directories using fopen wrappers.
 725              return;
 726          }
 727          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
 728              //Security Fix to prevent external directories using fopen wrappers.
 729              return;
 730          }
 731          if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
 732              //Security Fix to prevent external directories using fopen wrappers.
 733              return;
 734          }
 735          if ($path) {
 736              $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
 737              $this->set_language($this->language); // otherwise set_language_path has no effect
 738          }
 739      }
 740  
 741      /**
 742       * Get supported langs or an associative array lang=>full_name.
 743       * @param boolean $longnames
 744       * @return array
 745       */
 746      function get_supported_languages($full_names=false)
 747      {
 748          // return array
 749          $back = array();
 750  
 751          // we walk the lang root
 752          $dir = dir($this->language_path);
 753  
 754          // foreach entry
 755          while (false !== ($entry = $dir->read()))
 756          {
 757              $full_path = $this->language_path.$entry;
 758  
 759              // Skip all dirs
 760              if (is_dir($full_path)) {
 761                  continue;
 762              }
 763  
 764              // we only want lang.php files
 765              if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
 766                  continue;
 767              }
 768  
 769              // Raw lang name is here
 770              $langname = $matches[1];
 771  
 772              // We want the fullname too?
 773              if ($full_names === true)
 774              {
 775                  if (false !== ($fullname = $this->get_language_fullname($langname)))
 776                  {
 777                      $back[$langname] = $fullname; // we go associative
 778                  }
 779              }
 780              else
 781              {
 782                  // just store raw langname
 783                  $back[] = $langname;
 784              }
 785          }
 786  
 787          $dir->close();
 788  
 789          return $back;
 790      }
 791  
 792      /**
 793       * Get full_name for a lang or false.
 794       * @param string $language short langname (html4strict for example)
 795       * @return mixed
 796       */
 797      function get_language_fullname($language)
 798      {
 799          //Clean up the language name to prevent malicious code injection
 800          $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 801  
 802          $language = strtolower($language);
 803  
 804          // get fullpath-filename for a langname
 805          $fullpath = $this->language_path.$language.'.php';
 806  
 807          // we need to get contents :S
 808          if (false === ($data = file_get_contents($fullpath))) {
 809              $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
 810              return false;
 811          }
 812  
 813          // match the langname
 814          if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+)\'/', $data, $matches)) {
 815              $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
 816              return false;
 817          }
 818  
 819          // return fullname for langname
 820          return stripcslashes($matches[1]);
 821      }
 822  
 823      /**
 824       * Sets the type of header to be used.
 825       *
 826       * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
 827       * means more source code but more control over tab width and line-wrapping.
 828       * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
 829       * control. Default is GESHI_HEADER_PRE.
 830       *
 831       * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
 832       * should be outputted.
 833       *
 834       * @param int The type of header to be used
 835       * @since 1.0.0
 836       */
 837      function set_header_type($type) {
 838          //Check if we got a valid header type
 839          if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
 840              GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
 841              $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
 842              return;
 843          }
 844  
 845          //Set that new header type
 846          $this->header_type = $type;
 847      }
 848  
 849      /**
 850       * Sets the styles for the code that will be outputted
 851       * when this object is parsed. The style should be a
 852       * string of valid stylesheet declarations
 853       *
 854       * @param string  The overall style for the outputted code block
 855       * @param boolean Whether to merge the styles with the current styles or not
 856       * @since 1.0.0
 857       */
 858      function set_overall_style($style, $preserve_defaults = false) {
 859          if (!$preserve_defaults) {
 860              $this->overall_style = $style;
 861          } else {
 862              $this->overall_style .= $style;
 863          }
 864      }
 865  
 866      /**
 867       * Sets the overall classname for this block of code. This
 868       * class can then be used in a stylesheet to style this object's
 869       * output
 870       *
 871       * @param string The class name to use for this block of code
 872       * @since 1.0.0
 873       */
 874      function set_overall_class($class) {
 875          $this->overall_class = $class;
 876      }
 877  
 878      /**
 879       * Sets the overall id for this block of code. This id can then
 880       * be used in a stylesheet to style this object's output
 881       *
 882       * @param string The ID to use for this block of code
 883       * @since 1.0.0
 884       */
 885      function set_overall_id($id) {
 886          $this->overall_id = $id;
 887      }
 888  
 889      /**
 890       * Sets whether CSS classes should be used to highlight the source. Default
 891       * is off, calling this method with no arguments will turn it on
 892       *
 893       * @param boolean Whether to turn classes on or not
 894       * @since 1.0.0
 895       */
 896      function enable_classes($flag = true) {
 897          $this->use_classes = ($flag) ? true : false;
 898      }
 899  
 900      /**
 901       * Sets the style for the actual code. This should be a string
 902       * containing valid stylesheet declarations. If $preserve_defaults is
 903       * true, then styles are merged with the default styles, with the
 904       * user defined styles having priority
 905       *
 906       * Note: Use this method to override any style changes you made to
 907       * the line numbers if you are using line numbers, else the line of
 908       * code will have the same style as the line number! Consult the
 909       * GeSHi documentation for more information about this.
 910       *
 911       * @param string  The style to use for actual code
 912       * @param boolean Whether to merge the current styles with the new styles
 913       * @since 1.0.2
 914       */
 915      function set_code_style($style, $preserve_defaults = false) {
 916          if (!$preserve_defaults) {
 917              $this->code_style = $style;
 918          } else {
 919              $this->code_style .= $style;
 920          }
 921      }
 922  
 923      /**
 924       * Sets the styles for the line numbers.
 925       *
 926       * @param string The style for the line numbers that are "normal"
 927       * @param string|boolean If a string, this is the style of the line
 928       *        numbers that are "fancy", otherwise if boolean then this
 929       *        defines whether the normal styles should be merged with the
 930       *        new normal styles or not
 931       * @param boolean If set, is the flag for whether to merge the "fancy"
 932       *        styles with the current styles or not
 933       * @since 1.0.2
 934       */
 935      function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
 936          //Check if we got 2 or three parameters
 937          if (is_bool($style2)) {
 938              $preserve_defaults = $style2;
 939              $style2 = '';
 940          }
 941  
 942          //Actually set the new styles
 943          if (!$preserve_defaults) {
 944              $this->line_style1 = $style1;
 945              $this->line_style2 = $style2;
 946          } else {
 947              $this->line_style1 .= $style1;
 948              $this->line_style2 .= $style2;
 949          }
 950      }
 951  
 952      /**
 953       * Sets whether line numbers should be displayed.
 954       *
 955       * Valid values for the first parameter are:
 956       *
 957       *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
 958       *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
 959       *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
 960       *
 961       * For fancy line numbers, the second parameter is used to signal which lines
 962       * are to be fancy. For example, if the value of this parameter is 5 then every
 963       * 5th line will be fancy.
 964       *
 965       * @param int How line numbers should be displayed
 966       * @param int Defines which lines are fancy
 967       * @since 1.0.0
 968       */
 969      function enable_line_numbers($flag, $nth_row = 5) {
 970          if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
 971              && GESHI_FANCY_LINE_NUMBERS != $flag) {
 972              $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
 973          }
 974          $this->line_numbers = $flag;
 975          $this->line_nth_row = $nth_row;
 976      }
 977  
 978      /**
 979       * Sets wether spans and other HTML markup generated by GeSHi can
 980       * span over multiple lines or not. Defaults to true to reduce overhead.
 981       * Set it to false if you want to manipulate the output or manually display
 982       * the code in an ordered list.
 983       *
 984       * @param boolean Wether multiline spans are allowed or not
 985       * @since 1.0.7.22
 986       */
 987      function enable_multiline_span($flag) {
 988          $this->allow_multiline_span = (bool) $flag;
 989      }
 990  
 991      /**
 992       * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
 993       *
 994       * @see enable_multiline_span
 995       * @return bool
 996       */
 997      function get_multiline_span() {
 998          return $this->allow_multiline_span;
 999      }
1000  
1001      /**
1002       * Sets the style for a keyword group. If $preserve_defaults is
1003       * true, then styles are merged with the default styles, with the
1004       * user defined styles having priority
1005       *
1006       * @param int     The key of the keyword group to change the styles of
1007       * @param string  The style to make the keywords
1008       * @param boolean Whether to merge the new styles with the old or just
1009       *                to overwrite them
1010       * @since 1.0.0
1011       */
1012      function set_keyword_group_style($key, $style, $preserve_defaults = false) {
1013          //Set the style for this keyword group
1014          if (!$preserve_defaults) {
1015              $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1016          } else {
1017              $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1018          }
1019  
1020          //Update the lexic permissions
1021          if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1022              $this->lexic_permissions['KEYWORDS'][$key] = true;
1023          }
1024      }
1025  
1026      /**
1027       * Turns highlighting on/off for a keyword group
1028       *
1029       * @param int     The key of the keyword group to turn on or off
1030       * @param boolean Whether to turn highlighting for that group on or off
1031       * @since 1.0.0
1032       */
1033      function set_keyword_group_highlighting($key, $flag = true) {
1034          $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1035      }
1036  
1037      /**
1038       * Sets the styles for comment groups.  If $preserve_defaults is
1039       * true, then styles are merged with the default styles, with the
1040       * user defined styles having priority
1041       *
1042       * @param int     The key of the comment group to change the styles of
1043       * @param string  The style to make the comments
1044       * @param boolean Whether to merge the new styles with the old or just
1045       *                to overwrite them
1046       * @since 1.0.0
1047       */
1048      function set_comments_style($key, $style, $preserve_defaults = false) {
1049          if (!$preserve_defaults) {
1050              $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1051          } else {
1052              $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1053          }
1054      }
1055  
1056      /**
1057       * Turns highlighting on/off for comment groups
1058       *
1059       * @param int     The key of the comment group to turn on or off
1060       * @param boolean Whether to turn highlighting for that group on or off
1061       * @since 1.0.0
1062       */
1063      function set_comments_highlighting($key, $flag = true) {
1064          $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1065      }
1066  
1067      /**
1068       * Sets the styles for escaped characters. If $preserve_defaults is
1069       * true, then styles are merged with the default styles, with the
1070       * user defined styles having priority
1071       *
1072       * @param string  The style to make the escape characters
1073       * @param boolean Whether to merge the new styles with the old or just
1074       *                to overwrite them
1075       * @since 1.0.0
1076       */
1077      function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1078          if (!$preserve_defaults) {
1079              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1080          } else {
1081              $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1082          }
1083      }
1084  
1085      /**
1086       * Turns highlighting on/off for escaped characters
1087       *
1088       * @param boolean Whether to turn highlighting for escape characters on or off
1089       * @since 1.0.0
1090       */
1091      function set_escape_characters_highlighting($flag = true) {
1092          $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1093      }
1094  
1095      /**
1096       * Sets the styles for brackets. If $preserve_defaults is
1097       * true, then styles are merged with the default styles, with the
1098       * user defined styles having priority
1099       *
1100       * This method is DEPRECATED: use set_symbols_style instead.
1101       * This method will be removed in 1.2.X
1102       *
1103       * @param string  The style to make the brackets
1104       * @param boolean Whether to merge the new styles with the old or just
1105       *                to overwrite them
1106       * @since 1.0.0
1107       * @deprecated In favour of set_symbols_style
1108       */
1109      function set_brackets_style($style, $preserve_defaults = false) {
1110          if (!$preserve_defaults) {
1111              $this->language_data['STYLES']['BRACKETS'][0] = $style;
1112          } else {
1113              $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1114          }
1115      }
1116  
1117      /**
1118       * Turns highlighting on/off for brackets
1119       *
1120       * This method is DEPRECATED: use set_symbols_highlighting instead.
1121       * This method will be remove in 1.2.X
1122       *
1123       * @param boolean Whether to turn highlighting for brackets on or off
1124       * @since 1.0.0
1125       * @deprecated In favour of set_symbols_highlighting
1126       */
1127      function set_brackets_highlighting($flag) {
1128          $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1129      }
1130  
1131      /**
1132       * Sets the styles for symbols. If $preserve_defaults is
1133       * true, then styles are merged with the default styles, with the
1134       * user defined styles having priority
1135       *
1136       * @param string  The style to make the symbols
1137       * @param boolean Whether to merge the new styles with the old or just
1138       *                to overwrite them
1139       * @param int     Tells the group of symbols for which style should be set.
1140       * @since 1.0.1
1141       */
1142      function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1143          // Update the style of symbols
1144          if (!$preserve_defaults) {
1145              $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1146          } else {
1147              $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1148          }
1149  
1150          // For backward compatibility
1151          if (0 == $group) {
1152              $this->set_brackets_style ($style, $preserve_defaults);
1153          }
1154      }
1155  
1156      /**
1157       * Turns highlighting on/off for symbols
1158       *
1159       * @param boolean Whether to turn highlighting for symbols on or off
1160       * @since 1.0.0
1161       */
1162      function set_symbols_highlighting($flag) {
1163          // Update lexic permissions for this symbol group
1164          $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1165  
1166          // For backward compatibility
1167          $this->set_brackets_highlighting ($flag);
1168      }
1169  
1170      /**
1171       * Sets the styles for strings. If $preserve_defaults is
1172       * true, then styles are merged with the default styles, with the
1173       * user defined styles having priority
1174       *
1175       * @param string  The style to make the escape characters
1176       * @param boolean Whether to merge the new styles with the old or just
1177       *                to overwrite them
1178       * @param int     Tells the group of strings for which style should be set.
1179       * @since 1.0.0
1180       */
1181      function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1182          if (!$preserve_defaults) {
1183              $this->language_data['STYLES']['STRINGS'][$group] = $style;
1184          } else {
1185              $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1186          }
1187      }
1188  
1189      /**
1190       * Turns highlighting on/off for strings
1191       *
1192       * @param boolean Whether to turn highlighting for strings on or off
1193       * @since 1.0.0
1194       */
1195      function set_strings_highlighting($flag) {
1196          $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1197      }
1198  
1199      /**
1200       * Sets the styles for strict code blocks. If $preserve_defaults is
1201       * true, then styles are merged with the default styles, with the
1202       * user defined styles having priority
1203       *
1204       * @param string  The style to make the script blocks
1205       * @param boolean Whether to merge the new styles with the old or just
1206       *                to overwrite them
1207       * @param int     Tells the group of script blocks for which style should be set.
1208       * @since 1.0.8.4
1209       */
1210      function set_script_style($style, $preserve_defaults = false, $group = 0) {
1211          // Update the style of symbols
1212          if (!$preserve_defaults) {
1213              $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1214          } else {
1215              $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1216          }
1217      }
1218  
1219      /**
1220       * Sets the styles for numbers. If $preserve_defaults is
1221       * true, then styles are merged with the default styles, with the
1222       * user defined styles having priority
1223       *
1224       * @param string  The style to make the numbers
1225       * @param boolean Whether to merge the new styles with the old or just
1226       *                to overwrite them
1227       * @param int     Tells the group of numbers for which style should be set.
1228       * @since 1.0.0
1229       */
1230      function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1231          if (!$preserve_defaults) {
1232              $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1233          } else {
1234              $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1235          }
1236      }
1237  
1238      /**
1239       * Turns highlighting on/off for numbers
1240       *
1241       * @param boolean Whether to turn highlighting for numbers on or off
1242       * @since 1.0.0
1243       */
1244      function set_numbers_highlighting($flag) {
1245          $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1246      }
1247  
1248      /**
1249       * Sets the styles for methods. $key is a number that references the
1250       * appropriate "object splitter" - see the language file for the language
1251       * you are highlighting to get this number. If $preserve_defaults is
1252       * true, then styles are merged with the default styles, with the
1253       * user defined styles having priority
1254       *
1255       * @param int     The key of the object splitter to change the styles of
1256       * @param string  The style to make the methods
1257       * @param boolean Whether to merge the new styles with the old or just
1258       *                to overwrite them
1259       * @since 1.0.0
1260       */
1261      function set_methods_style($key, $style, $preserve_defaults = false) {
1262          if (!$preserve_defaults) {
1263              $this->language_data['STYLES']['METHODS'][$key] = $style;
1264          } else {
1265              $this->language_data['STYLES']['METHODS'][$key] .= $style;
1266          }
1267      }
1268  
1269      /**
1270       * Turns highlighting on/off for methods
1271       *
1272       * @param boolean Whether to turn highlighting for methods on or off
1273       * @since 1.0.0
1274       */
1275      function set_methods_highlighting($flag) {
1276          $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1277      }
1278  
1279      /**
1280       * Sets the styles for regexps. If $preserve_defaults is
1281       * true, then styles are merged with the default styles, with the
1282       * user defined styles having priority
1283       *
1284       * @param string  The style to make the regular expression matches
1285       * @param boolean Whether to merge the new styles with the old or just
1286       *                to overwrite them
1287       * @since 1.0.0
1288       */
1289      function set_regexps_style($key, $style, $preserve_defaults = false) {
1290          if (!$preserve_defaults) {
1291              $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1292          } else {
1293              $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1294          }
1295      }
1296  
1297      /**
1298       * Turns highlighting on/off for regexps
1299       *
1300       * @param int     The key of the regular expression group to turn on or off
1301       * @param boolean Whether to turn highlighting for the regular expression group on or off
1302       * @since 1.0.0
1303       */
1304      function set_regexps_highlighting($key, $flag) {
1305          $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1306      }
1307  
1308      /**
1309       * Sets whether a set of keywords are checked for in a case sensitive manner
1310       *
1311       * @param int The key of the keyword group to change the case sensitivity of
1312       * @param boolean Whether to check in a case sensitive manner or not
1313       * @since 1.0.0
1314       */
1315      function set_case_sensitivity($key, $case) {
1316          $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1317      }
1318  
1319      /**
1320       * Sets the case that keywords should use when found. Use the constants:
1321       *
1322       *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1323       *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1324       *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1325       *
1326       * @param int A constant specifying what to do with matched keywords
1327       * @since 1.0.1
1328       */
1329      function set_case_keywords($case) {
1330          if (in_array($case, array(
1331              GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1332              $this->language_data['CASE_KEYWORDS'] = $case;
1333          }
1334      }
1335  
1336      /**
1337       * Sets how many spaces a tab is substituted for
1338       *
1339       * Widths below zero are ignored
1340       *
1341       * @param int The tab width
1342       * @since 1.0.0
1343       */
1344      function set_tab_width($width) {
1345          $this->tab_width = intval($width);
1346  
1347          //Check if it fit's the constraints:
1348          if ($this->tab_width < 1) {
1349              //Return it to the default
1350              $this->tab_width = 8;
1351          }
1352      }
1353  
1354      /**
1355       * Sets whether or not to use tab-stop width specifed by language
1356       *
1357       * @param boolean Whether to use language-specific tab-stop widths
1358       * @since 1.0.7.20
1359       */
1360      function set_use_language_tab_width($use) {
1361          $this->use_language_tab_width = (bool) $use;
1362      }
1363  
1364      /**
1365       * Returns the tab width to use, based on the current language and user
1366       * preference
1367       *
1368       * @return int Tab width
1369       * @since 1.0.7.20
1370       */
1371      function get_real_tab_width() {
1372          if (!$this->use_language_tab_width ||
1373              !isset($this->language_data['TAB_WIDTH'])) {
1374              return $this->tab_width;
1375          } else {
1376              return $this->language_data['TAB_WIDTH'];
1377          }
1378      }
1379  
1380      /**
1381       * Enables/disables strict highlighting. Default is off, calling this
1382       * method without parameters will turn it on. See documentation
1383       * for more details on strict mode and where to use it.
1384       *
1385       * @param boolean Whether to enable strict mode or not
1386       * @since 1.0.0
1387       */
1388      function enable_strict_mode($mode = true) {
1389          if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1390              $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1391          }
1392      }
1393  
1394      /**
1395       * Disables all highlighting
1396       *
1397       * @since 1.0.0
1398       * @todo  Rewrite with array traversal
1399       * @deprecated In favour of enable_highlighting
1400       */
1401      function disable_highlighting() {
1402          $this->enable_highlighting(false);
1403      }
1404  
1405      /**
1406       * Enables all highlighting
1407       *
1408       * The optional flag parameter was added in version 1.0.7.21 and can be used
1409       * to enable (true) or disable (false) all highlighting.
1410       *
1411       * @since 1.0.0
1412       * @param boolean A flag specifying whether to enable or disable all highlighting
1413       * @todo  Rewrite with array traversal
1414       */
1415      function enable_highlighting($flag = true) {
1416          $flag = $flag ? true : false;
1417          foreach ($this->lexic_permissions as $key => $value) {
1418              if (is_array($value)) {
1419                  foreach ($value as $k => $v) {
1420                      $this->lexic_permissions[$key][$k] = $flag;
1421                  }
1422              } else {
1423                  $this->lexic_permissions[$key] = $flag;
1424              }
1425          }
1426  
1427          // Context blocks
1428          $this->enable_important_blocks = $flag;
1429      }
1430  
1431      /**
1432       * Given a file extension, this method returns either a valid geshi language
1433       * name, or the empty string if it couldn't be found
1434       *
1435       * @param string The extension to get a language name for
1436       * @param array  A lookup array to use instead of the default one
1437       * @since 1.0.5
1438       * @todo Re-think about how this method works (maybe make it private and/or make it
1439       *       a extension->lang lookup?)
1440       * @todo static?
1441       */
1442      function get_language_name_from_extension( $extension, $lookup = array() ) {
1443          if ( !is_array($lookup) || empty($lookup)) {
1444              $lookup = array(
1445                  '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1446                  '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1447                  '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1448                  '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1449                  'abap' => array('abap'),
1450                  'actionscript' => array('as'),
1451                  'ada' => array('a', 'ada', 'adb', 'ads'),
1452                  'apache' => array('conf'),
1453                  'asm' => array('ash', 'asm', 'inc'),
1454                  'asp' => array('asp'),
1455                  'bash' => array('sh'),
1456                  'bf' => array('bf'),
1457                  'c' => array('c', 'h'),
1458                  'c_mac' => array('c', 'h'),
1459                  'caddcl' => array(),
1460                  'cadlisp' => array(),
1461                  'cdfg' => array('cdfg'),
1462                  'cobol' => array('cbl'),
1463                  'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1464                  'csharp' => array('cs'),
1465                  'css' => array('css'),
1466                  'd' => array('d'),
1467                  'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1468                  'diff' => array('diff', 'patch'),
1469                  'dos' => array('bat', 'cmd'),
1470                  'gdb' => array('kcrash', 'crash', 'bt'),
1471                  'gettext' => array('po', 'pot'),
1472                  'gml' => array('gml'),
1473                  'gnuplot' => array('plt'),
1474                  'groovy' => array('groovy'),
1475                  'haskell' => array('hs'),
1476                  'html4strict' => array('html', 'htm'),
1477                  'ini' => array('ini', 'desktop'),
1478                  'java' => array('java'),
1479                  'javascript' => array('js'),
1480                  'klonec' => array('kl1'),
1481                  'klonecpp' => array('klx'),
1482                  'latex' => array('tex'),
1483                  'lisp' => array('lisp'),
1484                  'lua' => array('lua'),
1485                  'matlab' => array('m'),
1486                  'mpasm' => array(),
1487                  'mysql' => array('sql'),
1488                  'nsis' => array(),
1489                  'objc' => array(),
1490                  'oobas' => array(),
1491                  'oracle8' => array(),
1492                  'oracle10' => array(),
1493                  'pascal' => array('pas'),
1494                  'perl' => array('pl', 'pm'),
1495                  'php' => array('php', 'php5', 'phtml', 'phps'),
1496                  'povray' => array('pov'),
1497                  'providex' => array('pvc', 'pvx'),
1498                  'prolog' => array('pl'),
1499                  'python' => array('py'),
1500                  'qbasic' => array('bi'),
1501                  'reg' => array('reg'),
1502                  'ruby' => array('rb'),
1503                  'sas' => array('sas'),
1504                  'scala' => array('scala'),
1505                  'scheme' => array('scm'),
1506                  'scilab' => array('sci'),
1507                  'smalltalk' => array('st'),
1508                  'smarty' => array(),
1509                  'tcl' => array('tcl'),
1510                  'vb' => array('bas'),
1511                  'vbnet' => array(),
1512                  'visualfoxpro' => array(),
1513                  'whitespace' => array('ws'),
1514                  'xml' => array('xml', 'svg', 'xrc'),
1515                  'z80' => array('z80', 'asm', 'inc')
1516              );
1517          }
1518  
1519          foreach ($lookup as $lang => $extensions) {
1520              if (in_array($extension, $extensions)) {
1521                  return $lang;
1522              }
1523          }
1524          return '';
1525      }
1526  
1527      /**
1528       * Given a file name, this method loads its contents in, and attempts
1529       * to set the language automatically. An optional lookup table can be
1530       * passed for looking up the language name. If not specified a default
1531       * table is used
1532       *
1533       * The language table is in the form
1534       * <pre>array(
1535       *   'lang_name' => array('extension', 'extension', ...),
1536       *   'lang_name' ...
1537       * );</pre>
1538       *
1539       * @param string The filename to load the source from
1540       * @param array  A lookup array to use instead of the default one
1541       * @todo Complete rethink of this and above method
1542       * @since 1.0.5
1543       */
1544      function load_from_file($file_name, $lookup = array()) {
1545          if (is_readable($file_name)) {
1546              $this->set_source(file_get_contents($file_name));
1547              $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1548          } else {
1549              $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1550          }
1551      }
1552  
1553      /**
1554       * Adds a keyword to a keyword group for highlighting
1555       *
1556       * @param int    The key of the keyword group to add the keyword to
1557       * @param string The word to add to the keyword group
1558       * @since 1.0.0
1559       */
1560      function add_keyword($key, $word) {
1561          if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1562              $this->language_data['KEYWORDS'][$key][] = $word;
1563  
1564              //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1565              if ($this->parse_cache_built) {
1566                  $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1567                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1568              }
1569          }
1570      }
1571  
1572      /**
1573       * Removes a keyword from a keyword group
1574       *
1575       * @param int    The key of the keyword group to remove the keyword from
1576       * @param string The word to remove from the keyword group
1577       * @param bool   Wether to automatically recompile the optimized regexp list or not.
1578       *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1579       *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1580       *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1581       *               it might be too expensive to recompile the regexp list for every removal if you want to
1582       *               remove a lot of keywords.
1583       * @since 1.0.0
1584       */
1585      function remove_keyword($key, $word, $recompile = true) {
1586          $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1587          if ($key_to_remove !== false) {
1588              unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1589  
1590              //NEW in 1.0.8, optionally recompile keyword group
1591              if ($recompile && $this->parse_cache_built) {
1592                  $this->optimize_keyword_group($key);
1593              }
1594          }
1595      }
1596  
1597      /**
1598       * Creates a new keyword group
1599       *
1600       * @param int    The key of the keyword group to create
1601       * @param string The styles for the keyword group
1602       * @param boolean Whether the keyword group is case sensitive ornot
1603       * @param array  The words to use for the keyword group
1604       * @since 1.0.0
1605       */
1606      function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1607          $words = (array) $words;
1608          if  (empty($words)) {
1609              // empty word lists mess up highlighting
1610              return false;
1611          }
1612  
1613          //Add the new keyword group internally
1614          $this->language_data['KEYWORDS'][$key] = $words;
1615          $this->lexic_permissions['KEYWORDS'][$key] = true;
1616          $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1617          $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1618  
1619          //NEW in 1.0.8, cache keyword regexp
1620          if ($this->parse_cache_built) {
1621              $this->optimize_keyword_group($key);
1622          }
1623      }
1624  
1625      /**
1626       * Removes a keyword group
1627       *
1628       * @param int    The key of the keyword group to remove
1629       * @since 1.0.0
1630       */
1631      function remove_keyword_group ($key) {
1632          //Remove the keyword group internally
1633          unset($this->language_data['KEYWORDS'][$key]);
1634          unset($this->lexic_permissions['KEYWORDS'][$key]);
1635          unset($this->language_data['CASE_SENSITIVE'][$key]);
1636          unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1637  
1638          //NEW in 1.0.8
1639          unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1640      }
1641  
1642      /**
1643       * compile optimized regexp list for keyword group
1644       *
1645       * @param int   The key of the keyword group to compile & optimize
1646       * @since 1.0.8
1647       */
1648      function optimize_keyword_group($key) {
1649          $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1650              $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1651          $space_as_whitespace = false;
1652          if(isset($this->language_data['PARSER_CONTROL'])) {
1653              if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1654                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1655                      $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1656                  }
1657                  if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1658                      if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1659                          $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1660                      }
1661                  }
1662              }
1663          }
1664          if($space_as_whitespace) {
1665              foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1666                  $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1667                      str_replace(" ", "\\s+", $rxv);
1668              }
1669          }
1670      }
1671  
1672      /**
1673       * Sets the content of the header block
1674       *
1675       * @param string The content of the header block
1676       * @since 1.0.2
1677       */
1678      function set_header_content($content) {
1679          $this->header_content = $content;
1680      }
1681  
1682      /**
1683       * Sets the content of the footer block
1684       *
1685       * @param string The content of the footer block
1686       * @since 1.0.2
1687       */
1688      function set_footer_content($content) {
1689          $this->footer_content = $content;
1690      }
1691  
1692      /**
1693       * Sets the style for the header content
1694       *
1695       * @param string The style for the header content
1696       * @since 1.0.2
1697       */
1698      function set_header_content_style($style) {
1699          $this->header_content_style = $style;
1700      }
1701  
1702      /**
1703       * Sets the style for the footer content
1704       *
1705       * @param string The style for the footer content
1706       * @since 1.0.2
1707       */
1708      function set_footer_content_style($style) {
1709          $this->footer_content_style = $style;
1710      }
1711  
1712      /**
1713       * Sets whether to force a surrounding block around
1714       * the highlighted code or not
1715       *
1716       * @param boolean Tells whether to enable or disable this feature
1717       * @since 1.0.7.20
1718       */
1719      function enable_inner_code_block($flag) {
1720          $this->force_code_block = (bool)$flag;
1721      }
1722  
1723      /**
1724       * Sets the base URL to be used for keywords
1725       *
1726       * @param int The key of the keyword group to set the URL for
1727       * @param string The URL to set for the group. If {FNAME} is in
1728       *               the url somewhere, it is replaced by the keyword
1729       *               that the URL is being made for
1730       * @since 1.0.2
1731       */
1732      function set_url_for_keyword_group($group, $url) {
1733          $this->language_data['URLS'][$group] = $url;
1734      }
1735  
1736      /**
1737       * Sets styles for links in code
1738       *
1739       * @param int A constant that specifies what state the style is being
1740       *            set for - e.g. :hover or :visited
1741       * @param string The styles to use for that state
1742       * @since 1.0.2
1743       */
1744      function set_link_styles($type, $styles) {
1745          $this->link_styles[$type] = $styles;
1746      }
1747  
1748      /**
1749       * Sets the target for links in code
1750       *
1751       * @param string The target for links in the code, e.g. _blank
1752       * @since 1.0.3
1753       */
1754      function set_link_target($target) {
1755          if (!$target) {
1756              $this->link_target = '';
1757          } else {
1758              $this->link_target = ' target="' . $target . '"';
1759          }
1760      }
1761  
1762      /**
1763       * Sets styles for important parts of the code
1764       *
1765       * @param string The styles to use on important parts of the code
1766       * @since 1.0.2
1767       */
1768      function set_important_styles($styles) {
1769          $this->important_styles = $styles;
1770      }
1771  
1772      /**
1773       * Sets whether context-important blocks are highlighted
1774       *
1775       * @param boolean Tells whether to enable or disable highlighting of important blocks
1776       * @todo REMOVE THIS SHIZ FROM GESHI!
1777       * @deprecated
1778       * @since 1.0.2
1779       */
1780      function enable_important_blocks($flag) {
1781          $this->enable_important_blocks = ( $flag ) ? true : false;
1782      }
1783  
1784      /**
1785       * Whether CSS IDs should be added to each line
1786       *
1787       * @param boolean If true, IDs will be added to each line.
1788       * @since 1.0.2
1789       */
1790      function enable_ids($flag = true) {
1791          $this->add_ids = ($flag) ? true : false;
1792      }
1793  
1794      /**
1795       * Specifies which lines to highlight extra
1796       *
1797       * The extra style parameter was added in 1.0.7.21.
1798       *
1799       * @param mixed An array of line numbers to highlight, or just a line
1800       *              number on its own.
1801       * @param string A string specifying the style to use for this line.
1802       *              If null is specified, the default style is used.
1803       *              If false is specified, the line will be removed from
1804       *              special highlighting
1805       * @since 1.0.2
1806       * @todo  Some data replication here that could be cut down on
1807       */
1808      function highlight_lines_extra($lines, $style = null) {
1809          if (is_array($lines)) {
1810              //Split up the job using single lines at a time
1811              foreach ($lines as $line) {
1812                  $this->highlight_lines_extra($line, $style);
1813              }
1814          } else {
1815              //Mark the line as being highlighted specially
1816              $lines = intval($lines);
1817              $this->highlight_extra_lines[$lines] = $lines;
1818  
1819              //Decide on which style to use
1820              if ($style === null) { //Check if we should use default style
1821                  unset($this->highlight_extra_lines_styles[$lines]);
1822              } else if ($style === false) { //Check if to remove this line
1823                  unset($this->highlight_extra_lines[$lines]);
1824                  unset($this->highlight_extra_lines_styles[$lines]);
1825              } else {
1826                  $this->highlight_extra_lines_styles[$lines] = $style;
1827              }
1828          }
1829      }
1830  
1831      /**
1832       * Sets the style for extra-highlighted lines
1833       *
1834       * @param string The style for extra-highlighted lines
1835       * @since 1.0.2
1836       */
1837      function set_highlight_lines_extra_style($styles) {
1838          $this->highlight_extra_lines_style = $styles;
1839      }
1840  
1841      /**
1842       * Sets the line-ending
1843       *
1844       * @param string The new line-ending
1845       * @since 1.0.2
1846       */
1847      function set_line_ending($line_ending) {
1848          $this->line_ending = (string)$line_ending;
1849      }
1850  
1851      /**
1852       * Sets what number line numbers should start at. Should
1853       * be a positive integer, and will be converted to one.
1854       *
1855       * <b>Warning:</b> Using this method will add the "start"
1856       * attribute to the &lt;ol&gt; that is used for line numbering.
1857       * This is <b>not</b> valid XHTML strict, so if that's what you
1858       * care about then don't use this method. Firefox is getting
1859       * support for the CSS method of doing this in 1.1 and Opera
1860       * has support for the CSS method, but (of course) IE doesn't
1861       * so it's not worth doing it the CSS way yet.
1862       *
1863       * @param int The number to start line numbers at
1864       * @since 1.0.2
1865       */
1866      function start_line_numbers_at($number) {
1867          $this->line_numbers_start = abs(intval($number));
1868      }
1869  
1870      /**
1871       * Sets the encoding used for htmlspecialchars(), for international
1872       * support.
1873       *
1874       * NOTE: This is not needed for now because htmlspecialchars() is not
1875       * being used (it has a security hole in PHP4 that has not been patched).
1876       * Maybe in a future version it may make a return for speed reasons, but
1877       * I doubt it.
1878       *
1879       * @param string The encoding to use for the source
1880       * @since 1.0.3
1881       */
1882      function set_encoding($encoding) {
1883          if ($encoding) {
1884            $this->encoding = strtolower($encoding);
1885          }
1886      }
1887  
1888      /**
1889       * Turns linking of keywords on or off.
1890       *
1891       * @param boolean If true, links will be added to keywords
1892       * @since 1.0.2
1893       */
1894      function enable_keyword_links($enable = true) {
1895          $this->keyword_links = (bool) $enable;
1896      }
1897  
1898      /**
1899       * Setup caches needed for styling. This is automatically called in
1900       * parse_code() and get_stylesheet() when appropriate. This function helps
1901       * stylesheet generators as they rely on some style information being
1902       * preprocessed
1903       *
1904       * @since 1.0.8
1905       * @access private
1906       */
1907      function build_style_cache() {
1908          //Build the style cache needed to highlight numbers appropriate
1909          if($this->lexic_permissions['NUMBERS']) {
1910              //First check what way highlighting information for numbers are given
1911              if(!isset($this->language_data['NUMBERS'])) {
1912                  $this->language_data['NUMBERS'] = 0;
1913              }
1914  
1915              if(is_array($this->language_data['NUMBERS'])) {
1916                  $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1917              } else {
1918                  $this->language_data['NUMBERS_CACHE'] = array();
1919                  if(!$this->language_data['NUMBERS']) {
1920                      $this->language_data['NUMBERS'] =
1921                          GESHI_NUMBER_INT_BASIC |
1922                          GESHI_NUMBER_FLT_NONSCI;
1923                  }
1924  
1925                  for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1926                      //Rearrange style indices if required ...
1927                      if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1928                          $this->language_data['STYLES']['NUMBERS'][$i] =
1929                              $this->language_data['STYLES']['NUMBERS'][1<<$i];
1930                          unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1931                      }
1932  
1933                      //Check if this bit is set for highlighting
1934                      if($j&1) {
1935                          //So this bit is set ...
1936                          //Check if it belongs to group 0 or the actual stylegroup
1937                          if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1938                              $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1939                          } else {
1940                              if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1941                                  $this->language_data['NUMBERS_CACHE'][0] = 0;
1942                              }
1943                              $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1944                          }
1945                      }
1946                  }
1947              }
1948          }
1949      }
1950  
1951      /**
1952       * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1953       * This function makes stylesheet generators much faster as they do not need these caches.
1954       *
1955       * @since 1.0.8
1956       * @access private
1957       */
1958      function build_parse_cache() {
1959          // cache symbol regexp
1960          //As this is a costy operation, we avoid doing it for multiple groups ...
1961          //Instead we perform it for all symbols at once.
1962          //
1963          //For this to work, we need to reorganize the data arrays.
1964          if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1965              $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1966  
1967              $this->language_data['SYMBOL_DATA'] = array();
1968              $symbol_preg_multi = array(); // multi char symbols
1969              $symbol_preg_single = array(); // single char symbols
1970              foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1971                  if (is_array($symbols)) {
1972                      foreach ($symbols as $sym) {
1973                          $sym = $this->hsc($sym);
1974                          if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1975                              $this->language_data['SYMBOL_DATA'][$sym] = $key;
1976                              if (isset($sym[1])) { // multiple chars
1977                                  $symbol_preg_multi[] = preg_quote($sym, '/');
1978                              } else { // single char
1979                                  if ($sym == '-') {
1980                                      // don't trigger range out of order error
1981                                      $symbol_preg_single[] = '\-';
1982                                  } else {
1983                                      $symbol_preg_single[] = preg_quote($sym, '/');
1984                                  }
1985                              }
1986                          }
1987                      }
1988                  } else {
1989                      $symbols = $this->hsc($symbols);
1990                      if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1991                          $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1992                          if (isset($symbols[1])) { // multiple chars
1993                              $symbol_preg_multi[] = preg_quote($symbols, '/');
1994                          } else if ($symbols == '-') {
1995                              // don't trigger range out of order error
1996                              $symbol_preg_single[] = '\-';
1997                          } else { // single char
1998                              $symbol_preg_single[] = preg_quote($symbols, '/');
1999                          }
2000                      }
2001                  }
2002              }
2003  
2004              //Now we have an array with each possible symbol as the key and the style as the actual data.
2005              //This way we can set the correct style just the moment we highlight ...
2006              //
2007              //Now we need to rewrite our array to get a search string that
2008              $symbol_preg = array();
2009              if (!empty($symbol_preg_multi)) {
2010                  rsort($symbol_preg_multi);
2011                  $symbol_preg[] = implode('|', $symbol_preg_multi);
2012              }
2013              if (!empty($symbol_preg_single)) {
2014                  rsort($symbol_preg_single);
2015                  $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2016              }
2017              $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2018          }
2019  
2020          // cache optimized regexp for keyword matching
2021          // remove old cache
2022          $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2023          foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2024              if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2025                      $this->lexic_permissions['KEYWORDS'][$key]) {
2026                  $this->optimize_keyword_group($key);
2027              }
2028          }
2029  
2030          // brackets
2031          if ($this->lexic_permissions['BRACKETS']) {
2032              $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2033              if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2034                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2035                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
2036                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
2037                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
2038                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
2039                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
2040                      '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
2041                  );
2042              }
2043              else {
2044                  $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2045                      '<| class="br0">&#91;|>',
2046                      '<| class="br0">&#93;|>',
2047                      '<| class="br0">&#40;|>',
2048                      '<| class="br0">&#41;|>',
2049                      '<| class="br0">&#123;|>',
2050                      '<| class="br0">&#125;|>',
2051                  );
2052              }
2053          }
2054  
2055          //Build the parse cache needed to highlight numbers appropriate
2056          if($this->lexic_permissions['NUMBERS']) {
2057              //Check if the style rearrangements have been processed ...
2058              //This also does some preprocessing to check which style groups are useable ...
2059              if(!isset($this->language_data['NUMBERS_CACHE'])) {
2060                  $this->build_style_cache();
2061              }
2062  
2063              //Number format specification
2064              //All this formats are matched case-insensitively!
2065              static $numbers_format = array(
2066                  GESHI_NUMBER_INT_BASIC =>
2067                      '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2068                  GESHI_NUMBER_INT_CSTYLE =>
2069                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2070                  GESHI_NUMBER_BIN_SUFFIX =>
2071                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2072                  GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2073                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2074                  GESHI_NUMBER_BIN_PREFIX_0B =>
2075                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2076                  GESHI_NUMBER_OCT_PREFIX =>
2077                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2078                  GESHI_NUMBER_OCT_PREFIX_0O =>
2079                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2080                  GESHI_NUMBER_OCT_PREFIX_AT =>
2081                      '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2082                  GESHI_NUMBER_OCT_SUFFIX =>
2083                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2084                  GESHI_NUMBER_HEX_PREFIX =>
2085                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2086                  GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2087                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2088                  GESHI_NUMBER_HEX_SUFFIX =>
2089                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2090                  GESHI_NUMBER_FLT_NONSCI =>
2091                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2092                  GESHI_NUMBER_FLT_NONSCI_F =>
2093                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2094                  GESHI_NUMBER_FLT_SCI_SHORT =>
2095                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2096                  GESHI_NUMBER_FLT_SCI_ZERO =>
2097                      '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2098                  );
2099  
2100              //At this step we have an associative array with flag groups for a
2101              //specific style or an string denoting a regexp given its index.
2102              $this->language_data['NUMBERS_RXCACHE'] = array();
2103              foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2104                  if(is_string($rxdata)) {
2105                      $regexp = $rxdata;
2106                  } else {
2107                      //This is a bitfield of number flags to highlight:
2108                      //Build an array, implode them together and make this the actual RX
2109                      $rxuse = array();
2110                      for($i = 1; $i <= $rxdata; $i<<=1) {
2111                          if($rxdata & $i) {
2112                              $rxuse[] = $numbers_format[$i];
2113                          }
2114                      }
2115                      $regexp = implode("|", $rxuse);
2116                  }
2117  
2118                  $this->language_data['NUMBERS_RXCACHE'][$key] =
2119                      "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2120              }
2121  
2122              if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2123                  $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2124              }
2125          }
2126  
2127          $this->parse_cache_built = true;
2128      }
2129  
2130      /**
2131       * Returns the code in $this->source, highlighted and surrounded by the
2132       * nessecary HTML.
2133       *
2134       * This should only be called ONCE, cos it's SLOW! If you want to highlight
2135       * the same source multiple times, you're better off doing a whole lot of
2136       * str_replaces to replace the &lt;span&gt;s
2137       *
2138       * @since 1.0.0
2139       */
2140      function parse_code () {
2141          // Start the timer
2142          $start_time = microtime();
2143  
2144          // Replace all newlines to a common form.
2145          $code = str_replace("\r\n", "\n", $this->source);
2146          $code = str_replace("\r", "\n", $code);
2147  
2148          // Firstly, if there is an error, we won't highlight
2149          if ($this->error) {
2150              //Escape the source for output
2151              $result = $this->hsc($this->source);
2152  
2153              //This fix is related to SF#1923020, but has to be applied regardless of
2154              //actually highlighting symbols.
2155              $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2156  
2157              // Timing is irrelevant
2158              $this->set_time($start_time, $start_time);
2159              $this->finalise($result);
2160              return $result;
2161          }
2162  
2163          // make sure the parse cache is up2date
2164          if (!$this->parse_cache_built) {
2165              $this->build_parse_cache();
2166          }
2167  
2168          // Initialise various stuff
2169          $length           = strlen($code);
2170          $COMMENT_MATCHED  = false;
2171          $stuff_to_parse   = '';
2172          $endresult        = '';
2173  
2174          // "Important" selections are handled like multiline comments
2175          // @todo GET RID OF THIS SHIZ
2176          if ($this->enable_important_blocks) {
2177              $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2178          }
2179  
2180          if ($this->strict_mode) {
2181              // Break the source into bits. Each bit will be a portion of the code
2182              // within script delimiters - for example, HTML between < and >
2183              $k = 0;
2184              $parts = array();
2185              $matches = array();
2186              $next_match_pointer = null;
2187              // we use a copy to unset delimiters on demand (when they are not found)
2188              $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2189              $i = 0;
2190              while ($i < $length) {
2191                  $next_match_pos = $length + 1; // never true
2192                  foreach ($delim_copy as $dk => $delimiters) {
2193                      if(is_array($delimiters)) {
2194                          foreach ($delimiters as $open => $close) {
2195                              // make sure the cache is setup properly
2196                              if (!isset($matches[$dk][$open])) {
2197                                  $matches[$dk][$open] = array(
2198                                      'next_match' => -1,
2199                                      'dk' => $dk,
2200  
2201                                      'open' => $open, // needed for grouping of adjacent code blocks (see below)
2202                                      'open_strlen' => strlen($open),
2203  
2204                                      'close' => $close,
2205                                      'close_strlen' => strlen($close),
2206                                  );
2207                              }
2208                              // Get the next little bit for this opening string
2209                              if ($matches[$dk][$open]['next_match'] < $i) {
2210                                  // only find the next pos if it was not already cached
2211                                  $open_pos = strpos($code, $open, $i);
2212                                  if ($open_pos === false) {
2213                                      // no match for this delimiter ever
2214                                      unset($delim_copy[$dk][$open]);
2215                                      continue;
2216                                  }
2217                                  $matches[$dk][$open]['next_match'] = $open_pos;
2218                              }
2219                              if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2220                                  //So we got a new match, update the close_pos
2221                                  $matches[$dk][$open]['close_pos'] =
2222                                      strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2223  
2224                                  $next_match_pointer =& $matches[$dk][$open];
2225                                  $next_match_pos = $matches[$dk][$open]['next_match'];
2226                              }
2227                          }
2228                      } else {
2229                          //So we should match an RegExp as Strict Block ...
2230                          /**
2231                           * The value in $delimiters is expected to be an RegExp
2232                           * containing exactly 2 matching groups:
2233                           *  - Group 1 is the opener
2234                           *  - Group 2 is the closer
2235                           */
2236                          if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2237                              preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2238                              //We got a match ...
2239                              if(isset($matches_rx['start']) && isset($matches_rx['end']))
2240                              {
2241                                  $matches[$dk] = array(
2242                                      'next_match' => $matches_rx['start'][1],
2243                                      'dk' => $dk,
2244  
2245                                      'close_strlen' => strlen($matches_rx['end'][0]),
2246                                      'close_pos' => $matches_rx['end'][1],
2247                                      );
2248                              } else {
2249                                  $matches[$dk] = array(
2250                                      'next_match' => $matches_rx[1][1],
2251                                      'dk' => $dk,
2252  
2253                                      'close_strlen' => strlen($matches_rx[2][0]),
2254                                      'close_pos' => $matches_rx[2][1],
2255                                      );
2256                              }
2257                          } else {
2258                              // no match for this delimiter ever
2259                              unset($delim_copy[$dk]);
2260                              continue;
2261                          }
2262  
2263                          if ($matches[$dk]['next_match'] <= $next_match_pos) {
2264                              $next_match_pointer =& $matches[$dk];
2265                              $next_match_pos = $matches[$dk]['next_match'];
2266                          }
2267                      }
2268                  }
2269  
2270                  // non-highlightable text
2271                  $parts[$k] = array(
2272                      1 => substr($code, $i, $next_match_pos - $i)
2273                  );
2274                  ++$k;
2275  
2276                  if ($next_match_pos > $length) {
2277                      // out of bounds means no next match was found
2278                      break;
2279                  }
2280  
2281                  // highlightable code
2282                  $parts[$k][0] = $next_match_pointer['dk'];
2283  
2284                  //Only combine for non-rx script blocks
2285                  if(is_array($delim_copy[$next_match_pointer['dk']])) {
2286                      // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2287                      $i = $next_match_pos + $next_match_pointer['open_strlen'];
2288                      while (true) {
2289                          $close_pos = strpos($code, $next_match_pointer['close'], $i);
2290                          if ($close_pos == false) {
2291                              break;
2292                          }
2293                          $i = $close_pos + $next_match_pointer['close_strlen'];
2294                          if ($i == $length) {
2295                              break;
2296                          }
2297                          if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2298                              substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2299                              // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2300                              foreach ($matches as $submatches) {
2301                                  foreach ($submatches as $match) {
2302                                      if ($match['next_match'] == $i) {
2303                                          // a different block already matches here!
2304                                          break 3;
2305                                      }
2306                                  }
2307                              }
2308                          } else {
2309                              break;
2310                          }
2311                      }
2312                  } else {
2313                      $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2314                      $i = $close_pos;
2315                  }
2316  
2317                  if ($close_pos === false) {
2318                      // no closing delimiter found!
2319                      $parts[$k][1] = substr($code, $next_match_pos);
2320                      ++$k;
2321                      break;
2322                  } else {
2323                      $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2324                      ++$k;
2325                  }
2326              }
2327              unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2328              $num_parts = $k;
2329  
2330              if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2331                  // when we have only one part, we don't have anything to highlight at all.
2332                  // if we have a "maybe" strict language, this should be handled as highlightable code
2333                  $parts = array(
2334                      0 => array(
2335                          0 => '',
2336                          1 => ''
2337                      ),
2338                      1 => array(
2339                          0 => null,
2340                          1 => $parts[0][1]
2341                      )
2342                  );
2343                  $num_parts = 2;
2344              }
2345  
2346          } else {
2347              // Not strict mode - simply dump the source into
2348              // the array at index 1 (the first highlightable block)
2349              $parts = array(
2350                  0 => array(
2351                      0 => '',
2352                      1 => ''
2353                  ),
2354                  1 => array(
2355                      0 => null,
2356                      1 => $code
2357                  )
2358              );
2359              $num_parts = 2;
2360          }
2361  
2362          //Unset variables we won't need any longer
2363          unset($code);
2364  
2365          //Preload some repeatedly used values regarding hardquotes ...
2366          $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2367          $hq_strlen = strlen($hq);
2368  
2369          //Preload if line numbers are to be generated afterwards
2370          //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2371          $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2372              !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2373  
2374          //preload the escape char for faster checking ...
2375          $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2376  
2377          // this is used for single-line comments
2378          $sc_disallowed_before = "";
2379          $sc_disallowed_after = "";
2380  
2381          if (isset($this->language_data['PARSER_CONTROL'])) {
2382              if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2383                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2384                      $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2385                  }
2386                  if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2387                      $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2388                  }
2389              }
2390          }
2391  
2392          //Fix for SF#1932083: Multichar Quotemarks unsupported
2393          $is_string_starter = array();
2394          if ($this->lexic_permissions['STRINGS']) {
2395              foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2396                  if (!isset($is_string_starter[$quotemark[0]])) {
2397                      $is_string_starter[$quotemark[0]] = (string)$quotemark;
2398                  } else if (is_string($is_string_starter[$quotemark[0]])) {
2399                      $is_string_starter[$quotemark[0]] = array(
2400                          $is_string_starter[$quotemark[0]],
2401                          $quotemark);
2402                  } else {
2403                      $is_string_starter[$quotemark[0]][] = $quotemark;
2404                  }
2405              }
2406          }
2407  
2408          // Now we go through each part. We know that even-indexed parts are
2409          // code that shouldn't be highlighted, and odd-indexed parts should
2410          // be highlighted
2411          for ($key = 0; $key < $num_parts; ++$key) {
2412              $STRICTATTRS = '';
2413  
2414              // If this block should be highlighted...
2415              if (!($key & 1)) {
2416                  // Else not a block to highlight
2417                  $endresult .= $this->hsc($parts[$key][1]);
2418                  unset($parts[$key]);
2419                  continue;
2420              }
2421  
2422              $result = '';
2423              $part = $parts[$key][1];
2424  
2425              $highlight_part = true;
2426              if ($this->strict_mode && !is_null($parts[$key][0])) {
2427                  // get the class key for this block of code
2428                  $script_key = $parts[$key][0];
2429                  $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2430                  if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2431                      $this->lexic_permissions['SCRIPT']) {
2432                      // Add a span element around the source to
2433                      // highlight the overall source block
2434                      if (!$this->use_classes &&
2435                          $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2436                          $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2437                      } else {
2438                          $attributes = ' class="sc' . $script_key . '"';
2439                      }
2440                      $result .= "<span$attributes>";
2441                      $STRICTATTRS = $attributes;
2442                  }
2443              }
2444  
2445              if ($highlight_part) {
2446                  // Now, highlight the code in this block. This code
2447                  // is really the engine of GeSHi (along with the method
2448                  // parse_non_string_part).
2449  
2450                  // cache comment regexps incrementally
2451                  $next_comment_regexp_key = '';
2452                  $next_comment_regexp_pos = -1;
2453                  $next_comment_multi_pos = -1;
2454                  $next_comment_single_pos = -1;
2455                  $comment_regexp_cache_per_key = array();
2456                  $comment_multi_cache_per_key = array();
2457                  $comment_single_cache_per_key = array();
2458                  $next_open_comment_multi = '';
2459                  $next_comment_single_key = '';
2460                  $escape_regexp_cache_per_key = array();
2461                  $next_escape_regexp_key = '';
2462                  $next_escape_regexp_pos = -1;
2463  
2464                  $length = strlen($part);
2465                  for ($i = 0; $i < $length; ++$i) {
2466                      // Get the next char
2467                      $char = $part[$i];
2468                      $char_len = 1;
2469  
2470                      // update regexp comment cache if needed
2471                      if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2472                          $next_comment_regexp_pos = $length;
2473                          foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2474                              $match_i = false;
2475                              if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2476                                  ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2477                                   $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2478                                  // we have already matched something
2479                                  if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2480                                      // this comment is never matched
2481                                      continue;
2482                                  }
2483                                  $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2484                              } else if (
2485                                  //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2486                                  (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2487                                  (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2488                                  ) {
2489                                  $match_i = $match[0][1];
2490                                  if (GESHI_PHP_PRE_433) {
2491                                      $match_i += $i;
2492                                  }
2493  
2494                                  $comment_regexp_cache_per_key[$comment_key] = array(
2495                                      'key' => $comment_key,
2496                                      'length' => strlen($match[0][0]),
2497                                      'pos' => $match_i
2498                                  );
2499                              } else {
2500                                  $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2501                                  continue;
2502                              }
2503  
2504                              if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2505                                  $next_comment_regexp_pos = $match_i;
2506                                  $next_comment_regexp_key = $comment_key;
2507                                  if ($match_i === $i) {
2508                                      break;
2509                                  }
2510                              }
2511                          }
2512                      }
2513  
2514                      $string_started = false;
2515  
2516                      if (isset($is_string_starter[$char])) {
2517                          // Possibly the start of a new string ...
2518