| [ Index ] |
PHP Cross Reference of Joomla 1.5.25 |
[Summary view] [Print] [Text view]
1 <?php 2 // no direct access 3 defined('_JEXEC') or die; 4 5 /** 6 * GeSHi - Generic Syntax Highlighter 7 * 8 * The GeSHi class for Generic Syntax Highlighting. Please refer to the 9 * documentation at http://qbnz.com/highlighter/documentation.php for more 10 * information about how to use this class. 11 * 12 * For changes, release notes, TODOs etc, see the relevant files in the docs/ 13 * directory. 14 * 15 * This file is part of GeSHi. 16 * 17 * GeSHi is free software; you can redistribute it and/or modify 18 * it under the terms of the GNU General Public License as published by 19 * the Free Software Foundation; either version 2 of the License, or 20 * (at your option) any later version. 21 * 22 * GeSHi is distributed in the hope that it will be useful, 23 * but WITHOUT ANY WARRANTY; without even the implied warranty of 24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 25 * GNU General Public License for more details. 26 * 27 * You should have received a copy of the GNU General Public License 28 * along with GeSHi; if not, write to the Free Software 29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 30 * 31 * @package geshi 32 * @subpackage core 33 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 34 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann 35 * @license http://gnu.org/copyleft/gpl.html GNU GPL 36 * 37 */ 38 39 // 40 // GeSHi Constants 41 // You should use these constant names in your programs instead of 42 // their values - you never know when a value may change in a future 43 // version 44 // 45 46 /** The version of this GeSHi file */ 47 define('GESHI_VERSION', '1.0.8.10'); 48 49 // Define the root directory for the GeSHi code tree 50 if (!defined('GESHI_ROOT')) { 51 /** The root directory for GeSHi */ 52 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR); 53 } 54 /** The language file directory for GeSHi 55 @access private */ 56 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR); 57 58 // Define if GeSHi should be paranoid about security 59 if (!defined('GESHI_SECURITY_PARANOID')) { 60 /** Tells GeSHi to be paranoid about security settings */ 61 define('GESHI_SECURITY_PARANOID', false); 62 } 63 64 // Line numbers - use with enable_line_numbers() 65 /** Use no line numbers when building the result */ 66 define('GESHI_NO_LINE_NUMBERS', 0); 67 /** Use normal line numbers when building the result */ 68 define('GESHI_NORMAL_LINE_NUMBERS', 1); 69 /** Use fancy line numbers when building the result */ 70 define('GESHI_FANCY_LINE_NUMBERS', 2); 71 72 // Container HTML type 73 /** Use nothing to surround the source */ 74 define('GESHI_HEADER_NONE', 0); 75 /** Use a "div" to surround the source */ 76 define('GESHI_HEADER_DIV', 1); 77 /** Use a "pre" to surround the source */ 78 define('GESHI_HEADER_PRE', 2); 79 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */ 80 define('GESHI_HEADER_PRE_VALID', 3); 81 /** 82 * Use a "table" to surround the source: 83 * 84 * <table> 85 * <thead><tr><td colspan="2">$header</td></tr></thead> 86 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody> 87 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot> 88 * </table> 89 * 90 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at 91 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805 92 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE 93 */ 94 define('GESHI_HEADER_PRE_TABLE', 4); 95 96 // Capatalisation constants 97 /** Lowercase keywords found */ 98 define('GESHI_CAPS_NO_CHANGE', 0); 99 /** Uppercase keywords found */ 100 define('GESHI_CAPS_UPPER', 1); 101 /** Leave keywords found as the case that they are */ 102 define('GESHI_CAPS_LOWER', 2); 103 104 // Link style constants 105 /** Links in the source in the :link state */ 106 define('GESHI_LINK', 0); 107 /** Links in the source in the :hover state */ 108 define('GESHI_HOVER', 1); 109 /** Links in the source in the :active state */ 110 define('GESHI_ACTIVE', 2); 111 /** Links in the source in the :visited state */ 112 define('GESHI_VISITED', 3); 113 114 // Important string starter/finisher 115 // Note that if you change these, they should be as-is: i.e., don't 116 // write them as if they had been run through htmlentities() 117 /** The starter for important parts of the source */ 118 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>'); 119 /** The ender for important parts of the source */ 120 define('GESHI_END_IMPORTANT', '<END GeSHi>'); 121 122 /**#@+ 123 * @access private 124 */ 125 // When strict mode applies for a language 126 /** Strict mode never applies (this is the most common) */ 127 define('GESHI_NEVER', 0); 128 /** Strict mode *might* apply, and can be enabled or 129 disabled by {@link GeSHi->enable_strict_mode()} */ 130 define('GESHI_MAYBE', 1); 131 /** Strict mode always applies */ 132 define('GESHI_ALWAYS', 2); 133 134 // Advanced regexp handling constants, used in language files 135 /** The key of the regex array defining what to search for */ 136 define('GESHI_SEARCH', 0); 137 /** The key of the regex array defining what bracket group in a 138 matched search to use as a replacement */ 139 define('GESHI_REPLACE', 1); 140 /** The key of the regex array defining any modifiers to the regular expression */ 141 define('GESHI_MODIFIERS', 2); 142 /** The key of the regex array defining what bracket group in a 143 matched search to put before the replacement */ 144 define('GESHI_BEFORE', 3); 145 /** The key of the regex array defining what bracket group in a 146 matched search to put after the replacement */ 147 define('GESHI_AFTER', 4); 148 /** The key of the regex array defining a custom keyword to use 149 for this regexp's html tag class */ 150 define('GESHI_CLASS', 5); 151 152 /** Used in language files to mark comments */ 153 define('GESHI_COMMENTS', 0); 154 155 /** Used to work around missing PHP features **/ 156 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1)); 157 158 /** make sure we can call stripos **/ 159 if (!function_exists('stripos')) { 160 // the offset param of preg_match is not supported below PHP 4.3.3 161 if (GESHI_PHP_PRE_433) { 162 /** 163 * @ignore 164 */ 165 function stripos($haystack, $needle, $offset = null) { 166 if (!is_null($offset)) { 167 $haystack = substr($haystack, $offset); 168 } 169 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) { 170 return $match[0][1]; 171 } 172 return false; 173 } 174 } 175 else { 176 /** 177 * @ignore 178 */ 179 function stripos($haystack, $needle, $offset = null) { 180 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) { 181 return $match[0][1]; 182 } 183 return false; 184 } 185 } 186 } 187 188 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in 189 regular expressions. Set this to false if your PCRE lib is up to date 190 @see GeSHi->optimize_regexp_list() 191 **/ 192 define('GESHI_MAX_PCRE_SUBPATTERNS', 500); 193 /** it's also important not to generate too long regular expressions 194 be generous here... but keep in mind, that when reaching this limit we 195 still have to close open patterns. 12k should do just fine on a 16k limit. 196 @see GeSHi->optimize_regexp_list() 197 **/ 198 define('GESHI_MAX_PCRE_LENGTH', 12288); 199 200 //Number format specification 201 /** Basic number format for integers */ 202 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+ 203 /** Enhanced number format for integers like seen in C */ 204 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]? 205 /** Number format to highlight binary numbers with a suffix "b" */ 206 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB] 207 /** Number format to highlight binary numbers with a prefix % */ 208 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+ 209 /** Number format to highlight binary numbers with a prefix 0b (C) */ 210 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+ 211 /** Number format to highlight octal numbers with a leading zero */ 212 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+ 213 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */ 214 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+ 215 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */ 216 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+ 217 /** Number format to highlight octal numbers with a suffix of o */ 218 define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO] 219 /** Number format to highlight hex numbers with a prefix 0x */ 220 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+ 221 /** Number format to highlight hex numbers with a prefix $ */ 222 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+ 223 /** Number format to highlight hex numbers with a suffix of h */ 224 define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h 225 /** Number format to highlight floating-point numbers without support for scientific notation */ 226 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+ 227 /** Number format to highlight floating-point numbers without support for scientific notation */ 228 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f 229 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */ 230 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+ 231 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */ 232 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+ 233 //Custom formats are passed by RX array 234 235 // Error detection - use these to analyse faults 236 /** No sourcecode to highlight was specified 237 * @deprecated 238 */ 239 define('GESHI_ERROR_NO_INPUT', 1); 240 /** The language specified does not exist */ 241 define('GESHI_ERROR_NO_SUCH_LANG', 2); 242 /** GeSHi could not open a file for reading (generally a language file) */ 243 define('GESHI_ERROR_FILE_NOT_READABLE', 3); 244 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */ 245 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4); 246 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */ 247 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5); 248 /**#@-*/ 249 250 251 /** 252 * The GeSHi Class. 253 * 254 * Please refer to the documentation for GeSHi 1.0.X that is available 255 * at http://qbnz.com/highlighter/documentation.php for more information 256 * about how to use this class. 257 * 258 * @package geshi 259 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de> 260 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann 261 */ 262 class GeSHi { 263 /**#@+ 264 * @access private 265 */ 266 /** 267 * The source code to highlight 268 * @var string 269 */ 270 var $source = ''; 271 272 /** 273 * The language to use when highlighting 274 * @var string 275 */ 276 var $language = ''; 277 278 /** 279 * The data for the language used 280 * @var array 281 */ 282 var $language_data = array(); 283 284 /** 285 * The path to the language files 286 * @var string 287 */ 288 var $language_path = GESHI_LANG_ROOT; 289 290 /** 291 * The error message associated with an error 292 * @var string 293 * @todo check err reporting works 294 */ 295 var $error = false; 296 297 /** 298 * Possible error messages 299 * @var array 300 */ 301 var $error_messages = array( 302 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})', 303 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable', 304 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid', 305 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid' 306 ); 307 308 /** 309 * Whether highlighting is strict or not 310 * @var boolean 311 */ 312 var $strict_mode = false; 313 314 /** 315 * Whether to use CSS classes in output 316 * @var boolean 317 */ 318 var $use_classes = false; 319 320 /** 321 * The type of header to use. Can be one of the following 322 * values: 323 * 324 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element. 325 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element. 326 * - GESHI_HEADER_NONE: No header is outputted. 327 * 328 * @var int 329 */ 330 var $header_type = GESHI_HEADER_PRE; 331 332 /** 333 * Array of permissions for which lexics should be highlighted 334 * @var array 335 */ 336 var $lexic_permissions = array( 337 'KEYWORDS' => array(), 338 'COMMENTS' => array('MULTI' => true), 339 'REGEXPS' => array(), 340 'ESCAPE_CHAR' => true, 341 'BRACKETS' => true, 342 'SYMBOLS' => false, 343 'STRINGS' => true, 344 'NUMBERS' => true, 345 'METHODS' => true, 346 'SCRIPT' => true 347 ); 348 349 /** 350 * The time it took to parse the code 351 * @var double 352 */ 353 var $time = 0; 354 355 /** 356 * The content of the header block 357 * @var string 358 */ 359 var $header_content = ''; 360 361 /** 362 * The content of the footer block 363 * @var string 364 */ 365 var $footer_content = ''; 366 367 /** 368 * The style of the header block 369 * @var string 370 */ 371 var $header_content_style = ''; 372 373 /** 374 * The style of the footer block 375 * @var string 376 */ 377 var $footer_content_style = ''; 378 379 /** 380 * Tells if a block around the highlighted source should be forced 381 * if not using line numbering 382 * @var boolean 383 */ 384 var $force_code_block = false; 385 386 /** 387 * The styles for hyperlinks in the code 388 * @var array 389 */ 390 var $link_styles = array(); 391 392 /** 393 * Whether important blocks should be recognised or not 394 * @var boolean 395 * @deprecated 396 * @todo REMOVE THIS FUNCTIONALITY! 397 */ 398 var $enable_important_blocks = false; 399 400 /** 401 * Styles for important parts of the code 402 * @var string 403 * @deprecated 404 * @todo As above - rethink the whole idea of important blocks as it is buggy and 405 * will be hard to implement in 1.2 406 */ 407 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code 408 409 /** 410 * Whether CSS IDs should be added to the code 411 * @var boolean 412 */ 413 var $add_ids = false; 414 415 /** 416 * Lines that should be highlighted extra 417 * @var array 418 */ 419 var $highlight_extra_lines = array(); 420 421 /** 422 * Styles of lines that should be highlighted extra 423 * @var array 424 */ 425 var $highlight_extra_lines_styles = array(); 426 427 /** 428 * Styles of extra-highlighted lines 429 * @var string 430 */ 431 var $highlight_extra_lines_style = 'background-color: #ffc;'; 432 433 /** 434 * The line ending 435 * If null, nl2br() will be used on the result string. 436 * Otherwise, all instances of \n will be replaced with $line_ending 437 * @var string 438 */ 439 var $line_ending = null; 440 441 /** 442 * Number at which line numbers should start at 443 * @var int 444 */ 445 var $line_numbers_start = 1; 446 447 /** 448 * The overall style for this code block 449 * @var string 450 */ 451 var $overall_style = 'font-family:monospace;'; 452 453 /** 454 * The style for the actual code 455 * @var string 456 */ 457 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;'; 458 459 /** 460 * The overall class for this code block 461 * @var string 462 */ 463 var $overall_class = ''; 464 465 /** 466 * The overall ID for this code block 467 * @var string 468 */ 469 var $overall_id = ''; 470 471 /** 472 * Line number styles 473 * @var string 474 */ 475 var $line_style1 = 'font-weight: normal; vertical-align:top;'; 476 477 /** 478 * Line number styles for fancy lines 479 * @var string 480 */ 481 var $line_style2 = 'font-weight: bold; vertical-align:top;'; 482 483 /** 484 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen 485 * @var string 486 */ 487 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;'; 488 489 /** 490 * Flag for how line numbers are displayed 491 * @var boolean 492 */ 493 var $line_numbers = GESHI_NO_LINE_NUMBERS; 494 495 /** 496 * Flag to decide if multi line spans are allowed. Set it to false to make sure 497 * each tag is closed before and reopened after each linefeed. 498 * @var boolean 499 */ 500 var $allow_multiline_span = true; 501 502 /** 503 * The "nth" value for fancy line highlighting 504 * @var int 505 */ 506 var $line_nth_row = 0; 507 508 /** 509 * The size of tab stops 510 * @var int 511 */ 512 var $tab_width = 8; 513 514 /** 515 * Should we use language-defined tab stop widths? 516 * @var int 517 */ 518 var $use_language_tab_width = false; 519 520 /** 521 * Default target for keyword links 522 * @var string 523 */ 524 var $link_target = ''; 525 526 /** 527 * The encoding to use for entity encoding 528 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598) 529 * @var string 530 */ 531 var $encoding = 'utf-8'; 532 533 /** 534 * Should keywords be linked? 535 * @var boolean 536 */ 537 var $keyword_links = true; 538 539 /** 540 * Currently loaded language file 541 * @var string 542 * @since 1.0.7.22 543 */ 544 var $loaded_language = ''; 545 546 /** 547 * Wether the caches needed for parsing are built or not 548 * 549 * @var bool 550 * @since 1.0.8 551 */ 552 var $parse_cache_built = false; 553 554 /** 555 * Work around for Suhosin Patch with disabled /e modifier 556 * 557 * Note from suhosins author in config file: 558 * <blockquote> 559 * The /e modifier inside <code>preg_replace()</code> allows code execution. 560 * Often it is the cause for remote code execution exploits. It is wise to 561 * deactivate this feature and test where in the application it is used. 562 * The developer using the /e modifier should be made aware that he should 563 * use <code>preg_replace_callback()</code> instead 564 * </blockquote> 565 * 566 * @var array 567 * @since 1.0.8 568 */ 569 var $_kw_replace_group = 0; 570 var $_rx_key = 0; 571 572 /** 573 * some "callback parameters" for handle_multiline_regexps 574 * 575 * @since 1.0.8 576 * @access private 577 * @var string 578 */ 579 var $_hmr_before = ''; 580 var $_hmr_replace = ''; 581 var $_hmr_after = ''; 582 var $_hmr_key = 0; 583 584 /**#@-*/ 585 586 /** 587 * Creates a new GeSHi object, with source and language 588 * 589 * @param string The source code to highlight 590 * @param string The language to highlight the source with 591 * @param string The path to the language file directory. <b>This 592 * is deprecated!</b> I've backported the auto path 593 * detection from the 1.1.X dev branch, so now it 594 * should be automatically set correctly. If you have 595 * renamed the language directory however, you will 596 * still need to set the path using this parameter or 597 * {@link GeSHi->set_language_path()} 598 * @since 1.0.0 599 */ 600 function GeSHi($source = '', $language = '', $path = '') { 601 if (!empty($source)) { 602 $this->set_source($source); 603 } 604 if (!empty($language)) { 605 $this->set_language($language); 606 } 607 $this->set_language_path($path); 608 } 609 610 /** 611 * Returns an error message associated with the last GeSHi operation, 612 * or false if no error has occured 613 * 614 * @return string|false An error message if there has been an error, else false 615 * @since 1.0.0 616 */ 617 function error() { 618 if ($this->error) { 619 //Put some template variables for debugging here ... 620 $debug_tpl_vars = array( 621 '{LANGUAGE}' => $this->language, 622 '{PATH}' => $this->language_path 623 ); 624 $msg = str_replace( 625 array_keys($debug_tpl_vars), 626 array_values($debug_tpl_vars), 627 $this->error_messages[$this->error]); 628 629 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />"; 630 } 631 return false; 632 } 633 634 /** 635 * Gets a human-readable language name (thanks to Simon Patterson 636 * for the idea :)) 637 * 638 * @return string The name for the current language 639 * @since 1.0.2 640 */ 641 function get_language_name() { 642 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) { 643 return $this->language_data['LANG_NAME'] . ' (Unknown Language)'; 644 } 645 return $this->language_data['LANG_NAME']; 646 } 647 648 /** 649 * Sets the source code for this object 650 * 651 * @param string The source code to highlight 652 * @since 1.0.0 653 */ 654 function set_source($source) { 655 $this->source = $source; 656 $this->highlight_extra_lines = array(); 657 } 658 659 /** 660 * Sets the language for this object 661 * 662 * @note since 1.0.8 this function won't reset language-settings by default anymore! 663 * if you need this set $force_reset = true 664 * 665 * @param string The name of the language to use 666 * @since 1.0.0 667 */ 668 function set_language($language, $force_reset = false) { 669 if ($force_reset) { 670 $this->loaded_language = false; 671 } 672 673 //Clean up the language name to prevent malicious code injection 674 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 675 676 $language = strtolower($language); 677 678 //Retreive the full filename 679 $file_name = $this->language_path . $language . '.php'; 680 if ($file_name == $this->loaded_language) { 681 // this language is already loaded! 682 return; 683 } 684 685 $this->language = $language; 686 687 $this->error = false; 688 $this->strict_mode = GESHI_NEVER; 689 690 //Check if we can read the desired file 691 if (!is_readable($file_name)) { 692 $this->error = GESHI_ERROR_NO_SUCH_LANG; 693 return; 694 } 695 696 // Load the language for parsing 697 $this->load_language($file_name); 698 } 699 700 /** 701 * Sets the path to the directory containing the language files. Note 702 * that this path is relative to the directory of the script that included 703 * geshi.php, NOT geshi.php itself. 704 * 705 * @param string The path to the language directory 706 * @since 1.0.0 707 * @deprecated The path to the language files should now be automatically 708 * detected, so this method should no longer be needed. The 709 * 1.1.X branch handles manual setting of the path differently 710 * so this method will disappear in 1.2.0. 711 */ 712 function set_language_path($path) { 713 if(strpos($path,':')) { 714 //Security Fix to prevent external directories using fopen wrappers. 715 if(DIRECTORY_SEPARATOR == "\\") { 716 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) { 717 return; 718 } 719 } else { 720 return; 721 } 722 } 723 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) { 724 //Security Fix to prevent external directories using fopen wrappers. 725 return; 726 } 727 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) { 728 //Security Fix to prevent external directories using fopen wrappers. 729 return; 730 } 731 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) { 732 //Security Fix to prevent external directories using fopen wrappers. 733 return; 734 } 735 if ($path) { 736 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/'; 737 $this->set_language($this->language); // otherwise set_language_path has no effect 738 } 739 } 740 741 /** 742 * Get supported langs or an associative array lang=>full_name. 743 * @param boolean $longnames 744 * @return array 745 */ 746 function get_supported_languages($full_names=false) 747 { 748 // return array 749 $back = array(); 750 751 // we walk the lang root 752 $dir = dir($this->language_path); 753 754 // foreach entry 755 while (false !== ($entry = $dir->read())) 756 { 757 $full_path = $this->language_path.$entry; 758 759 // Skip all dirs 760 if (is_dir($full_path)) { 761 continue; 762 } 763 764 // we only want lang.php files 765 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) { 766 continue; 767 } 768 769 // Raw lang name is here 770 $langname = $matches[1]; 771 772 // We want the fullname too? 773 if ($full_names === true) 774 { 775 if (false !== ($fullname = $this->get_language_fullname($langname))) 776 { 777 $back[$langname] = $fullname; // we go associative 778 } 779 } 780 else 781 { 782 // just store raw langname 783 $back[] = $langname; 784 } 785 } 786 787 $dir->close(); 788 789 return $back; 790 } 791 792 /** 793 * Get full_name for a lang or false. 794 * @param string $language short langname (html4strict for example) 795 * @return mixed 796 */ 797 function get_language_fullname($language) 798 { 799 //Clean up the language name to prevent malicious code injection 800 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language); 801 802 $language = strtolower($language); 803 804 // get fullpath-filename for a langname 805 $fullpath = $this->language_path.$language.'.php'; 806 807 // we need to get contents :S 808 if (false === ($data = file_get_contents($fullpath))) { 809 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language); 810 return false; 811 } 812 813 // match the langname 814 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+)\'/', $data, $matches)) { 815 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language); 816 return false; 817 } 818 819 // return fullname for langname 820 return stripcslashes($matches[1]); 821 } 822 823 /** 824 * Sets the type of header to be used. 825 * 826 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This 827 * means more source code but more control over tab width and line-wrapping. 828 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less 829 * control. Default is GESHI_HEADER_PRE. 830 * 831 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code 832 * should be outputted. 833 * 834 * @param int The type of header to be used 835 * @since 1.0.0 836 */ 837 function set_header_type($type) { 838 //Check if we got a valid header type 839 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV, 840 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) { 841 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE; 842 return; 843 } 844 845 //Set that new header type 846 $this->header_type = $type; 847 } 848 849 /** 850 * Sets the styles for the code that will be outputted 851 * when this object is parsed. The style should be a 852 * string of valid stylesheet declarations 853 * 854 * @param string The overall style for the outputted code block 855 * @param boolean Whether to merge the styles with the current styles or not 856 * @since 1.0.0 857 */ 858 function set_overall_style($style, $preserve_defaults = false) { 859 if (!$preserve_defaults) { 860 $this->overall_style = $style; 861 } else { 862 $this->overall_style .= $style; 863 } 864 } 865 866 /** 867 * Sets the overall classname for this block of code. This 868 * class can then be used in a stylesheet to style this object's 869 * output 870 * 871 * @param string The class name to use for this block of code 872 * @since 1.0.0 873 */ 874 function set_overall_class($class) { 875 $this->overall_class = $class; 876 } 877 878 /** 879 * Sets the overall id for this block of code. This id can then 880 * be used in a stylesheet to style this object's output 881 * 882 * @param string The ID to use for this block of code 883 * @since 1.0.0 884 */ 885 function set_overall_id($id) { 886 $this->overall_id = $id; 887 } 888 889 /** 890 * Sets whether CSS classes should be used to highlight the source. Default 891 * is off, calling this method with no arguments will turn it on 892 * 893 * @param boolean Whether to turn classes on or not 894 * @since 1.0.0 895 */ 896 function enable_classes($flag = true) { 897 $this->use_classes = ($flag) ? true : false; 898 } 899 900 /** 901 * Sets the style for the actual code. This should be a string 902 * containing valid stylesheet declarations. If $preserve_defaults is 903 * true, then styles are merged with the default styles, with the 904 * user defined styles having priority 905 * 906 * Note: Use this method to override any style changes you made to 907 * the line numbers if you are using line numbers, else the line of 908 * code will have the same style as the line number! Consult the 909 * GeSHi documentation for more information about this. 910 * 911 * @param string The style to use for actual code 912 * @param boolean Whether to merge the current styles with the new styles 913 * @since 1.0.2 914 */ 915 function set_code_style($style, $preserve_defaults = false) { 916 if (!$preserve_defaults) { 917 $this->code_style = $style; 918 } else { 919 $this->code_style .= $style; 920 } 921 } 922 923 /** 924 * Sets the styles for the line numbers. 925 * 926 * @param string The style for the line numbers that are "normal" 927 * @param string|boolean If a string, this is the style of the line 928 * numbers that are "fancy", otherwise if boolean then this 929 * defines whether the normal styles should be merged with the 930 * new normal styles or not 931 * @param boolean If set, is the flag for whether to merge the "fancy" 932 * styles with the current styles or not 933 * @since 1.0.2 934 */ 935 function set_line_style($style1, $style2 = '', $preserve_defaults = false) { 936 //Check if we got 2 or three parameters 937 if (is_bool($style2)) { 938 $preserve_defaults = $style2; 939 $style2 = ''; 940 } 941 942 //Actually set the new styles 943 if (!$preserve_defaults) { 944 $this->line_style1 = $style1; 945 $this->line_style2 = $style2; 946 } else { 947 $this->line_style1 .= $style1; 948 $this->line_style2 .= $style2; 949 } 950 } 951 952 /** 953 * Sets whether line numbers should be displayed. 954 * 955 * Valid values for the first parameter are: 956 * 957 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed 958 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed 959 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed 960 * 961 * For fancy line numbers, the second parameter is used to signal which lines 962 * are to be fancy. For example, if the value of this parameter is 5 then every 963 * 5th line will be fancy. 964 * 965 * @param int How line numbers should be displayed 966 * @param int Defines which lines are fancy 967 * @since 1.0.0 968 */ 969 function enable_line_numbers($flag, $nth_row = 5) { 970 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag 971 && GESHI_FANCY_LINE_NUMBERS != $flag) { 972 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE; 973 } 974 $this->line_numbers = $flag; 975 $this->line_nth_row = $nth_row; 976 } 977 978 /** 979 * Sets wether spans and other HTML markup generated by GeSHi can 980 * span over multiple lines or not. Defaults to true to reduce overhead. 981 * Set it to false if you want to manipulate the output or manually display 982 * the code in an ordered list. 983 * 984 * @param boolean Wether multiline spans are allowed or not 985 * @since 1.0.7.22 986 */ 987 function enable_multiline_span($flag) { 988 $this->allow_multiline_span = (bool) $flag; 989 } 990 991 /** 992 * Get current setting for multiline spans, see GeSHi->enable_multiline_span(). 993 * 994 * @see enable_multiline_span 995 * @return bool 996 */ 997 function get_multiline_span() { 998 return $this->allow_multiline_span; 999 } 1000 1001 /** 1002 * Sets the style for a keyword group. If $preserve_defaults is 1003 * true, then styles are merged with the default styles, with the 1004 * user defined styles having priority 1005 * 1006 * @param int The key of the keyword group to change the styles of 1007 * @param string The style to make the keywords 1008 * @param boolean Whether to merge the new styles with the old or just 1009 * to overwrite them 1010 * @since 1.0.0 1011 */ 1012 function set_keyword_group_style($key, $style, $preserve_defaults = false) { 1013 //Set the style for this keyword group 1014 if (!$preserve_defaults) { 1015 $this->language_data['STYLES']['KEYWORDS'][$key] = $style; 1016 } else { 1017 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style; 1018 } 1019 1020 //Update the lexic permissions 1021 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) { 1022 $this->lexic_permissions['KEYWORDS'][$key] = true; 1023 } 1024 } 1025 1026 /** 1027 * Turns highlighting on/off for a keyword group 1028 * 1029 * @param int The key of the keyword group to turn on or off 1030 * @param boolean Whether to turn highlighting for that group on or off 1031 * @since 1.0.0 1032 */ 1033 function set_keyword_group_highlighting($key, $flag = true) { 1034 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false; 1035 } 1036 1037 /** 1038 * Sets the styles for comment groups. If $preserve_defaults is 1039 * true, then styles are merged with the default styles, with the 1040 * user defined styles having priority 1041 * 1042 * @param int The key of the comment group to change the styles of 1043 * @param string The style to make the comments 1044 * @param boolean Whether to merge the new styles with the old or just 1045 * to overwrite them 1046 * @since 1.0.0 1047 */ 1048 function set_comments_style($key, $style, $preserve_defaults = false) { 1049 if (!$preserve_defaults) { 1050 $this->language_data['STYLES']['COMMENTS'][$key] = $style; 1051 } else { 1052 $this->language_data['STYLES']['COMMENTS'][$key] .= $style; 1053 } 1054 } 1055 1056 /** 1057 * Turns highlighting on/off for comment groups 1058 * 1059 * @param int The key of the comment group to turn on or off 1060 * @param boolean Whether to turn highlighting for that group on or off 1061 * @since 1.0.0 1062 */ 1063 function set_comments_highlighting($key, $flag = true) { 1064 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false; 1065 } 1066 1067 /** 1068 * Sets the styles for escaped characters. If $preserve_defaults is 1069 * true, then styles are merged with the default styles, with the 1070 * user defined styles having priority 1071 * 1072 * @param string The style to make the escape characters 1073 * @param boolean Whether to merge the new styles with the old or just 1074 * to overwrite them 1075 * @since 1.0.0 1076 */ 1077 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) { 1078 if (!$preserve_defaults) { 1079 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style; 1080 } else { 1081 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style; 1082 } 1083 } 1084 1085 /** 1086 * Turns highlighting on/off for escaped characters 1087 * 1088 * @param boolean Whether to turn highlighting for escape characters on or off 1089 * @since 1.0.0 1090 */ 1091 function set_escape_characters_highlighting($flag = true) { 1092 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false; 1093 } 1094 1095 /** 1096 * Sets the styles for brackets. If $preserve_defaults is 1097 * true, then styles are merged with the default styles, with the 1098 * user defined styles having priority 1099 * 1100 * This method is DEPRECATED: use set_symbols_style instead. 1101 * This method will be removed in 1.2.X 1102 * 1103 * @param string The style to make the brackets 1104 * @param boolean Whether to merge the new styles with the old or just 1105 * to overwrite them 1106 * @since 1.0.0 1107 * @deprecated In favour of set_symbols_style 1108 */ 1109 function set_brackets_style($style, $preserve_defaults = false) { 1110 if (!$preserve_defaults) { 1111 $this->language_data['STYLES']['BRACKETS'][0] = $style; 1112 } else { 1113 $this->language_data['STYLES']['BRACKETS'][0] .= $style; 1114 } 1115 } 1116 1117 /** 1118 * Turns highlighting on/off for brackets 1119 * 1120 * This method is DEPRECATED: use set_symbols_highlighting instead. 1121 * This method will be remove in 1.2.X 1122 * 1123 * @param boolean Whether to turn highlighting for brackets on or off 1124 * @since 1.0.0 1125 * @deprecated In favour of set_symbols_highlighting 1126 */ 1127 function set_brackets_highlighting($flag) { 1128 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false; 1129 } 1130 1131 /** 1132 * Sets the styles for symbols. If $preserve_defaults is 1133 * true, then styles are merged with the default styles, with the 1134 * user defined styles having priority 1135 * 1136 * @param string The style to make the symbols 1137 * @param boolean Whether to merge the new styles with the old or just 1138 * to overwrite them 1139 * @param int Tells the group of symbols for which style should be set. 1140 * @since 1.0.1 1141 */ 1142 function set_symbols_style($style, $preserve_defaults = false, $group = 0) { 1143 // Update the style of symbols 1144 if (!$preserve_defaults) { 1145 $this->language_data['STYLES']['SYMBOLS'][$group] = $style; 1146 } else { 1147 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style; 1148 } 1149 1150 // For backward compatibility 1151 if (0 == $group) { 1152 $this->set_brackets_style ($style, $preserve_defaults); 1153 } 1154 } 1155 1156 /** 1157 * Turns highlighting on/off for symbols 1158 * 1159 * @param boolean Whether to turn highlighting for symbols on or off 1160 * @since 1.0.0 1161 */ 1162 function set_symbols_highlighting($flag) { 1163 // Update lexic permissions for this symbol group 1164 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false; 1165 1166 // For backward compatibility 1167 $this->set_brackets_highlighting ($flag); 1168 } 1169 1170 /** 1171 * Sets the styles for strings. If $preserve_defaults is 1172 * true, then styles are merged with the default styles, with the 1173 * user defined styles having priority 1174 * 1175 * @param string The style to make the escape characters 1176 * @param boolean Whether to merge the new styles with the old or just 1177 * to overwrite them 1178 * @param int Tells the group of strings for which style should be set. 1179 * @since 1.0.0 1180 */ 1181 function set_strings_style($style, $preserve_defaults = false, $group = 0) { 1182 if (!$preserve_defaults) { 1183 $this->language_data['STYLES']['STRINGS'][$group] = $style; 1184 } else { 1185 $this->language_data['STYLES']['STRINGS'][$group] .= $style; 1186 } 1187 } 1188 1189 /** 1190 * Turns highlighting on/off for strings 1191 * 1192 * @param boolean Whether to turn highlighting for strings on or off 1193 * @since 1.0.0 1194 */ 1195 function set_strings_highlighting($flag) { 1196 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false; 1197 } 1198 1199 /** 1200 * Sets the styles for strict code blocks. If $preserve_defaults is 1201 * true, then styles are merged with the default styles, with the 1202 * user defined styles having priority 1203 * 1204 * @param string The style to make the script blocks 1205 * @param boolean Whether to merge the new styles with the old or just 1206 * to overwrite them 1207 * @param int Tells the group of script blocks for which style should be set. 1208 * @since 1.0.8.4 1209 */ 1210 function set_script_style($style, $preserve_defaults = false, $group = 0) { 1211 // Update the style of symbols 1212 if (!$preserve_defaults) { 1213 $this->language_data['STYLES']['SCRIPT'][$group] = $style; 1214 } else { 1215 $this->language_data['STYLES']['SCRIPT'][$group] .= $style; 1216 } 1217 } 1218 1219 /** 1220 * Sets the styles for numbers. If $preserve_defaults is 1221 * true, then styles are merged with the default styles, with the 1222 * user defined styles having priority 1223 * 1224 * @param string The style to make the numbers 1225 * @param boolean Whether to merge the new styles with the old or just 1226 * to overwrite them 1227 * @param int Tells the group of numbers for which style should be set. 1228 * @since 1.0.0 1229 */ 1230 function set_numbers_style($style, $preserve_defaults = false, $group = 0) { 1231 if (!$preserve_defaults) { 1232 $this->language_data['STYLES']['NUMBERS'][$group] = $style; 1233 } else { 1234 $this->language_data['STYLES']['NUMBERS'][$group] .= $style; 1235 } 1236 } 1237 1238 /** 1239 * Turns highlighting on/off for numbers 1240 * 1241 * @param boolean Whether to turn highlighting for numbers on or off 1242 * @since 1.0.0 1243 */ 1244 function set_numbers_highlighting($flag) { 1245 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false; 1246 } 1247 1248 /** 1249 * Sets the styles for methods. $key is a number that references the 1250 * appropriate "object splitter" - see the language file for the language 1251 * you are highlighting to get this number. If $preserve_defaults is 1252 * true, then styles are merged with the default styles, with the 1253 * user defined styles having priority 1254 * 1255 * @param int The key of the object splitter to change the styles of 1256 * @param string The style to make the methods 1257 * @param boolean Whether to merge the new styles with the old or just 1258 * to overwrite them 1259 * @since 1.0.0 1260 */ 1261 function set_methods_style($key, $style, $preserve_defaults = false) { 1262 if (!$preserve_defaults) { 1263 $this->language_data['STYLES']['METHODS'][$key] = $style; 1264 } else { 1265 $this->language_data['STYLES']['METHODS'][$key] .= $style; 1266 } 1267 } 1268 1269 /** 1270 * Turns highlighting on/off for methods 1271 * 1272 * @param boolean Whether to turn highlighting for methods on or off 1273 * @since 1.0.0 1274 */ 1275 function set_methods_highlighting($flag) { 1276 $this->lexic_permissions['METHODS'] = ($flag) ? true : false; 1277 } 1278 1279 /** 1280 * Sets the styles for regexps. If $preserve_defaults is 1281 * true, then styles are merged with the default styles, with the 1282 * user defined styles having priority 1283 * 1284 * @param string The style to make the regular expression matches 1285 * @param boolean Whether to merge the new styles with the old or just 1286 * to overwrite them 1287 * @since 1.0.0 1288 */ 1289 function set_regexps_style($key, $style, $preserve_defaults = false) { 1290 if (!$preserve_defaults) { 1291 $this->language_data['STYLES']['REGEXPS'][$key] = $style; 1292 } else { 1293 $this->language_data['STYLES']['REGEXPS'][$key] .= $style; 1294 } 1295 } 1296 1297 /** 1298 * Turns highlighting on/off for regexps 1299 * 1300 * @param int The key of the regular expression group to turn on or off 1301 * @param boolean Whether to turn highlighting for the regular expression group on or off 1302 * @since 1.0.0 1303 */ 1304 function set_regexps_highlighting($key, $flag) { 1305 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false; 1306 } 1307 1308 /** 1309 * Sets whether a set of keywords are checked for in a case sensitive manner 1310 * 1311 * @param int The key of the keyword group to change the case sensitivity of 1312 * @param boolean Whether to check in a case sensitive manner or not 1313 * @since 1.0.0 1314 */ 1315 function set_case_sensitivity($key, $case) { 1316 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false; 1317 } 1318 1319 /** 1320 * Sets the case that keywords should use when found. Use the constants: 1321 * 1322 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is 1323 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found 1324 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found 1325 * 1326 * @param int A constant specifying what to do with matched keywords 1327 * @since 1.0.1 1328 */ 1329 function set_case_keywords($case) { 1330 if (in_array($case, array( 1331 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) { 1332 $this->language_data['CASE_KEYWORDS'] = $case; 1333 } 1334 } 1335 1336 /** 1337 * Sets how many spaces a tab is substituted for 1338 * 1339 * Widths below zero are ignored 1340 * 1341 * @param int The tab width 1342 * @since 1.0.0 1343 */ 1344 function set_tab_width($width) { 1345 $this->tab_width = intval($width); 1346 1347 //Check if it fit's the constraints: 1348 if ($this->tab_width < 1) { 1349 //Return it to the default 1350 $this->tab_width = 8; 1351 } 1352 } 1353 1354 /** 1355 * Sets whether or not to use tab-stop width specifed by language 1356 * 1357 * @param boolean Whether to use language-specific tab-stop widths 1358 * @since 1.0.7.20 1359 */ 1360 function set_use_language_tab_width($use) { 1361 $this->use_language_tab_width = (bool) $use; 1362 } 1363 1364 /** 1365 * Returns the tab width to use, based on the current language and user 1366 * preference 1367 * 1368 * @return int Tab width 1369 * @since 1.0.7.20 1370 */ 1371 function get_real_tab_width() { 1372 if (!$this->use_language_tab_width || 1373 !isset($this->language_data['TAB_WIDTH'])) { 1374 return $this->tab_width; 1375 } else { 1376 return $this->language_data['TAB_WIDTH']; 1377 } 1378 } 1379 1380 /** 1381 * Enables/disables strict highlighting. Default is off, calling this 1382 * method without parameters will turn it on. See documentation 1383 * for more details on strict mode and where to use it. 1384 * 1385 * @param boolean Whether to enable strict mode or not 1386 * @since 1.0.0 1387 */ 1388 function enable_strict_mode($mode = true) { 1389 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) { 1390 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER; 1391 } 1392 } 1393 1394 /** 1395 * Disables all highlighting 1396 * 1397 * @since 1.0.0 1398 * @todo Rewrite with array traversal 1399 * @deprecated In favour of enable_highlighting 1400 */ 1401 function disable_highlighting() { 1402 $this->enable_highlighting(false); 1403 } 1404 1405 /** 1406 * Enables all highlighting 1407 * 1408 * The optional flag parameter was added in version 1.0.7.21 and can be used 1409 * to enable (true) or disable (false) all highlighting. 1410 * 1411 * @since 1.0.0 1412 * @param boolean A flag specifying whether to enable or disable all highlighting 1413 * @todo Rewrite with array traversal 1414 */ 1415 function enable_highlighting($flag = true) { 1416 $flag = $flag ? true : false; 1417 foreach ($this->lexic_permissions as $key => $value) { 1418 if (is_array($value)) { 1419 foreach ($value as $k => $v) { 1420 $this->lexic_permissions[$key][$k] = $flag; 1421 } 1422 } else { 1423 $this->lexic_permissions[$key] = $flag; 1424 } 1425 } 1426 1427 // Context blocks 1428 $this->enable_important_blocks = $flag; 1429 } 1430 1431 /** 1432 * Given a file extension, this method returns either a valid geshi language 1433 * name, or the empty string if it couldn't be found 1434 * 1435 * @param string The extension to get a language name for 1436 * @param array A lookup array to use instead of the default one 1437 * @since 1.0.5 1438 * @todo Re-think about how this method works (maybe make it private and/or make it 1439 * a extension->lang lookup?) 1440 * @todo static? 1441 */ 1442 function get_language_name_from_extension( $extension, $lookup = array() ) { 1443 if ( !is_array($lookup) || empty($lookup)) { 1444 $lookup = array( 1445 '6502acme' => array( 'a', 's', 'asm', 'inc' ), 1446 '6502tasm' => array( 'a', 's', 'asm', 'inc' ), 1447 '6502kickass' => array( 'a', 's', 'asm', 'inc' ), 1448 '68000devpac' => array( 'a', 's', 'asm', 'inc' ), 1449 'abap' => array('abap'), 1450 'actionscript' => array('as'), 1451 'ada' => array('a', 'ada', 'adb', 'ads'), 1452 'apache' => array('conf'), 1453 'asm' => array('ash', 'asm', 'inc'), 1454 'asp' => array('asp'), 1455 'bash' => array('sh'), 1456 'bf' => array('bf'), 1457 'c' => array('c', 'h'), 1458 'c_mac' => array('c', 'h'), 1459 'caddcl' => array(), 1460 'cadlisp' => array(), 1461 'cdfg' => array('cdfg'), 1462 'cobol' => array('cbl'), 1463 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'), 1464 'csharp' => array('cs'), 1465 'css' => array('css'), 1466 'd' => array('d'), 1467 'delphi' => array('dpk', 'dpr', 'pp', 'pas'), 1468 'diff' => array('diff', 'patch'), 1469 'dos' => array('bat', 'cmd'), 1470 'gdb' => array('kcrash', 'crash', 'bt'), 1471 'gettext' => array('po', 'pot'), 1472 'gml' => array('gml'), 1473 'gnuplot' => array('plt'), 1474 'groovy' => array('groovy'), 1475 'haskell' => array('hs'), 1476 'html4strict' => array('html', 'htm'), 1477 'ini' => array('ini', 'desktop'), 1478 'java' => array('java'), 1479 'javascript' => array('js'), 1480 'klonec' => array('kl1'), 1481 'klonecpp' => array('klx'), 1482 'latex' => array('tex'), 1483 'lisp' => array('lisp'), 1484 'lua' => array('lua'), 1485 'matlab' => array('m'), 1486 'mpasm' => array(), 1487 'mysql' => array('sql'), 1488 'nsis' => array(), 1489 'objc' => array(), 1490 'oobas' => array(), 1491 'oracle8' => array(), 1492 'oracle10' => array(), 1493 'pascal' => array('pas'), 1494 'perl' => array('pl', 'pm'), 1495 'php' => array('php', 'php5', 'phtml', 'phps'), 1496 'povray' => array('pov'), 1497 'providex' => array('pvc', 'pvx'), 1498 'prolog' => array('pl'), 1499 'python' => array('py'), 1500 'qbasic' => array('bi'), 1501 'reg' => array('reg'), 1502 'ruby' => array('rb'), 1503 'sas' => array('sas'), 1504 'scala' => array('scala'), 1505 'scheme' => array('scm'), 1506 'scilab' => array('sci'), 1507 'smalltalk' => array('st'), 1508 'smarty' => array(), 1509 'tcl' => array('tcl'), 1510 'vb' => array('bas'), 1511 'vbnet' => array(), 1512 'visualfoxpro' => array(), 1513 'whitespace' => array('ws'), 1514 'xml' => array('xml', 'svg', 'xrc'), 1515 'z80' => array('z80', 'asm', 'inc') 1516 ); 1517 } 1518 1519 foreach ($lookup as $lang => $extensions) { 1520 if (in_array($extension, $extensions)) { 1521 return $lang; 1522 } 1523 } 1524 return ''; 1525 } 1526 1527 /** 1528 * Given a file name, this method loads its contents in, and attempts 1529 * to set the language automatically. An optional lookup table can be 1530 * passed for looking up the language name. If not specified a default 1531 * table is used 1532 * 1533 * The language table is in the form 1534 * <pre>array( 1535 * 'lang_name' => array('extension', 'extension', ...), 1536 * 'lang_name' ... 1537 * );</pre> 1538 * 1539 * @param string The filename to load the source from 1540 * @param array A lookup array to use instead of the default one 1541 * @todo Complete rethink of this and above method 1542 * @since 1.0.5 1543 */ 1544 function load_from_file($file_name, $lookup = array()) { 1545 if (is_readable($file_name)) { 1546 $this->set_source(file_get_contents($file_name)); 1547 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup)); 1548 } else { 1549 $this->error = GESHI_ERROR_FILE_NOT_READABLE; 1550 } 1551 } 1552 1553 /** 1554 * Adds a keyword to a keyword group for highlighting 1555 * 1556 * @param int The key of the keyword group to add the keyword to 1557 * @param string The word to add to the keyword group 1558 * @since 1.0.0 1559 */ 1560 function add_keyword($key, $word) { 1561 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) { 1562 $this->language_data['KEYWORDS'][$key][] = $word; 1563 1564 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it 1565 if ($this->parse_cache_built) { 1566 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1; 1567 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/'); 1568 } 1569 } 1570 } 1571 1572 /** 1573 * Removes a keyword from a keyword group 1574 * 1575 * @param int The key of the keyword group to remove the keyword from 1576 * @param string The word to remove from the keyword group 1577 * @param bool Wether to automatically recompile the optimized regexp list or not. 1578 * Note: if you set this to false and @see GeSHi->parse_code() was already called once, 1579 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group() 1580 * or the removed keyword will stay in cache and still be highlighted! On the other hand 1581 * it might be too expensive to recompile the regexp list for every removal if you want to 1582 * remove a lot of keywords. 1583 * @since 1.0.0 1584 */ 1585 function remove_keyword($key, $word, $recompile = true) { 1586 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]); 1587 if ($key_to_remove !== false) { 1588 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]); 1589 1590 //NEW in 1.0.8, optionally recompile keyword group 1591 if ($recompile && $this->parse_cache_built) { 1592 $this->optimize_keyword_group($key); 1593 } 1594 } 1595 } 1596 1597 /** 1598 * Creates a new keyword group 1599 * 1600 * @param int The key of the keyword group to create 1601 * @param string The styles for the keyword group 1602 * @param boolean Whether the keyword group is case sensitive ornot 1603 * @param array The words to use for the keyword group 1604 * @since 1.0.0 1605 */ 1606 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) { 1607 $words = (array) $words; 1608 if (empty($words)) { 1609 // empty word lists mess up highlighting 1610 return false; 1611 } 1612 1613 //Add the new keyword group internally 1614 $this->language_data['KEYWORDS'][$key] = $words; 1615 $this->lexic_permissions['KEYWORDS'][$key] = true; 1616 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive; 1617 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles; 1618 1619 //NEW in 1.0.8, cache keyword regexp 1620 if ($this->parse_cache_built) { 1621 $this->optimize_keyword_group($key); 1622 } 1623 } 1624 1625 /** 1626 * Removes a keyword group 1627 * 1628 * @param int The key of the keyword group to remove 1629 * @since 1.0.0 1630 */ 1631 function remove_keyword_group ($key) { 1632 //Remove the keyword group internally 1633 unset($this->language_data['KEYWORDS'][$key]); 1634 unset($this->lexic_permissions['KEYWORDS'][$key]); 1635 unset($this->language_data['CASE_SENSITIVE'][$key]); 1636 unset($this->language_data['STYLES']['KEYWORDS'][$key]); 1637 1638 //NEW in 1.0.8 1639 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]); 1640 } 1641 1642 /** 1643 * compile optimized regexp list for keyword group 1644 * 1645 * @param int The key of the keyword group to compile & optimize 1646 * @since 1.0.8 1647 */ 1648 function optimize_keyword_group($key) { 1649 $this->language_data['CACHED_KEYWORD_LISTS'][$key] = 1650 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]); 1651 $space_as_whitespace = false; 1652 if(isset($this->language_data['PARSER_CONTROL'])) { 1653 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) { 1654 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) { 1655 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE']; 1656 } 1657 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1658 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) { 1659 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE']; 1660 } 1661 } 1662 } 1663 } 1664 if($space_as_whitespace) { 1665 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) { 1666 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] = 1667 str_replace(" ", "\\s+", $rxv); 1668 } 1669 } 1670 } 1671 1672 /** 1673 * Sets the content of the header block 1674 * 1675 * @param string The content of the header block 1676 * @since 1.0.2 1677 */ 1678 function set_header_content($content) { 1679 $this->header_content = $content; 1680 } 1681 1682 /** 1683 * Sets the content of the footer block 1684 * 1685 * @param string The content of the footer block 1686 * @since 1.0.2 1687 */ 1688 function set_footer_content($content) { 1689 $this->footer_content = $content; 1690 } 1691 1692 /** 1693 * Sets the style for the header content 1694 * 1695 * @param string The style for the header content 1696 * @since 1.0.2 1697 */ 1698 function set_header_content_style($style) { 1699 $this->header_content_style = $style; 1700 } 1701 1702 /** 1703 * Sets the style for the footer content 1704 * 1705 * @param string The style for the footer content 1706 * @since 1.0.2 1707 */ 1708 function set_footer_content_style($style) { 1709 $this->footer_content_style = $style; 1710 } 1711 1712 /** 1713 * Sets whether to force a surrounding block around 1714 * the highlighted code or not 1715 * 1716 * @param boolean Tells whether to enable or disable this feature 1717 * @since 1.0.7.20 1718 */ 1719 function enable_inner_code_block($flag) { 1720 $this->force_code_block = (bool)$flag; 1721 } 1722 1723 /** 1724 * Sets the base URL to be used for keywords 1725 * 1726 * @param int The key of the keyword group to set the URL for 1727 * @param string The URL to set for the group. If {FNAME} is in 1728 * the url somewhere, it is replaced by the keyword 1729 * that the URL is being made for 1730 * @since 1.0.2 1731 */ 1732 function set_url_for_keyword_group($group, $url) { 1733 $this->language_data['URLS'][$group] = $url; 1734 } 1735 1736 /** 1737 * Sets styles for links in code 1738 * 1739 * @param int A constant that specifies what state the style is being 1740 * set for - e.g. :hover or :visited 1741 * @param string The styles to use for that state 1742 * @since 1.0.2 1743 */ 1744 function set_link_styles($type, $styles) { 1745 $this->link_styles[$type] = $styles; 1746 } 1747 1748 /** 1749 * Sets the target for links in code 1750 * 1751 * @param string The target for links in the code, e.g. _blank 1752 * @since 1.0.3 1753 */ 1754 function set_link_target($target) { 1755 if (!$target) { 1756 $this->link_target = ''; 1757 } else { 1758 $this->link_target = ' target="' . $target . '"'; 1759 } 1760 } 1761 1762 /** 1763 * Sets styles for important parts of the code 1764 * 1765 * @param string The styles to use on important parts of the code 1766 * @since 1.0.2 1767 */ 1768 function set_important_styles($styles) { 1769 $this->important_styles = $styles; 1770 } 1771 1772 /** 1773 * Sets whether context-important blocks are highlighted 1774 * 1775 * @param boolean Tells whether to enable or disable highlighting of important blocks 1776 * @todo REMOVE THIS SHIZ FROM GESHI! 1777 * @deprecated 1778 * @since 1.0.2 1779 */ 1780 function enable_important_blocks($flag) { 1781 $this->enable_important_blocks = ( $flag ) ? true : false; 1782 } 1783 1784 /** 1785 * Whether CSS IDs should be added to each line 1786 * 1787 * @param boolean If true, IDs will be added to each line. 1788 * @since 1.0.2 1789 */ 1790 function enable_ids($flag = true) { 1791 $this->add_ids = ($flag) ? true : false; 1792 } 1793 1794 /** 1795 * Specifies which lines to highlight extra 1796 * 1797 * The extra style parameter was added in 1.0.7.21. 1798 * 1799 * @param mixed An array of line numbers to highlight, or just a line 1800 * number on its own. 1801 * @param string A string specifying the style to use for this line. 1802 * If null is specified, the default style is used. 1803 * If false is specified, the line will be removed from 1804 * special highlighting 1805 * @since 1.0.2 1806 * @todo Some data replication here that could be cut down on 1807 */ 1808 function highlight_lines_extra($lines, $style = null) { 1809 if (is_array($lines)) { 1810 //Split up the job using single lines at a time 1811 foreach ($lines as $line) { 1812 $this->highlight_lines_extra($line, $style); 1813 } 1814 } else { 1815 //Mark the line as being highlighted specially 1816 $lines = intval($lines); 1817 $this->highlight_extra_lines[$lines] = $lines; 1818 1819 //Decide on which style to use 1820 if ($style === null) { //Check if we should use default style 1821 unset($this->highlight_extra_lines_styles[$lines]); 1822 } else if ($style === false) { //Check if to remove this line 1823 unset($this->highlight_extra_lines[$lines]); 1824 unset($this->highlight_extra_lines_styles[$lines]); 1825 } else { 1826 $this->highlight_extra_lines_styles[$lines] = $style; 1827 } 1828 } 1829 } 1830 1831 /** 1832 * Sets the style for extra-highlighted lines 1833 * 1834 * @param string The style for extra-highlighted lines 1835 * @since 1.0.2 1836 */ 1837 function set_highlight_lines_extra_style($styles) { 1838 $this->highlight_extra_lines_style = $styles; 1839 } 1840 1841 /** 1842 * Sets the line-ending 1843 * 1844 * @param string The new line-ending 1845 * @since 1.0.2 1846 */ 1847 function set_line_ending($line_ending) { 1848 $this->line_ending = (string)$line_ending; 1849 } 1850 1851 /** 1852 * Sets what number line numbers should start at. Should 1853 * be a positive integer, and will be converted to one. 1854 * 1855 * <b>Warning:</b> Using this method will add the "start" 1856 * attribute to the <ol> that is used for line numbering. 1857 * This is <b>not</b> valid XHTML strict, so if that's what you 1858 * care about then don't use this method. Firefox is getting 1859 * support for the CSS method of doing this in 1.1 and Opera 1860 * has support for the CSS method, but (of course) IE doesn't 1861 * so it's not worth doing it the CSS way yet. 1862 * 1863 * @param int The number to start line numbers at 1864 * @since 1.0.2 1865 */ 1866 function start_line_numbers_at($number) { 1867 $this->line_numbers_start = abs(intval($number)); 1868 } 1869 1870 /** 1871 * Sets the encoding used for htmlspecialchars(), for international 1872 * support. 1873 * 1874 * NOTE: This is not needed for now because htmlspecialchars() is not 1875 * being used (it has a security hole in PHP4 that has not been patched). 1876 * Maybe in a future version it may make a return for speed reasons, but 1877 * I doubt it. 1878 * 1879 * @param string The encoding to use for the source 1880 * @since 1.0.3 1881 */ 1882 function set_encoding($encoding) { 1883 if ($encoding) { 1884 $this->encoding = strtolower($encoding); 1885 } 1886 } 1887 1888 /** 1889 * Turns linking of keywords on or off. 1890 * 1891 * @param boolean If true, links will be added to keywords 1892 * @since 1.0.2 1893 */ 1894 function enable_keyword_links($enable = true) { 1895 $this->keyword_links = (bool) $enable; 1896 } 1897 1898 /** 1899 * Setup caches needed for styling. This is automatically called in 1900 * parse_code() and get_stylesheet() when appropriate. This function helps 1901 * stylesheet generators as they rely on some style information being 1902 * preprocessed 1903 * 1904 * @since 1.0.8 1905 * @access private 1906 */ 1907 function build_style_cache() { 1908 //Build the style cache needed to highlight numbers appropriate 1909 if($this->lexic_permissions['NUMBERS']) { 1910 //First check what way highlighting information for numbers are given 1911 if(!isset($this->language_data['NUMBERS'])) { 1912 $this->language_data['NUMBERS'] = 0; 1913 } 1914 1915 if(is_array($this->language_data['NUMBERS'])) { 1916 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS']; 1917 } else { 1918 $this->language_data['NUMBERS_CACHE'] = array(); 1919 if(!$this->language_data['NUMBERS']) { 1920 $this->language_data['NUMBERS'] = 1921 GESHI_NUMBER_INT_BASIC | 1922 GESHI_NUMBER_FLT_NONSCI; 1923 } 1924 1925 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) { 1926 //Rearrange style indices if required ... 1927 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) { 1928 $this->language_data['STYLES']['NUMBERS'][$i] = 1929 $this->language_data['STYLES']['NUMBERS'][1<<$i]; 1930 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]); 1931 } 1932 1933 //Check if this bit is set for highlighting 1934 if($j&1) { 1935 //So this bit is set ... 1936 //Check if it belongs to group 0 or the actual stylegroup 1937 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) { 1938 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i; 1939 } else { 1940 if(!isset($this->language_data['NUMBERS_CACHE'][0])) { 1941 $this->language_data['NUMBERS_CACHE'][0] = 0; 1942 } 1943 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i; 1944 } 1945 } 1946 } 1947 } 1948 } 1949 } 1950 1951 /** 1952 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate. 1953 * This function makes stylesheet generators much faster as they do not need these caches. 1954 * 1955 * @since 1.0.8 1956 * @access private 1957 */ 1958 function build_parse_cache() { 1959 // cache symbol regexp 1960 //As this is a costy operation, we avoid doing it for multiple groups ... 1961 //Instead we perform it for all symbols at once. 1962 // 1963 //For this to work, we need to reorganize the data arrays. 1964 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) { 1965 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1; 1966 1967 $this->language_data['SYMBOL_DATA'] = array(); 1968 $symbol_preg_multi = array(); // multi char symbols 1969 $symbol_preg_single = array(); // single char symbols 1970 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) { 1971 if (is_array($symbols)) { 1972 foreach ($symbols as $sym) { 1973 $sym = $this->hsc($sym); 1974 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) { 1975 $this->language_data['SYMBOL_DATA'][$sym] = $key; 1976 if (isset($sym[1])) { // multiple chars 1977 $symbol_preg_multi[] = preg_quote($sym, '/'); 1978 } else { // single char 1979 if ($sym == '-') { 1980 // don't trigger range out of order error 1981 $symbol_preg_single[] = '\-'; 1982 } else { 1983 $symbol_preg_single[] = preg_quote($sym, '/'); 1984 } 1985 } 1986 } 1987 } 1988 } else { 1989 $symbols = $this->hsc($symbols); 1990 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) { 1991 $this->language_data['SYMBOL_DATA'][$symbols] = 0; 1992 if (isset($symbols[1])) { // multiple chars 1993 $symbol_preg_multi[] = preg_quote($symbols, '/'); 1994 } else if ($symbols == '-') { 1995 // don't trigger range out of order error 1996 $symbol_preg_single[] = '\-'; 1997 } else { // single char 1998 $symbol_preg_single[] = preg_quote($symbols, '/'); 1999 } 2000 } 2001 } 2002 } 2003 2004 //Now we have an array with each possible symbol as the key and the style as the actual data. 2005 //This way we can set the correct style just the moment we highlight ... 2006 // 2007 //Now we need to rewrite our array to get a search string that 2008 $symbol_preg = array(); 2009 if (!empty($symbol_preg_multi)) { 2010 rsort($symbol_preg_multi); 2011 $symbol_preg[] = implode('|', $symbol_preg_multi); 2012 } 2013 if (!empty($symbol_preg_single)) { 2014 rsort($symbol_preg_single); 2015 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']'; 2016 } 2017 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg); 2018 } 2019 2020 // cache optimized regexp for keyword matching 2021 // remove old cache 2022 $this->language_data['CACHED_KEYWORD_LISTS'] = array(); 2023 foreach (array_keys($this->language_data['KEYWORDS']) as $key) { 2024 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) || 2025 $this->lexic_permissions['KEYWORDS'][$key]) { 2026 $this->optimize_keyword_group($key); 2027 } 2028 } 2029 2030 // brackets 2031 if ($this->lexic_permissions['BRACKETS']) { 2032 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}'); 2033 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) { 2034 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2035 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>', 2036 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>', 2037 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>', 2038 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>', 2039 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>', 2040 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>', 2041 ); 2042 } 2043 else { 2044 $this->language_data['CACHE_BRACKET_REPLACE'] = array( 2045 '<| class="br0">[|>', 2046 '<| class="br0">]|>', 2047 '<| class="br0">(|>', 2048 '<| class="br0">)|>', 2049 '<| class="br0">{|>', 2050 '<| class="br0">}|>', 2051 ); 2052 } 2053 } 2054 2055 //Build the parse cache needed to highlight numbers appropriate 2056 if($this->lexic_permissions['NUMBERS']) { 2057 //Check if the style rearrangements have been processed ... 2058 //This also does some preprocessing to check which style groups are useable ... 2059 if(!isset($this->language_data['NUMBERS_CACHE'])) { 2060 $this->build_style_cache(); 2061 } 2062 2063 //Number format specification 2064 //All this formats are matched case-insensitively! 2065 static $numbers_format = array( 2066 GESHI_NUMBER_INT_BASIC => 2067 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2068 GESHI_NUMBER_INT_CSTYLE => 2069 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2070 GESHI_NUMBER_BIN_SUFFIX => 2071 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2072 GESHI_NUMBER_BIN_PREFIX_PERCENT => 2073 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2074 GESHI_NUMBER_BIN_PREFIX_0B => 2075 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2076 GESHI_NUMBER_OCT_PREFIX => 2077 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2078 GESHI_NUMBER_OCT_PREFIX_0O => 2079 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2080 GESHI_NUMBER_OCT_PREFIX_AT => 2081 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2082 GESHI_NUMBER_OCT_SUFFIX => 2083 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2084 GESHI_NUMBER_HEX_PREFIX => 2085 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2086 GESHI_NUMBER_HEX_PREFIX_DOLLAR => 2087 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2088 GESHI_NUMBER_HEX_SUFFIX => 2089 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2090 GESHI_NUMBER_FLT_NONSCI => 2091 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2092 GESHI_NUMBER_FLT_NONSCI_F => 2093 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2094 GESHI_NUMBER_FLT_SCI_SHORT => 2095 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)', 2096 GESHI_NUMBER_FLT_SCI_ZERO => 2097 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)' 2098 ); 2099 2100 //At this step we have an associative array with flag groups for a 2101 //specific style or an string denoting a regexp given its index. 2102 $this->language_data['NUMBERS_RXCACHE'] = array(); 2103 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) { 2104 if(is_string($rxdata)) { 2105 $regexp = $rxdata; 2106 } else { 2107 //This is a bitfield of number flags to highlight: 2108 //Build an array, implode them together and make this the actual RX 2109 $rxuse = array(); 2110 for($i = 1; $i <= $rxdata; $i<<=1) { 2111 if($rxdata & $i) { 2112 $rxuse[] = $numbers_format[$i]; 2113 } 2114 } 2115 $regexp = implode("|", $rxuse); 2116 } 2117 2118 $this->language_data['NUMBERS_RXCACHE'][$key] = 2119 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; // 2120 } 2121 2122 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) { 2123 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#'; 2124 } 2125 } 2126 2127 $this->parse_cache_built = true; 2128 } 2129 2130 /** 2131 * Returns the code in $this->source, highlighted and surrounded by the 2132 * nessecary HTML. 2133 * 2134 * This should only be called ONCE, cos it's SLOW! If you want to highlight 2135 * the same source multiple times, you're better off doing a whole lot of 2136 * str_replaces to replace the <span>s 2137 * 2138 * @since 1.0.0 2139 */ 2140 function parse_code () { 2141 // Start the timer 2142 $start_time = microtime(); 2143 2144 // Replace all newlines to a common form. 2145 $code = str_replace("\r\n", "\n", $this->source); 2146 $code = str_replace("\r", "\n", $code); 2147 2148 // Firstly, if there is an error, we won't highlight 2149 if ($this->error) { 2150 //Escape the source for output 2151 $result = $this->hsc($this->source); 2152 2153 //This fix is related to SF#1923020, but has to be applied regardless of 2154 //actually highlighting symbols. 2155 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result); 2156 2157 // Timing is irrelevant 2158 $this->set_time($start_time, $start_time); 2159 $this->finalise($result); 2160 return $result; 2161 } 2162 2163 // make sure the parse cache is up2date 2164 if (!$this->parse_cache_built) { 2165 $this->build_parse_cache(); 2166 } 2167 2168 // Initialise various stuff 2169 $length = strlen($code); 2170 $COMMENT_MATCHED = false; 2171 $stuff_to_parse = ''; 2172 $endresult = ''; 2173 2174 // "Important" selections are handled like multiline comments 2175 // @todo GET RID OF THIS SHIZ 2176 if ($this->enable_important_blocks) { 2177 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT; 2178 } 2179 2180 if ($this->strict_mode) { 2181 // Break the source into bits. Each bit will be a portion of the code 2182 // within script delimiters - for example, HTML between < and > 2183 $k = 0; 2184 $parts = array(); 2185 $matches = array(); 2186 $next_match_pointer = null; 2187 // we use a copy to unset delimiters on demand (when they are not found) 2188 $delim_copy = $this->language_data['SCRIPT_DELIMITERS']; 2189 $i = 0; 2190 while ($i < $length) { 2191 $next_match_pos = $length + 1; // never true 2192 foreach ($delim_copy as $dk => $delimiters) { 2193 if(is_array($delimiters)) { 2194 foreach ($delimiters as $open => $close) { 2195 // make sure the cache is setup properly 2196 if (!isset($matches[$dk][$open])) { 2197 $matches[$dk][$open] = array( 2198 'next_match' => -1, 2199 'dk' => $dk, 2200 2201 'open' => $open, // needed for grouping of adjacent code blocks (see below) 2202 'open_strlen' => strlen($open), 2203 2204 'close' => $close, 2205 'close_strlen' => strlen($close), 2206 ); 2207 } 2208 // Get the next little bit for this opening string 2209 if ($matches[$dk][$open]['next_match'] < $i) { 2210 // only find the next pos if it was not already cached 2211 $open_pos = strpos($code, $open, $i); 2212 if ($open_pos === false) { 2213 // no match for this delimiter ever 2214 unset($delim_copy[$dk][$open]); 2215 continue; 2216 } 2217 $matches[$dk][$open]['next_match'] = $open_pos; 2218 } 2219 if ($matches[$dk][$open]['next_match'] < $next_match_pos) { 2220 //So we got a new match, update the close_pos 2221 $matches[$dk][$open]['close_pos'] = 2222 strpos($code, $close, $matches[$dk][$open]['next_match']+1); 2223 2224 $next_match_pointer =& $matches[$dk][$open]; 2225 $next_match_pos = $matches[$dk][$open]['next_match']; 2226 } 2227 } 2228 } else { 2229 //So we should match an RegExp as Strict Block ... 2230 /** 2231 * The value in $delimiters is expected to be an RegExp 2232 * containing exactly 2 matching groups: 2233 * - Group 1 is the opener 2234 * - Group 2 is the closer 2235 */ 2236 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work. 2237 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) { 2238 //We got a match ... 2239 if(isset($matches_rx['start']) && isset($matches_rx['end'])) 2240 { 2241 $matches[$dk] = array( 2242 'next_match' => $matches_rx['start'][1], 2243 'dk' => $dk, 2244 2245 'close_strlen' => strlen($matches_rx['end'][0]), 2246 'close_pos' => $matches_rx['end'][1], 2247 ); 2248 } else { 2249 $matches[$dk] = array( 2250 'next_match' => $matches_rx[1][1], 2251 'dk' => $dk, 2252 2253 'close_strlen' => strlen($matches_rx[2][0]), 2254 'close_pos' => $matches_rx[2][1], 2255 ); 2256 } 2257 } else { 2258 // no match for this delimiter ever 2259 unset($delim_copy[$dk]); 2260 continue; 2261 } 2262 2263 if ($matches[$dk]['next_match'] <= $next_match_pos) { 2264 $next_match_pointer =& $matches[$dk]; 2265 $next_match_pos = $matches[$dk]['next_match']; 2266 } 2267 } 2268 } 2269 2270 // non-highlightable text 2271 $parts[$k] = array( 2272 1 => substr($code, $i, $next_match_pos - $i) 2273 ); 2274 ++$k; 2275 2276 if ($next_match_pos > $length) { 2277 // out of bounds means no next match was found 2278 break; 2279 } 2280 2281 // highlightable code 2282 $parts[$k][0] = $next_match_pointer['dk']; 2283 2284 //Only combine for non-rx script blocks 2285 if(is_array($delim_copy[$next_match_pointer['dk']])) { 2286 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three! 2287 $i = $next_match_pos + $next_match_pointer['open_strlen']; 2288 while (true) { 2289 $close_pos = strpos($code, $next_match_pointer['close'], $i); 2290 if ($close_pos == false) { 2291 break; 2292 } 2293 $i = $close_pos + $next_match_pointer['close_strlen']; 2294 if ($i == $length) { 2295 break; 2296 } 2297 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 || 2298 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) { 2299 // merge adjacent but make sure we don't merge things like <tag><!-- comment --> 2300 foreach ($matches as $submatches) { 2301 foreach ($submatches as $match) { 2302 if ($match['next_match'] == $i) { 2303 // a different block already matches here! 2304 break 3; 2305 } 2306 } 2307 } 2308 } else { 2309 break; 2310 } 2311 } 2312 } else { 2313 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen']; 2314 $i = $close_pos; 2315 } 2316 2317 if ($close_pos === false) { 2318 // no closing delimiter found! 2319 $parts[$k][1] = substr($code, $next_match_pos); 2320 ++$k; 2321 break; 2322 } else { 2323 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos); 2324 ++$k; 2325 } 2326 } 2327 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches); 2328 $num_parts = $k; 2329 2330 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) { 2331 // when we have only one part, we don't have anything to highlight at all. 2332 // if we have a "maybe" strict language, this should be handled as highlightable code 2333 $parts = array( 2334 0 => array( 2335 0 => '', 2336 1 => '' 2337 ), 2338 1 => array( 2339 0 => null, 2340 1 => $parts[0][1] 2341 ) 2342 ); 2343 $num_parts = 2; 2344 } 2345 2346 } else { 2347 // Not strict mode - simply dump the source into 2348 // the array at index 1 (the first highlightable block) 2349 $parts = array( 2350 0 => array( 2351 0 => '', 2352 1 => '' 2353 ), 2354 1 => array( 2355 0 => null, 2356 1 => $code 2357 ) 2358 ); 2359 $num_parts = 2; 2360 } 2361 2362 //Unset variables we won't need any longer 2363 unset($code); 2364 2365 //Preload some repeatedly used values regarding hardquotes ... 2366 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false; 2367 $hq_strlen = strlen($hq); 2368 2369 //Preload if line numbers are to be generated afterwards 2370 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398 2371 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS || 2372 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span; 2373 2374 //preload the escape char for faster checking ... 2375 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']); 2376 2377 // this is used for single-line comments 2378 $sc_disallowed_before = ""; 2379 $sc_disallowed_after = ""; 2380 2381 if (isset($this->language_data['PARSER_CONTROL'])) { 2382 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) { 2383 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) { 2384 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE']; 2385 } 2386 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) { 2387 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER']; 2388 } 2389 } 2390 } 2391 2392 //Fix for SF#1932083: Multichar Quotemarks unsupported 2393 $is_string_starter = array(); 2394 if ($this->lexic_permissions['STRINGS']) { 2395 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) { 2396 if (!isset($is_string_starter[$quotemark[0]])) { 2397 $is_string_starter[$quotemark[0]] = (string)$quotemark; 2398 } else if (is_string($is_string_starter[$quotemark[0]])) { 2399 $is_string_starter[$quotemark[0]] = array( 2400 $is_string_starter[$quotemark[0]], 2401 $quotemark); 2402 } else { 2403 $is_string_starter[$quotemark[0]][] = $quotemark; 2404 } 2405 } 2406 } 2407 2408 // Now we go through each part. We know that even-indexed parts are 2409 // code that shouldn't be highlighted, and odd-indexed parts should 2410 // be highlighted 2411 for ($key = 0; $key < $num_parts; ++$key) { 2412 $STRICTATTRS = ''; 2413 2414 // If this block should be highlighted... 2415 if (!($key & 1)) { 2416 // Else not a block to highlight 2417 $endresult .= $this->hsc($parts[$key][1]); 2418 unset($parts[$key]); 2419 continue; 2420 } 2421 2422 $result = ''; 2423 $part = $parts[$key][1]; 2424 2425 $highlight_part = true; 2426 if ($this->strict_mode && !is_null($parts[$key][0])) { 2427 // get the class key for this block of code 2428 $script_key = $parts[$key][0]; 2429 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key]; 2430 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' && 2431 $this->lexic_permissions['SCRIPT']) { 2432 // Add a span element around the source to 2433 // highlight the overall source block 2434 if (!$this->use_classes && 2435 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') { 2436 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"'; 2437 } else { 2438 $attributes = ' class="sc' . $script_key . '"'; 2439 } 2440 $result .= "<span$attributes>"; 2441 $STRICTATTRS = $attributes; 2442 } 2443 } 2444 2445 if ($highlight_part) { 2446 // Now, highlight the code in this block. This code 2447 // is really the engine of GeSHi (along with the method 2448 // parse_non_string_part). 2449 2450 // cache comment regexps incrementally 2451 $next_comment_regexp_key = ''; 2452 $next_comment_regexp_pos = -1; 2453 $next_comment_multi_pos = -1; 2454 $next_comment_single_pos = -1; 2455 $comment_regexp_cache_per_key = array(); 2456 $comment_multi_cache_per_key = array(); 2457 $comment_single_cache_per_key = array(); 2458 $next_open_comment_multi = ''; 2459 $next_comment_single_key = ''; 2460 $escape_regexp_cache_per_key = array(); 2461 $next_escape_regexp_key = ''; 2462 $next_escape_regexp_pos = -1; 2463 2464 $length = strlen($part); 2465 for ($i = 0; $i < $length; ++$i) { 2466 // Get the next char 2467 $char = $part[$i]; 2468 $char_len = 1; 2469 2470 // update regexp comment cache if needed 2471 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) { 2472 $next_comment_regexp_pos = $length; 2473 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) { 2474 $match_i = false; 2475 if (isset($comment_regexp_cache_per_key[$comment_key]) && 2476 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i || 2477 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) { 2478 // we have already matched something 2479 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) { 2480 // this comment is never matched 2481 continue; 2482 } 2483 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos']; 2484 } else if ( 2485 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible 2486 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) || 2487 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i)) 2488 ) { 2489 $match_i = $match[0][1]; 2490 if (GESHI_PHP_PRE_433) { 2491 $match_i += $i; 2492 } 2493 2494 $comment_regexp_cache_per_key[$comment_key] = array( 2495 'key' => $comment_key, 2496 'length' => strlen($match[0][0]), 2497 'pos' => $match_i 2498 ); 2499 } else { 2500 $comment_regexp_cache_per_key[$comment_key]['pos'] = false; 2501 continue; 2502 } 2503 2504 if ($match_i !== false && $match_i < $next_comment_regexp_pos) { 2505 $next_comment_regexp_pos = $match_i; 2506 $next_comment_regexp_key = $comment_key; 2507 if ($match_i === $i) { 2508 break; 2509 } 2510 } 2511 } 2512 } 2513 2514 $string_started = false; 2515 2516 if (isset($is_string_starter[$char])) { 2517 // Possibly the start of a new string ... 2518