| [ Index ] |
PHP Cross Reference of Joomla 1.5.26 DE |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * This module implements a VERY limited parser that finds <link> tags 5 * in the head of HTML or XHTML documents and parses out their 6 * attributes according to the OpenID spec. It is a liberal parser, 7 * but it requires these things from the data in order to work: 8 * 9 * - There must be an open <html> tag 10 * 11 * - There must be an open <head> tag inside of the <html> tag 12 * 13 * - Only <link>s that are found inside of the <head> tag are parsed 14 * (this is by design) 15 * 16 * - The parser follows the OpenID specification in resolving the 17 * attributes of the link tags. This means that the attributes DO 18 * NOT get resolved as they would by an XML or HTML parser. In 19 * particular, only certain entities get replaced, and href 20 * attributes do not get resolved relative to a base URL. 21 * 22 * From http://openid.net/specs.bml: 23 * 24 * - The openid.server URL MUST be an absolute URL. OpenID consumers 25 * MUST NOT attempt to resolve relative URLs. 26 * 27 * - The openid.server URL MUST NOT include entities other than &, 28 * <, >, and ". 29 * 30 * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds 31 * of quoting are allowed for attributes. 32 * 33 * The parser deals with invalid markup in these ways: 34 * 35 * - Tag names are not case-sensitive 36 * 37 * - The <html> tag is accepted even when it is not at the top level 38 * 39 * - The <head> tag is accepted even when it is not a direct child of 40 * the <html> tag, but a <html> tag must be an ancestor of the 41 * <head> tag 42 * 43 * - <link> tags are accepted even when they are not direct children 44 * of the <head> tag, but a <head> tag must be an ancestor of the 45 * <link> tag 46 * 47 * - If there is no closing tag for an open <html> or <head> tag, the 48 * remainder of the document is viewed as being inside of the 49 * tag. If there is no closing tag for a <link> tag, the link tag is 50 * treated as a short tag. Exceptions to this rule are that <html> 51 * closes <html> and <body> or <head> closes <head> 52 * 53 * - Attributes of the <link> tag are not required to be quoted. 54 * 55 * - In the case of duplicated attribute names, the attribute coming 56 * last in the tag will be the value returned. 57 * 58 * - Any text that does not parse as an attribute within a link tag 59 * will be ignored. (e.g. <link pumpkin rel='openid.server' /> will 60 * ignore pumpkin) 61 * 62 * - If there are more than one <html> or <head> tag, the parser only 63 * looks inside of the first one. 64 * 65 * - The contents of <script> tags are ignored entirely, except 66 * unclosed <script> tags. Unclosed <script> tags are ignored. 67 * 68 * - Any other invalid markup is ignored, including unclosed SGML 69 * comments and unclosed <![CDATA[blocks. 70 * 71 * PHP versions 4 and 5 72 * 73 * LICENSE: See the COPYING file included in this distribution. 74 * 75 * @access private 76 * @package OpenID 77 * @author JanRain, Inc. <openid@janrain.com> 78 * @copyright 2005-2008 Janrain, Inc. 79 * @license http://www.apache.org/licenses/LICENSE-2.0 Apache 80 */ 81 82 // Do not allow direct access 83 defined( '_JEXEC' ) or die( 'Restricted access' ); 84 85 /** 86 * Require Auth_OpenID::arrayGet(). 87 */ 88 require_once "Auth/OpenID.php"; 89 90 class Auth_OpenID_Parse { 91 92 /** 93 * Specify some flags for use with regex matching. 94 */ 95 var $_re_flags = "si"; 96 97 /** 98 * Stuff to remove before we start looking for tags 99 */ 100 var $_removed_re = 101 "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>"; 102 103 /** 104 * Starts with the tag name at a word boundary, where the tag name 105 * is not a namespace 106 */ 107 var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))"; 108 109 var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)'; 110 111 var $_open_tag_expr = "<%s\b"; 112 var $_close_tag_expr = "<((\/%s\b)|(%s[^>\/]*\/))>"; 113 114 function Auth_OpenID_Parse() 115 { 116 $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s", 117 $this->_re_flags); 118 119 $this->_entity_replacements = array( 120 'amp' => '&', 121 'lt' => '<', 122 'gt' => '>', 123 'quot' => '"' 124 ); 125 126 $this->_attr_find = sprintf("/%s/%s", 127 $this->_attr_find, 128 $this->_re_flags); 129 130 $this->_removed_re = sprintf("/%s/%s", 131 $this->_removed_re, 132 $this->_re_flags); 133 134 $this->_ent_replace = 135 sprintf("&(%s);", implode("|", 136 $this->_entity_replacements)); 137 } 138 139 /** 140 * Returns a regular expression that will match a given tag in an 141 * SGML string. 142 */ 143 function tagMatcher($tag_name, $close_tags = null) 144 { 145 $expr = $this->_tag_expr; 146 147 if ($close_tags) { 148 $options = implode("|", array_merge(array($tag_name), $close_tags)); 149 $closer = sprintf("(?:%s)", $options); 150 } else { 151 $closer = $tag_name; 152 } 153 154 $expr = sprintf($expr, $tag_name, $closer); 155 return sprintf("/%s/%s", $expr, $this->_re_flags); 156 } 157 158 function openTag($tag_name) 159 { 160 $expr = sprintf($this->_open_tag_expr, $tag_name); 161 return sprintf("/%s/%s", $expr, $this->_re_flags); 162 } 163 164 function closeTag($tag_name) 165 { 166 $expr = sprintf($this->_close_tag_expr, $tag_name, $tag_name); 167 return sprintf("/%s/%s", $expr, $this->_re_flags); 168 } 169 170 function htmlBegin($s) 171 { 172 $matches = array(); 173 $result = preg_match($this->openTag('html'), $s, 174 $matches, PREG_OFFSET_CAPTURE); 175 if ($result === false || !$matches) { 176 return false; 177 } 178 // Return the offset of the first match. 179 return $matches[0][1]; 180 } 181 182 function htmlEnd($s) 183 { 184 $matches = array(); 185 $result = preg_match($this->closeTag('html'), $s, 186 $matches, PREG_OFFSET_CAPTURE); 187 if ($result === false || !$matches) { 188 return false; 189 } 190 // Return the offset of the first match. 191 return $matches[count($matches) - 1][1]; 192 } 193 194 function headFind() 195 { 196 return $this->tagMatcher('head', array('body', 'html')); 197 } 198 199 function replaceEntities($str) 200 { 201 foreach ($this->_entity_replacements as $old => $new) { 202 $str = preg_replace(sprintf("/&%s;/", $old), $new, $str); 203 } 204 return $str; 205 } 206 207 function removeQuotes($str) 208 { 209 $matches = array(); 210 $double = '/^"(.*)"$/'; 211 $single = "/^\'(.*)\'$/"; 212 213 if (preg_match($double, $str, $matches)) { 214 return $matches[1]; 215 } else if (preg_match($single, $str, $matches)) { 216 return $matches[1]; 217 } else { 218 return $str; 219 } 220 } 221 222 /** 223 * Find all link tags in a string representing a HTML document and 224 * return a list of their attributes. 225 * 226 * @param string $html The text to parse 227 * @return array $list An array of arrays of attributes, one for each 228 * link tag 229 */ 230 function parseLinkAttrs($html) 231 { 232 $stripped = preg_replace($this->_removed_re, 233 "", 234 $html); 235 236 $html_begin = $this->htmlBegin($stripped); 237 $html_end = $this->htmlEnd($stripped); 238 239 if ($html_begin === false) { 240 return array(); 241 } 242 243 if ($html_end === false) { 244 $html_end = strlen($stripped); 245 } 246 247 $stripped = substr($stripped, $html_begin, 248 $html_end - $html_begin); 249 250 // Try to find the <HEAD> tag. 251 $head_re = $this->headFind(); 252 $head_matches = array(); 253 if (!preg_match($head_re, $stripped, $head_matches)) { 254 return array(); 255 } 256 257 $link_data = array(); 258 $link_matches = array(); 259 260 if (!preg_match_all($this->_link_find, $head_matches[0], 261 $link_matches)) { 262 return array(); 263 } 264 265 foreach ($link_matches[0] as $link) { 266 $attr_matches = array(); 267 preg_match_all($this->_attr_find, $link, $attr_matches); 268 $link_attrs = array(); 269 foreach ($attr_matches[0] as $index => $full_match) { 270 $name = $attr_matches[1][$index]; 271 $value = $this->replaceEntities( 272 $this->removeQuotes($attr_matches[2][$index])); 273 274 $link_attrs[strtolower($name)] = $value; 275 } 276 $link_data[] = $link_attrs; 277 } 278 279 return $link_data; 280 } 281 282 function relMatches($rel_attr, $target_rel) 283 { 284 // Does this target_rel appear in the rel_str? 285 // XXX: TESTME 286 $rels = preg_split("/\s+/", trim($rel_attr)); 287 foreach ($rels as $rel) { 288 $rel = strtolower($rel); 289 if ($rel == $target_rel) { 290 return 1; 291 } 292 } 293 294 return 0; 295 } 296 297 function linkHasRel($link_attrs, $target_rel) 298 { 299 // Does this link have target_rel as a relationship? 300 // XXX: TESTME 301 $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null); 302 return ($rel_attr && $this->relMatches($rel_attr, 303 $target_rel)); 304 } 305 306 function findLinksRel($link_attrs_list, $target_rel) 307 { 308 // Filter the list of link attributes on whether it has 309 // target_rel as a relationship. 310 // XXX: TESTME 311 $result = array(); 312 foreach ($link_attrs_list as $attr) { 313 if ($this->linkHasRel($attr, $target_rel)) { 314 $result[] = $attr; 315 } 316 } 317 318 return $result; 319 } 320 321 function findFirstHref($link_attrs_list, $target_rel) 322 { 323 // Return the value of the href attribute for the first link 324 // tag in the list that has target_rel as a relationship. 325 // XXX: TESTME 326 $matches = $this->findLinksRel($link_attrs_list, 327 $target_rel); 328 if (!$matches) { 329 return null; 330 } 331 $first = $matches[0]; 332 return Auth_OpenID::arrayGet($first, 'href', null); 333 } 334 } 335 336 function Auth_OpenID_legacy_discover($html_text, $server_rel, 337 $delegate_rel) 338 { 339 $p = new Auth_OpenID_Parse(); 340 341 $link_attrs = $p->parseLinkAttrs($html_text); 342 343 $server_url = $p->findFirstHref($link_attrs, 344 $server_rel); 345 346 if ($server_url === null) { 347 return false; 348 } else { 349 $delegate_url = $p->findFirstHref($link_attrs, 350 $delegate_rel); 351 return array($delegate_url, $server_url); 352 } 353 } 354 355 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
| Generated: Wed Mar 28 15:54:07 2012 | Cross-referenced by PHPXref 0.7.1 |