[ Index ]

PHP Cross Reference of Joomla 1.5.26 DE

title

Body

[close]

/libraries/openid/Auth/OpenID/ -> Parse.php (source)

   1  <?php
   2  
   3  /**
   4   * This module implements a VERY limited parser that finds <link> tags
   5   * in the head of HTML or XHTML documents and parses out their
   6   * attributes according to the OpenID spec. It is a liberal parser,
   7   * but it requires these things from the data in order to work:
   8   *
   9   * - There must be an open <html> tag
  10   *
  11   * - There must be an open <head> tag inside of the <html> tag
  12   *
  13   * - Only <link>s that are found inside of the <head> tag are parsed
  14   *   (this is by design)
  15   *
  16   * - The parser follows the OpenID specification in resolving the
  17   *   attributes of the link tags. This means that the attributes DO
  18   *   NOT get resolved as they would by an XML or HTML parser. In
  19   *   particular, only certain entities get replaced, and href
  20   *   attributes do not get resolved relative to a base URL.
  21   *
  22   * From http://openid.net/specs.bml:
  23   *
  24   * - The openid.server URL MUST be an absolute URL. OpenID consumers
  25   *   MUST NOT attempt to resolve relative URLs.
  26   *
  27   * - The openid.server URL MUST NOT include entities other than &amp;,
  28   *   &lt;, &gt;, and &quot;.
  29   *
  30   * The parser ignores SGML comments and <![CDATA[blocks]]>. Both kinds
  31   * of quoting are allowed for attributes.
  32   *
  33   * The parser deals with invalid markup in these ways:
  34   *
  35   * - Tag names are not case-sensitive
  36   *
  37   * - The <html> tag is accepted even when it is not at the top level
  38   *
  39   * - The <head> tag is accepted even when it is not a direct child of
  40   *   the <html> tag, but a <html> tag must be an ancestor of the
  41   *   <head> tag
  42   *
  43   * - <link> tags are accepted even when they are not direct children
  44   *   of the <head> tag, but a <head> tag must be an ancestor of the
  45   *   <link> tag
  46   *
  47   * - If there is no closing tag for an open <html> or <head> tag, the
  48   *   remainder of the document is viewed as being inside of the
  49   *   tag. If there is no closing tag for a <link> tag, the link tag is
  50   *   treated as a short tag. Exceptions to this rule are that <html>
  51   *   closes <html> and <body> or <head> closes <head>
  52   *
  53   * - Attributes of the <link> tag are not required to be quoted.
  54   *
  55   * - In the case of duplicated attribute names, the attribute coming
  56   *   last in the tag will be the value returned.
  57   *
  58   * - Any text that does not parse as an attribute within a link tag
  59   *   will be ignored. (e.g. <link pumpkin rel='openid.server' /> will
  60   *   ignore pumpkin)
  61   *
  62   * - If there are more than one <html> or <head> tag, the parser only
  63   *   looks inside of the first one.
  64   *
  65   * - The contents of <script> tags are ignored entirely, except
  66   *   unclosed <script> tags. Unclosed <script> tags are ignored.
  67   *
  68   * - Any other invalid markup is ignored, including unclosed SGML
  69   *   comments and unclosed <![CDATA[blocks.
  70   *
  71   * PHP versions 4 and 5
  72   *
  73   * LICENSE: See the COPYING file included in this distribution.
  74   *
  75   * @access private
  76   * @package OpenID
  77   * @author JanRain, Inc. <openid@janrain.com>
  78   * @copyright 2005-2008 Janrain, Inc.
  79   * @license http://www.apache.org/licenses/LICENSE-2.0 Apache
  80   */
  81  
  82  // Do not allow direct access
  83  defined( '_JEXEC' ) or die( 'Restricted access' );
  84  
  85  /**
  86   * Require Auth_OpenID::arrayGet().
  87   */
  88  require_once "Auth/OpenID.php";
  89  
  90  class Auth_OpenID_Parse {
  91  
  92      /**
  93       * Specify some flags for use with regex matching.
  94       */
  95      var $_re_flags = "si";
  96  
  97      /**
  98       * Stuff to remove before we start looking for tags
  99       */
 100      var $_removed_re =
 101             "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
 102  
 103      /**
 104       * Starts with the tag name at a word boundary, where the tag name
 105       * is not a namespace
 106       */
 107      var $_tag_expr = "<%s\b(?!:)([^>]*?)(?:\/>|>(.*?)(?:<\/?%s\s*>|\Z))";
 108  
 109      var $_attr_find = '\b(\w+)=("[^"]*"|\'[^\']*\'|[^\'"\s\/<>]+)';
 110  
 111      var $_open_tag_expr = "<%s\b";
 112      var $_close_tag_expr = "<((\/%s\b)|(%s[^>\/]*\/))>";
 113  
 114      function Auth_OpenID_Parse()
 115      {
 116          $this->_link_find = sprintf("/<link\b(?!:)([^>]*)(?!<)>/%s",
 117                                      $this->_re_flags);
 118  
 119          $this->_entity_replacements = array(
 120                                              'amp' => '&',
 121                                              'lt' => '<',
 122                                              'gt' => '>',
 123                                              'quot' => '"'
 124                                              );
 125  
 126          $this->_attr_find = sprintf("/%s/%s",
 127                                      $this->_attr_find,
 128                                      $this->_re_flags);
 129  
 130          $this->_removed_re = sprintf("/%s/%s",
 131                                       $this->_removed_re,
 132                                       $this->_re_flags);
 133  
 134          $this->_ent_replace =
 135              sprintf("&(%s);", implode("|",
 136                                        $this->_entity_replacements));
 137      }
 138  
 139      /**
 140       * Returns a regular expression that will match a given tag in an
 141       * SGML string.
 142       */
 143      function tagMatcher($tag_name, $close_tags = null)
 144      {
 145          $expr = $this->_tag_expr;
 146  
 147          if ($close_tags) {
 148              $options = implode("|", array_merge(array($tag_name), $close_tags));
 149              $closer = sprintf("(?:%s)", $options);
 150          } else {
 151              $closer = $tag_name;
 152          }
 153  
 154          $expr = sprintf($expr, $tag_name, $closer);
 155          return sprintf("/%s/%s", $expr, $this->_re_flags);
 156      }
 157  
 158      function openTag($tag_name)
 159      {
 160          $expr = sprintf($this->_open_tag_expr, $tag_name);
 161          return sprintf("/%s/%s", $expr, $this->_re_flags);
 162      }
 163  
 164      function closeTag($tag_name)
 165      {
 166          $expr = sprintf($this->_close_tag_expr, $tag_name, $tag_name);
 167          return sprintf("/%s/%s", $expr, $this->_re_flags);
 168      }
 169  
 170      function htmlBegin($s)
 171      {
 172          $matches = array();
 173          $result = preg_match($this->openTag('html'), $s,
 174                               $matches, PREG_OFFSET_CAPTURE);
 175          if ($result === false || !$matches) {
 176              return false;
 177          }
 178          // Return the offset of the first match.
 179          return $matches[0][1];
 180      }
 181  
 182      function htmlEnd($s)
 183      {
 184          $matches = array();
 185          $result = preg_match($this->closeTag('html'), $s,
 186                               $matches, PREG_OFFSET_CAPTURE);
 187          if ($result === false || !$matches) {
 188              return false;
 189          }
 190          // Return the offset of the first match.
 191          return $matches[count($matches) - 1][1];
 192      }
 193  
 194      function headFind()
 195      {
 196          return $this->tagMatcher('head', array('body', 'html'));
 197      }
 198  
 199      function replaceEntities($str)
 200      {
 201          foreach ($this->_entity_replacements as $old => $new) {
 202              $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
 203          }
 204          return $str;
 205      }
 206  
 207      function removeQuotes($str)
 208      {
 209          $matches = array();
 210          $double = '/^"(.*)"$/';
 211          $single = "/^\'(.*)\'$/";
 212  
 213          if (preg_match($double, $str, $matches)) {
 214              return $matches[1];
 215          } else if (preg_match($single, $str, $matches)) {
 216              return $matches[1];
 217          } else {
 218              return $str;
 219          }
 220      }
 221  
 222      /**
 223       * Find all link tags in a string representing a HTML document and
 224       * return a list of their attributes.
 225       *
 226       * @param string $html The text to parse
 227       * @return array $list An array of arrays of attributes, one for each
 228       * link tag
 229       */
 230      function parseLinkAttrs($html)
 231      {
 232          $stripped = preg_replace($this->_removed_re,
 233                                   "",
 234                                   $html);
 235  
 236          $html_begin = $this->htmlBegin($stripped);
 237          $html_end = $this->htmlEnd($stripped);
 238  
 239          if ($html_begin === false) {
 240              return array();
 241          }
 242  
 243          if ($html_end === false) {
 244              $html_end = strlen($stripped);
 245          }
 246  
 247          $stripped = substr($stripped, $html_begin,
 248                             $html_end - $html_begin);
 249  
 250          // Try to find the <HEAD> tag.
 251          $head_re = $this->headFind();
 252          $head_matches = array();
 253          if (!preg_match($head_re, $stripped, $head_matches)) {
 254              return array();
 255          }
 256  
 257          $link_data = array();
 258          $link_matches = array();
 259  
 260          if (!preg_match_all($this->_link_find, $head_matches[0],
 261                              $link_matches)) {
 262              return array();
 263          }
 264  
 265          foreach ($link_matches[0] as $link) {
 266              $attr_matches = array();
 267              preg_match_all($this->_attr_find, $link, $attr_matches);
 268              $link_attrs = array();
 269              foreach ($attr_matches[0] as $index => $full_match) {
 270                  $name = $attr_matches[1][$index];
 271                  $value = $this->replaceEntities(
 272                                $this->removeQuotes($attr_matches[2][$index]));
 273  
 274                  $link_attrs[strtolower($name)] = $value;
 275              }
 276              $link_data[] = $link_attrs;
 277          }
 278  
 279          return $link_data;
 280      }
 281  
 282      function relMatches($rel_attr, $target_rel)
 283      {
 284          // Does this target_rel appear in the rel_str?
 285          // XXX: TESTME
 286          $rels = preg_split("/\s+/", trim($rel_attr));
 287          foreach ($rels as $rel) {
 288              $rel = strtolower($rel);
 289              if ($rel == $target_rel) {
 290                  return 1;
 291              }
 292          }
 293  
 294          return 0;
 295      }
 296  
 297      function linkHasRel($link_attrs, $target_rel)
 298      {
 299          // Does this link have target_rel as a relationship?
 300          // XXX: TESTME
 301          $rel_attr = Auth_OpeniD::arrayGet($link_attrs, 'rel', null);
 302          return ($rel_attr && $this->relMatches($rel_attr,
 303                                                 $target_rel));
 304      }
 305  
 306      function findLinksRel($link_attrs_list, $target_rel)
 307      {
 308          // Filter the list of link attributes on whether it has
 309          // target_rel as a relationship.
 310          // XXX: TESTME
 311          $result = array();
 312          foreach ($link_attrs_list as $attr) {
 313              if ($this->linkHasRel($attr, $target_rel)) {
 314                  $result[] = $attr;
 315              }
 316          }
 317  
 318          return $result;
 319      }
 320  
 321      function findFirstHref($link_attrs_list, $target_rel)
 322      {
 323          // Return the value of the href attribute for the first link
 324          // tag in the list that has target_rel as a relationship.
 325          // XXX: TESTME
 326          $matches = $this->findLinksRel($link_attrs_list,
 327                                         $target_rel);
 328          if (!$matches) {
 329              return null;
 330          }
 331          $first = $matches[0];
 332          return Auth_OpenID::arrayGet($first, 'href', null);
 333      }
 334  }
 335  
 336  function Auth_OpenID_legacy_discover($html_text, $server_rel,
 337                                       $delegate_rel)
 338  {
 339      $p = new Auth_OpenID_Parse();
 340  
 341      $link_attrs = $p->parseLinkAttrs($html_text);
 342  
 343      $server_url = $p->findFirstHref($link_attrs,
 344                                      $server_rel);
 345  
 346      if ($server_url === null) {
 347          return false;
 348      } else {
 349          $delegate_url = $p->findFirstHref($link_attrs,
 350                                            $delegate_rel);
 351          return array($delegate_url, $server_url);
 352      }
 353  }
 354  
 355  ?>


Generated: Wed Mar 28 15:54:07 2012 Cross-referenced by PHPXref 0.7.1