[ Index ]

PHP Cross Reference of Joomla 1.5.26 DE

title

Body

[close]

/libraries/openid/Auth/Yadis/ -> ParseHTML.php (source)

   1  <?php
   2  
   3  /**
   4   * This is the HTML pseudo-parser for the Yadis library.
   5   *
   6   * PHP versions 4 and 5
   7   *
   8   * LICENSE: See the COPYING file included in this distribution.
   9   *
  10   * @package OpenID
  11   * @author JanRain, Inc. <openid@janrain.com>
  12   * @copyright 2005-2008 Janrain, Inc.
  13   * @license http://www.apache.org/licenses/LICENSE-2.0 Apache
  14   */
  15  
  16  // Do not allow direct access
  17  defined( '_JEXEC' ) or die( 'Restricted access' );
  18  
  19  /**
  20   * This class is responsible for scanning an HTML string to find META
  21   * tags and their attributes.  This is used by the Yadis discovery
  22   * process.  This class must be instantiated to be used.
  23   *
  24   * @package OpenID
  25   */
  26  class Auth_Yadis_ParseHTML {
  27  
  28      /**
  29       * @access private
  30       */
  31      var $_re_flags = "si";
  32  
  33      /**
  34       * @access private
  35       */
  36      var $_removed_re =
  37             "<!--.*?-->|<!\[CDATA\[.*?\]\]>|<script\b(?!:)[^>]*>.*?<\/script>";
  38  
  39      /**
  40       * @access private
  41       */
  42      var $_tag_expr = "<%s%s(?:\s.*?)?%s>";
  43  
  44      /**
  45       * @access private
  46       */
  47      var $_attr_find = '\b([-\w]+)=(".*?"|\'.*?\'|.+?)[\/\s>]';
  48  
  49      function Auth_Yadis_ParseHTML()
  50      {
  51          $this->_attr_find = sprintf("/%s/%s",
  52                                      $this->_attr_find,
  53                                      $this->_re_flags);
  54  
  55          $this->_removed_re = sprintf("/%s/%s",
  56                                       $this->_removed_re,
  57                                       $this->_re_flags);
  58  
  59          $this->_entity_replacements = array(
  60                                              'amp' => '&',
  61                                              'lt' => '<',
  62                                              'gt' => '>',
  63                                              'quot' => '"'
  64                                              );
  65  
  66          $this->_ent_replace =
  67              sprintf("&(%s);", implode("|",
  68                                        $this->_entity_replacements));
  69      }
  70  
  71      /**
  72       * Replace HTML entities (amp, lt, gt, and quot) as well as
  73       * numeric entities (e.g. #x9f;) with their actual values and
  74       * return the new string.
  75       *
  76       * @access private
  77       * @param string $str The string in which to look for entities
  78       * @return string $new_str The new string entities decoded
  79       */
  80      function replaceEntities($str)
  81      {
  82          foreach ($this->_entity_replacements as $old => $new) {
  83              $str = preg_replace(sprintf("/&%s;/", $old), $new, $str);
  84          }
  85  
  86          // Replace numeric entities because html_entity_decode doesn't
  87          // do it for us.
  88          $str = preg_replace('~&#x([0-9a-f]+);~ei', 'chr(hexdec("\\1"))', $str);
  89          $str = preg_replace('~&#([0-9]+);~e', 'chr(\\1)', $str);
  90  
  91          return $str;
  92      }
  93  
  94      /**
  95       * Strip single and double quotes off of a string, if they are
  96       * present.
  97       *
  98       * @access private
  99       * @param string $str The original string
 100       * @return string $new_str The new string with leading and
 101       * trailing quotes removed
 102       */
 103      function removeQuotes($str)
 104      {
 105          $matches = array();
 106          $double = '/^"(.*)"$/';
 107          $single = "/^\'(.*)\'$/";
 108  
 109          if (preg_match($double, $str, $matches)) {
 110              return $matches[1];
 111          } else if (preg_match($single, $str, $matches)) {
 112              return $matches[1];
 113          } else {
 114              return $str;
 115          }
 116      }
 117  
 118      /**
 119       * Create a regular expression that will match an opening 
 120       * or closing tag from a set of names.
 121       *
 122       * @access private
 123       * @param mixed $tag_names Tag names to match
 124       * @param mixed $close false/0 = no, true/1 = yes, other = maybe
 125       * @param mixed $self_close false/0 = no, true/1 = yes, other = maybe
 126       * @return string $regex A regular expression string to be used
 127       * in, say, preg_match.
 128       */
 129      function tagPattern($tag_names, $close, $self_close)
 130      {
 131          if (is_array($tag_names)) {
 132              $tag_names = '(?:'.implode('|',$tag_names).')';
 133          }
 134          if ($close) {
 135              $close = '\/' . (($close == 1)? '' : '?');
 136          } else {
 137              $close = '';
 138          }
 139          if ($self_close) {
 140              $self_close = '(?:\/\s*)' . (($self_close == 1)? '' : '?');
 141          } else {
 142              $self_close = '';
 143          }
 144          $expr = sprintf($this->_tag_expr, $close, $tag_names, $self_close);
 145  
 146          return sprintf("/%s/%s", $expr, $this->_re_flags);
 147      }
 148  
 149      /**
 150       * Given an HTML document string, this finds all the META tags in
 151       * the document, provided they are found in the
 152       * <HTML><HEAD>...</HEAD> section of the document.  The <HTML> tag
 153       * may be missing.
 154       *
 155       * @access private
 156       * @param string $html_string An HTMl document string
 157       * @return array $tag_list Array of tags; each tag is an array of
 158       * attribute -> value.
 159       */
 160      function getMetaTags($html_string)
 161      {
 162          $html_string = preg_replace($this->_removed_re,
 163                                      "",
 164                                      $html_string);
 165  
 166          $key_tags = array($this->tagPattern('html', false, false),
 167                            $this->tagPattern('head', false, false),
 168                            $this->tagPattern('head', true, false),
 169                            $this->tagPattern('html', true, false),
 170                            $this->tagPattern(array(
 171                            'body', 'frameset', 'frame', 'p', 'div',
 172                            'table','span','a'), 'maybe', 'maybe'));
 173          $key_tags_pos = array();
 174          foreach ($key_tags as $pat) {
 175              $matches = array();
 176              preg_match($pat, $html_string, $matches, PREG_OFFSET_CAPTURE);
 177              if($matches) {
 178                  $key_tags_pos[] = $matches[0][1];
 179              } else {
 180                  $key_tags_pos[] = null;
 181              }
 182          }
 183          // no opening head tag
 184          if (is_null($key_tags_pos[1])) {
 185              return array();
 186          }
 187          // the effective </head> is the min of the following
 188          if (is_null($key_tags_pos[2])) {
 189              $key_tags_pos[2] = strlen($html_string);
 190          }
 191          foreach (array($key_tags_pos[3], $key_tags_pos[4]) as $pos) {
 192              if (!is_null($pos) && $pos < $key_tags_pos[2]) {
 193                  $key_tags_pos[2] = $pos;
 194              }
 195          }
 196          // closing head tag comes before opening head tag
 197          if ($key_tags_pos[1] > $key_tags_pos[2]) {
 198              return array();
 199          }
 200          // if there is an opening html tag, make sure the opening head tag
 201          // comes after it
 202          if (!is_null($key_tags_pos[0]) && $key_tags_pos[1] < $key_tags_pos[0]) {
 203              return array();
 204          }
 205          $html_string = substr($html_string, $key_tags_pos[1],
 206                                ($key_tags_pos[2]-$key_tags_pos[1]));
 207  
 208          $link_data = array();
 209          $link_matches = array();
 210          
 211          if (!preg_match_all($this->tagPattern('meta', false, 'maybe'),
 212                              $html_string, $link_matches)) {
 213              return array();
 214          }
 215  
 216          foreach ($link_matches[0] as $link) {
 217              $attr_matches = array();
 218              preg_match_all($this->_attr_find, $link, $attr_matches);
 219              $link_attrs = array();
 220              foreach ($attr_matches[0] as $index => $full_match) {
 221                  $name = $attr_matches[1][$index];
 222                  $value = $this->replaceEntities(
 223                                $this->removeQuotes($attr_matches[2][$index]));
 224  
 225                  $link_attrs[strtolower($name)] = $value;
 226              }
 227              $link_data[] = $link_attrs;
 228          }
 229  
 230          return $link_data;
 231      }
 232  
 233      /**
 234       * Looks for a META tag with an "http-equiv" attribute whose value
 235       * is one of ("x-xrds-location", "x-yadis-location"), ignoring
 236       * case.  If such a META tag is found, its "content" attribute
 237       * value is returned.
 238       *
 239       * @param string $html_string An HTML document in string format
 240       * @return mixed $content The "content" attribute value of the
 241       * META tag, if found, or null if no such tag was found.
 242       */
 243      function getHTTPEquiv($html_string)
 244      {
 245          $meta_tags = $this->getMetaTags($html_string);
 246  
 247          if ($meta_tags) {
 248              foreach ($meta_tags as $tag) {
 249                  if (array_key_exists('http-equiv', $tag) &&
 250                      (in_array(strtolower($tag['http-equiv']),
 251                                array('x-xrds-location', 'x-yadis-location'))) &&
 252                      array_key_exists('content', $tag)) {
 253                      return $tag['content'];
 254                  }
 255              }
 256          }
 257  
 258          return null;
 259      }
 260  }
 261  
 262  ?>


Generated: Wed Mar 28 15:54:07 2012 Cross-referenced by PHPXref 0.7.1