Jeremy Newman : update urlify and emailify

Fri Sep 13 16:02:15 CDT 2019

Module: website
Branch: master
Commit: 27c0867ad582e2c12601a8eba0c0bf50433311fb
URL:    https://source.winehq.org/git/website.git/?a=commit;h=27c0867ad582e2c12601a8eba0c0bf50433311fb

Author: Jeremy Newman <jnewman at codeweavers.com>
Date:   Fri Sep 13 16:01:51 2019 -0500

update urlify and emailify

---

 css/styles.css   | 11 ++++++--
 include/html.php | 81 ++++++++++++++++++++++++++++++++++++--------------------
 2 files changed, 62 insertions(+), 30 deletions(-)

diff --git a/css/styles.css b/css/styles.css
index b06db889..825604ab 100644
--- a/css/styles.css
+++ b/css/styles.css
@@ -18,7 +18,7 @@ PRE {
 }
 
 /* remove background from carousel nav */
-.carousel-control.left, 
+.carousel-control.left,
 .carousel-control.right {
     background-image: none
 }
@@ -217,6 +217,13 @@ DIV.clear           { clear: both; margin: 0px; padding: 0px; }
 /* disable wrapping */
 .nowrap          { white-space:nowrap; }
 
+/* force wrap */
+.force-wrap {
+    overflow-wrap: break-word !important;
+    word-wrap: break-word !important;
+    word-break: break-word !important;
+}
+
 /* text styles */
 .bold                   { font-weight: bold; }
 .bolder                 { font-weight: bolder; }
@@ -419,7 +426,7 @@ DIV.newsblock       { margin: 0 5px 25px 10px; padding: 0; }
 /* winehq badge (uses font-awesome) */
 .winehq_menu_item .winehq_badge {
     display:inline-block;
-    float:left; 
+    float:left;
     clear:left;
 }
 .winehq_menu_item .winehq_badge .winehq_badge_inner {
diff --git a/include/html.php b/include/html.php
index 0e33ac4a..eb65ed4f 100644
--- a/include/html.php
+++ b/include/html.php
@@ -629,41 +629,51 @@ class html
         return $str;
     }
 
-    // URLIFY (search text and make urls linkable, also wrap URL at 100 chars)
+    // URLIFY (make urls hyperlinks)
     public function urlify ($text)
     {
+        // the regular expression used to match URLs
+        $url_regex = '/((http|https|ftp|rss):\/\/'.                                         // protocol
+                     '(([a-z0-9$_\.\+!\*\'\(\),;\?&\=\-]|%[0-9a-f]{2})+'.                   // username
+                     '(:([a-z0-9$_\.\+!\*\'\(\),;\?&\=\-]|%[0-9a-f]{2})+)'.                 // password
+                     '@)?(?#'.                                                              // auth requires @
+                     ')((([a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*'.                       // domain segments AND
+                     '[a-z][a-z0-9-]*[a-z0-9]'.                                             // top level domain  OR
+                     '|((\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])\.){3}'.
+                     '(\d|[1-9]\d|1\d{2}|2[0-4][0-9]|25[0-5])'.                             // IP address
+                     ')(:\d+)?'.                                                            // port
+                     ')(((\/+([a-z0-9$_\.\+!\*\'\,\;\:@&\=\-\~]|%[0-9a-f]{2})*)*'.          // path
+                     '(\?([a-z0-9$_\.\+!\*\',;:@&\=\-\/]|%[0-9a-f]{2})*)'.                  // query string
+                     '?)?)?'.                                                               // path and query string optional
+                     '(#([a-z0-9$_\.\+!\*\',;:@&\=\-]|%[0-9a-f]{2})*)?'.                    // fragment
+                     ')/i';
+
         // only use if text has links in it
-        if (preg_match('%(http|https|ftp|rss)(://)([-\w\.]+)%', $text))
+        if (preg_match($url_regex, $text))
         {
             // extract existing HTML so it is left unprocessed (for example, bbcode is pre-converted)
             $html_matches = array();
-            preg_match_all("/(<a href=.*>.*<\/a>)/Us", $text, $matches, PREG_PATTERN_ORDER);
+            preg_match_all("/((<a href=.*>.*<\/a>)|(<img .*src=.*>)|<iframe .*src=.*>.*<\/iframe>)/Us", $text, $matches, PREG_PATTERN_ORDER);
             foreach ($matches[0] as $c => $match)
             {
                 $html_matches[$c] = $matches[1][$c];
                 $text = str_replace("$match", "__HTMLMATCH_{$c}__", $text);
             }
 
-            // fix URL converted vars temporarily
-            $text = preg_replace('/>/', '<-RPLME->', $text);
-            $text = preg_replace('/"/', '"-RPLME-"', $text);
-
-            // add a space to the beginning text so regex will work
-            $text = " $text";
-
-            // perform regular expression
-            $text = preg_replace(
-                                 '%((http|https|ftp|rss)(://)([-\w\.]+)(:\d+)?(/)?([\w/_\-\.\*\~]+)?(\?)?([\w_\-\.\;\&\=\%]+)?(\#)?([-\w\.]+)?)%e',
-                                 "'<a href=\"\\1\">'.wordwrap('\\1', 100, '\n', 1).'</a>'",
-                                 $text
-                                );
-
-            // remove our temp conversion
-            $text = preg_replace('/<-RPLME->/', '>', $text);
-            $text = preg_replace('/"-RPLME-"/', '"', $text);
-
-            // remove our temp space
-            $text = preg_replace('%^ %', '', $text);
+            // perform regular expression and wrap (with special handling for <URL>)
+            $text = preg_replace_callback(
+                $url_regex,
+                function ($m) {
+                    $newend = '';
+                    $end = preg_replace('/^(.*)(\>|&gt\;)([\.\?\!\;\:\,])?$/', '\\2\\3', $m[1]);
+                    if ($end and $end != $m[1]) {
+                        $m[1] = preg_replace('/(.*)'.preg_quote($end).'$/', '\\1', $m[1]);
+                        $newend = $end;
+                    }
+                    return "<a href=\"{$m[1]}\" class=\"force-wrap\">{$m[1]}</a>{$newend}";
+                },
+                $text
+            );
 
             // re-insert HTML
             preg_match_all("/(__HTMLMATCH_(\d+)__)/", $text, $matches, PREG_PATTERN_ORDER);
@@ -677,7 +687,7 @@ class html
     }
 
     // EMAILIFY (convert email addresses to links)
-    public function emailify ($text, $convert = 0)
+    public function emailify ($text, $convert = 0, $shorten = 0)
     {
         // fix quoted printable
         if (preg_match('/^\=/', $text))
@@ -686,11 +696,17 @@ class html
             $text = mb_convert_encoding($text, 'UTF-8');
             $text = preg_replace('/^\=\?[a-z0-9\-]+\?Q\?(.*)\?\=/', "\\1", $text);
         }
-        $emreg = '/(.*) (\<|\&lt\;)([a-zA-Z0-9_\.\+\/-]+)@([a-zA-Z0-9_\.-]+\.[a-zA-Z0-9]+)(\>|\&gt\;)(.*)/';
-        if ($convert and preg_match($emreg, $text))
+        $emailreg = '/(.*) (\<|\&lt\;)([a-zA-Z0-9_\.\+\/-]+)@([a-zA-Z0-9_\.-]+\.[a-zA-Z0-9]+)(\>|\&gt\;)(.*)/';
+        if ($convert and preg_match($emailreg, $text))
         {
             // long format puts the name in and hides the email in the href
-            $text = preg_replace($emreg."e", "'<a href=\"mailto:\\3@\\4\">'.wordwrap('\\1', 25, ' ', 1).'</a>'", $text);
+            $text = preg_replace_callback(
+                "{$emailreg}",
+                function ($m) {
+                    return "<a href=\"mailto:{$m[3]}@{$m[4]}\">{$m[1]}</a>";
+                },
+                $text
+            );
         }
         else
         {
@@ -698,7 +714,16 @@ class html
             $emailreg = '/([a-zA-Z0-9_\.\+\/-]+)@([a-zA-Z0-9_\.-]+\.[a-zA-Z0-9]+)/';
             if (preg_match($emailreg, $text))
             {
-                    $text = preg_replace($emailreg, "<a href=\"mailto:\\1@\\2\">\\1@\\2</a>", $text);
+                $text = preg_replace_callback(
+                    "{$emailreg}",
+                    function ($m) use ($shorten) {
+                        if ($shorten)
+                            return "<a href=\"mailto:{$m[1]}@{$m[2]}\" class=\"force-wrap\">{$m[1]}</a>";
+                        else
+                            return "<a href=\"mailto:{$m[1]}@{$m[2]}\" class=\"force-wrap\">{$m[1]}@{$m[2]}</a>";
+                    },
+                    $text
+                );
             }
         }
         return $text;