summaryrefslogtreecommitdiffabout
authorWojciech Polak <polak@gnu.org>2019-09-10 20:01:50 (GMT)
committer Wojciech Polak <polak@gnu.org>2019-09-10 20:01:50 (GMT)
commitc5f91b184ebbb54e717f73b7ec25b37b05f56bf0 (patch) (side-by-side diff)
tree5597f598a65e235bf29e87cdfd82357c30fa4154
parent6ee2cdd2573866484089da12a892ed1c888b5dc6 (diff)
downloadglifestream-c5f91b184ebbb54e717f73b7ec25b37b05f56bf0.tar.gz
glifestream-c5f91b184ebbb54e717f73b7ec25b37b05f56bf0.tar.bz2
Fix urlizetrunc regression
Diffstat (more/less context) (ignore whitespace changes)
-rw-r--r--glifestream/filters/twyntax.py5
-rw-r--r--glifestream/stream/templatetags/gls_filters.py14
-rw-r--r--glifestream/utils/html.py112
3 files changed, 127 insertions, 4 deletions
diff --git a/glifestream/filters/twyntax.py b/glifestream/filters/twyntax.py
index e2bf0b7..34134c7 100644
--- a/glifestream/filters/twyntax.py
+++ b/glifestream/filters/twyntax.py
@@ -14,15 +14,14 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.
import re
-from django.template.defaultfilters import urlizetrunc
-
def parse(s, type='twitter'):
+ from glifestream.stream.templatetags.gls_filters import gls_urlizetrunc
if type == 'twitter':
s = s.split(': ', 1)[1]
s = hash_tag(s, type)
s = at_reply(s, type)
- s = urlizetrunc(s, 45)
+ s = gls_urlizetrunc(s, 45)
return s
diff --git a/glifestream/stream/templatetags/gls_filters.py b/glifestream/stream/templatetags/gls_filters.py
index 4be8f7b..6d69672 100644
--- a/glifestream/stream/templatetags/gls_filters.py
+++ b/glifestream/stream/templatetags/gls_filters.py
@@ -28,6 +28,7 @@ from django.utils.translation import ugettext as _
from django.utils.translation import ungettext
from django.template.defaultfilters import date as ddate
from django.template.defaultfilters import urlencode, stringfilter
+from glifestream.utils.html import urlize as _urlize
from glifestream.utils.slugify import slugify
from glifestream.stream import media
from glifestream.apis import *
@@ -178,3 +179,16 @@ def fix_ampersands(value):
def fix_ampersands_filter(value):
"""Replaces ampersands with ``&amp;`` entities."""
return fix_ampersands(value)
+
+
+@register.filter('gls_urlizetrunc', is_safe=True, needs_autoescape=True)
+@stringfilter
+def gls_urlizetrunc(value, limit, autoescape=None):
+ """
+ Converts URLs into clickable links, truncating URLs to the given character
+ limit, and adding 'rel=nofollow' attribute to discourage spamming.
+
+ Argument: Length to truncate URLs to.
+ """
+ return mark_safe(_urlize(value, trim_url_limit=int(limit), nofollow=True,
+ autoescape=autoescape))
diff --git a/glifestream/utils/html.py b/glifestream/utils/html.py
index ed75356..9ff2658 100644
--- a/glifestream/utils/html.py
+++ b/glifestream/utils/html.py
@@ -15,8 +15,10 @@
import re
from django.utils import six
-from django.utils.encoding import force_text
+from django.utils.six.moves.urllib.parse import quote, unquote, urlsplit, urlunsplit
+from django.utils.encoding import force_str, force_text
from django.utils.functional import allow_lazy
+from django.utils.safestring import mark_safe, SafeData
try:
from bs4 import BeautifulSoup
@@ -57,3 +59,111 @@ def strip_entities(value):
strip_entities = allow_lazy(strip_entities, six.text_type)
+
+##
+## Code taken from Django 1.7
+##
+
+TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)', '"', '\'']
+WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;'), ('"', '"'), ('\'', '\'')]
+word_split_re = re.compile(r'(\s+)')
+simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE)
+simple_url_2_re = re.compile(r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)$', re.IGNORECASE)
+simple_email_re = re.compile(r'^\S+@\S+\.\S+$')
+
+def smart_urlquote(url):
+ "Quotes a URL if it isn't already quoted."
+ # Handle IDN before quoting.
+ try:
+ scheme, netloc, path, query, fragment = urlsplit(url)
+ try:
+ netloc = netloc.encode('idna').decode('ascii') # IDN -> ACE
+ except UnicodeError: # invalid domain part
+ pass
+ else:
+ url = urlunsplit((scheme, netloc, path, query, fragment))
+ except ValueError:
+ # invalid IPv6 URL (normally square brackets in hostname part).
+ pass
+
+ url = unquote(force_str(url))
+ url = quote(url, safe=b'!*\'();:@&=+$,/?#[]~')
+
+ return force_text(url)
+
+def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
+ """
+ Converts any URLs in text into clickable links.
+
+ Works on http://, https://, www. links, and also on links ending in one of
+ the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
+ Links can have trailing punctuation (periods, commas, close-parens) and
+ leading punctuation (opening parens) and it'll still do the right thing.
+
+ If trim_url_limit is not None, the URLs in the link text longer than this
+ limit will be truncated to trim_url_limit-3 characters and appended with
+ an ellipsis.
+
+ If nofollow is True, the links will get a rel="nofollow" attribute.
+
+ If autoescape is True, the link text and URLs will be autoescaped.
+ """
+ def trim_url(x, limit=trim_url_limit):
+ if limit is None or len(x) <= limit:
+ return x
+ return '%s...' % x[:max(0, limit - 3)]
+ safe_input = isinstance(text, SafeData)
+ words = word_split_re.split(force_text(text))
+ for i, word in enumerate(words):
+ if '.' in word or '@' in word or ':' in word:
+ # Deal with punctuation.
+ lead, middle, trail = '', word, ''
+ for punctuation in TRAILING_PUNCTUATION:
+ if middle.endswith(punctuation):
+ middle = middle[:-len(punctuation)]
+ trail = punctuation + trail
+ for opening, closing in WRAPPING_PUNCTUATION:
+ if middle.startswith(opening):
+ middle = middle[len(opening):]
+ lead = lead + opening
+ # Keep parentheses at the end only if they're balanced.
+ if (middle.endswith(closing)
+ and middle.count(closing) == middle.count(opening) + 1):
+ middle = middle[:-len(closing)]
+ trail = closing + trail
+
+ # Make URL we want to point to.
+ url = None
+ nofollow_attr = ' rel="nofollow"' if nofollow else ''
+ if simple_url_re.match(middle):
+ url = smart_urlquote(middle)
+ elif simple_url_2_re.match(middle):
+ url = smart_urlquote('http://%s' % middle)
+ elif ':' not in middle and simple_email_re.match(middle):
+ local, domain = middle.rsplit('@', 1)
+ try:
+ domain = domain.encode('idna').decode('ascii')
+ except UnicodeError:
+ continue
+ url = 'mailto:%s@%s' % (local, domain)
+ nofollow_attr = ''
+
+ # Make link.
+ if url:
+ trimmed = trim_url(middle)
+ if autoescape and not safe_input:
+ lead, trail = escape(lead), escape(trail)
+ url, trimmed = escape(url), escape(trimmed)
+ middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
+ words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
+ else:
+ if safe_input:
+ words[i] = mark_safe(word)
+ elif autoescape:
+ words[i] = escape(word)
+ elif safe_input:
+ words[i] = mark_safe(word)
+ elif autoescape:
+ words[i] = escape(word)
+ return ''.join(words)
+urlize = allow_lazy(urlize, six.text_type)

Return to:

Send suggestions and report system problems to the System administrator.