mirror of
https://github.com/pixelfed/pixelfed.git
synced 2025-01-25 22:10:47 +00:00
Add new entity lexers
This commit is contained in:
parent
33ff1f7829
commit
7bb1f10d19
7 changed files with 2698 additions and 0 deletions
771
app/Util/Lexer/Autolink.php
Executable file
771
app/Util/Lexer/Autolink.php
Executable file
|
@ -0,0 +1,771 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Mike Cochrane, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
use App\Util\Lexer\Regex;
|
||||
use App\Util\Lexer\Extractor;
|
||||
use App\Util\Lexer\StringUtils;
|
||||
|
||||
/**
|
||||
* Twitter Autolink Class
|
||||
*
|
||||
* Parses tweets and generates HTML anchor tags around URLs, usernames,
|
||||
* username/list pairs and hashtags.
|
||||
*
|
||||
* Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
|
||||
* is based on code by {@link http://github.com/mzsanford Matt Sanford} and
|
||||
* heavily modified by {@link http://github.com/ngnpope Nick Pope}.
|
||||
*
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Mike Cochrane, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
class Autolink extends Regex
|
||||
{
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked URLs.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class_url = '';
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked username URLs.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class_user = 'u-url mention';
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked list URLs.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class_list = 'u-url list-slug';
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked hashtag URLs.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class_hash = 'u-url hashtag';
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked cashtag URLs.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $class_cash = 'u-url cashtag';
|
||||
|
||||
/**
|
||||
* URL base for username links (the username without the @ will be appended).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $url_base_user = null;
|
||||
|
||||
/**
|
||||
* URL base for list links (the username/list without the @ will be appended).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $url_base_list = null;
|
||||
|
||||
/**
|
||||
* URL base for hashtag links (the hashtag without the # will be appended).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $url_base_hash = null;
|
||||
|
||||
/**
|
||||
* URL base for cashtag links (the hashtag without the $ will be appended).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $url_base_cash = null;
|
||||
|
||||
/**
|
||||
* Whether to include the value 'nofollow' in the 'rel' attribute.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $nofollow = true;
|
||||
|
||||
/**
|
||||
* Whether to include the value 'noopener' in the 'rel' attribute.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $noopener = true;
|
||||
|
||||
/**
|
||||
* Whether to include the value 'external' in the 'rel' attribute.
|
||||
*
|
||||
* Often this is used to be matched on in JavaScript for dynamically adding
|
||||
* the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has
|
||||
* been undeprecated and thus the 'target' attribute can be used. If this is
|
||||
* set to false then the 'target' attribute will be output.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
protected $external = true;
|
||||
|
||||
/**
|
||||
* The scope to open the link in.
|
||||
*
|
||||
* Support for the 'target' attribute was deprecated in HTML 4.01 but has
|
||||
* since been reinstated in HTML 5. To output the 'target' attribute you
|
||||
* must disable the adding of the string 'external' to the 'rel' attribute.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $target = '_blank';
|
||||
|
||||
/**
|
||||
* attribute for invisible span tag
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $invisibleTagAttrs = "style='position:absolute;left:-9999px;'";
|
||||
|
||||
/**
|
||||
*
|
||||
* @var Extractor
|
||||
*/
|
||||
protected $extractor = null;
|
||||
|
||||
/**
|
||||
* Provides fluent method chaining.
|
||||
*
|
||||
* @param string $tweet The tweet to be converted.
|
||||
* @param bool $full_encode Whether to encode all special characters.
|
||||
*
|
||||
* @see __construct()
|
||||
*
|
||||
* @return Autolink
|
||||
*/
|
||||
public static function create($tweet = null, $full_encode = false)
|
||||
{
|
||||
return new static($tweet, $full_encode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a tweet to be parsed and converted to contain links.
|
||||
*
|
||||
* As the intent is to produce links and output the modified tweet to the
|
||||
* user, we take this opportunity to ensure that we escape user input.
|
||||
*
|
||||
* @see htmlspecialchars()
|
||||
*
|
||||
* @param string $tweet The tweet to be converted.
|
||||
* @param bool $escape Whether to escape the tweet (default: true).
|
||||
* @param bool $full_encode Whether to encode all special characters.
|
||||
*/
|
||||
public function __construct($tweet = null, $escape = true, $full_encode = false)
|
||||
{
|
||||
if ($escape && !empty($tweet)) {
|
||||
if ($full_encode) {
|
||||
parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
|
||||
} else {
|
||||
parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
|
||||
}
|
||||
} else {
|
||||
parent::__construct($tweet);
|
||||
}
|
||||
$this->extractor = Extractor::create();
|
||||
$this->url_base_user = config('app.url') . '/';
|
||||
$this->url_base_list = config('app.url') . '/';
|
||||
$this->url_base_hash = config('app.url') . "/discover/tags/";
|
||||
$this->url_base_cash = config('app.url') . '/search?q=%24';
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked URLs.
|
||||
*
|
||||
* @return string CSS class for URL links.
|
||||
*/
|
||||
public function getURLClass()
|
||||
{
|
||||
return $this->class_url;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked URLs.
|
||||
*
|
||||
* @param string $v CSS class for URL links.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setURLClass($v)
|
||||
{
|
||||
$this->class_url = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked username URLs.
|
||||
*
|
||||
* @return string CSS class for username links.
|
||||
*/
|
||||
public function getUsernameClass()
|
||||
{
|
||||
return $this->class_user;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked username URLs.
|
||||
*
|
||||
* @param string $v CSS class for username links.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setUsernameClass($v)
|
||||
{
|
||||
$this->class_user = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked username/list URLs.
|
||||
*
|
||||
* @return string CSS class for username/list links.
|
||||
*/
|
||||
public function getListClass()
|
||||
{
|
||||
return $this->class_list;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked username/list URLs.
|
||||
*
|
||||
* @param string $v CSS class for username/list links.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setListClass($v)
|
||||
{
|
||||
$this->class_list = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked hashtag URLs.
|
||||
*
|
||||
* @return string CSS class for hashtag links.
|
||||
*/
|
||||
public function getHashtagClass()
|
||||
{
|
||||
return $this->class_hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked hashtag URLs.
|
||||
*
|
||||
* @param string $v CSS class for hashtag links.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setHashtagClass($v)
|
||||
{
|
||||
$this->class_hash = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked cashtag URLs.
|
||||
*
|
||||
* @return string CSS class for cashtag links.
|
||||
*/
|
||||
public function getCashtagClass()
|
||||
{
|
||||
return $this->class_cash;
|
||||
}
|
||||
|
||||
/**
|
||||
* CSS class for auto-linked cashtag URLs.
|
||||
*
|
||||
* @param string $v CSS class for cashtag links.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setCashtagClass($v)
|
||||
{
|
||||
$this->class_cash = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the value 'nofollow' in the 'rel' attribute.
|
||||
*
|
||||
* @return bool Whether to add 'nofollow' to the 'rel' attribute.
|
||||
*/
|
||||
public function getNoFollow()
|
||||
{
|
||||
return $this->nofollow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the value 'nofollow' in the 'rel' attribute.
|
||||
*
|
||||
* @param bool $v The value to add to the 'target' attribute.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setNoFollow($v)
|
||||
{
|
||||
$this->nofollow = $v;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the value 'external' in the 'rel' attribute.
|
||||
*
|
||||
* Often this is used to be matched on in JavaScript for dynamically adding
|
||||
* the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has
|
||||
* been undeprecated and thus the 'target' attribute can be used. If this is
|
||||
* set to false then the 'target' attribute will be output.
|
||||
*
|
||||
* @return bool Whether to add 'external' to the 'rel' attribute.
|
||||
*/
|
||||
public function getExternal()
|
||||
{
|
||||
return $this->external;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether to include the value 'external' in the 'rel' attribute.
|
||||
*
|
||||
* Often this is used to be matched on in JavaScript for dynamically adding
|
||||
* the 'target' attribute which is deprecated in HTML 4.01. In HTML 5 it has
|
||||
* been undeprecated and thus the 'target' attribute can be used. If this is
|
||||
* set to false then the 'target' attribute will be output.
|
||||
*
|
||||
* @param bool $v The value to add to the 'target' attribute.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setExternal($v)
|
||||
{
|
||||
$this->external = $v;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* The scope to open the link in.
|
||||
*
|
||||
* Support for the 'target' attribute was deprecated in HTML 4.01 but has
|
||||
* since been reinstated in HTML 5. To output the 'target' attribute you
|
||||
* must disable the adding of the string 'external' to the 'rel' attribute.
|
||||
*
|
||||
* @return string The value to add to the 'target' attribute.
|
||||
*/
|
||||
public function getTarget()
|
||||
{
|
||||
return $this->target;
|
||||
}
|
||||
|
||||
/**
|
||||
* The scope to open the link in.
|
||||
*
|
||||
* Support for the 'target' attribute was deprecated in HTML 4.01 but has
|
||||
* since been reinstated in HTML 5. To output the 'target' attribute you
|
||||
* must disable the adding of the string 'external' to the 'rel' attribute.
|
||||
*
|
||||
* @param string $v The value to add to the 'target' attribute.
|
||||
*
|
||||
* @return Autolink Fluid method chaining.
|
||||
*/
|
||||
public function setTarget($v)
|
||||
{
|
||||
$this->target = trim($v);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Autolink with entities
|
||||
*
|
||||
* @param string $tweet
|
||||
* @param array $entities
|
||||
* @return string
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkEntities($tweet = null, $entities = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
$text = '';
|
||||
$beginIndex = 0;
|
||||
foreach ($entities as $entity) {
|
||||
if (isset($entity['screen_name'])) {
|
||||
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex + 1);
|
||||
} else {
|
||||
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex);
|
||||
}
|
||||
|
||||
if (isset($entity['url'])) {
|
||||
$text .= $this->linkToUrl($entity);
|
||||
} elseif (isset($entity['hashtag'])) {
|
||||
$text .= $this->linkToHashtag($entity, $tweet);
|
||||
} elseif (isset($entity['screen_name'])) {
|
||||
$text .= $this->linkToMentionAndList($entity);
|
||||
} elseif (isset($entity['cashtag'])) {
|
||||
$text .= $this->linkToCashtag($entity, $tweet);
|
||||
}
|
||||
$beginIndex = $entity['indices'][1];
|
||||
}
|
||||
$text .= StringUtils::substr($tweet, $beginIndex, StringUtils::strlen($tweet));
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link hashtags, URLs, usernames and lists, with JSON entities.
|
||||
*
|
||||
* @param string The tweet to be converted
|
||||
* @param mixed The entities info
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkWithJson($tweet = null, $json = null)
|
||||
{
|
||||
// concatenate entities
|
||||
$entities = array();
|
||||
if (is_object($json)) {
|
||||
$json = $this->object2array($json);
|
||||
}
|
||||
if (is_array($json)) {
|
||||
foreach ($json as $key => $vals) {
|
||||
$entities = array_merge($entities, $json[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
// map JSON entity to twitter-text entity
|
||||
foreach ($entities as $idx => $entity) {
|
||||
if (!empty($entity['text'])) {
|
||||
$entities[$idx]['hashtag'] = $entity['text'];
|
||||
}
|
||||
}
|
||||
|
||||
$entities = $this->extractor->removeOverlappingEntities($entities);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
/**
|
||||
* convert Object to Array
|
||||
*
|
||||
* @param mixed $obj
|
||||
* @return array
|
||||
*/
|
||||
protected function object2array($obj)
|
||||
{
|
||||
$array = (array) $obj;
|
||||
foreach ($array as $key => $var) {
|
||||
if (is_object($var) || is_array($var)) {
|
||||
$array[$key] = $this->object2array($var);
|
||||
}
|
||||
}
|
||||
return $array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link hashtags, URLs, usernames and lists.
|
||||
*
|
||||
* @param string The tweet to be converted
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLink($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = $this->extractor->extractURLWithoutProtocol(false)->extractEntitiesWithIndices($tweet);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link the @username and @username/list references in the provided text. Links to @username references will
|
||||
* have the usernameClass CSS classes added. Links to @username/list references will have the listClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkUsernamesAndLists($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = $this->extractor->extractMentionsOrListsWithIndices($tweet);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link #hashtag references in the provided Tweet text. The #hashtag links will have the hashtagClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkHashtags($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = $this->extractor->extractHashtagsWithIndices($tweet);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link URLs in the Tweet text provided.
|
||||
* <p/>
|
||||
* This only auto-links URLs with protocol.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkURLs($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = $this->extractor->extractURLWithoutProtocol(false)->extractURLsWithIndices($tweet);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function autoLinkCashtags($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = $this->extractor->extractCashtagsWithIndices($tweet);
|
||||
return $this->autoLinkEntities($tweet, $entities);
|
||||
}
|
||||
|
||||
public function linkToUrl($entity)
|
||||
{
|
||||
if (!empty($this->class_url)) {
|
||||
$attributes['class'] = $this->class_url;
|
||||
}
|
||||
$attributes['href'] = $entity['url'];
|
||||
$linkText = $this->escapeHTML($entity['url']);
|
||||
|
||||
if (!empty($entity['display_url']) && !empty($entity['expanded_url'])) {
|
||||
// Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste
|
||||
// should contain the full original URL (expanded_url), not the display URL.
|
||||
//
|
||||
// Method: Whenever possible, we actually emit HTML that contains expanded_url, and use
|
||||
// font-size:0 to hide those parts that should not be displayed (because they are not part of display_url).
|
||||
// Elements with font-size:0 get copied even though they are not visible.
|
||||
// Note that display:none doesn't work here. Elements with display:none don't get copied.
|
||||
//
|
||||
// Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we
|
||||
// wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on
|
||||
// everything with the tco-ellipsis class.
|
||||
//
|
||||
// As an example: The user tweets "hi http://longdomainname.com/foo"
|
||||
// This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo"
|
||||
// This will get rendered as:
|
||||
// <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
|
||||
// …
|
||||
// <!-- There's a chance the onCopy event handler might not fire. In case that happens,
|
||||
// we include an here so that the … doesn't bump up against the URL and ruin it.
|
||||
// The is inside the tco-ellipsis span so that when the onCopy handler *does*
|
||||
// fire, it doesn't get copied. Otherwise the copied text would have two spaces in a row,
|
||||
// e.g. "hi http://longdomainname.com/foo".
|
||||
// <span style='font-size:0'> </span>
|
||||
// </span>
|
||||
// <span style='font-size:0'> <!-- This stuff should get copied but not displayed -->
|
||||
// http://longdomai
|
||||
// </span>
|
||||
// <span class='js-display-url'> <!-- This stuff should get displayed *and* copied -->
|
||||
// nname.com/foo
|
||||
// </span>
|
||||
// <span class='tco-ellipsis'> <!-- This stuff should get displayed but not copied -->
|
||||
// <span style='font-size:0'> </span>
|
||||
// …
|
||||
// </span>
|
||||
//
|
||||
// Exception: pic.socialhub.dev images, for which expandedUrl = "https://socialhub.dev/#!/username/status/1234/photo/1
|
||||
// For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts.
|
||||
// For a pic.socialhub.dev URL, the only elided part will be the "https://", so this is fine.
|
||||
$displayURL = $entity['display_url'];
|
||||
$expandedURL = $entity['expanded_url'];
|
||||
$displayURLSansEllipses = preg_replace('/…/u', '', $displayURL);
|
||||
$diplayURLIndexInExpandedURL = mb_strpos($expandedURL, $displayURLSansEllipses);
|
||||
|
||||
if ($diplayURLIndexInExpandedURL !== false) {
|
||||
$beforeDisplayURL = mb_substr($expandedURL, 0, $diplayURLIndexInExpandedURL);
|
||||
$afterDisplayURL = mb_substr($expandedURL, $diplayURLIndexInExpandedURL + mb_strlen($displayURLSansEllipses));
|
||||
$precedingEllipsis = (preg_match('/\A…/u', $displayURL)) ? '…' : '';
|
||||
$followingEllipsis = (preg_match('/…\z/u', $displayURL)) ? '…' : '';
|
||||
|
||||
$invisibleSpan = "<span {$this->invisibleTagAttrs}>";
|
||||
|
||||
$linkText = "<span class='tco-ellipsis'>{$precedingEllipsis}{$invisibleSpan} </span></span>";
|
||||
$linkText .= "{$invisibleSpan}{$this->escapeHTML($beforeDisplayURL)}</span>";
|
||||
$linkText .= "<span class='js-display-url'>{$this->escapeHTML($displayURLSansEllipses)}</span>";
|
||||
$linkText .= "{$invisibleSpan}{$this->escapeHTML($afterDisplayURL)}</span>";
|
||||
$linkText .= "<span class='tco-ellipsis'>{$invisibleSpan} </span>{$followingEllipsis}</span>";
|
||||
} else {
|
||||
$linkText = $entity['display_url'];
|
||||
}
|
||||
$attributes['title'] = $entity['expanded_url'];
|
||||
} elseif (!empty($entity['display_url'])) {
|
||||
$linkText = $entity['display_url'];
|
||||
}
|
||||
|
||||
return $this->linkToText($entity, $linkText, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param array $entity
|
||||
* @param string $tweet
|
||||
* @return string
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function linkToHashtag($entity, $tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$this->target = false;
|
||||
$attributes = array();
|
||||
$class = array();
|
||||
$hash = StringUtils::substr($tweet, $entity['indices'][0], 1);
|
||||
$linkText = $hash . $entity['hashtag'];
|
||||
|
||||
$attributes['href'] = $this->url_base_hash . $entity['hashtag'] . '?src=hash';
|
||||
$attributes['title'] = '#' . $entity['hashtag'];
|
||||
if (!empty($this->class_hash)) {
|
||||
$class[] = $this->class_hash;
|
||||
}
|
||||
if (preg_match(self::$patterns['rtl_chars'], $linkText)) {
|
||||
$class[] = 'rtl';
|
||||
}
|
||||
if (!empty($class)) {
|
||||
$attributes['class'] = join(' ', $class);
|
||||
}
|
||||
|
||||
return $this->linkToText($entity, $linkText, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param array $entity
|
||||
* @return string
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function linkToMentionAndList($entity)
|
||||
{
|
||||
$attributes = array();
|
||||
|
||||
if (!empty($entity['list_slug'])) {
|
||||
# Replace the list and username
|
||||
$linkText = $entity['screen_name'] . $entity['list_slug'];
|
||||
$class = $this->class_list;
|
||||
$url = $this->url_base_list . $linkText;
|
||||
} else {
|
||||
# Replace the username
|
||||
$linkText = $entity['screen_name'];
|
||||
$class = $this->class_user;
|
||||
$url = $this->url_base_user . $linkText;
|
||||
}
|
||||
if (!empty($class)) {
|
||||
$attributes['class'] = $class;
|
||||
}
|
||||
$attributes['href'] = $url;
|
||||
|
||||
return $this->linkToText($entity, $linkText, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param array $entity
|
||||
* @param string $tweet
|
||||
* @return string
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function linkToCashtag($entity, $tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$attributes = array();
|
||||
$doller = StringUtils::substr($tweet, $entity['indices'][0], 1);
|
||||
$linkText = $doller . $entity['cashtag'];
|
||||
$attributes['href'] = $this->url_base_cash . $entity['cashtag'];
|
||||
$attributes['title'] = $linkText;
|
||||
if (!empty($this->class_cash)) {
|
||||
$attributes['class'] = $this->class_cash;
|
||||
}
|
||||
|
||||
return $this->linkToText($entity, $linkText, $attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param array $entity
|
||||
* @param string $text
|
||||
* @param array $attributes
|
||||
* @return string
|
||||
* @since 1.1.0
|
||||
*/
|
||||
public function linkToText(array $entity, $text, $attributes = array())
|
||||
{
|
||||
$rel = array();
|
||||
if ($this->external) {
|
||||
$rel[] = 'external';
|
||||
}
|
||||
if ($this->nofollow) {
|
||||
$rel[] = 'nofollow';
|
||||
}
|
||||
if ($this->noopener) {
|
||||
$rel[] = 'noopener';
|
||||
}
|
||||
if (!empty($rel)) {
|
||||
$attributes['rel'] = join(' ', $rel);
|
||||
}
|
||||
if ($this->target) {
|
||||
$attributes['target'] = $this->target;
|
||||
}
|
||||
$link = '<a';
|
||||
foreach ($attributes as $key => $val) {
|
||||
$link .= ' ' . $key . '="' . $this->escapeHTML($val) . '"';
|
||||
}
|
||||
$link .= '>' . $text . '</a>';
|
||||
return $link;
|
||||
}
|
||||
|
||||
/**
|
||||
* html escape
|
||||
*
|
||||
* @param string $text
|
||||
* @return string
|
||||
*/
|
||||
protected function escapeHTML($text)
|
||||
{
|
||||
return htmlspecialchars($text, ENT_QUOTES, 'UTF-8', false);
|
||||
}
|
||||
}
|
548
app/Util/Lexer/Extractor.php
Executable file
548
app/Util/Lexer/Extractor.php
Executable file
|
@ -0,0 +1,548 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Mike Cochrane, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
use App\Util\Lexer\Regex;
|
||||
use App\Util\Lexer\StringUtils;
|
||||
|
||||
/**
|
||||
* Twitter Extractor Class
|
||||
*
|
||||
* Parses tweets and extracts URLs, usernames, username/list pairs and
|
||||
* hashtags.
|
||||
*
|
||||
* Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
|
||||
* is based on code by {@link http://github.com/mzsanford Matt Sanford} and
|
||||
* heavily modified by {@link http://github.com/ngnpope Nick Pope}.
|
||||
*
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Mike Cochrane, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
class Extractor extends Regex
|
||||
{
|
||||
|
||||
/**
|
||||
* @var boolean
|
||||
*/
|
||||
protected $extractURLWithoutProtocol = true;
|
||||
|
||||
/**
|
||||
* Provides fluent method chaining.
|
||||
*
|
||||
* @param string $tweet The tweet to be converted.
|
||||
*
|
||||
* @see __construct()
|
||||
*
|
||||
* @return Extractor
|
||||
*/
|
||||
public static function create($tweet = null)
|
||||
{
|
||||
return new self($tweet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a tweet to be parsed and extracts elements from it.
|
||||
*
|
||||
* Extracts various parts of a tweet including URLs, usernames, hashtags...
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
*/
|
||||
public function __construct($tweet = null)
|
||||
{
|
||||
parent::__construct($tweet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all parts of a tweet and returns an associative array containing
|
||||
* the extracted elements.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The elements in the tweet.
|
||||
*/
|
||||
public function extract($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
return array(
|
||||
'hashtags' => $this->extractHashtags($tweet),
|
||||
'urls' => $this->extractURLs($tweet),
|
||||
'mentions' => $this->extractMentionedUsernames($tweet),
|
||||
'replyto' => $this->extractRepliedUsernames($tweet),
|
||||
'hashtags_with_indices' => $this->extractHashtagsWithIndices($tweet),
|
||||
'urls_with_indices' => $this->extractURLsWithIndices($tweet),
|
||||
'mentions_with_indices' => $this->extractMentionedUsernamesWithIndices($tweet),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract URLs, @mentions, lists and #hashtag from a given text/tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array list of extracted entities
|
||||
*/
|
||||
public function extractEntitiesWithIndices($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$entities = array();
|
||||
$entities = array_merge($entities, $this->extractURLsWithIndices($tweet));
|
||||
$entities = array_merge($entities, $this->extractHashtagsWithIndices($tweet, false));
|
||||
$entities = array_merge($entities, $this->extractMentionsOrListsWithIndices($tweet));
|
||||
$entities = array_merge($entities, $this->extractCashtagsWithIndices($tweet));
|
||||
$entities = $this->removeOverlappingEntities($entities);
|
||||
return $entities;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the hashtags from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The hashtag elements in the tweet.
|
||||
*/
|
||||
public function extractHashtags($tweet = null)
|
||||
{
|
||||
$hashtagsOnly = array();
|
||||
$hashtagsWithIndices = $this->extractHashtagsWithIndices($tweet);
|
||||
|
||||
foreach ($hashtagsWithIndices as $hashtagWithIndex) {
|
||||
$hashtagsOnly[] = $hashtagWithIndex['hashtag'];
|
||||
}
|
||||
return $hashtagsOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the cashtags from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The cashtag elements in the tweet.
|
||||
*/
|
||||
public function extractCashtags($tweet = null)
|
||||
{
|
||||
$cashtagsOnly = array();
|
||||
$cashtagsWithIndices = $this->extractCashtagsWithIndices($tweet);
|
||||
|
||||
foreach ($cashtagsWithIndices as $cashtagWithIndex) {
|
||||
$cashtagsOnly[] = $cashtagWithIndex['cashtag'];
|
||||
}
|
||||
return $cashtagsOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the URLs from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The URL elements in the tweet.
|
||||
*/
|
||||
public function extractURLs($tweet = null)
|
||||
{
|
||||
$urlsOnly = array();
|
||||
$urlsWithIndices = $this->extractURLsWithIndices($tweet);
|
||||
|
||||
foreach ($urlsWithIndices as $urlWithIndex) {
|
||||
$urlsOnly[] = $urlWithIndex['url'];
|
||||
}
|
||||
return $urlsOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all the usernames from the tweet.
|
||||
*
|
||||
* A mention is an occurrence of a username anywhere in a tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The usernames elements in the tweet.
|
||||
*/
|
||||
public function extractMentionedScreennames($tweet = null)
|
||||
{
|
||||
$usernamesOnly = array();
|
||||
$mentionsWithIndices = $this->extractMentionsOrListsWithIndices($tweet);
|
||||
|
||||
foreach ($mentionsWithIndices as $mentionWithIndex) {
|
||||
$screen_name = mb_strtolower($mentionWithIndex['screen_name']);
|
||||
if (empty($screen_name) OR in_array($screen_name, $usernamesOnly)) {
|
||||
continue;
|
||||
}
|
||||
$usernamesOnly[] = $screen_name;
|
||||
}
|
||||
return $usernamesOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all the usernames from the tweet.
|
||||
*
|
||||
* A mention is an occurrence of a username anywhere in a tweet.
|
||||
*
|
||||
* @return array The usernames elements in the tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function extractMentionedUsernames($tweet)
|
||||
{
|
||||
$this->tweet = $tweet;
|
||||
return $this->extractMentionedScreennames($tweet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all the usernames replied to from the tweet.
|
||||
*
|
||||
* A reply is an occurrence of a username at the beginning of a tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The usernames replied to in a tweet.
|
||||
*/
|
||||
public function extractReplyScreenname($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$matched = preg_match(self::$patterns['valid_reply'], $tweet, $matches);
|
||||
# Check username ending in
|
||||
if ($matched && preg_match(self::$patterns['end_mention_match'], $matches[2])) {
|
||||
$matched = false;
|
||||
}
|
||||
return $matched ? $matches[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all the usernames replied to from the tweet.
|
||||
*
|
||||
* A reply is an occurrence of a username at the beginning of a tweet.
|
||||
*
|
||||
* @return array The usernames replied to in a tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function extractRepliedUsernames()
|
||||
{
|
||||
return $this->extractReplyScreenname();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the hashtags and the indices they occur at from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @param boolean $checkUrlOverlap if true, check if extracted hashtags overlap URLs and remove overlapping ones
|
||||
* @return array The hashtag elements in the tweet.
|
||||
*/
|
||||
public function extractHashtagsWithIndices($tweet = null, $checkUrlOverlap = true)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
if (!preg_match('/[##]/iu', $tweet)) {
|
||||
return array();
|
||||
}
|
||||
|
||||
preg_match_all(self::$patterns['valid_hashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
|
||||
$tags = array();
|
||||
|
||||
foreach ($matches as $match) {
|
||||
list($all, $before, $hash, $hashtag, $outer) = array_pad($match, 3, array('', 0));
|
||||
$start_position = $hash[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $hash[1])) : $hash[1];
|
||||
$end_position = $start_position + StringUtils::strlen($hash[0] . $hashtag[0]);
|
||||
|
||||
if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tags[] = array(
|
||||
'hashtag' => $hashtag[0],
|
||||
'indices' => array($start_position, $end_position)
|
||||
);
|
||||
}
|
||||
|
||||
if (!$checkUrlOverlap) {
|
||||
return $tags;
|
||||
}
|
||||
|
||||
# check url overlap
|
||||
$urls = $this->extractURLsWithIndices($tweet);
|
||||
$entities = $this->removeOverlappingEntities(array_merge($tags, $urls));
|
||||
|
||||
$validTags = array();
|
||||
foreach ($entities as $entity) {
|
||||
if (empty($entity['hashtag'])) {
|
||||
continue;
|
||||
}
|
||||
$validTags[] = $entity;
|
||||
}
|
||||
|
||||
return $validTags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the cashtags and the indices they occur at from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The cashtag elements in the tweet.
|
||||
*/
|
||||
public function extractCashtagsWithIndices($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
if (!preg_match('/\$/iu', $tweet)) {
|
||||
return array();
|
||||
}
|
||||
|
||||
preg_match_all(self::$patterns['valid_cashtag'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
|
||||
$tags = array();
|
||||
|
||||
foreach ($matches as $match) {
|
||||
list($all, $before, $dollar, $cash_text, $outer) = array_pad($match, 3, array('', 0));
|
||||
$start_position = $dollar[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $dollar[1])) : $dollar[1];
|
||||
$end_position = $start_position + StringUtils::strlen($dollar[0] . $cash_text[0]);
|
||||
|
||||
if (preg_match(self::$patterns['end_hashtag_match'], $outer[0])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$tags[] = array(
|
||||
'cashtag' => $cash_text[0],
|
||||
'indices' => array($start_position, $end_position)
|
||||
);
|
||||
}
|
||||
|
||||
return $tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the URLs and the indices they occur at from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The URLs elements in the tweet.
|
||||
*/
|
||||
public function extractURLsWithIndices($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
$needle = $this->extractURLWithoutProtocol() ? '.' : ':';
|
||||
if (strpos($tweet, $needle) === false) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$urls = array();
|
||||
preg_match_all(self::$patterns['valid_url'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
|
||||
|
||||
foreach ($matches as $match) {
|
||||
list($all, $before, $url, $protocol, $domain, $port, $path, $query) = array_pad($match, 8, array(''));
|
||||
$start_position = $url[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $url[1])) : $url[1];
|
||||
$end_position = $start_position + StringUtils::strlen($url[0]);
|
||||
|
||||
$all = $all[0];
|
||||
$before = $before[0];
|
||||
$url = $url[0];
|
||||
$protocol = $protocol[0];
|
||||
$domain = $domain[0];
|
||||
$port = $port[0];
|
||||
$path = $path[0];
|
||||
$query = $query[0];
|
||||
|
||||
// If protocol is missing and domain contains non-ASCII characters,
|
||||
// extract ASCII-only domains.
|
||||
if (empty($protocol)) {
|
||||
if (!$this->extractURLWithoutProtocol || preg_match(self::$patterns['invalid_url_without_protocol_preceding_chars'], $before)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$last_url = null;
|
||||
$ascii_end_position = 0;
|
||||
|
||||
if (preg_match(self::$patterns['valid_ascii_domain'], $domain, $asciiDomain)) {
|
||||
$asciiDomain[0] = preg_replace('/' . preg_quote($domain, '/') . '/u', $asciiDomain[0], $url);
|
||||
$ascii_start_position = StringUtils::strpos($domain, $asciiDomain[0], $ascii_end_position);
|
||||
$ascii_end_position = $ascii_start_position + StringUtils::strlen($asciiDomain[0]);
|
||||
$last_url = array(
|
||||
'url' => $asciiDomain[0],
|
||||
'indices' => array($start_position + $ascii_start_position, $start_position + $ascii_end_position),
|
||||
);
|
||||
if (!empty($path)
|
||||
|| preg_match(self::$patterns['valid_special_short_domain'], $asciiDomain[0])
|
||||
|| !preg_match(self::$patterns['invalid_short_domain'], $asciiDomain[0])) {
|
||||
$urls[] = $last_url;
|
||||
}
|
||||
}
|
||||
|
||||
// no ASCII-only domain found. Skip the entire URL
|
||||
if (empty($last_url)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// $last_url only contains domain. Need to add path and query if they exist.
|
||||
if (!empty($path)) {
|
||||
// last_url was not added. Add it to urls here.
|
||||
$last_url['url'] = preg_replace('/' . preg_quote($domain, '/') . '/u', $last_url['url'], $url);
|
||||
$last_url['indices'][1] = $end_position;
|
||||
}
|
||||
} else {
|
||||
// In the case of t.co URLs, don't allow additional path characters
|
||||
if (preg_match(self::$patterns['valid_tco_url'], $url, $tcoUrlMatches)) {
|
||||
$url = $tcoUrlMatches[0];
|
||||
$end_position = $start_position + StringUtils::strlen($url);
|
||||
}
|
||||
$urls[] = array(
|
||||
'url' => $url,
|
||||
'indices' => array($start_position, $end_position),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the usernames and the indices they occur at from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The username elements in the tweet.
|
||||
*/
|
||||
public function extractMentionedScreennamesWithIndices($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
$usernamesOnly = array();
|
||||
$mentions = $this->extractMentionsOrListsWithIndices($tweet);
|
||||
foreach ($mentions as $mention) {
|
||||
if (isset($mention['list_slug'])) {
|
||||
unset($mention['list_slug']);
|
||||
}
|
||||
$usernamesOnly[] = $mention;
|
||||
}
|
||||
return $usernamesOnly;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the usernames and the indices they occur at from the tweet.
|
||||
*
|
||||
* @return array The username elements in the tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function extractMentionedUsernamesWithIndices()
|
||||
{
|
||||
return $this->extractMentionedScreennamesWithIndices();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the usernames and the indices they occur at from the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to extract.
|
||||
* @return array The username elements in the tweet.
|
||||
*/
|
||||
public function extractMentionsOrListsWithIndices($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
|
||||
if (!preg_match('/[@@]/iu', $tweet)) {
|
||||
return array();
|
||||
}
|
||||
|
||||
preg_match_all(self::$patterns['valid_mentions_or_lists'], $tweet, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
|
||||
$results = array();
|
||||
|
||||
foreach ($matches as $match) {
|
||||
list($all, $before, $at, $username, $list_slug, $outer) = array_pad($match, 6, array('', 0));
|
||||
$start_position = $at[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $at[1])) : $at[1];
|
||||
$end_position = $start_position + StringUtils::strlen($at[0]) + StringUtils::strlen($username[0]);
|
||||
$entity = array(
|
||||
'screen_name' => $username[0],
|
||||
'list_slug' => $list_slug[0],
|
||||
'indices' => array($start_position, $end_position),
|
||||
);
|
||||
|
||||
if (preg_match(self::$patterns['end_mention_match'], $outer[0])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!empty($list_slug[0])) {
|
||||
$entity['indices'][1] = $end_position + StringUtils::strlen($list_slug[0]);
|
||||
}
|
||||
|
||||
$results[] = $entity;
|
||||
}
|
||||
|
||||
return $results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all the usernames and the indices they occur at from the tweet.
|
||||
*
|
||||
* @return array The username elements in the tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function extractMentionedUsernamesOrListsWithIndices()
|
||||
{
|
||||
return $this->extractMentionsOrListsWithIndices();
|
||||
}
|
||||
|
||||
/**
|
||||
* setter/getter for extractURLWithoutProtocol
|
||||
*
|
||||
* @param boolean $flag
|
||||
* @return Extractor
|
||||
*/
|
||||
public function extractURLWithoutProtocol($flag = null)
|
||||
{
|
||||
if (is_null($flag)) {
|
||||
return $this->extractURLWithoutProtocol;
|
||||
}
|
||||
$this->extractURLWithoutProtocol = (bool) $flag;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove overlapping entities.
|
||||
* This returns a new array with no overlapping entities.
|
||||
*
|
||||
* @param array $entities
|
||||
* @return array
|
||||
*/
|
||||
public function removeOverlappingEntities($entities)
|
||||
{
|
||||
$result = array();
|
||||
usort($entities, array($this, 'sortEntites'));
|
||||
|
||||
$prev = null;
|
||||
foreach ($entities as $entity) {
|
||||
if (isset($prev) && $entity['indices'][0] < $prev['indices'][1]) {
|
||||
continue;
|
||||
}
|
||||
$prev = $entity;
|
||||
$result[] = $entity;
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* sort by entity start index
|
||||
*
|
||||
* @param array $a
|
||||
* @param array $b
|
||||
* @return int
|
||||
*/
|
||||
protected function sortEntites($a, $b)
|
||||
{
|
||||
if ($a['indices'][0] == $b['indices'][0]) {
|
||||
return 0;
|
||||
}
|
||||
return ($a['indices'][0] < $b['indices'][0]) ? -1 : 1;
|
||||
}
|
||||
}
|
202
app/Util/Lexer/HitHighlighter.php
Executable file
202
app/Util/Lexer/HitHighlighter.php
Executable file
|
@ -0,0 +1,202 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
use App\Util\Lexer\Regex;
|
||||
use App\Util\Lexer\StringUtils;
|
||||
|
||||
/**
|
||||
* Twitter HitHighlighter Class
|
||||
*
|
||||
* Performs "hit highlighting" on tweets that have been auto-linked already.
|
||||
* Useful with the results returned from the search API.
|
||||
*
|
||||
* Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
|
||||
* is based on code by {@link http://github.com/mzsanford Matt Sanford} and
|
||||
* heavily modified by {@link http://github.com/ngnpope Nick Pope}.
|
||||
*
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
class HitHighlighter extends Regex
|
||||
{
|
||||
|
||||
/**
|
||||
* The tag to surround hits with.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
protected $tag = 'em';
|
||||
|
||||
/**
|
||||
* Provides fluent method chaining.
|
||||
*
|
||||
* @param string $tweet The tweet to be hit highlighted.
|
||||
* @param bool $full_encode Whether to encode all special characters.
|
||||
*
|
||||
* @see __construct()
|
||||
*
|
||||
* @return HitHighlighter
|
||||
*/
|
||||
public static function create($tweet = null, $full_encode = false)
|
||||
{
|
||||
return new self($tweet, $full_encode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a tweet to be parsed and hit highlighted.
|
||||
*
|
||||
* We take this opportunity to ensure that we escape user input.
|
||||
*
|
||||
* @see htmlspecialchars()
|
||||
*
|
||||
* @param string $tweet The tweet to be hit highlighted.
|
||||
* @param bool $escape Whether to escape the tweet (default: true).
|
||||
* @param bool $full_encode Whether to encode all special characters.
|
||||
*/
|
||||
public function __construct($tweet = null, $escape = true, $full_encode = false)
|
||||
{
|
||||
if (!empty($tweet) && $escape) {
|
||||
if ($full_encode) {
|
||||
parent::__construct(htmlentities($tweet, ENT_QUOTES, 'UTF-8', false));
|
||||
} else {
|
||||
parent::__construct(htmlspecialchars($tweet, ENT_QUOTES, 'UTF-8', false));
|
||||
}
|
||||
} else {
|
||||
parent::__construct($tweet);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the highlighting tag to surround hits with. The default tag is 'em'.
|
||||
*
|
||||
* @return string The tag name.
|
||||
*/
|
||||
public function getTag()
|
||||
{
|
||||
return $this->tag;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the highlighting tag to surround hits with. The default tag is 'em'.
|
||||
*
|
||||
* @param string $v The tag name.
|
||||
*
|
||||
* @return HitHighlighter Fluid method chaining.
|
||||
*/
|
||||
public function setTag($v)
|
||||
{
|
||||
$this->tag = $v;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hit highlights the tweet.
|
||||
*
|
||||
* @param string $tweet The tweet to be hit highlighted.
|
||||
* @param array $hits An array containing the start and end index pairs
|
||||
* for the highlighting.
|
||||
* @param bool $escape Whether to escape the tweet (default: true).
|
||||
* @param bool $full_encode Whether to encode all special characters.
|
||||
*
|
||||
* @return string The hit highlighted tweet.
|
||||
*/
|
||||
public function highlight($tweet = null, array $hits = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
if (empty($hits)) {
|
||||
return $tweet;
|
||||
}
|
||||
$highlightTweet = '';
|
||||
$tags = array('<' . $this->tag . '>', '</' . $this->tag . '>');
|
||||
# Check whether we can simply replace or whether we need to chunk...
|
||||
if (strpos($tweet, '<') === false) {
|
||||
$ti = 0; // tag increment (for added tags)
|
||||
$highlightTweet = $tweet;
|
||||
foreach ($hits as $hit) {
|
||||
$highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[0], $hit[0] + $ti, 0);
|
||||
$ti += StringUtils::strlen($tags[0]);
|
||||
$highlightTweet = StringUtils::substrReplace($highlightTweet, $tags[1], $hit[1] + $ti, 0);
|
||||
$ti += StringUtils::strlen($tags[1]);
|
||||
}
|
||||
} else {
|
||||
$chunks = preg_split('/[<>]/iu', $tweet);
|
||||
$chunk = $chunks[0];
|
||||
$chunk_index = 0;
|
||||
$chunk_cursor = 0;
|
||||
$offset = 0;
|
||||
$start_in_chunk = false;
|
||||
# Flatten the multidimensional hits array:
|
||||
$hits_flat = array();
|
||||
foreach ($hits as $hit) {
|
||||
$hits_flat = array_merge($hits_flat, $hit);
|
||||
}
|
||||
# Loop over the hit indices:
|
||||
for ($index = 0; $index < count($hits_flat); $index++) {
|
||||
$hit = $hits_flat[$index];
|
||||
$tag = $tags[$index % 2];
|
||||
$placed = false;
|
||||
while ($chunk !== null && $hit >= ($i = $offset + StringUtils::strlen($chunk))) {
|
||||
$highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
|
||||
if ($start_in_chunk && $hit === $i) {
|
||||
$highlightTweet .= $tag;
|
||||
$placed = true;
|
||||
}
|
||||
if (isset($chunks[$chunk_index + 1])) {
|
||||
$highlightTweet .= '<' . $chunks[$chunk_index + 1] . '>';
|
||||
}
|
||||
$offset += StringUtils::strlen($chunk);
|
||||
$chunk_cursor = 0;
|
||||
$chunk_index += 2;
|
||||
$chunk = (isset($chunks[$chunk_index]) ? $chunks[$chunk_index] : null);
|
||||
$start_in_chunk = false;
|
||||
}
|
||||
if (!$placed && $chunk !== null) {
|
||||
$hit_spot = $hit - $offset;
|
||||
$highlightTweet .= StringUtils::substr($chunk, $chunk_cursor, $hit_spot - $chunk_cursor) . $tag;
|
||||
$chunk_cursor = $hit_spot;
|
||||
$start_in_chunk = ($index % 2 === 0);
|
||||
$placed = true;
|
||||
}
|
||||
# Ultimate fallback - hits that run off the end get a closing tag:
|
||||
if (!$placed) {
|
||||
$highlightTweet .= $tag;
|
||||
}
|
||||
}
|
||||
if ($chunk !== null) {
|
||||
if ($chunk_cursor < StringUtils::strlen($chunk)) {
|
||||
$highlightTweet .= StringUtils::substr($chunk, $chunk_cursor);
|
||||
}
|
||||
for ($index = $chunk_index + 1; $index < count($chunks); $index++) {
|
||||
$highlightTweet .= ($index % 2 === 0 ? $chunks[$index] : '<' . $chunks[$index] . '>');
|
||||
}
|
||||
}
|
||||
}
|
||||
return $highlightTweet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hit highlights the tweet.
|
||||
*
|
||||
* @param array $hits An array containing the start and end index pairs
|
||||
* for the highlighting.
|
||||
*
|
||||
* @return string The hit highlighted tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function addHitHighlighting(array $hits)
|
||||
{
|
||||
return $this->highlight($this->tweet, $hits);
|
||||
}
|
||||
}
|
348
app/Util/Lexer/LooseAutolink.php
Executable file
348
app/Util/Lexer/LooseAutolink.php
Executable file
|
@ -0,0 +1,348 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @author Takashi Nojima
|
||||
* @copyright Copyright 2014 Mike Cochrane, Nick Pope, Takashi Nojima
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
use App\Util\Lexer\Autolink;
|
||||
|
||||
/**
|
||||
* Twitter LooseAutolink Class
|
||||
*
|
||||
* Parses tweets and generates HTML anchor tags around URLs, usernames,
|
||||
* username/list pairs and hashtags.
|
||||
*
|
||||
* Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
|
||||
* is based on code by {@link http://github.com/mzsanford Matt Sanford} and
|
||||
* heavily modified by {@link http://github.com/ngnpope Nick Pope}.
|
||||
*
|
||||
* @author Mike Cochrane <mikec@mikenz.geek.nz>
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @author Takashi Nojima
|
||||
* @copyright Copyright 2014 Mike Cochrane, Nick Pope, Takashi Nojima
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
* @since 1.8.0
|
||||
* @deprecated since version 1.9.0
|
||||
*/
|
||||
class LooseAutolink extends Autolink
|
||||
{
|
||||
|
||||
/**
|
||||
* Auto-link hashtags, URLs, usernames and lists.
|
||||
*
|
||||
* @param string The tweet to be converted
|
||||
* @return string that auto-link HTML added
|
||||
* @deprecated since version 1.9.0
|
||||
*/
|
||||
public function autoLink($tweet = null)
|
||||
{
|
||||
if (!is_null($tweet)) {
|
||||
$this->tweet = $tweet;
|
||||
}
|
||||
return $this->addLinks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link the @username and @username/list references in the provided text. Links to @username references will
|
||||
* have the usernameClass CSS classes added. Links to @username/list references will have the listClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
*/
|
||||
public function autoLinkUsernamesAndLists($tweet = null)
|
||||
{
|
||||
if (!is_null($tweet)) {
|
||||
$this->tweet = $tweet;
|
||||
}
|
||||
return $this->addLinksToUsernamesAndLists();
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link #hashtag references in the provided Tweet text. The #hashtag links will have the hashtagClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
*/
|
||||
public function autoLinkHashtags($tweet = null)
|
||||
{
|
||||
if (!is_null($tweet)) {
|
||||
$this->tweet = $tweet;
|
||||
}
|
||||
return $this->addLinksToHashtags();
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link URLs in the Tweet text provided.
|
||||
* <p/>
|
||||
* This only auto-links URLs with protocol.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
*/
|
||||
public function autoLinkURLs($tweet = null)
|
||||
{
|
||||
if (!is_null($tweet)) {
|
||||
$this->tweet = $tweet;
|
||||
}
|
||||
return $this->addLinksToURLs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class
|
||||
* added.
|
||||
*
|
||||
* @return string that auto-link HTML added
|
||||
*/
|
||||
public function autoLinkCashtags($tweet = null)
|
||||
{
|
||||
if (!is_null($tweet)) {
|
||||
$this->tweet = $tweet;
|
||||
}
|
||||
return $this->addLinksToCashtags();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds links to all elements in the tweet.
|
||||
*
|
||||
* @return string The modified tweet.
|
||||
* @deprecated since version 1.9.0
|
||||
*/
|
||||
public function addLinks()
|
||||
{
|
||||
$original = $this->tweet;
|
||||
$this->tweet = $this->addLinksToURLs();
|
||||
$this->tweet = $this->addLinksToHashtags();
|
||||
$this->tweet = $this->addLinksToCashtags();
|
||||
$this->tweet = $this->addLinksToUsernamesAndLists();
|
||||
$modified = $this->tweet;
|
||||
$this->tweet = $original;
|
||||
return $modified;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds links to hashtag elements in the tweet.
|
||||
*
|
||||
* @return string The modified tweet.
|
||||
*/
|
||||
public function addLinksToHashtags()
|
||||
{
|
||||
return preg_replace_callback(
|
||||
self::$patterns['valid_hashtag'],
|
||||
array($this, '_addLinksToHashtags'),
|
||||
$this->tweet
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds links to cashtag elements in the tweet.
|
||||
*
|
||||
* @return string The modified tweet.
|
||||
*/
|
||||
public function addLinksToCashtags()
|
||||
{
|
||||
return preg_replace_callback(
|
||||
self::$patterns['valid_cashtag'],
|
||||
array($this, '_addLinksToCashtags'),
|
||||
$this->tweet
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds links to URL elements in the tweet.
|
||||
*
|
||||
* @return string The modified tweet
|
||||
*/
|
||||
public function addLinksToURLs()
|
||||
{
|
||||
return preg_replace_callback(self::$patterns['valid_url'], array($this, '_addLinksToURLs'), $this->tweet);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds links to username/list elements in the tweet.
|
||||
*
|
||||
* @return string The modified tweet.
|
||||
*/
|
||||
public function addLinksToUsernamesAndLists()
|
||||
{
|
||||
return preg_replace_callback(
|
||||
self::$patterns['valid_mentions_or_lists'],
|
||||
array($this, '_addLinksToUsernamesAndLists'),
|
||||
$this->tweet
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a tweet element in an HTML anchor tag using the provided URL.
|
||||
*
|
||||
* This is a helper function to perform the generation of the link.
|
||||
*
|
||||
* @param string $url The URL to use as the href.
|
||||
* @param string $class The CSS class(es) to apply (space separated).
|
||||
* @param string $element The tweet element to wrap.
|
||||
*
|
||||
* @return string The tweet element with a link applied.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
protected function wrap($url, $class, $element)
|
||||
{
|
||||
$link = '<a';
|
||||
if ($class) {
|
||||
$link .= ' class="' . $class . '"';
|
||||
}
|
||||
$link .= ' href="' . $url . '"';
|
||||
$rel = array();
|
||||
if ($this->external) {
|
||||
$rel[] = 'external';
|
||||
}
|
||||
if ($this->nofollow) {
|
||||
$rel[] = 'nofollow';
|
||||
}
|
||||
if (!empty($rel)) {
|
||||
$link .= ' rel="' . implode(' ', $rel) . '"';
|
||||
}
|
||||
if ($this->target) {
|
||||
$link .= ' target="' . $this->target . '"';
|
||||
}
|
||||
$link .= '>' . $element . '</a>';
|
||||
return $link;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a tweet element in an HTML anchor tag using the provided URL.
|
||||
*
|
||||
* This is a helper function to perform the generation of the hashtag link.
|
||||
*
|
||||
* @param string $url The URL to use as the href.
|
||||
* @param string $class The CSS class(es) to apply (space separated).
|
||||
* @param string $element The tweet element to wrap.
|
||||
*
|
||||
* @return string The tweet element with a link applied.
|
||||
*/
|
||||
protected function wrapHash($url, $class, $element)
|
||||
{
|
||||
$title = preg_replace('/#/u', '#', $element);
|
||||
$link = '<a';
|
||||
$link .= ' href="' . $url . '"';
|
||||
$link .= ' title="' . $title . '"';
|
||||
if ($class) {
|
||||
$link .= ' class="' . $class . '"';
|
||||
}
|
||||
$rel = array();
|
||||
if ($this->external) {
|
||||
$rel[] = 'external';
|
||||
}
|
||||
if ($this->nofollow) {
|
||||
$rel[] = 'nofollow';
|
||||
}
|
||||
if (!empty($rel)) {
|
||||
$link .= ' rel="' . implode(' ', $rel) . '"';
|
||||
}
|
||||
if ($this->target) {
|
||||
$link .= ' target="' . $this->target . '"';
|
||||
}
|
||||
$link .= '>' . $element . '</a>';
|
||||
return $link;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback used by the method that adds links to hashtags.
|
||||
*
|
||||
* @see addLinksToHashtags()
|
||||
* @param array $matches The regular expression matches.
|
||||
* @return string The link-wrapped hashtag.
|
||||
*/
|
||||
protected function _addLinksToHashtags($matches)
|
||||
{
|
||||
list($all, $before, $hash, $tag, $after) = array_pad($matches, 5, '');
|
||||
if (preg_match(self::$patterns['end_hashtag_match'], $after)
|
||||
|| (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
|
||||
return $all;
|
||||
}
|
||||
$replacement = $before;
|
||||
$element = $hash . $tag;
|
||||
$url = $this->url_base_hash . $tag;
|
||||
$class_hash = $this->class_hash;
|
||||
if (preg_match(self::$patterns['rtl_chars'], $element)) {
|
||||
$class_hash .= ' rtl';
|
||||
}
|
||||
$replacement .= $this->wrapHash($url, $class_hash, $element);
|
||||
return $replacement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback used by the method that adds links to cashtags.
|
||||
*
|
||||
* @see addLinksToCashtags()
|
||||
* @param array $matches The regular expression matches.
|
||||
* @return string The link-wrapped cashtag.
|
||||
*/
|
||||
protected function _addLinksToCashtags($matches)
|
||||
{
|
||||
list($all, $before, $cash, $tag, $after) = array_pad($matches, 5, '');
|
||||
if (preg_match(self::$patterns['end_cashtag_match'], $after)
|
||||
|| (!preg_match('!\A["\']!', $before) && preg_match('!\A["\']!', $after)) || preg_match('!\A</!', $after)) {
|
||||
return $all;
|
||||
}
|
||||
$replacement = $before;
|
||||
$element = $cash . $tag;
|
||||
$url = $this->url_base_cash . $tag;
|
||||
$replacement .= $this->wrapHash($url, $this->class_cash, $element);
|
||||
return $replacement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback used by the method that adds links to URLs.
|
||||
*
|
||||
* @see addLinksToURLs()
|
||||
* @param array $matches The regular expression matches.
|
||||
* @return string The link-wrapped URL.
|
||||
*/
|
||||
protected function _addLinksToURLs($matches)
|
||||
{
|
||||
list($all, $before, $url, $protocol, $domain, $path, $query) = array_pad($matches, 7, '');
|
||||
$url = htmlspecialchars($url, ENT_QUOTES, 'UTF-8', false);
|
||||
if (!$protocol) {
|
||||
return $all;
|
||||
}
|
||||
return $before . $this->wrap($url, $this->class_url, $url);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback used by the method that adds links to username/list pairs.
|
||||
*
|
||||
* @see addLinksToUsernamesAndLists()
|
||||
* @param array $matches The regular expression matches.
|
||||
* @return string The link-wrapped username/list pair.
|
||||
*/
|
||||
protected function _addLinksToUsernamesAndLists($matches)
|
||||
{
|
||||
list($all, $before, $at, $username, $slash_listname, $after) = array_pad($matches, 6, '');
|
||||
# If $after is not empty, there is an invalid character.
|
||||
if (!empty($slash_listname)) {
|
||||
# Replace the list and username
|
||||
$element = $username . $slash_listname;
|
||||
$class = $this->class_list;
|
||||
$url = $this->url_base_list . $element;
|
||||
} else {
|
||||
if (preg_match(self::$patterns['end_mention_match'], $after)) {
|
||||
return $all;
|
||||
}
|
||||
# Replace the username
|
||||
$element = $username;
|
||||
$class = $this->class_user;
|
||||
$url = $this->url_base_user . $element;
|
||||
}
|
||||
# XXX: Due to use of preg_replace_callback() for multiple replacements in a
|
||||
# single tweet and also as only the match is replaced and we have to
|
||||
# use a look-ahead for $after because there is no equivalent for the
|
||||
# $' (dollar apostrophe) global from Ruby, we MUST NOT append $after.
|
||||
return $before . $at . $this->wrap($url, $class, $element);
|
||||
}
|
||||
}
|
337
app/Util/Lexer/Regex.php
Executable file
337
app/Util/Lexer/Regex.php
Executable file
File diff suppressed because one or more lines are too long
104
app/Util/Lexer/StringUtils.php
Executable file
104
app/Util/Lexer/StringUtils.php
Executable file
|
@ -0,0 +1,104 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Takashi Nojima
|
||||
* @copyright Copyright 2014, Takashi Nojima
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
/**
|
||||
* String utility
|
||||
*
|
||||
* @author Takashi Nojima
|
||||
* @copyright Copyright 2014, Takashi Nojima
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter
|
||||
*/
|
||||
class StringUtils
|
||||
{
|
||||
|
||||
/**
|
||||
* alias of mb_substr
|
||||
*
|
||||
* @param string $str
|
||||
* @param integer $start
|
||||
* @param integer $length
|
||||
* @param string $encoding
|
||||
* @return string
|
||||
*/
|
||||
public static function substr($str, $start, $length = null, $encoding = 'UTF-8')
|
||||
{
|
||||
if (is_null($length)) {
|
||||
// for PHP <= 5.4.7
|
||||
$length = mb_strlen($str, $encoding);
|
||||
}
|
||||
return mb_substr($str, $start, $length, $encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* alias of mb_strlen
|
||||
*
|
||||
* @param string $str
|
||||
* @param string $encoding
|
||||
* @return integer
|
||||
*/
|
||||
public static function strlen($str, $encoding = 'UTF-8')
|
||||
{
|
||||
return mb_strlen($str, $encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* alias of mb_strpos
|
||||
*
|
||||
* @param string $haystack
|
||||
* @param string $needle
|
||||
* @param integer $offset
|
||||
* @param string $encoding
|
||||
* @return integer
|
||||
*/
|
||||
public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8')
|
||||
{
|
||||
return mb_strpos($haystack, $needle, $offset, $encoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* A multibyte-aware substring replacement function.
|
||||
*
|
||||
* @param string $string The string to modify.
|
||||
* @param string $replacement The replacement string.
|
||||
* @param int $start The start of the replacement.
|
||||
* @param int $length The number of characters to replace.
|
||||
* @param string $encoding The encoding of the string.
|
||||
*
|
||||
* @return string The modified string.
|
||||
*
|
||||
* @see http://www.php.net/manual/en/function.substr-replace.php#90146
|
||||
*/
|
||||
public static function substrReplace($string, $replacement, $start, $length = null, $encoding = 'UTF-8')
|
||||
{
|
||||
if (extension_loaded('mbstring') === true) {
|
||||
$string_length = static::strlen($string, $encoding);
|
||||
if ($start < 0) {
|
||||
$start = max(0, $string_length + $start);
|
||||
} elseif ($start > $string_length) {
|
||||
$start = $string_length;
|
||||
}
|
||||
if ($length < 0) {
|
||||
$length = max(0, $string_length - $start + $length);
|
||||
} elseif ((is_null($length) === true) || ($length > $string_length)) {
|
||||
$length = $string_length;
|
||||
}
|
||||
if (($start + $length) > $string_length) {
|
||||
$length = $string_length - $start;
|
||||
}
|
||||
|
||||
$suffixOffset = $start + $length;
|
||||
$suffixLength = $string_length - $start - $length;
|
||||
return static::substr($string, 0, $start, $encoding) . $replacement . static::substr($string, $suffixOffset, $suffixLength, $encoding);
|
||||
}
|
||||
return (is_null($length) === true) ? substr_replace($string, $replacement, $start) : substr_replace($string, $replacement, $start, $length);
|
||||
}
|
||||
}
|
388
app/Util/Lexer/Validator.php
Executable file
388
app/Util/Lexer/Validator.php
Executable file
|
@ -0,0 +1,388 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
|
||||
namespace App\Util\Lexer;
|
||||
|
||||
use App\Util\Lexer\Regex;
|
||||
use App\Util\Lexer\Extractor;
|
||||
use App\Util\Lexer\StringUtils;
|
||||
|
||||
/**
|
||||
* Twitter Validator Class
|
||||
*
|
||||
* Performs "validation" on tweets.
|
||||
*
|
||||
* Originally written by {@link http://github.com/mikenz Mike Cochrane}, this
|
||||
* is based on code by {@link http://github.com/mzsanford Matt Sanford} and
|
||||
* heavily modified by {@link http://github.com/ngnpope Nick Pope}.
|
||||
*
|
||||
* @author Nick Pope <nick@nickpope.me.uk>
|
||||
* @copyright Copyright © 2010, Nick Pope
|
||||
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License v2.0
|
||||
* @package Twitter.Text
|
||||
*/
|
||||
class Validator extends Regex
|
||||
{
|
||||
|
||||
/**
|
||||
* The maximum length of a tweet.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
const MAX_LENGTH = 140;
|
||||
|
||||
/**
|
||||
* The length of a short URL beginning with http:
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $short_url_length = 23;
|
||||
|
||||
/**
|
||||
* The length of a short URL beginning with http:
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
protected $short_url_length_https = 23;
|
||||
|
||||
/**
|
||||
*
|
||||
* @var Extractor
|
||||
*/
|
||||
protected $extractor = null;
|
||||
|
||||
/**
|
||||
* Provides fluent method chaining.
|
||||
*
|
||||
* @param string $tweet The tweet to be validated.
|
||||
* @param mixed $config Setup short URL length from Twitter API /help/configuration response.
|
||||
*
|
||||
* @see __construct()
|
||||
*
|
||||
* @return Validator
|
||||
*/
|
||||
public static function create($tweet = null, $config = null)
|
||||
{
|
||||
return new self($tweet, $config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads in a tweet to be parsed and validates it.
|
||||
*
|
||||
* @param string $tweet The tweet to validate.
|
||||
*/
|
||||
public function __construct($tweet = null, $config = null)
|
||||
{
|
||||
parent::__construct($tweet);
|
||||
if (!empty($config)) {
|
||||
$this->setConfiguration($config);
|
||||
}
|
||||
$this->extractor = Extractor::create();
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup short URL length from Twitter API /help/configuration response
|
||||
*
|
||||
* @param mixed $config
|
||||
* @return Validator
|
||||
* @link https://dev.twitter.com/docs/api/1/get/help/configuration
|
||||
*/
|
||||
public function setConfiguration($config)
|
||||
{
|
||||
if (is_array($config)) {
|
||||
// setup from array
|
||||
if (isset($config['short_url_length'])) {
|
||||
$this->setShortUrlLength($config['short_url_length']);
|
||||
}
|
||||
if (isset($config['short_url_length_https'])) {
|
||||
$this->setShortUrlLengthHttps($config['short_url_length_https']);
|
||||
}
|
||||
} elseif (is_object($config)) {
|
||||
// setup from object
|
||||
if (isset($config->short_url_length)) {
|
||||
$this->setShortUrlLength($config->short_url_length);
|
||||
}
|
||||
if (isset($config->short_url_length_https)) {
|
||||
$this->setShortUrlLengthHttps($config->short_url_length_https);
|
||||
}
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the length of a short URL beginning with http:
|
||||
*
|
||||
* @param mixed $length
|
||||
* @return Validator
|
||||
*/
|
||||
public function setShortUrlLength($length)
|
||||
{
|
||||
$this->short_url_length = intval($length);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the length of a short URL beginning with http:
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getShortUrlLength()
|
||||
{
|
||||
return $this->short_url_length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the length of a short URL beginning with https:
|
||||
*
|
||||
* @param mixed $length
|
||||
* @return Validator
|
||||
*/
|
||||
public function setShortUrlLengthHttps($length)
|
||||
{
|
||||
$this->short_url_length_https = intval($length);
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the length of a short URL beginning with https:
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function getShortUrlLengthHttps()
|
||||
{
|
||||
return $this->short_url_length_https;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a tweet is valid.
|
||||
*
|
||||
* @param string $tweet The tweet to validate.
|
||||
* @return boolean Whether the tweet is valid.
|
||||
*/
|
||||
public function isValidTweetText($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$length = $this->getTweetLength($tweet);
|
||||
if (!$tweet || !$length) {
|
||||
return false;
|
||||
}
|
||||
if ($length > self::MAX_LENGTH) {
|
||||
return false;
|
||||
}
|
||||
if (preg_match(self::$patterns['invalid_characters'], $tweet)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a tweet is valid.
|
||||
*
|
||||
* @return boolean Whether the tweet is valid.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function validateTweet()
|
||||
{
|
||||
return $this->isValidTweetText();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a username is valid.
|
||||
*
|
||||
* @param string $username The username to validate.
|
||||
* @return boolean Whether the username is valid.
|
||||
*/
|
||||
public function isValidUsername($username = null)
|
||||
{
|
||||
if (is_null($username)) {
|
||||
$username = $this->tweet;
|
||||
}
|
||||
$length = StringUtils::strlen($username);
|
||||
if (empty($username) || !$length) {
|
||||
return false;
|
||||
}
|
||||
$extracted = $this->extractor->extractMentionedScreennames($username);
|
||||
return count($extracted) === 1 && $extracted[0] === substr($username, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a username is valid.
|
||||
*
|
||||
* @return boolean Whether the username is valid.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function validateUsername()
|
||||
{
|
||||
return $this->isValidUsername();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a list is valid.
|
||||
*
|
||||
* @param string $list The list name to validate.
|
||||
* @return boolean Whether the list is valid.
|
||||
*/
|
||||
public function isValidList($list = null)
|
||||
{
|
||||
if (is_null($list)) {
|
||||
$list = $this->tweet;
|
||||
}
|
||||
$length = StringUtils::strlen($list);
|
||||
if (empty($list) || !$length) {
|
||||
return false;
|
||||
}
|
||||
preg_match(self::$patterns['valid_mentions_or_lists'], $list, $matches);
|
||||
$matches = array_pad($matches, 5, '');
|
||||
return isset($matches) && $matches[1] === '' && $matches[4] && !empty($matches[4]) && $matches[5] === '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a list is valid.
|
||||
*
|
||||
* @return boolean Whether the list is valid.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function validateList()
|
||||
{
|
||||
return $this->isValidList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a hashtag is valid.
|
||||
*
|
||||
* @param string $hashtag The hashtag to validate.
|
||||
* @return boolean Whether the hashtag is valid.
|
||||
*/
|
||||
public function isValidHashtag($hashtag = null)
|
||||
{
|
||||
if (is_null($hashtag)) {
|
||||
$hashtag = $this->tweet;
|
||||
}
|
||||
$length = StringUtils::strlen($hashtag);
|
||||
if (empty($hashtag) || !$length) {
|
||||
return false;
|
||||
}
|
||||
$extracted = $this->extractor->extractHashtags($hashtag);
|
||||
return count($extracted) === 1 && $extracted[0] === substr($hashtag, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a hashtag is valid.
|
||||
*
|
||||
* @return boolean Whether the hashtag is valid.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function validateHashtag()
|
||||
{
|
||||
return $this->isValidHashtag();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a URL is valid.
|
||||
*
|
||||
* @param string $url The url to validate.
|
||||
* @param boolean $unicode_domains Consider the domain to be unicode.
|
||||
* @param boolean $require_protocol Require a protocol for valid domain?
|
||||
*
|
||||
* @return boolean Whether the URL is valid.
|
||||
*/
|
||||
public function isValidURL($url = null, $unicode_domains = true, $require_protocol = true)
|
||||
{
|
||||
if (is_null($url)) {
|
||||
$url = $this->tweet;
|
||||
}
|
||||
$length = StringUtils::strlen($url);
|
||||
if (empty($url) || !$length) {
|
||||
return false;
|
||||
}
|
||||
preg_match(self::$patterns['validate_url_unencoded'], $url, $matches);
|
||||
$match = array_shift($matches);
|
||||
if (!$matches || $match !== $url) {
|
||||
return false;
|
||||
}
|
||||
list($scheme, $authority, $path, $query, $fragment) = array_pad($matches, 5, '');
|
||||
# Check scheme, path, query, fragment:
|
||||
if (($require_protocol && !(
|
||||
self::isValidMatch($scheme, self::$patterns['validate_url_scheme']) && preg_match('/^https?$/i', $scheme))
|
||||
) || !self::isValidMatch($path, self::$patterns['validate_url_path']) || !self::isValidMatch($query, self::$patterns['validate_url_query'], true)
|
||||
|| !self::isValidMatch($fragment, self::$patterns['validate_url_fragment'], true)) {
|
||||
return false;
|
||||
}
|
||||
# Check authority:
|
||||
$authority_pattern = $unicode_domains ? 'validate_url_unicode_authority' : 'validate_url_authority';
|
||||
return self::isValidMatch($authority, self::$patterns[$authority_pattern]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether a URL is valid.
|
||||
*
|
||||
* @param boolean $unicode_domains Consider the domain to be unicode.
|
||||
* @param boolean $require_protocol Require a protocol for valid domain?
|
||||
*
|
||||
* @return boolean Whether the URL is valid.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function validateURL($unicode_domains = true, $require_protocol = true)
|
||||
{
|
||||
return $this->isValidURL(null, $unicode_domains, $require_protocol);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the length of a tweet. Takes shortening of URLs into account.
|
||||
*
|
||||
* @param string $tweet The tweet to validate.
|
||||
* @return int the length of a tweet.
|
||||
*/
|
||||
public function getTweetLength($tweet = null)
|
||||
{
|
||||
if (is_null($tweet)) {
|
||||
$tweet = $this->tweet;
|
||||
}
|
||||
$length = StringUtils::strlen($tweet);
|
||||
$urls_with_indices = $this->extractor->extractURLsWithIndices($tweet);
|
||||
foreach ($urls_with_indices as $x) {
|
||||
$length += $x['indices'][0] - $x['indices'][1];
|
||||
$length += stripos($x['url'], 'https://') === 0 ? $this->short_url_length_https : $this->short_url_length;
|
||||
}
|
||||
return $length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the length of a tweet. Takes shortening of URLs into account.
|
||||
*
|
||||
* @return int the length of a tweet.
|
||||
* @deprecated since version 1.1.0
|
||||
*/
|
||||
public function getLength()
|
||||
{
|
||||
return $this->getTweetLength();
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper function to check for a valid match. Used in URL validation.
|
||||
*
|
||||
* @param string $string The subject string to test.
|
||||
* @param string $pattern The pattern to match against.
|
||||
* @param boolean $optional Whether a match is compulsory or not.
|
||||
*
|
||||
* @return boolean Whether an exact match was found.
|
||||
*/
|
||||
protected static function isValidMatch($string, $pattern, $optional = false)
|
||||
{
|
||||
$found = preg_match($pattern, $string, $matches);
|
||||
if (!$optional) {
|
||||
return (($string || $string === '') && $found && $matches[0] === $string);
|
||||
} else {
|
||||
return !(($string || $string === '') && (!$found || $matches[0] !== $string));
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue