Update lexer util

This commit is contained in:
Daniel Supernault 2019-05-20 19:43:12 -06:00
parent 4bece20954
commit 7260ca2100
No known key found for this signature in database
GPG key ID: 0DEF1C662C9033F7
3 changed files with 15 additions and 4 deletions

View file

@ -9,6 +9,8 @@
namespace App\Util\Lexer;
use Illuminate\Support\Str;
/**
* Twitter Autolink Class.
*
@ -413,7 +415,11 @@ class Autolink extends Regex
$beginIndex = 0;
foreach ($entities as $entity) {
if (isset($entity['screen_name'])) {
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex + 1);
if(Str::startsWith($entity['screen_name'], '@')) {
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex);
} else {
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex + 1);
}
} else {
$text .= StringUtils::substr($tweet, $beginIndex, $entity['indices'][0] - $beginIndex);
}
@ -704,7 +710,7 @@ class Autolink extends Regex
if (!empty($entity['list_slug'])) {
// Replace the list and username
$linkText = $entity['screen_name'].$entity['list_slug'];
$linkText = $entity['screen_name'];
$class = $this->class_list;
$url = $this->url_base_list.$linkText;
} else {

View file

@ -9,6 +9,8 @@
namespace App\Util\Lexer;
use Illuminate\Support\Str;
/**
* Twitter Extractor Class.
*
@ -452,8 +454,9 @@ class Extractor extends Regex
list($all, $before, $at, $username, $list_slug, $outer) = array_pad($match, 6, ['', 0]);
$start_position = $at[1] > 0 ? StringUtils::strlen(substr($tweet, 0, $at[1])) : $at[1];
$end_position = $start_position + StringUtils::strlen($at[0]) + StringUtils::strlen($username[0]);
$screenname = trim($all[0]) == '@'.$username[0] ? $username[0] : trim($all[0]);
$entity = [
'screen_name' => $username[0],
'screen_name' => $screenname,
'list_slug' => $list_slug[0],
'indices' => [$start_position, $end_position],
];

View file

@ -161,7 +161,9 @@ abstract class Regex
// $after in the following regular expression. Note that we only use a
// look-ahead capture here and don't append $after when we return.
$tmp['valid_mention_preceding_chars'] = '([^a-zA-Z0-9_!#\$%&*@\/]|^|(?:^|[^a-z0-9_+~.-])RT:?)';
$re['valid_mentions_or_lists'] = '/'.$tmp['valid_mention_preceding_chars'].'(['.$tmp['at_signs'].'])([a-z0-9_]{1,20})(\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))/iu';
$re['valid_mentions_or_lists'] = '/'.$tmp['valid_mention_preceding_chars'].'(['.$tmp['at_signs'].'])([a-z0-9_]{1,20})((\/[a-z][a-z0-9_\-]{0,24})?(?=(.*|$))(?:@[a-z0-9\.\-]+[a-z0-9]+)?)/i';
$re['valid_reply'] = '/^(?:['.$tmp['spaces'].'])*['.$tmp['at_signs'].']([a-z0-9_]{1,20})(?=(.*|$))/iu';
$re['end_mention_match'] = '/\A(?:['.$tmp['at_signs'].']|['.$tmp['latin_accents'].']|:\/\/)/iu';