init sqlite implementation

This commit is contained in:
yggverse 2024-05-04 08:25:29 +03:00
parent 423f88e94e
commit 13ebc7bcbc
8 changed files with 672 additions and 152 deletions

View file

@ -7,126 +7,354 @@ $semaphore = sem_get(
), 1
);
if (false === sem_acquire($semaphore, true))
{
exit;
}
if (false === sem_acquire($semaphore, true)) exit;
// Load dependencies
require_once __DIR__ .
DIRECTORY_SEPARATOR . '..'.
DIRECTORY_SEPARATOR . 'vendor' .
DIRECTORY_SEPARATOR . 'autoload.php';
// Init profile argument
if (empty($argv[1])) throw new \Exception();
// Init config
$config = json_decode(
file_get_contents(
__DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'config.json'
str_starts_with(
$argv[1],
DIRECTORY_SEPARATOR
) ? $argv[1] // absolute
: __DIR__ . // relative
DIRECTORY_SEPARATOR . '..'.
DIRECTORY_SEPARATOR . 'config'.
DIRECTORY_SEPARATOR . $argv[1]
)
); if (!$config) throw new \Exception();
// Init database
$database = new \Yggverse\Pulsar\Model\Database(
str_starts_with(
$config->database->location,
DIRECTORY_SEPARATOR
) ? $config->database->location
: __DIR__ .
DIRECTORY_SEPARATOR . '..'.
DIRECTORY_SEPARATOR . 'config'.
DIRECTORY_SEPARATOR . $config->database->location,
$config->database->username,
$config->database->password
);
// Update feeds
foreach ($config->feed as $feed)
// Begin channels crawl
foreach ($config->crawler->channel as $channel)
{
// Init feed location
$filename = str_starts_with(
$feed->target,
DIRECTORY_SEPARATOR
) ? $feed->target : __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $feed->target;
// Init destination storage
@mkdir(
dirname(
$filename
),
0755,
true
);
// Get feed data
if (!$channel = simplexml_load_file($feed->source)->channel)
// Check channel enabled
if (!$channel->enabled)
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] skip disabled channel "%s"') . PHP_EOL,
date('c'),
$channel->source
) . PHP_EOL;
}
continue;
}
// Update title
if (!empty($channel->title))
// Get channel data
if (!$remoteChannel = simplexml_load_file($channel->source)->channel)
{
$title = trim(
strip_tags(
html_entity_decode(
$channel->title
)
)
);
}
else
{
$title = parse_url(
$feed->source,
PHP_URL_HOST
);
}
file_put_contents(
$filename,
sprintf(
'# %s',
$title
) . PHP_EOL
);
// Append description
if (!empty($channel->description))
{
file_put_contents(
$filename,
PHP_EOL . trim(
strip_tags(
html_entity_decode(
$channel->description
)
)
) . PHP_EOL,
FILE_APPEND | LOCK_EX
);
}
// Append items
$i = 1; foreach ($channel->item as $item)
{
// Apply items limit
if ($i > $feed->item->limit)
if ($channel->debug->warning)
{
break;
printf(
_('[%s] [warning] channel "%s" not accessible') . PHP_EOL,
date('c'),
$channel->source
) . PHP_EOL;
}
// Format item
file_put_contents(
$filename,
PHP_EOL . trim(
preg_replace(
'/[\s]{3,}/ui',
PHP_EOL . PHP_EOL,
str_replace(
[
'{nl}',
'{link}',
'{guid}',
'{pubDate}',
'{title}',
'{description}'
],
[
PHP_EOL,
!empty($item->link) ? trim($item->link) : '',
!empty($item->guid) ? trim($item->guid) : '',
!empty($item->pubDate) ? trim($item->pubDate) : '',
!empty($item->title) ? trim(strip_tags(html_entity_decode($item->title))) : '',
!empty($item->description) ? trim(strip_tags(html_entity_decode($item->description))) : ''
],
$feed->item->template
) . PHP_EOL
)
) . PHP_EOL,
FILE_APPEND | LOCK_EX
continue;
}
// Init channel
if (!$channelId = $database->getChannelIdBySource($channel->source))
{
// Create new one if not exists
$channelId = $database->addChannel(
$channel->source,
isset($remoteChannel->link) ? (string) $remoteChannel->link : null,
isset($remoteChannel->title) ? (string) $remoteChannel->title : null,
isset($remoteChannel->description) ? (string) $remoteChannel->description : null
);
$i++;
if ($channel->debug->info)
{
printf(
_('[%s] [info] channel "%s" registered as #%d') . PHP_EOL,
date('c'),
$channel->source,
$channelId
) . PHP_EOL;
}
}
// Process items
if (!empty($remoteChannel->item))
{
foreach ($remoteChannel->item as $remoteChannelItem)
{
// Prepare link
$link = null;
if ($channel->item->link->enabled)
{
if (isset($remoteChannelItem->link))
{
$link = (string) $remoteChannelItem->link;
}
else
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] item link enabled but not defined in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
if ($channel->item->link->required && !$link)
{
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] could not get item link for channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
continue;
}
}
// Prepare guid or define it from link
$guid = null;
if (isset($remoteChannelItem->guid))
{
$guid = (string) $remoteChannelItem->guid;
}
else
{
$guid = $link;
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] item guid defined as link in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
// Prepare title
$title = null;
if ($channel->item->title->enabled)
{
if (isset($remoteChannelItem->title))
{
$title = (string) $remoteChannelItem->title;
}
else
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] item title enabled but not defined in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
if ($channel->item->title->required && !$title)
{
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] could not get item title in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
continue;
}
}
// Prepare description
$description = null;
if ($channel->item->description->enabled)
{
if (isset($remoteChannelItem->description))
{
$description = (string) $remoteChannelItem->description;
}
else
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] item description enabled but not defined in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
if ($channel->item->description->required && !$description)
{
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] could not get item description in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
continue;
}
}
// Prepare content
$content = null;
if ($channel->item->content->enabled)
{
if ($_content = $remoteChannelItem->children('content', true))
{
if (isset($_content->encoded))
{
$content = (string) $_content->encoded;
}
}
if (!$content && $channel->debug->info)
{
printf(
_('[%s] [info] item content enabled but not defined in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
if ($channel->item->content->required && !$content)
{
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] could not get item content in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
continue;
}
}
// Prepare pubDate
$pubTime = null;
if ($channel->item->pubDate->enabled)
{
if (isset($remoteChannelItem->pubDate))
{
if ($_pubTime = strtotime((string) $remoteChannelItem->pubDate))
{
$pubTime = $_pubTime;
}
else
{
if ($channel->debug->warning)
{
printf(
_('[%s] [info] could not convert item pubDate to pubTime in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
}
else
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] item pubDate enabled but not defined in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
}
if ($channel->item->pubDate->required && !$pubTime)
{
if ($channel->debug->warning)
{
printf(
_('[%s] [warning] could not get item pubDate in channel #%d') . PHP_EOL,
date('c'),
$channelId
) . PHP_EOL;
}
continue;
}
}
// Check item not registered yet
if (!$database->isChannelItemExist($channelId, $guid))
{
// Create new one if not exists
$channelItemId = $database->addChannelItem(
$channelId,
$guid,
$link,
$title,
$description,
$content,
$pubTime
);
if ($channelItemId)
{
if ($channel->debug->info)
{
printf(
_('[%s] [info] registered new item #%d for channel #%d') . PHP_EOL,
date('c'),
$channelItemId,
$channelId
) . PHP_EOL;
}
}
}
}
}
}