mirror of
https://github.com/YGGverse/Pulsar.git
synced 2026-03-31 17:55:37 +00:00
initial commit
This commit is contained in:
parent
ba2d31fb51
commit
a09f941a55
5 changed files with 210 additions and 1 deletions
132
src/crawler.php
Normal file
132
src/crawler.php
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
<?php
|
||||
|
||||
// Prevent multi-thread execution
|
||||
$semaphore = sem_get(
|
||||
crc32(
|
||||
__DIR__
|
||||
), 1
|
||||
);
|
||||
|
||||
if (false === sem_acquire($semaphore, true))
|
||||
{
|
||||
exit;
|
||||
}
|
||||
|
||||
// Init config
|
||||
$config = json_decode(
|
||||
file_get_contents(
|
||||
__DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . 'config.json'
|
||||
)
|
||||
);
|
||||
|
||||
// Update feeds
|
||||
foreach ($config->feed as $feed)
|
||||
{
|
||||
// Init feed location
|
||||
$filename = str_starts_with(
|
||||
$feed->target,
|
||||
DIRECTORY_SEPARATOR
|
||||
) ? $feed->target : __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $feed->target;
|
||||
|
||||
// Init destination storage
|
||||
@mkdir(
|
||||
dirname(
|
||||
$filename
|
||||
),
|
||||
0755,
|
||||
true
|
||||
);
|
||||
|
||||
// Get feed data
|
||||
if (!$channel = simplexml_load_file($feed->source)->channel)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Update title
|
||||
if (!empty($channel->title))
|
||||
{
|
||||
$title = trim(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$channel->title
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
$title = parse_url(
|
||||
$feed->source,
|
||||
PHP_URL_HOST
|
||||
);
|
||||
}
|
||||
|
||||
file_put_contents(
|
||||
$filename,
|
||||
sprintf(
|
||||
'# %s',
|
||||
$title
|
||||
) . PHP_EOL
|
||||
);
|
||||
|
||||
// Append description
|
||||
if (!empty($channel->description))
|
||||
{
|
||||
file_put_contents(
|
||||
$filename,
|
||||
PHP_EOL . trim(
|
||||
strip_tags(
|
||||
html_entity_decode(
|
||||
$channel->description
|
||||
)
|
||||
)
|
||||
) . PHP_EOL,
|
||||
FILE_APPEND | LOCK_EX
|
||||
);
|
||||
}
|
||||
|
||||
// Append items
|
||||
$i = 1; foreach ($channel->item as $item)
|
||||
{
|
||||
// Apply items limit
|
||||
if ($i > $feed->item->limit)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Format item
|
||||
file_put_contents(
|
||||
$filename,
|
||||
PHP_EOL . trim(
|
||||
preg_replace(
|
||||
'/[\s]{3,}/ui',
|
||||
PHP_EOL . PHP_EOL,
|
||||
str_replace(
|
||||
[
|
||||
'{nl}',
|
||||
'{link}',
|
||||
'{guid}',
|
||||
'{pubDate}',
|
||||
'{title}',
|
||||
'{description}'
|
||||
],
|
||||
[
|
||||
PHP_EOL,
|
||||
!empty($item->link) ? trim($item->link) : '',
|
||||
!empty($item->guid) ? trim($item->guid) : '',
|
||||
!empty($item->pubDate) ? trim($item->pubDate) : '',
|
||||
!empty($item->title) ? trim(strip_tags(html_entity_decode($item->title))) : '',
|
||||
!empty($item->description) ? trim(strip_tags(html_entity_decode($item->description))) : ''
|
||||
],
|
||||
$feed->item->template
|
||||
) . PHP_EOL
|
||||
)
|
||||
) . PHP_EOL,
|
||||
FILE_APPEND | LOCK_EX
|
||||
);
|
||||
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue