feat(refresh): implement if-modified-since detection to only refresh newer content

Also autoformatted with PHP Tools on VS Code
This commit is contained in:
eapl.mx 2024-12-02 22:25:36 -06:00
parent ee7e6558b2
commit d583632c48
No known key found for this signature in database

View file

@ -1,4 +1,5 @@
<?php
declare(strict_types=1);
$config = parse_ini_file('private/config.ini');
@ -9,7 +10,8 @@ if ($config['debug_mode']) {
error_reporting(E_ALL);
}
class TwtxtFile {
class TwtxtFile
{
public $mainURL = ''; // First found URL
public $URLs = [];
public $nick = '';
@ -22,7 +24,8 @@ class TwtxtFile {
public $twts = [];
}
class Twt {
class Twt
{
public $originalTwtStr;
public $hash;
public $timestamp;
@ -64,13 +67,14 @@ curl_setopt($curl, CURLOPT_SSLVERSION, 4);
* found, the function returns the value of the key as a string after trimming any whitespace. If no
* match is found, the function returns null.
*/
function getSingleParameter($keyToFind, $string) {
function getSingleParameter($keyToFind, $string)
{
if (!str_contains($string, $keyToFind)) {
return null;
}
$pattern = '/\s*(?<!\S)' . $keyToFind . '\s*=\s*([^#\n]+)/';
// Fix: not machting with nick as in: `# follow = dbucklin@www.davebucklin.com https://www.davebucklin.com/twtxt.txt?nick=dbucklin`
// Fix: not machting with nick as in: `# follow = dbucklin@www.davebucklin.com https://www.davebucklin.com/twtxt.txt?nick=dbucklin`
//$pattern = '/\s*' . $keyToFind . '\s*=\s*([^#\n]+)/';
//$pattern = '/\s*' . $keyToFind . '\s*=\s*([^\s#]+)/'; // Only matches the first word
preg_match($pattern, $string, $matches);
@ -82,7 +86,8 @@ function getSingleParameter($keyToFind, $string) {
return null;
}
function getDoubleParameter($keywordToFind, $string) {
function getDoubleParameter($keywordToFind, $string)
{
// Returns string or null
$pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/';
// Matches "# <keyword> = <value> <value>"
@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) {
return null;
}
function getReplyHashFromTwt(string $twtString): string {
function getReplyHashFromTwt(string $twtString): string
{
// Extract the text between parentheses using regular expressions
$pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)"
preg_match($pattern, $twtString, $matches);
@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string {
return '';
}
function getImagesFromTwt(string $twtString) {
function getImagesFromTwt(string $twtString)
{
$pattern = '/(<img[^>]+>)/i';
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) {
return $result;
}
function getTagsFromTwt(string $twtString) {
function getTagsFromTwt(string $twtString)
{
//$pattern = '/(?<!\()\B#\w+(?!\))/iu';
//$pattern = '/(?<=\B)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -144,7 +152,8 @@ function getTagsFromTwt(string $twtString) {
return $result;
}
function getMentionsFromTwt(string $twtString) {
function getMentionsFromTwt(string $twtString)
{
$pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>"
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -159,13 +168,14 @@ function getMentionsFromTwt(string $twtString) {
return $result;
}
function replaceMentionsFromTwt(string $twtString): string {
function replaceMentionsFromTwt(string $twtString): string
{
// Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>';
// Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a>
$pattern = '/@<([^ ]+)\s([^>]+)>/';
//$replacement = '<a href="/?url=$2">@$1</a>';
$replacement = '<a href="'.str_replace("/index.php", "", $_SERVER["SCRIPT_NAME"]).'/profile?url=$2">@$1</a>';
$replacement = '<a href="' . str_replace("/index.php", "", $_SERVER["SCRIPT_NAME"]) . '/profile?url=$2">@$1</a>';
$replacement .= '<a href="$2" class="webmention"></a>'; // Adds a hidden link direcly to the twtxt.txt of the mentioned target
#$twtString = '@<nick https://eapl.mx/twtxt.txt>';
#$pattern = '/@<([^ ]+) ([^>]+)>/';
@ -175,10 +185,11 @@ function replaceMentionsFromTwt(string $twtString): string {
// from https://github.com/hxii/picoblog/blob/master/picoblog.php
//$pattern = '/\@<([a-zA-Z0-9\.]+)\W+(https?:\/\/[^>]+)>/';
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
}
function replaceLinksFromTwt(string $twtString) {
function replaceLinksFromTwt(string $twtString)
{
// TODO: Make this NOT match with `inline code` to avoid links in code-snippets
// 1. Look into how yarnd handles this
@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) {
return $result;
}
function replaceMarkdownLinksFromTwt(string $twtString) {
function replaceMarkdownLinksFromTwt(string $twtString)
{
$pattern = '/\[([^\]]+)\]\(([^)]+)\)/';
$replacement = '<a href="$2">$1</a>';
@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) {
return $result;
}
function replaceImagesFromTwt(string $twtString) {
function replaceImagesFromTwt(string $twtString)
{
$pattern = '/!\[(.*?)\]\((.*?)\)/';
//$replacement = '<img src="$2" alt="$1">';
$replacement = '<a href="$2"><img src="$2" alt="$1"></a>';
@ -212,7 +225,8 @@ function replaceImagesFromTwt(string $twtString) {
return $result;
}
function replaceTagsFromTwt(string $twtString) {
function replaceTagsFromTwt(string $twtString)
{
//$pattern = '/#(\w+)?/';
//$pattern = '/(?<=\s)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -224,17 +238,18 @@ function replaceTagsFromTwt(string $twtString) {
return $result;
}
function embedYoutubeFromTwt(string $twtString) {
function embedYoutubeFromTwt(string $twtString)
{
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
$pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi';
if(preg_match_all($pattern, $twtString, $youtubeLinks)) {
if (preg_match_all($pattern, $twtString, $youtubeLinks)) {
$youtubeLinks = array_unique($youtubeLinks[1]); // Remove dublicate cause by raw URLs conceverter to links
foreach ($youtubeLinks as $videoID) {
$twtString .= '<iframe loading="lazy" src="https://www.youtube.com/embed/'.$videoID.'" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
$twtString .= '<iframe loading="lazy" src="https://www.youtube.com/embed/' . $videoID . '" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
}
}
@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) {
}
function getTimeElapsedString($timestamp, $full = false) {
function getTimeElapsedString($timestamp, $full = false)
{
$now = new DateTime;
$ago = new DateTime;
$ago->setTimestamp($timestamp);
@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) {
return $string ? implode(', ', $string) . " $agoText" : 'just now';
}
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15)
{
# TODO: Process the Warning
# Warning: file_get_contents(https://eapl.mx/twtxt.net):
# failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in
@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
return $contents;
}
function getCachedFileContents($filePath) {
function getCachedFileContents($filePath)
{
$cacheFile = getCachedFileName($filePath);
// Check if cache file exists and it's not expired
@ -320,31 +338,65 @@ function getCachedFileContents($filePath) {
return null;
}
function updateCachedFile($filePath, $cacheDurationSecs = 15) {
function updateCachedFile($filePath)
{
$cacheFilePath = getCachedFileName($filePath);
# TODO: Report down URLs and stop loading them after a few tries
// File doesn't exist in cache or has expired, so fetch and cache it
// TODO: Seems it's not working right!
$fileDoesntExist = !file_exists($cacheFilePath);
$fileIsOld = false;
if (!$fileDoesntExist) {
$fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs);
# Get the last modification time of the local file
$lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false;
$lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null;
# echo "lastModifiedHeader: $lastModifiedHeader<br>\n";
# Set up the HTTP context with the 'If-Modified-Since' header
$options = [
'http' => [
'method' => 'GET',
'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '',
]
];
$context = stream_context_create($options);
$response = @file_get_contents($filePath, false, $context);
# Check if HTTP headers are available, usually when the server is available
if (!isset($http_response_header)) {
# echo "Failed to fetch headers. No HTTP request was made.\n";
return;
}
if ($fileDoesntExist || $fileIsOld) {
#echo "Loading Cached file $cacheFilePath<br>\n";
$contents = @file_get_contents($filePath);
if ($http_response_header) {
# var_dump($http_response_header);
if ($contents === false) {
// File loaded with errors, skip saving it
return;
foreach ($http_response_header as $header) {
# Look for the Last-Modified header
if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) {
$dateString = $matches[1]; // Extracted date
# echo "Extracted Date: $dateString\n";
// Convert to Unix timestamp
$lastModifiedTimestamp = strtotime($dateString);
if ($lastModifiedTimestamp > $lastModifiedTime) {
# echo "Remote file is newer. Load it!<br>\n";
} else {
# echo "Not modified since last request. No update needed.<br>\n";
return;
}
}
}
}
file_put_contents($cacheFilePath, $contents);
# Save the content if it was successfully retrieved
if ($response !== false) {
file_put_contents($cacheFilePath, $response);
#echo "File updated successfully.\n";
}
}
function getTwtsFromTwtxtString($url) {
function getTwtsFromTwtxtString($url)
{
$fileContent = getCachedFileContents($url);
if (is_null($fileContent)) {
@ -398,8 +450,8 @@ function getTwtsFromTwtxtString($url) {
// mosty for (re)feeds from Mastodon etc.
if (str_contains($twtxtData->nick, "@")) {
$str = $twtxtData->nick;
$str = ltrim($str,"@");
$twtxtData->nick = explode("@",$str)[0]; // take the first [0] from splitting the nick at "@"
$str = ltrim($str, "@");
$twtxtData->nick = explode("@", $str)[0]; // take the first [0] from splitting the nick at "@"
}
// Fallback for nick and url if not set in twtxt.txt
@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) {
return $twtxtData;
}
function insertFollowingURL($urlString) {
function insertFollowingURL($urlString)
{
// Check if it's a valid URL
// Retrieve the nickname, if didn't find a nick, ask for one
@ -509,22 +562,26 @@ function insertFollowingURL($urlString) {
echo $result;
}
function getCachedFileName($filePath) {
function getCachedFileName($filePath)
{
return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path
}
if (!function_exists('str_starts_with')) {
function str_starts_with($haystack, $needle) {
function str_starts_with($haystack, $needle)
{
return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0;
}
}
if (!function_exists('str_ends_with')) {
function str_ends_with($haystack, $needle) {
function str_ends_with($haystack, $needle)
{
return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle;
}
}
if (!function_exists('str_contains')) {
function str_contains($haystack, $needle) {
function str_contains($haystack, $needle)
{
return $needle !== '' && mb_strpos($haystack, $needle) !== false;
}
}