feat(refresh): implement if-modified-since detection to only refresh newer content

Also autoformatted with PHP Tools on VS Code
This commit is contained in:
eapl.mx 2024-12-02 22:25:36 -06:00
parent ee7e6558b2
commit d583632c48
No known key found for this signature in database

View file

@ -1,4 +1,5 @@
<?php <?php
declare(strict_types=1); declare(strict_types=1);
$config = parse_ini_file('private/config.ini'); $config = parse_ini_file('private/config.ini');
@ -9,7 +10,8 @@ if ($config['debug_mode']) {
error_reporting(E_ALL); error_reporting(E_ALL);
} }
class TwtxtFile { class TwtxtFile
{
public $mainURL = ''; // First found URL public $mainURL = ''; // First found URL
public $URLs = []; public $URLs = [];
public $nick = ''; public $nick = '';
@ -22,7 +24,8 @@ class TwtxtFile {
public $twts = []; public $twts = [];
} }
class Twt { class Twt
{
public $originalTwtStr; public $originalTwtStr;
public $hash; public $hash;
public $timestamp; public $timestamp;
@ -64,7 +67,8 @@ curl_setopt($curl, CURLOPT_SSLVERSION, 4);
* found, the function returns the value of the key as a string after trimming any whitespace. If no * found, the function returns the value of the key as a string after trimming any whitespace. If no
* match is found, the function returns null. * match is found, the function returns null.
*/ */
function getSingleParameter($keyToFind, $string) { function getSingleParameter($keyToFind, $string)
{
if (!str_contains($string, $keyToFind)) { if (!str_contains($string, $keyToFind)) {
return null; return null;
} }
@ -82,7 +86,8 @@ function getSingleParameter($keyToFind, $string) {
return null; return null;
} }
function getDoubleParameter($keywordToFind, $string) { function getDoubleParameter($keywordToFind, $string)
{
// Returns string or null // Returns string or null
$pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/'; $pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/';
// Matches "# <keyword> = <value> <value>" // Matches "# <keyword> = <value> <value>"
@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) {
return null; return null;
} }
function getReplyHashFromTwt(string $twtString): string { function getReplyHashFromTwt(string $twtString): string
{
// Extract the text between parentheses using regular expressions // Extract the text between parentheses using regular expressions
$pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)" $pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)"
preg_match($pattern, $twtString, $matches); preg_match($pattern, $twtString, $matches);
@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string {
return ''; return '';
} }
function getImagesFromTwt(string $twtString) { function getImagesFromTwt(string $twtString)
{
$pattern = '/(<img[^>]+>)/i'; $pattern = '/(<img[^>]+>)/i';
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER); preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) {
return $result; return $result;
} }
function getTagsFromTwt(string $twtString) { function getTagsFromTwt(string $twtString)
{
//$pattern = '/(?<!\()\B#\w+(?!\))/iu'; //$pattern = '/(?<!\()\B#\w+(?!\))/iu';
//$pattern = '/(?<=\B)#(\w+)/'; //$pattern = '/(?<=\B)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u'; $pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -144,7 +152,8 @@ function getTagsFromTwt(string $twtString) {
return $result; return $result;
} }
function getMentionsFromTwt(string $twtString) { function getMentionsFromTwt(string $twtString)
{
$pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>" $pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>"
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER); preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -159,7 +168,8 @@ function getMentionsFromTwt(string $twtString) {
return $result; return $result;
} }
function replaceMentionsFromTwt(string $twtString): string { function replaceMentionsFromTwt(string $twtString): string
{
// Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>'; // Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>';
// Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a> // Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a>
@ -178,7 +188,8 @@ function replaceMentionsFromTwt(string $twtString): string {
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString); //return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
} }
function replaceLinksFromTwt(string $twtString) { function replaceLinksFromTwt(string $twtString)
{
// TODO: Make this NOT match with `inline code` to avoid links in code-snippets // TODO: Make this NOT match with `inline code` to avoid links in code-snippets
// 1. Look into how yarnd handles this // 1. Look into how yarnd handles this
@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) {
return $result; return $result;
} }
function replaceMarkdownLinksFromTwt(string $twtString) { function replaceMarkdownLinksFromTwt(string $twtString)
{
$pattern = '/\[([^\]]+)\]\(([^)]+)\)/'; $pattern = '/\[([^\]]+)\]\(([^)]+)\)/';
$replacement = '<a href="$2">$1</a>'; $replacement = '<a href="$2">$1</a>';
@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) {
return $result; return $result;
} }
function replaceImagesFromTwt(string $twtString) { function replaceImagesFromTwt(string $twtString)
{
$pattern = '/!\[(.*?)\]\((.*?)\)/'; $pattern = '/!\[(.*?)\]\((.*?)\)/';
//$replacement = '<img src="$2" alt="$1">'; //$replacement = '<img src="$2" alt="$1">';
$replacement = '<a href="$2"><img src="$2" alt="$1"></a>'; $replacement = '<a href="$2"><img src="$2" alt="$1"></a>';
@ -212,7 +225,8 @@ function replaceImagesFromTwt(string $twtString) {
return $result; return $result;
} }
function replaceTagsFromTwt(string $twtString) { function replaceTagsFromTwt(string $twtString)
{
//$pattern = '/#(\w+)?/'; //$pattern = '/#(\w+)?/';
//$pattern = '/(?<=\s)#(\w+)/'; //$pattern = '/(?<=\s)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u'; $pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -224,7 +238,8 @@ function replaceTagsFromTwt(string $twtString) {
return $result; return $result;
} }
function embedYoutubeFromTwt(string $twtString) { function embedYoutubeFromTwt(string $twtString)
{
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992 // original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
$pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi'; $pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi';
@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) {
} }
function getTimeElapsedString($timestamp, $full = false) { function getTimeElapsedString($timestamp, $full = false)
{
$now = new DateTime; $now = new DateTime;
$ago = new DateTime; $ago = new DateTime;
$ago->setTimestamp($timestamp); $ago->setTimestamp($timestamp);
@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) {
return $string ? implode(', ', $string) . " $agoText" : 'just now'; return $string ? implode(', ', $string) . " $agoText" : 'just now';
} }
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) { function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15)
{
# TODO: Process the Warning # TODO: Process the Warning
# Warning: file_get_contents(https://eapl.mx/twtxt.net): # Warning: file_get_contents(https://eapl.mx/twtxt.net):
# failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in # failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in
@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
return $contents; return $contents;
} }
function getCachedFileContents($filePath) { function getCachedFileContents($filePath)
{
$cacheFile = getCachedFileName($filePath); $cacheFile = getCachedFileName($filePath);
// Check if cache file exists and it's not expired // Check if cache file exists and it's not expired
@ -320,31 +338,65 @@ function getCachedFileContents($filePath) {
return null; return null;
} }
function updateCachedFile($filePath, $cacheDurationSecs = 15) { function updateCachedFile($filePath)
{
$cacheFilePath = getCachedFileName($filePath); $cacheFilePath = getCachedFileName($filePath);
# TODO: Report down URLs and stop loading them after a few tries
// File doesn't exist in cache or has expired, so fetch and cache it # Get the last modification time of the local file
// TODO: Seems it's not working right! $lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false;
$fileDoesntExist = !file_exists($cacheFilePath); $lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null;
$fileIsOld = false;
if (!$fileDoesntExist) {
$fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs);
}
if ($fileDoesntExist || $fileIsOld) { # echo "lastModifiedHeader: $lastModifiedHeader<br>\n";
#echo "Loading Cached file $cacheFilePath<br>\n";
$contents = @file_get_contents($filePath);
if ($contents === false) { # Set up the HTTP context with the 'If-Modified-Since' header
// File loaded with errors, skip saving it $options = [
'http' => [
'method' => 'GET',
'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '',
]
];
$context = stream_context_create($options);
$response = @file_get_contents($filePath, false, $context);
# Check if HTTP headers are available, usually when the server is available
if (!isset($http_response_header)) {
# echo "Failed to fetch headers. No HTTP request was made.\n";
return; return;
} }
file_put_contents($cacheFilePath, $contents); if ($http_response_header) {
# var_dump($http_response_header);
foreach ($http_response_header as $header) {
# Look for the Last-Modified header
if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) {
$dateString = $matches[1]; // Extracted date
# echo "Extracted Date: $dateString\n";
// Convert to Unix timestamp
$lastModifiedTimestamp = strtotime($dateString);
if ($lastModifiedTimestamp > $lastModifiedTime) {
# echo "Remote file is newer. Load it!<br>\n";
} else {
# echo "Not modified since last request. No update needed.<br>\n";
return;
}
}
} }
} }
function getTwtsFromTwtxtString($url) { # Save the content if it was successfully retrieved
if ($response !== false) {
file_put_contents($cacheFilePath, $response);
#echo "File updated successfully.\n";
}
}
function getTwtsFromTwtxtString($url)
{
$fileContent = getCachedFileContents($url); $fileContent = getCachedFileContents($url);
if (is_null($fileContent)) { if (is_null($fileContent)) {
@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) {
return $twtxtData; return $twtxtData;
} }
function insertFollowingURL($urlString) { function insertFollowingURL($urlString)
{
// Check if it's a valid URL // Check if it's a valid URL
// Retrieve the nickname, if didn't find a nick, ask for one // Retrieve the nickname, if didn't find a nick, ask for one
@ -509,22 +562,26 @@ function insertFollowingURL($urlString) {
echo $result; echo $result;
} }
function getCachedFileName($filePath) { function getCachedFileName($filePath)
{
return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path
} }
if (!function_exists('str_starts_with')) { if (!function_exists('str_starts_with')) {
function str_starts_with($haystack, $needle) { function str_starts_with($haystack, $needle)
{
return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0; return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0;
} }
} }
if (!function_exists('str_ends_with')) { if (!function_exists('str_ends_with')) {
function str_ends_with($haystack, $needle) { function str_ends_with($haystack, $needle)
{
return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle; return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle;
} }
} }
if (!function_exists('str_contains')) { if (!function_exists('str_contains')) {
function str_contains($haystack, $needle) { function str_contains($haystack, $needle)
{
return $needle !== '' && mb_strpos($haystack, $needle) !== false; return $needle !== '' && mb_strpos($haystack, $needle) !== false;
} }
} }