feat(refresh): implement if-modified-since detection to only refresh newer content

Also autoformatted with PHP Tools on VS Code
This commit is contained in:
eapl.mx 2024-12-02 22:25:36 -06:00
parent ee7e6558b2
commit d583632c48
No known key found for this signature in database

View file

@ -1,4 +1,5 @@
<?php
declare(strict_types=1);
$config = parse_ini_file('private/config.ini');
@ -9,7 +10,8 @@ if ($config['debug_mode']) {
error_reporting(E_ALL);
}
class TwtxtFile {
class TwtxtFile
{
public $mainURL = ''; // First found URL
public $URLs = [];
public $nick = '';
@ -22,7 +24,8 @@ class TwtxtFile {
public $twts = [];
}
class Twt {
class Twt
{
public $originalTwtStr;
public $hash;
public $timestamp;
@ -64,7 +67,8 @@ curl_setopt($curl, CURLOPT_SSLVERSION, 4);
* found, the function returns the value of the key as a string after trimming any whitespace. If no
* match is found, the function returns null.
*/
function getSingleParameter($keyToFind, $string) {
function getSingleParameter($keyToFind, $string)
{
if (!str_contains($string, $keyToFind)) {
return null;
}
@ -82,7 +86,8 @@ function getSingleParameter($keyToFind, $string) {
return null;
}
function getDoubleParameter($keywordToFind, $string) {
function getDoubleParameter($keywordToFind, $string)
{
// Returns string or null
$pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/';
// Matches "# <keyword> = <value> <value>"
@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) {
return null;
}
function getReplyHashFromTwt(string $twtString): string {
function getReplyHashFromTwt(string $twtString): string
{
// Extract the text between parentheses using regular expressions
$pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)"
preg_match($pattern, $twtString, $matches);
@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string {
return '';
}
function getImagesFromTwt(string $twtString) {
function getImagesFromTwt(string $twtString)
{
$pattern = '/(<img[^>]+>)/i';
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) {
return $result;
}
function getTagsFromTwt(string $twtString) {
function getTagsFromTwt(string $twtString)
{
//$pattern = '/(?<!\()\B#\w+(?!\))/iu';
//$pattern = '/(?<=\B)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -144,7 +152,8 @@ function getTagsFromTwt(string $twtString) {
return $result;
}
function getMentionsFromTwt(string $twtString) {
function getMentionsFromTwt(string $twtString)
{
$pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>"
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
@ -159,7 +168,8 @@ function getMentionsFromTwt(string $twtString) {
return $result;
}
function replaceMentionsFromTwt(string $twtString): string {
function replaceMentionsFromTwt(string $twtString): string
{
// Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>';
// Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a>
@ -178,7 +188,8 @@ function replaceMentionsFromTwt(string $twtString): string {
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
}
function replaceLinksFromTwt(string $twtString) {
function replaceLinksFromTwt(string $twtString)
{
// TODO: Make this NOT match with `inline code` to avoid links in code-snippets
// 1. Look into how yarnd handles this
@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) {
return $result;
}
function replaceMarkdownLinksFromTwt(string $twtString) {
function replaceMarkdownLinksFromTwt(string $twtString)
{
$pattern = '/\[([^\]]+)\]\(([^)]+)\)/';
$replacement = '<a href="$2">$1</a>';
@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) {
return $result;
}
function replaceImagesFromTwt(string $twtString) {
function replaceImagesFromTwt(string $twtString)
{
$pattern = '/!\[(.*?)\]\((.*?)\)/';
//$replacement = '<img src="$2" alt="$1">';
$replacement = '<a href="$2"><img src="$2" alt="$1"></a>';
@ -212,7 +225,8 @@ function replaceImagesFromTwt(string $twtString) {
return $result;
}
function replaceTagsFromTwt(string $twtString) {
function replaceTagsFromTwt(string $twtString)
{
//$pattern = '/#(\w+)?/';
//$pattern = '/(?<=\s)#(\w+)/';
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
@ -224,7 +238,8 @@ function replaceTagsFromTwt(string $twtString) {
return $result;
}
function embedYoutubeFromTwt(string $twtString) {
function embedYoutubeFromTwt(string $twtString)
{
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
$pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi';
@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) {
}
function getTimeElapsedString($timestamp, $full = false) {
function getTimeElapsedString($timestamp, $full = false)
{
$now = new DateTime;
$ago = new DateTime;
$ago->setTimestamp($timestamp);
@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) {
return $string ? implode(', ', $string) . " $agoText" : 'just now';
}
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15)
{
# TODO: Process the Warning
# Warning: file_get_contents(https://eapl.mx/twtxt.net):
# failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in
@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
return $contents;
}
function getCachedFileContents($filePath) {
function getCachedFileContents($filePath)
{
$cacheFile = getCachedFileName($filePath);
// Check if cache file exists and it's not expired
@ -320,31 +338,65 @@ function getCachedFileContents($filePath) {
return null;
}
function updateCachedFile($filePath, $cacheDurationSecs = 15) {
function updateCachedFile($filePath)
{
$cacheFilePath = getCachedFileName($filePath);
# TODO: Report down URLs and stop loading them after a few tries
// File doesn't exist in cache or has expired, so fetch and cache it
// TODO: Seems it's not working right!
$fileDoesntExist = !file_exists($cacheFilePath);
$fileIsOld = false;
if (!$fileDoesntExist) {
$fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs);
}
# Get the last modification time of the local file
$lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false;
$lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null;
if ($fileDoesntExist || $fileIsOld) {
#echo "Loading Cached file $cacheFilePath<br>\n";
$contents = @file_get_contents($filePath);
# echo "lastModifiedHeader: $lastModifiedHeader<br>\n";
if ($contents === false) {
// File loaded with errors, skip saving it
# Set up the HTTP context with the 'If-Modified-Since' header
$options = [
'http' => [
'method' => 'GET',
'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '',
]
];
$context = stream_context_create($options);
$response = @file_get_contents($filePath, false, $context);
# Check if HTTP headers are available, usually when the server is available
if (!isset($http_response_header)) {
# echo "Failed to fetch headers. No HTTP request was made.\n";
return;
}
file_put_contents($cacheFilePath, $contents);
if ($http_response_header) {
# var_dump($http_response_header);
foreach ($http_response_header as $header) {
# Look for the Last-Modified header
if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) {
$dateString = $matches[1]; // Extracted date
# echo "Extracted Date: $dateString\n";
// Convert to Unix timestamp
$lastModifiedTimestamp = strtotime($dateString);
if ($lastModifiedTimestamp > $lastModifiedTime) {
# echo "Remote file is newer. Load it!<br>\n";
} else {
# echo "Not modified since last request. No update needed.<br>\n";
return;
}
}
}
}
function getTwtsFromTwtxtString($url) {
# Save the content if it was successfully retrieved
if ($response !== false) {
file_put_contents($cacheFilePath, $response);
#echo "File updated successfully.\n";
}
}
function getTwtsFromTwtxtString($url)
{
$fileContent = getCachedFileContents($url);
if (is_null($fileContent)) {
@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) {
return $twtxtData;
}
function insertFollowingURL($urlString) {
function insertFollowingURL($urlString)
{
// Check if it's a valid URL
// Retrieve the nickname, if didn't find a nick, ask for one
@ -509,22 +562,26 @@ function insertFollowingURL($urlString) {
echo $result;
}
function getCachedFileName($filePath) {
function getCachedFileName($filePath)
{
return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path
}
if (!function_exists('str_starts_with')) {
function str_starts_with($haystack, $needle) {
function str_starts_with($haystack, $needle)
{
return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0;
}
}
if (!function_exists('str_ends_with')) {
function str_ends_with($haystack, $needle) {
function str_ends_with($haystack, $needle)
{
return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle;
}
}
if (!function_exists('str_contains')) {
function str_contains($haystack, $needle) {
function str_contains($haystack, $needle)
{
return $needle !== '' && mb_strpos($haystack, $needle) !== false;
}
}