From d583632c482edb57e535c3873cbb1d7ec82d848a Mon Sep 17 00:00:00 2001 From: "eapl.mx" Date: Mon, 2 Dec 2024 22:25:36 -0600 Subject: [PATCH] feat(refresh): implement if-modified-since detection to only refresh newer content Also autoformatted with PHP Tools on VS Code --- libs/twtxt.php | 155 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 49 deletions(-) diff --git a/libs/twtxt.php b/libs/twtxt.php index a94ed36..5e0a5f3 100644 --- a/libs/twtxt.php +++ b/libs/twtxt.php @@ -1,4 +1,5 @@ = " @@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) { return null; } -function getReplyHashFromTwt(string $twtString): string { +function getReplyHashFromTwt(string $twtString): string +{ // Extract the text between parentheses using regular expressions $pattern = '/\(#([^\)]+)\)/'; // Matches "(#)" preg_match($pattern, $twtString, $matches); @@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string { return ''; } -function getImagesFromTwt(string $twtString) { +function getImagesFromTwt(string $twtString) +{ $pattern = '/(]+>)/i'; preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER); @@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) { return $result; } -function getTagsFromTwt(string $twtString) { +function getTagsFromTwt(string $twtString) +{ //$pattern = '/(?]+)\s([^>]+)>/'; // Matches "@" preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER); @@ -159,13 +168,14 @@ function getMentionsFromTwt(string $twtString) { return $result; } -function replaceMentionsFromTwt(string $twtString): string { +function replaceMentionsFromTwt(string $twtString): string +{ // Example input: 'Hello @, how are you? @'; // Example output: Hello @eapl.mx@eapl.mx/twtxt.txt, how are you? @nick@server.com/something/twtxt.txt $pattern = '/@<([^ ]+)\s([^>]+)>/'; //$replacement = '@$1'; - $replacement = '@$1'; + $replacement = '@$1'; $replacement .= ''; // Adds a hidden link direcly to the twtxt.txt of the mentioned target #$twtString = '@'; #$pattern = '/@<([^ ]+) ([^>]+)>/'; @@ -175,10 +185,11 @@ function replaceMentionsFromTwt(string $twtString): string { // from https://github.com/hxii/picoblog/blob/master/picoblog.php //$pattern = '/\@<([a-zA-Z0-9\.]+)\W+(https?:\/\/[^>]+)>/'; - //return preg_replace($pattern,'@$1',$twtString); + //return preg_replace($pattern,'@$1',$twtString); } -function replaceLinksFromTwt(string $twtString) { +function replaceLinksFromTwt(string $twtString) +{ // TODO: Make this NOT match with `inline code` to avoid links in code-snippets // 1. Look into how yarnd handles this @@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) { return $result; } -function replaceMarkdownLinksFromTwt(string $twtString) { +function replaceMarkdownLinksFromTwt(string $twtString) +{ $pattern = '/\[([^\]]+)\]\(([^)]+)\)/'; $replacement = '$1'; @@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) { return $result; } -function replaceImagesFromTwt(string $twtString) { +function replaceImagesFromTwt(string $twtString) +{ $pattern = '/!\[(.*?)\]\((.*?)\)/'; //$replacement = '$1'; $replacement = '$1'; @@ -212,11 +225,12 @@ function replaceImagesFromTwt(string $twtString) { return $result; } -function replaceTagsFromTwt(string $twtString) { +function replaceTagsFromTwt(string $twtString) +{ //$pattern = '/#(\w+)?/'; //$pattern = '/(?<=\s)#(\w+)/'; $pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u'; - + //$replacement = '#\1'; // Dummy link $replacement = '#${1}'; $result = preg_replace($pattern, $replacement, $twtString); @@ -224,17 +238,18 @@ function replaceTagsFromTwt(string $twtString) { return $result; } -function embedYoutubeFromTwt(string $twtString) { +function embedYoutubeFromTwt(string $twtString) +{ - // original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992 + // original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992 $pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi'; - if(preg_match_all($pattern, $twtString, $youtubeLinks)) { - + if (preg_match_all($pattern, $twtString, $youtubeLinks)) { + $youtubeLinks = array_unique($youtubeLinks[1]); // Remove dublicate cause by raw URLs conceverter to links foreach ($youtubeLinks as $videoID) { - $twtString .= ''; + $twtString .= ''; } } @@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) { } -function getTimeElapsedString($timestamp, $full = false) { +function getTimeElapsedString($timestamp, $full = false) +{ $now = new DateTime; $ago = new DateTime; $ago->setTimestamp($timestamp); @@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) { return $string ? implode(', ', $string) . " $agoText" : 'just now'; } -function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) { +function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) +{ # TODO: Process the Warning # Warning: file_get_contents(https://eapl.mx/twtxt.net): # failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in @@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) { return $contents; } -function getCachedFileContents($filePath) { +function getCachedFileContents($filePath) +{ $cacheFile = getCachedFileName($filePath); // Check if cache file exists and it's not expired @@ -320,31 +338,65 @@ function getCachedFileContents($filePath) { return null; } -function updateCachedFile($filePath, $cacheDurationSecs = 15) { +function updateCachedFile($filePath) +{ $cacheFilePath = getCachedFileName($filePath); + # TODO: Report down URLs and stop loading them after a few tries - // File doesn't exist in cache or has expired, so fetch and cache it - // TODO: Seems it's not working right! - $fileDoesntExist = !file_exists($cacheFilePath); - $fileIsOld = false; - if (!$fileDoesntExist) { - $fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs); + # Get the last modification time of the local file + $lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false; + $lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null; + + # echo "lastModifiedHeader: $lastModifiedHeader
\n"; + + # Set up the HTTP context with the 'If-Modified-Since' header + $options = [ + 'http' => [ + 'method' => 'GET', + 'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '', + ] + ]; + + $context = stream_context_create($options); + + $response = @file_get_contents($filePath, false, $context); + + # Check if HTTP headers are available, usually when the server is available + if (!isset($http_response_header)) { + # echo "Failed to fetch headers. No HTTP request was made.\n"; + return; } - if ($fileDoesntExist || $fileIsOld) { - #echo "Loading Cached file $cacheFilePath
\n"; - $contents = @file_get_contents($filePath); + if ($http_response_header) { + # var_dump($http_response_header); - if ($contents === false) { - // File loaded with errors, skip saving it - return; + foreach ($http_response_header as $header) { + # Look for the Last-Modified header + if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) { + $dateString = $matches[1]; // Extracted date + # echo "Extracted Date: $dateString\n"; + + // Convert to Unix timestamp + $lastModifiedTimestamp = strtotime($dateString); + if ($lastModifiedTimestamp > $lastModifiedTime) { + # echo "Remote file is newer. Load it!
\n"; + } else { + # echo "Not modified since last request. No update needed.
\n"; + return; + } + } } + } - file_put_contents($cacheFilePath, $contents); + # Save the content if it was successfully retrieved + if ($response !== false) { + file_put_contents($cacheFilePath, $response); + #echo "File updated successfully.\n"; } } -function getTwtsFromTwtxtString($url) { +function getTwtsFromTwtxtString($url) +{ $fileContent = getCachedFileContents($url); if (is_null($fileContent)) { @@ -398,8 +450,8 @@ function getTwtsFromTwtxtString($url) { // mosty for (re)feeds from Mastodon etc. if (str_contains($twtxtData->nick, "@")) { $str = $twtxtData->nick; - $str = ltrim($str,"@"); - $twtxtData->nick = explode("@",$str)[0]; // take the first [0] from splitting the nick at "@" + $str = ltrim($str, "@"); + $twtxtData->nick = explode("@", $str)[0]; // take the first [0] from splitting the nick at "@" } // Fallback for nick and url if not set in twtxt.txt @@ -436,7 +488,7 @@ function getTwtsFromTwtxtString($url) { //$twtContent = str_replace("\u{2028}", "\n
\n", $twtContent); $twtContent = str_replace("\u{2028}", "\n", $twtContent); - $twtContent = embedYoutubeFromTwt($twtContent); + $twtContent = embedYoutubeFromTwt($twtContent); // Get and remove the hash $hash = getReplyHashFromTwt($twtContent); @@ -454,7 +506,7 @@ function getTwtsFromTwtxtString($url) { //$twtContent = replaceLinksFromTwt($twtContent); // TODO: Make ?tag= filtering feature - $twtContent = replaceTagsFromTwt($twtContent); + $twtContent = replaceTagsFromTwt($twtContent); // TODO: Get mentions $mentions = getMentionsFromTwt($twtContent); @@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) { return $twtxtData; } -function insertFollowingURL($urlString) { +function insertFollowingURL($urlString) +{ // Check if it's a valid URL // Retrieve the nickname, if didn't find a nick, ask for one @@ -509,22 +562,26 @@ function insertFollowingURL($urlString) { echo $result; } -function getCachedFileName($filePath) { +function getCachedFileName($filePath) +{ return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path } if (!function_exists('str_starts_with')) { - function str_starts_with($haystack, $needle) { + function str_starts_with($haystack, $needle) + { return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0; } } if (!function_exists('str_ends_with')) { - function str_ends_with($haystack, $needle) { + function str_ends_with($haystack, $needle) + { return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle; } } if (!function_exists('str_contains')) { - function str_contains($haystack, $needle) { + function str_contains($haystack, $needle) + { return $needle !== '' && mb_strpos($haystack, $needle) !== false; } }