mirror of
https://github.com/sorenpeter/timeline.git
synced 2025-12-15 19:07:01 +00:00
feat(refresh): implement if-modified-since detection to only refresh newer content
Also autoformatted with PHP Tools on VS Code
This commit is contained in:
parent
ee7e6558b2
commit
d583632c48
1 changed files with 106 additions and 49 deletions
131
libs/twtxt.php
131
libs/twtxt.php
|
|
@ -1,4 +1,5 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
$config = parse_ini_file('private/config.ini');
|
||||
|
|
@ -9,7 +10,8 @@ if ($config['debug_mode']) {
|
|||
error_reporting(E_ALL);
|
||||
}
|
||||
|
||||
class TwtxtFile {
|
||||
class TwtxtFile
|
||||
{
|
||||
public $mainURL = ''; // First found URL
|
||||
public $URLs = [];
|
||||
public $nick = '';
|
||||
|
|
@ -22,7 +24,8 @@ class TwtxtFile {
|
|||
public $twts = [];
|
||||
}
|
||||
|
||||
class Twt {
|
||||
class Twt
|
||||
{
|
||||
public $originalTwtStr;
|
||||
public $hash;
|
||||
public $timestamp;
|
||||
|
|
@ -64,7 +67,8 @@ curl_setopt($curl, CURLOPT_SSLVERSION, 4);
|
|||
* found, the function returns the value of the key as a string after trimming any whitespace. If no
|
||||
* match is found, the function returns null.
|
||||
*/
|
||||
function getSingleParameter($keyToFind, $string) {
|
||||
function getSingleParameter($keyToFind, $string)
|
||||
{
|
||||
if (!str_contains($string, $keyToFind)) {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -82,7 +86,8 @@ function getSingleParameter($keyToFind, $string) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function getDoubleParameter($keywordToFind, $string) {
|
||||
function getDoubleParameter($keywordToFind, $string)
|
||||
{
|
||||
// Returns string or null
|
||||
$pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/';
|
||||
// Matches "# <keyword> = <value> <value>"
|
||||
|
|
@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function getReplyHashFromTwt(string $twtString): string {
|
||||
function getReplyHashFromTwt(string $twtString): string
|
||||
{
|
||||
// Extract the text between parentheses using regular expressions
|
||||
$pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)"
|
||||
preg_match($pattern, $twtString, $matches);
|
||||
|
|
@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string {
|
|||
return '';
|
||||
}
|
||||
|
||||
function getImagesFromTwt(string $twtString) {
|
||||
function getImagesFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/(<img[^>]+>)/i';
|
||||
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
|
||||
|
||||
|
|
@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function getTagsFromTwt(string $twtString) {
|
||||
function getTagsFromTwt(string $twtString)
|
||||
{
|
||||
//$pattern = '/(?<!\()\B#\w+(?!\))/iu';
|
||||
//$pattern = '/(?<=\B)#(\w+)/';
|
||||
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
|
||||
|
|
@ -144,7 +152,8 @@ function getTagsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function getMentionsFromTwt(string $twtString) {
|
||||
function getMentionsFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>"
|
||||
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
|
||||
|
||||
|
|
@ -159,7 +168,8 @@ function getMentionsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceMentionsFromTwt(string $twtString): string {
|
||||
function replaceMentionsFromTwt(string $twtString): string
|
||||
{
|
||||
// Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>';
|
||||
// Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a>
|
||||
|
||||
|
|
@ -178,7 +188,8 @@ function replaceMentionsFromTwt(string $twtString): string {
|
|||
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
|
||||
}
|
||||
|
||||
function replaceLinksFromTwt(string $twtString) {
|
||||
function replaceLinksFromTwt(string $twtString)
|
||||
{
|
||||
|
||||
// TODO: Make this NOT match with `inline code` to avoid links in code-snippets
|
||||
// 1. Look into how yarnd handles this
|
||||
|
|
@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceMarkdownLinksFromTwt(string $twtString) {
|
||||
function replaceMarkdownLinksFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/\[([^\]]+)\]\(([^)]+)\)/';
|
||||
|
||||
$replacement = '<a href="$2">$1</a>';
|
||||
|
|
@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceImagesFromTwt(string $twtString) {
|
||||
function replaceImagesFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/!\[(.*?)\]\((.*?)\)/';
|
||||
//$replacement = '<img src="$2" alt="$1">';
|
||||
$replacement = '<a href="$2"><img src="$2" alt="$1"></a>';
|
||||
|
|
@ -212,7 +225,8 @@ function replaceImagesFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceTagsFromTwt(string $twtString) {
|
||||
function replaceTagsFromTwt(string $twtString)
|
||||
{
|
||||
//$pattern = '/#(\w+)?/';
|
||||
//$pattern = '/(?<=\s)#(\w+)/';
|
||||
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
|
||||
|
|
@ -224,7 +238,8 @@ function replaceTagsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function embedYoutubeFromTwt(string $twtString) {
|
||||
function embedYoutubeFromTwt(string $twtString)
|
||||
{
|
||||
|
||||
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
|
||||
$pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi';
|
||||
|
|
@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) {
|
|||
}
|
||||
|
||||
|
||||
function getTimeElapsedString($timestamp, $full = false) {
|
||||
function getTimeElapsedString($timestamp, $full = false)
|
||||
{
|
||||
$now = new DateTime;
|
||||
$ago = new DateTime;
|
||||
$ago->setTimestamp($timestamp);
|
||||
|
|
@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) {
|
|||
return $string ? implode(', ', $string) . " $agoText" : 'just now';
|
||||
}
|
||||
|
||||
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
|
||||
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15)
|
||||
{
|
||||
# TODO: Process the Warning
|
||||
# Warning: file_get_contents(https://eapl.mx/twtxt.net):
|
||||
# failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in
|
||||
|
|
@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
|
|||
return $contents;
|
||||
}
|
||||
|
||||
function getCachedFileContents($filePath) {
|
||||
function getCachedFileContents($filePath)
|
||||
{
|
||||
$cacheFile = getCachedFileName($filePath);
|
||||
|
||||
// Check if cache file exists and it's not expired
|
||||
|
|
@ -320,31 +338,65 @@ function getCachedFileContents($filePath) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function updateCachedFile($filePath, $cacheDurationSecs = 15) {
|
||||
function updateCachedFile($filePath)
|
||||
{
|
||||
$cacheFilePath = getCachedFileName($filePath);
|
||||
# TODO: Report down URLs and stop loading them after a few tries
|
||||
|
||||
// File doesn't exist in cache or has expired, so fetch and cache it
|
||||
// TODO: Seems it's not working right!
|
||||
$fileDoesntExist = !file_exists($cacheFilePath);
|
||||
$fileIsOld = false;
|
||||
if (!$fileDoesntExist) {
|
||||
$fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs);
|
||||
}
|
||||
# Get the last modification time of the local file
|
||||
$lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false;
|
||||
$lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null;
|
||||
|
||||
if ($fileDoesntExist || $fileIsOld) {
|
||||
#echo "Loading Cached file $cacheFilePath<br>\n";
|
||||
$contents = @file_get_contents($filePath);
|
||||
# echo "lastModifiedHeader: $lastModifiedHeader<br>\n";
|
||||
|
||||
if ($contents === false) {
|
||||
// File loaded with errors, skip saving it
|
||||
# Set up the HTTP context with the 'If-Modified-Since' header
|
||||
$options = [
|
||||
'http' => [
|
||||
'method' => 'GET',
|
||||
'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '',
|
||||
]
|
||||
];
|
||||
|
||||
$context = stream_context_create($options);
|
||||
|
||||
$response = @file_get_contents($filePath, false, $context);
|
||||
|
||||
# Check if HTTP headers are available, usually when the server is available
|
||||
if (!isset($http_response_header)) {
|
||||
# echo "Failed to fetch headers. No HTTP request was made.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
file_put_contents($cacheFilePath, $contents);
|
||||
if ($http_response_header) {
|
||||
# var_dump($http_response_header);
|
||||
|
||||
foreach ($http_response_header as $header) {
|
||||
# Look for the Last-Modified header
|
||||
if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) {
|
||||
$dateString = $matches[1]; // Extracted date
|
||||
# echo "Extracted Date: $dateString\n";
|
||||
|
||||
// Convert to Unix timestamp
|
||||
$lastModifiedTimestamp = strtotime($dateString);
|
||||
if ($lastModifiedTimestamp > $lastModifiedTime) {
|
||||
# echo "Remote file is newer. Load it!<br>\n";
|
||||
} else {
|
||||
# echo "Not modified since last request. No update needed.<br>\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function getTwtsFromTwtxtString($url) {
|
||||
# Save the content if it was successfully retrieved
|
||||
if ($response !== false) {
|
||||
file_put_contents($cacheFilePath, $response);
|
||||
#echo "File updated successfully.\n";
|
||||
}
|
||||
}
|
||||
|
||||
function getTwtsFromTwtxtString($url)
|
||||
{
|
||||
$fileContent = getCachedFileContents($url);
|
||||
|
||||
if (is_null($fileContent)) {
|
||||
|
|
@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) {
|
|||
return $twtxtData;
|
||||
}
|
||||
|
||||
function insertFollowingURL($urlString) {
|
||||
function insertFollowingURL($urlString)
|
||||
{
|
||||
// Check if it's a valid URL
|
||||
// Retrieve the nickname, if didn't find a nick, ask for one
|
||||
|
||||
|
|
@ -509,22 +562,26 @@ function insertFollowingURL($urlString) {
|
|||
echo $result;
|
||||
}
|
||||
|
||||
function getCachedFileName($filePath) {
|
||||
function getCachedFileName($filePath)
|
||||
{
|
||||
return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path
|
||||
}
|
||||
|
||||
if (!function_exists('str_starts_with')) {
|
||||
function str_starts_with($haystack, $needle) {
|
||||
function str_starts_with($haystack, $needle)
|
||||
{
|
||||
return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0;
|
||||
}
|
||||
}
|
||||
if (!function_exists('str_ends_with')) {
|
||||
function str_ends_with($haystack, $needle) {
|
||||
function str_ends_with($haystack, $needle)
|
||||
{
|
||||
return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle;
|
||||
}
|
||||
}
|
||||
if (!function_exists('str_contains')) {
|
||||
function str_contains($haystack, $needle) {
|
||||
function str_contains($haystack, $needle)
|
||||
{
|
||||
return $needle !== '' && mb_strpos($haystack, $needle) !== false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue