mirror of
https://github.com/sorenpeter/timeline.git
synced 2025-12-15 10:57:01 +00:00
feat(refresh): implement if-modified-since detection to only refresh newer content
Also autoformatted with PHP Tools on VS Code
This commit is contained in:
parent
ee7e6558b2
commit
d583632c48
1 changed files with 106 additions and 49 deletions
155
libs/twtxt.php
155
libs/twtxt.php
|
|
@ -1,4 +1,5 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
$config = parse_ini_file('private/config.ini');
|
||||
|
|
@ -9,7 +10,8 @@ if ($config['debug_mode']) {
|
|||
error_reporting(E_ALL);
|
||||
}
|
||||
|
||||
class TwtxtFile {
|
||||
class TwtxtFile
|
||||
{
|
||||
public $mainURL = ''; // First found URL
|
||||
public $URLs = [];
|
||||
public $nick = '';
|
||||
|
|
@ -22,7 +24,8 @@ class TwtxtFile {
|
|||
public $twts = [];
|
||||
}
|
||||
|
||||
class Twt {
|
||||
class Twt
|
||||
{
|
||||
public $originalTwtStr;
|
||||
public $hash;
|
||||
public $timestamp;
|
||||
|
|
@ -64,13 +67,14 @@ curl_setopt($curl, CURLOPT_SSLVERSION, 4);
|
|||
* found, the function returns the value of the key as a string after trimming any whitespace. If no
|
||||
* match is found, the function returns null.
|
||||
*/
|
||||
function getSingleParameter($keyToFind, $string) {
|
||||
function getSingleParameter($keyToFind, $string)
|
||||
{
|
||||
if (!str_contains($string, $keyToFind)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$pattern = '/\s*(?<!\S)' . $keyToFind . '\s*=\s*([^#\n]+)/';
|
||||
// Fix: not machting with nick as in: `# follow = dbucklin@www.davebucklin.com https://www.davebucklin.com/twtxt.txt?nick=dbucklin`
|
||||
// Fix: not machting with nick as in: `# follow = dbucklin@www.davebucklin.com https://www.davebucklin.com/twtxt.txt?nick=dbucklin`
|
||||
//$pattern = '/\s*' . $keyToFind . '\s*=\s*([^#\n]+)/';
|
||||
//$pattern = '/\s*' . $keyToFind . '\s*=\s*([^\s#]+)/'; // Only matches the first word
|
||||
preg_match($pattern, $string, $matches);
|
||||
|
|
@ -82,7 +86,8 @@ function getSingleParameter($keyToFind, $string) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function getDoubleParameter($keywordToFind, $string) {
|
||||
function getDoubleParameter($keywordToFind, $string)
|
||||
{
|
||||
// Returns string or null
|
||||
$pattern = '/#\s*' . preg_quote($keywordToFind, '/') . '\s*=\s*(\S+)\s*(\S+)/';
|
||||
// Matches "# <keyword> = <value> <value>"
|
||||
|
|
@ -96,7 +101,8 @@ function getDoubleParameter($keywordToFind, $string) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function getReplyHashFromTwt(string $twtString): string {
|
||||
function getReplyHashFromTwt(string $twtString): string
|
||||
{
|
||||
// Extract the text between parentheses using regular expressions
|
||||
$pattern = '/\(#([^\)]+)\)/'; // Matches "(#<text>)"
|
||||
preg_match($pattern, $twtString, $matches);
|
||||
|
|
@ -109,7 +115,8 @@ function getReplyHashFromTwt(string $twtString): string {
|
|||
return '';
|
||||
}
|
||||
|
||||
function getImagesFromTwt(string $twtString) {
|
||||
function getImagesFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/(<img[^>]+>)/i';
|
||||
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
|
||||
|
||||
|
|
@ -122,7 +129,8 @@ function getImagesFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function getTagsFromTwt(string $twtString) {
|
||||
function getTagsFromTwt(string $twtString)
|
||||
{
|
||||
//$pattern = '/(?<!\()\B#\w+(?!\))/iu';
|
||||
//$pattern = '/(?<=\B)#(\w+)/';
|
||||
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
|
||||
|
|
@ -144,7 +152,8 @@ function getTagsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function getMentionsFromTwt(string $twtString) {
|
||||
function getMentionsFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/@<([^>]+)\s([^>]+)>/'; // Matches "@<nick url>"
|
||||
preg_match_all($pattern, $twtString, $matches, PREG_SET_ORDER);
|
||||
|
||||
|
|
@ -159,13 +168,14 @@ function getMentionsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceMentionsFromTwt(string $twtString): string {
|
||||
function replaceMentionsFromTwt(string $twtString): string
|
||||
{
|
||||
// Example input: 'Hello @<eapl.mx https://eapl.mx/twtxt.txt>, how are you? @<nick https://server.com/something/twtxt.txt>';
|
||||
// Example output: Hello <a href="?url=https://eapl.mx/twtxt.txt">@eapl.mx@eapl.mx/twtxt.txt</a>, how are you? <a href="?url=https://server.com/something/twtxt.txt">@nick@server.com/something/twtxt.txt</a>
|
||||
|
||||
$pattern = '/@<([^ ]+)\s([^>]+)>/';
|
||||
//$replacement = '<a href="/?url=$2">@$1</a>';
|
||||
$replacement = '<a href="'.str_replace("/index.php", "", $_SERVER["SCRIPT_NAME"]).'/profile?url=$2">@$1</a>';
|
||||
$replacement = '<a href="' . str_replace("/index.php", "", $_SERVER["SCRIPT_NAME"]) . '/profile?url=$2">@$1</a>';
|
||||
$replacement .= '<a href="$2" class="webmention"></a>'; // Adds a hidden link direcly to the twtxt.txt of the mentioned target
|
||||
#$twtString = '@<nick https://eapl.mx/twtxt.txt>';
|
||||
#$pattern = '/@<([^ ]+) ([^>]+)>/';
|
||||
|
|
@ -175,10 +185,11 @@ function replaceMentionsFromTwt(string $twtString): string {
|
|||
|
||||
// from https://github.com/hxii/picoblog/blob/master/picoblog.php
|
||||
//$pattern = '/\@<([a-zA-Z0-9\.]+)\W+(https?:\/\/[^>]+)>/';
|
||||
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
|
||||
//return preg_replace($pattern,'<a href="$2">@$1</a>',$twtString);
|
||||
}
|
||||
|
||||
function replaceLinksFromTwt(string $twtString) {
|
||||
function replaceLinksFromTwt(string $twtString)
|
||||
{
|
||||
|
||||
// TODO: Make this NOT match with `inline code` to avoid links in code-snippets
|
||||
// 1. Look into how yarnd handles this
|
||||
|
|
@ -194,7 +205,8 @@ function replaceLinksFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceMarkdownLinksFromTwt(string $twtString) {
|
||||
function replaceMarkdownLinksFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/\[([^\]]+)\]\(([^)]+)\)/';
|
||||
|
||||
$replacement = '<a href="$2">$1</a>';
|
||||
|
|
@ -203,7 +215,8 @@ function replaceMarkdownLinksFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceImagesFromTwt(string $twtString) {
|
||||
function replaceImagesFromTwt(string $twtString)
|
||||
{
|
||||
$pattern = '/!\[(.*?)\]\((.*?)\)/';
|
||||
//$replacement = '<img src="$2" alt="$1">';
|
||||
$replacement = '<a href="$2"><img src="$2" alt="$1"></a>';
|
||||
|
|
@ -212,11 +225,12 @@ function replaceImagesFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function replaceTagsFromTwt(string $twtString) {
|
||||
function replaceTagsFromTwt(string $twtString)
|
||||
{
|
||||
//$pattern = '/#(\w+)?/';
|
||||
//$pattern = '/(?<=\s)#(\w+)/';
|
||||
$pattern = '/(?<=\B)#([\p{L}\p{N}_]+)/u';
|
||||
|
||||
|
||||
//$replacement = '<a href="#">#\1</a>'; // Dummy link
|
||||
$replacement = '<a href="?search=$1" class="tag">#${1}</a>';
|
||||
$result = preg_replace($pattern, $replacement, $twtString);
|
||||
|
|
@ -224,17 +238,18 @@ function replaceTagsFromTwt(string $twtString) {
|
|||
return $result;
|
||||
}
|
||||
|
||||
function embedYoutubeFromTwt(string $twtString) {
|
||||
function embedYoutubeFromTwt(string $twtString)
|
||||
{
|
||||
|
||||
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
|
||||
// original regex source: https://gist.github.com/afeld/1254889#gistcomment-1253992
|
||||
$pattern = '/(?:youtube(?:-nocookie)?\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})/mi';
|
||||
|
||||
if(preg_match_all($pattern, $twtString, $youtubeLinks)) {
|
||||
|
||||
if (preg_match_all($pattern, $twtString, $youtubeLinks)) {
|
||||
|
||||
$youtubeLinks = array_unique($youtubeLinks[1]); // Remove dublicate cause by raw URLs conceverter to links
|
||||
|
||||
foreach ($youtubeLinks as $videoID) {
|
||||
$twtString .= '<iframe loading="lazy" src="https://www.youtube.com/embed/'.$videoID.'" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
|
||||
$twtString .= '<iframe loading="lazy" src="https://www.youtube.com/embed/' . $videoID . '" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -244,7 +259,8 @@ function embedYoutubeFromTwt(string $twtString) {
|
|||
}
|
||||
|
||||
|
||||
function getTimeElapsedString($timestamp, $full = false) {
|
||||
function getTimeElapsedString($timestamp, $full = false)
|
||||
{
|
||||
$now = new DateTime;
|
||||
$ago = new DateTime;
|
||||
$ago->setTimestamp($timestamp);
|
||||
|
|
@ -290,7 +306,8 @@ function getTimeElapsedString($timestamp, $full = false) {
|
|||
return $string ? implode(', ', $string) . " $agoText" : 'just now';
|
||||
}
|
||||
|
||||
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
|
||||
function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15)
|
||||
{
|
||||
# TODO: Process the Warning
|
||||
# Warning: file_get_contents(https://eapl.mx/twtxt.net):
|
||||
# failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in
|
||||
|
|
@ -309,7 +326,8 @@ function getCachedFileContentsOrUpdate($fileURL, $cacheDurationSecs = 15) {
|
|||
return $contents;
|
||||
}
|
||||
|
||||
function getCachedFileContents($filePath) {
|
||||
function getCachedFileContents($filePath)
|
||||
{
|
||||
$cacheFile = getCachedFileName($filePath);
|
||||
|
||||
// Check if cache file exists and it's not expired
|
||||
|
|
@ -320,31 +338,65 @@ function getCachedFileContents($filePath) {
|
|||
return null;
|
||||
}
|
||||
|
||||
function updateCachedFile($filePath, $cacheDurationSecs = 15) {
|
||||
function updateCachedFile($filePath)
|
||||
{
|
||||
$cacheFilePath = getCachedFileName($filePath);
|
||||
# TODO: Report down URLs and stop loading them after a few tries
|
||||
|
||||
// File doesn't exist in cache or has expired, so fetch and cache it
|
||||
// TODO: Seems it's not working right!
|
||||
$fileDoesntExist = !file_exists($cacheFilePath);
|
||||
$fileIsOld = false;
|
||||
if (!$fileDoesntExist) {
|
||||
$fileIsOld = !((time() - filemtime($cacheFilePath)) < $cacheDurationSecs);
|
||||
# Get the last modification time of the local file
|
||||
$lastModifiedTime = file_exists($cacheFilePath) ? filemtime($cacheFilePath) : false;
|
||||
$lastModifiedHeader = $lastModifiedTime ? gmdate('D, d M Y H:i:s', $lastModifiedTime) . ' GMT' : null;
|
||||
|
||||
# echo "lastModifiedHeader: $lastModifiedHeader<br>\n";
|
||||
|
||||
# Set up the HTTP context with the 'If-Modified-Since' header
|
||||
$options = [
|
||||
'http' => [
|
||||
'method' => 'GET',
|
||||
'header' => $lastModifiedHeader ? "If-Modified-Since: $lastModifiedHeader\r\n" : '',
|
||||
]
|
||||
];
|
||||
|
||||
$context = stream_context_create($options);
|
||||
|
||||
$response = @file_get_contents($filePath, false, $context);
|
||||
|
||||
# Check if HTTP headers are available, usually when the server is available
|
||||
if (!isset($http_response_header)) {
|
||||
# echo "Failed to fetch headers. No HTTP request was made.\n";
|
||||
return;
|
||||
}
|
||||
|
||||
if ($fileDoesntExist || $fileIsOld) {
|
||||
#echo "Loading Cached file $cacheFilePath<br>\n";
|
||||
$contents = @file_get_contents($filePath);
|
||||
if ($http_response_header) {
|
||||
# var_dump($http_response_header);
|
||||
|
||||
if ($contents === false) {
|
||||
// File loaded with errors, skip saving it
|
||||
return;
|
||||
foreach ($http_response_header as $header) {
|
||||
# Look for the Last-Modified header
|
||||
if (preg_match('/^Last-Modified:\s*(.+)$/i', $header, $matches)) {
|
||||
$dateString = $matches[1]; // Extracted date
|
||||
# echo "Extracted Date: $dateString\n";
|
||||
|
||||
// Convert to Unix timestamp
|
||||
$lastModifiedTimestamp = strtotime($dateString);
|
||||
if ($lastModifiedTimestamp > $lastModifiedTime) {
|
||||
# echo "Remote file is newer. Load it!<br>\n";
|
||||
} else {
|
||||
# echo "Not modified since last request. No update needed.<br>\n";
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
file_put_contents($cacheFilePath, $contents);
|
||||
# Save the content if it was successfully retrieved
|
||||
if ($response !== false) {
|
||||
file_put_contents($cacheFilePath, $response);
|
||||
#echo "File updated successfully.\n";
|
||||
}
|
||||
}
|
||||
|
||||
function getTwtsFromTwtxtString($url) {
|
||||
function getTwtsFromTwtxtString($url)
|
||||
{
|
||||
$fileContent = getCachedFileContents($url);
|
||||
|
||||
if (is_null($fileContent)) {
|
||||
|
|
@ -398,8 +450,8 @@ function getTwtsFromTwtxtString($url) {
|
|||
// mosty for (re)feeds from Mastodon etc.
|
||||
if (str_contains($twtxtData->nick, "@")) {
|
||||
$str = $twtxtData->nick;
|
||||
$str = ltrim($str,"@");
|
||||
$twtxtData->nick = explode("@",$str)[0]; // take the first [0] from splitting the nick at "@"
|
||||
$str = ltrim($str, "@");
|
||||
$twtxtData->nick = explode("@", $str)[0]; // take the first [0] from splitting the nick at "@"
|
||||
}
|
||||
|
||||
// Fallback for nick and url if not set in twtxt.txt
|
||||
|
|
@ -436,7 +488,7 @@ function getTwtsFromTwtxtString($url) {
|
|||
//$twtContent = str_replace("\u{2028}", "\n<br>\n", $twtContent);
|
||||
$twtContent = str_replace("\u{2028}", "\n", $twtContent);
|
||||
|
||||
$twtContent = embedYoutubeFromTwt($twtContent);
|
||||
$twtContent = embedYoutubeFromTwt($twtContent);
|
||||
|
||||
// Get and remove the hash
|
||||
$hash = getReplyHashFromTwt($twtContent);
|
||||
|
|
@ -454,7 +506,7 @@ function getTwtsFromTwtxtString($url) {
|
|||
//$twtContent = replaceLinksFromTwt($twtContent);
|
||||
|
||||
// TODO: Make ?tag= filtering feature
|
||||
$twtContent = replaceTagsFromTwt($twtContent);
|
||||
$twtContent = replaceTagsFromTwt($twtContent);
|
||||
|
||||
// TODO: Get mentions
|
||||
$mentions = getMentionsFromTwt($twtContent);
|
||||
|
|
@ -493,7 +545,8 @@ function getTwtsFromTwtxtString($url) {
|
|||
return $twtxtData;
|
||||
}
|
||||
|
||||
function insertFollowingURL($urlString) {
|
||||
function insertFollowingURL($urlString)
|
||||
{
|
||||
// Check if it's a valid URL
|
||||
// Retrieve the nickname, if didn't find a nick, ask for one
|
||||
|
||||
|
|
@ -509,22 +562,26 @@ function insertFollowingURL($urlString) {
|
|||
echo $result;
|
||||
}
|
||||
|
||||
function getCachedFileName($filePath) {
|
||||
function getCachedFileName($filePath)
|
||||
{
|
||||
return __DIR__ . '/../private/cache/' . hash('sha256', $filePath); // TODO: make better path
|
||||
}
|
||||
|
||||
if (!function_exists('str_starts_with')) {
|
||||
function str_starts_with($haystack, $needle) {
|
||||
function str_starts_with($haystack, $needle)
|
||||
{
|
||||
return (string)$needle !== '' && strncmp($haystack, $needle, strlen($needle)) === 0;
|
||||
}
|
||||
}
|
||||
if (!function_exists('str_ends_with')) {
|
||||
function str_ends_with($haystack, $needle) {
|
||||
function str_ends_with($haystack, $needle)
|
||||
{
|
||||
return $needle !== '' && substr($haystack, -strlen($needle)) === (string)$needle;
|
||||
}
|
||||
}
|
||||
if (!function_exists('str_contains')) {
|
||||
function str_contains($haystack, $needle) {
|
||||
function str_contains($haystack, $needle)
|
||||
{
|
||||
return $needle !== '' && mb_strpos($haystack, $needle) !== false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue