Replace Slimdown.php with Parsedown.php, so markdown now work

This commit is contained in:
sørenpeter 2024-11-30 16:11:33 +01:00
parent e1f6072b31
commit f9f0934570
6 changed files with 2034 additions and 324 deletions

1994
libs/Parsedown.php Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,155 +0,0 @@
<?php
/**
* Slimdown - A simple regex-based Markdown parser in PHP. Supports the
* following elements (and can be extended via `Slimdown::add_rule()`):
*
* - Headers
* - Links
* - Bold
* - Emphasis
* - Deletions
* - Quotes
* - Code blocks
* - Inline code
* - Blockquotes
* - Ordered/unordered lists
* - Horizontal rules
* - Images
*
* Author: Johnny Broadway <johnny@johnnybroadway.com>
* Website: https://github.com/jbroadway/slimdown
* License: MIT
*/
class Slimdown {
public static $rules = array (
'/```(.*?)```/s' => self::class .'::code_parse', // code blocks
'/\n(#+)\s+(.*)/' => self::class .'::header', // headers
'/\!\[([^\[]*?)\]\(([^\)]+)\)/' => self::class .'::img', // images
'/\[([^\[]+)\]\(([^\)]+)\)/' => self::class .'::link', // links
'/(\*\*|__)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<strong>\2</strong>', // bold
'/(\*|_)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<em>\2</em>', // emphasis
'/(\~\~)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<del>\2</del>', // del
'/\:\"(.*?)\"\:/' => '<q>\1</q>', // quote
'/`(.*?)`/' => '<code>\1</code>', // inline code
'/\n\*(.*)/' => self::class .'::ul_list', // ul lists
'/\n[0-9]+\.(.*)/' => self::class .'::ol_list', // ol lists
'/\n(&gt;|\>)(.*)/' => self::class .'::blockquote', // blockquotes
'/\n-{5,}/' => "\n<hr />", // horizontal rule
'/\n([^\n]+)\n/' => self::class .'::para', // add paragraphs
'/<\/ul>\s?<ul>/' => '', // fix extra ul
'/<\/ol>\s?<ol>/' => '', // fix extra ol
'/<\/blockquote><blockquote>/' => "\n", // fix extra blockquote
'/<a href=\'(.*?)\'>/' => self::class .'::fix_link', // fix links
'/<img src=\'(.*?)\'/' => self::class .'::fix_img', // fix images
'/<p>{{{([0-9]+)}}}<\/p>/s' => self::class .'::reinsert_code_blocks' // re-insert code blocks
);
private static $code_blocks = [];
private static function code_parse ($regs) {
$item = $regs[1];
$item = htmlentities ($item, ENT_COMPAT);
$item = str_replace ("\n\n", '<br>', $item);
$item = str_replace ("\n", '<br>', $item);
while (mb_substr ($item, 0, 4) === '<br>') {
$item = mb_substr ($item, 4);
}
while (mb_substr ($item, -4) === '<br>') {
$item = mb_substr ($item, 0, -4);
}
// Store code blocks with placeholders to avoid other regexes affecting them
self::$code_blocks[] = sprintf ("<pre><code>%s</code></pre>", trim ($item));
return sprintf ("{{{%d}}}", count (self::$code_blocks) - 1);
}
private static function reinsert_code_blocks ($regs) {
// Reinsert the stored code blocks at the end
$index = $regs[1];
return self::$code_blocks[$index];
}
private static function para ($regs) {
$line = $regs[1];
$trimmed = trim ($line);
if (preg_match ('/^<\/?(ul|ol|li|h|p|bl|table|tr|th|td|code)/', $trimmed)) {
return "\n" . $line . "\n";
}
if (! empty ($trimmed)) {
//return sprintf ("\n<p>%s</p>\n", $trimmed);
return sprintf ("\n%s\n", $trimmed);
}
return $trimmed;
}
private static function ul_list ($regs) {
$item = $regs[1];
return sprintf ("\n<ul>\n\t<li>%s</li>\n</ul>", trim ($item));
}
private static function ol_list ($regs) {
$item = $regs[1];
return sprintf ("\n<ol>\n\t<li>%s</li>\n</ol>", trim ($item));
}
private static function blockquote ($regs) {
$item = $regs[2];
return sprintf ("\n<blockquote>%s</blockquote>", trim ($item));
}
private static function header ($regs) {
list ($tmp, $chars, $header) = $regs;
$level = strlen ($chars);
return sprintf ('<h%d>%s</h%d>', $level, trim ($header), $level);
}
private static function link ($regs) {
list ($tmp, $text, $link) = $regs;
// Substitute _ and * in links so they don't break the URLs
$link = str_replace (['_', '*'], ['{^^^}', '{~~~}'], $link);
return sprintf ('<a href=\'%s\'>%s</a>', $link, $text);
}
private static function img ($regs) {
list ($tmp, $text, $link) = $regs;
// Substitute _ and * in links so they don't break the URLs
$link = str_replace (['_', '*'], ['{^^^}', '{~~~}'], $link);
return sprintf ('<img src=\'%s\' alt=\'%s\' />', $link, $text);
//return sprintf ('<img src=\'/thumb?image=%s\' alt=\'%s\' />', $link, $text); // added support for thumbnail generation on the fly
}
private static function fix_link ($regs) {
// Replace substitutions so links are preserved
$fixed_link = str_replace (['{^^^}', '{~~~}'], ['_', '*'], $regs[1]);
return sprintf ('<a href=\'%s\'>', $fixed_link);
}
private static function fix_img ($regs) {
// Replace substitutions so links are preserved
$fixed_link = str_replace (['{^^^}', '{~~~}'], ['_', '*'], $regs[1]);
return sprintf ('<img src=\'%s\'', $fixed_link);
}
/**
* Add a rule.
*/
public static function add_rule ($regex, $replacement) {
self::$rules[$regex] = $replacement;
}
/**
* Render some Markdown into HTML.
*/
public static function render ($text) {
self::$code_blocks = [];
$text = "\n" . $text . "\n";
foreach (self::$rules as $regex => $replacement) {
if (is_callable ( $replacement)) {
$text = preg_replace_callback ($regex, $replacement, $text);
} else {
$text = preg_replace ($regex, $replacement, $text);
}
}
return trim ($text);
}
}

View file

@ -1,156 +0,0 @@
<?php
/**
* Slimdown - A simple regex-based Markdown parser in PHP. Supports the
* following elements (and can be extended via `Slimdown::add_rule()`):
*
* - Headers
* - Links
* - Bold
* - Emphasis
* - Deletions
* - Quotes
* - Code blocks
* - Inline code
* - Blockquotes
* - Ordered/unordered lists
* - Horizontal rules
* - Images
*
* Author: Johnny Broadway <johnny@johnnybroadway.com>
* Website: https://github.com/jbroadway/slimdown
* License: MIT
*/
class Slimdown {
public static $rules = array (
'/```(.*?)```/s' => self::class .'::code_parse', // code blocks
//'/\n(#+)(.*)/' => self::class .'::header', // headers
'/\n(#\s+)(.*)/' => self::class .'::header', // headers - only with a space between # and text, to avoid matching with `#hashtags`
//'/\!\[([^\[]+)\]\(([^\)]+)\)/' => self::class .'::img', // images
'/\!\[(.*?)\]\(([^\)]+)\)/' => self::class .'::img', // images 2
'/\[([^\[]+)\]\(([^\)]+)\)/' => self::class .'::link', // links
'/(\*\*|__)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<strong>\2</strong>', // bold
'/(\*|_)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<em>\2</em>', // emphasis
'/(\~\~)(?=(?:(?:[^`]*`[^`\r\n]*`)*[^`]*$))(?![^\/<]*>.*<\/.+>)(.*?)\1/' => '<del>\2</del>', // del
'/\:\"(.*?)\"\:/' => '<q>\1</q>', // quote
'/`(.*?)`/' => '<code>\1</code>', // inline code
'/\n\*(.*)/' => self::class .'::ul_list', // ul lists
'/\n[0-9]+\.(.*)/' => self::class .'::ol_list', // ol lists
'/\n(&gt;|\>)(.*)/' => self::class .'::blockquote', // blockquotes
'/\n-{5,}/' => "\n<hr />", // horizontal rule
'/\n([^\n]+)\n/' => self::class .'::para', // add paragraphs
'/<\/ul>\s?<ul>/' => '', // fix extra ul
'/<\/ol>\s?<ol>/' => '', // fix extra ol
'/<\/blockquote><blockquote>/' => "\n", // fix extra blockquote
'/<a href=\'(.*?)\'>/' => self::class .'::fix_link', // fix links
'/<img src=\'(.*?)\'/' => self::class .'::fix_img', // fix images
'/<p>{{{([0-9]+)}}}<\/p>/s' => self::class .'::reinsert_code_blocks' // re-insert code blocks
);
private static $code_blocks = [];
private static function code_parse ($regs) {
$item = $regs[1];
$item = htmlentities ($item, ENT_COMPAT);
$item = str_replace ("\n\n", '<br>', $item);
$item = str_replace ("\n", '<br>', $item);
while (mb_substr ($item, 0, 4) === '<br>') {
$item = mb_substr ($item, 4);
}
while (mb_substr ($item, -4) === '<br>') {
$item = mb_substr ($item, 0, -4);
}
// Store code blocks with placeholders to avoid other regexes affecting them
self::$code_blocks[] = sprintf ("<pre><code>%s</code></pre>", trim ($item));
return sprintf ("{{{%d}}}", count (self::$code_blocks) - 1);
}
private static function reinsert_code_blocks ($regs) {
// Reinsert the stored code blocks at the end
$index = $regs[1];
return self::$code_blocks[$index];
}
private static function para ($regs) {
$line = $regs[1];
$trimmed = trim ($line);
if (preg_match ('/^<\/?(ul|ol|li|h|p|bl|table|tr|th|td|code)/', $trimmed)) {
return "\n" . $line . "\n";
}
if (! empty ($trimmed)) {
//return sprintf ("\n<p>%s</p>\n", $trimmed);
return sprintf ("\n%s\n", $trimmed); // avoind addin <p>-tags and extra vetical margins
}
return $trimmed;
}
private static function ul_list ($regs) {
$item = $regs[1];
return sprintf ("\n<ul>\n\t<li>%s</li>\n</ul>", trim ($item));
}
private static function ol_list ($regs) {
$item = $regs[1];
return sprintf ("\n<ol>\n\t<li>%s</li>\n</ol>", trim ($item));
}
private static function blockquote ($regs) {
$item = $regs[2];
return sprintf ("\n<blockquote>%s</blockquote>", trim ($item));
}
private static function header ($regs) {
list ($tmp, $chars, $header) = $regs;
$level = strlen ($chars);
return sprintf ('<h%d>%s</h%d>', $level, trim ($header), $level);
}
private static function link ($regs) {
list ($tmp, $text, $link) = $regs;
// Substitute _ and * in links so they don't break the URLs
$link = str_replace (['_', '*'], ['{^^^}', '{~~~}'], $link);
return sprintf ('<a href=\'%s\'>%s</a>', $link, $text);
}
private static function img ($regs) {
list ($tmp, $text, $link) = $regs;
// Substitute _ and * in links so they don't break the URLs
$link = str_replace (['_', '*'], ['{^^^}', '{~~~}'], $link);
return sprintf ('<img src=\'%s\' alt=\'%s\' />', $link, $text);
}
private static function fix_link ($regs) {
// Replace substitutions so links are preserved
$fixed_link = str_replace (['{^^^}', '{~~~}'], ['_', '*'], $regs[1]);
return sprintf ('<a href=\'%s\'>', $fixed_link);
}
private static function fix_img ($regs) {
// Replace substitutions so links are preserved
$fixed_link = str_replace (['{^^^}', '{~~~}'], ['_', '*'], $regs[1]);
return sprintf ('<img src=\'%s\'', $fixed_link);
}
/**
* Add a rule.
*/
public static function add_rule ($regex, $replacement) {
self::$rules[$regex] = $replacement;
}
/**
* Render some Markdown into HTML.
*/
public static function render ($text) {
self::$code_blocks = [];
$text = "\n" . $text . "\n";
foreach (self::$rules as $regex => $replacement) {
if (is_callable ( $replacement)) {
$text = preg_replace_callback ($regex, $replacement, $text);
} else {
$text = preg_replace ($regex, $replacement, $text);
}
}
return trim ($text);
}
}

View file

@ -141,11 +141,6 @@ img.avatar {
border: none;
}
.embed-video {
width: 100%;
aspect-ratio: 16/9;
}
a.author {
text-decoration: none;
color: var(--text);
@ -174,7 +169,6 @@ a.author {
line-height: 1;
}
.profile p {
margin: 0.2rem 0;
color: var(--text-light);
@ -278,14 +272,33 @@ article .twt-msg {
overflow-wrap: anywhere;
}
article .twt-msg p {
margin: 0.25rem 0;
}
article .twt-msg > blockquote {
margin: 0;
margin: 0.5rem 0;
border-left: thick solid grey;
padding: 0.25rem 0.5rem;
display: inline-block;
font-style: italic;
}
article .twt-msg ul,
article .twt-msg ol {
padding-left: 2rem;
}
article .twt-msg li {
padding-left: 0.5rem;
}
article .twt-msg pre {
padding: 0.25rem 0.5rem;
border-radius: 0.25rem;
}
article .twt-msg img {
margin: 0.25rem -0.25rem;
display: block;
@ -299,6 +312,14 @@ article .twt-msg > img:last-child {
margin-bottom: 0;
}
article .embed-video {
margin-top: 0.5rem;
display: block;
width: 100%;
aspect-ratio: 16/9;
border-radius: 0.25rem;
}
article small {
font-size: small;
}

View file

@ -231,7 +231,7 @@ function embedYoutubeFromTwt(string $twtString) {
//echo "</pre>";
foreach ($youtubeLinks as $videoID) {
$twtString .= '<br><iframe loading="lazy" src="https://www.youtube.com/embed/'.$videoID.'" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
$twtString .= '<iframe loading="lazy" src="https://www.youtube.com/embed/'.$videoID.'" class="embed-video" allow="encrypted-media" title="" allowfullscreen="allowfullscreen" frameborder="0"></iframe>';
}
}
@ -430,13 +430,18 @@ function getTwtsFromTwtxtString($url) {
// For some reason I was having trouble finding this nomenclature
// that's why I leave the UTF-8 representation for future reference
$twtContent = str_replace("\u{2028}", "\n<br>\n", $twtContent);
//$twtContent = str_replace("\u{2028}", "\n<br>\n", $twtContent);
$twtContent = str_replace("\u{2028}", "\n", $twtContent);
$twtContent = replaceMarkdownLinksFromTwt($twtContent);
$twtContent = replaceImagesFromTwt($twtContent);
//$twtContent = replaceMarkdownLinksFromTwt($twtContent);
//$twtContent = replaceImagesFromTwt($twtContent);
//$twtContent = Slimdown::render($twtContent);
$twtContent = embedYoutubeFromTwt($twtContent); // TODO: Find the right order to embed youtube, so we don't get two video due to links containing URL as link texts
$twtContent = replaceLinksFromTwt($twtContent); // TODO
$Parsedown = new Parsedown();
$twtContent = $Parsedown->text($twtContent);
$twtContent = embedYoutubeFromTwt($twtContent);
//$twtContent = replaceLinksFromTwt($twtContent);
// Get and remove the hash
$hash = getReplyHashFromTwt($twtContent);

View file

@ -21,6 +21,7 @@ require_once('libs/session.php');
require_once('libs/twtxt.php');
require_once('libs/hash.php');
require_once('libs/Slimdown.php');
require_once('libs/Parsedown.php');
const TWTS_PER_PAGE = 50;