devdaily home | apple | java | perl | unix | directory | blog

What this is

This file is included in the DevDaily.com "PHP Source Code Warehouse" project. The intent of this project is to help you "Learn PHP by Example" TM.

Other links

The source code

<?php
/*
Plugin Name: MarkDown
Plugin URI: http://daringfireball.net/projects/markdown/
Description: Markdown is a text-to-HTML conversion tool for web writers. <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format, then convert it to structurally valid XHTML. This plugin <strong>enables Markdown for your posts and comments</strong>. Written by <a href="http://daringfireball.net/">John Gruber</a> in Perl, translated to PHP by <a href="http://www.michelf.com/">Michel Fortin</a>, and made a WP plugin by <a href="http://photomatt.net/">Matt</a>. If you use this you should disable Textile 1 and 2 because the syntax conflicts.
Version: 1.0b4
Author: John Gruber
Author URI: http://daringfireball.net/
*/ 


/*
Note to code readers: I've stripped most of the comments from the source, see the original at http://www.michelf.com/php-markdown/?code to get the unaltered version. --Matt
*/

$MarkdownPHPVersion    = '1.0b4.1'; # Sun 4 Apr 2004
$MarkdownSyntaxVersion = '1.0b4'; # Thu 25 Mar 2004
$g_empty_element_suffix = " />";     # Change to ">" for HTML output
$g_tab_width = 4;
$g_nested_brackets_depth = 6;
$g_nested_brackets = 
	str_repeat('(?>[^\[\]]+|\[', $g_nested_brackets_depth).
	str_repeat('\])*', $g_nested_brackets_depth);
$g_escape_table = array(
	"\\" => md5("\\"),
	"`" => md5("`"),
	"*" => md5("*"),
	"_" => md5("_"),
	"{" => md5("{"),
	"}" => md5("}"),
	"[" => md5("["),
	"]" => md5("]"),
	"(" => md5("("),
	")" => md5(")"),
	"#" => md5("#"),
	"." => md5("."),
	"!" => md5("!")
);
$g_backslash_escape_table;
foreach ($g_escape_table as $key => $char)
	$g_backslash_escape_table["\\$key"] = $char;

$g_urls;
$g_titles;
$g_html_blocks;

function Markdown($text) {
	global $g_urls, $g_titles, $g_html_blocks;
	$g_urls = array();
	$g_titles = array();
	$g_html_blocks = array();
	$text = str_replace(array("\r\n", "\r"), "\n", $text);
	$text .= "\n\n";
	$text = _Detab($text);
	$text = preg_replace('/^[ \t]+$/m', '', $text);
	$text = _HashHTMLBlocks($text);
	$text = _StripLinkDefinitions($text);
	$text = _EscapeSpecialChars($text);
	$text = _RunBlockGamut($text);
	$text = _UnescapeSpecialChars($text);
	return $text . "\n";
}

function _StripLinkDefinitions($text) {
	$text = preg_replace_callback('{
						^[ \t]*\[(.+)\]:	# id = $1
						  [ \t]*
						  \n?				# maybe *one* newline
						  [ \t]*
						(\S+)				# url = $2
						  [ \t]*
						  \n?				# maybe one newline
						  [ \t]*
						(?:
							# Todo: Titles are delimited by "quotes" or (parens).
							["(]
							(.+?)			# title = $3
							[")]
							[ \t]*
						)?	# title is optional
						(?:\n+|\Z)
		}xm',
		'_StripLinkDefinitions_callback',
		$text);
	return $text;
}
function _StripLinkDefinitions_callback($matches) {
	global $g_urls, $g_titles;
	$link_id = strtolower($matches[1]);
	$g_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
	if (isset($matches[3]))
		$g_titles[$link_id] = htmlentities($matches[3]);
	return ''; # String that will replace the block
}

function _HashHTMLBlocks($text) {
	$block_tag_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script';
	$text = preg_replace_callback("{
				(						# save in $1
					^					# start of line  (with /m)
					<($block_tag_re)	# start tag = $2
					\\b					# word break
					(.*\\n)*?			# any number of lines, minimally matching
					</\\2>				# the matching end tag
					[ \\t]*				# trailing spaces/tabs
					(?=\\n+|\\Z)	# followed by a newline or end of document
				)
		}xm",
		'_HashHTMLBlocks_callback',
		$text);

	$text = preg_replace_callback("{
				(						# save in $1
					^					# start of line  (with /m)
					<($block_tag_re)	# start tag = $2
					\\b					# word break
					(.*\\n)*?			# any number of lines, minimally matching
					.*</\\2>				# the matching end tag
					[ \\t]*				# trailing spaces/tabs
					(?=\\n+|\\Z)	# followed by a newline or end of document
				)
		}xm",
		'_HashHTMLBlocks_callback',
		$text);

	$text = preg_replace_callback('{
				(?:
					(?<=\n\n)		# Starting after a blank line
					|				# or
					\A\n?			# the beginning of the doc
				)
				(						# save in $1
					[ \t]*
					<(hr)				# start tag = $2
					\b					# word break
					([^<>])*?			# 
					/?>					# the matching end tag
					(?=\n{2,}|\Z)		# followed by a blank line or end of document
				)
		}x',
		'_HashHTMLBlocks_callback',
		$text);

	return $text;
}
function _HashHTMLBlocks_callback($matches) {
	global $g_html_blocks;
	$text = $matches[1];
	$key = md5($text);
	$g_html_blocks[$key] = $text;
	return "\n\n$key\n\n"; # String that will replace the block
}

function _RunBlockGamut($text) {
	global $g_empty_element_suffix;
	
	$text = _DoHeaders($text);

	$text = preg_replace(
		array('/^( ?\* ?){3,}$/m',
			  '/^( ?- ?){3,}$/m'),
		array("\n<hr$g_empty_element_suffix\n", 
			  "\n<hr$g_empty_element_suffix\n"), 
		$text);

	$text = _DoLists($text);

	$text = _DoCodeBlocks($text);

	$text = _DoBlockQuotes($text);

	$text = _DoAutoLinks($text);

	$text = _HashHTMLBlocks($text);

	$text = _FormParagraphs($text);

	return $text;
}


function _RunSpanGamut($text) {
	global $g_empty_element_suffix;
	$text = _DoCodeSpans($text);


	$text = _EncodeAmpsAndAngles($text);

	$text = _DoImages($text);
	$text = _DoAnchors($text);


	$text = _DoItalicsAndBold($text);
	
	# Do hard breaks:
	$text = preg_replace('/ {2,}\n/', "<br$g_empty_element_suffix\n", $text);

	return $text;
}


function _EscapeSpecialChars($text) {
	global $g_escape_table;
	$tokens = _TokenizeHTML($text);

	$text = '';   # rebuild $text from the tokens
	$in_pre = 0;  # Keep track of when we're inside 
 or <code> tags.
	$tags_to_skip = "!<(/?)(?:pre|code|kbd|script)[\s>]!";

	foreach ($tokens as $cur_token) {
		if ($cur_token[0] == 'tag') {
			$cur_token[1] = str_replace(array('*', '_'),
				array($g_escape_table['*'], $g_escape_table['_']),
				$cur_token[1]);
			$text .= $cur_token[1];
		} else {
			$t = $cur_token[1];
			if (! $in_pre) {
				$t = _EncodeBackslashEscapes($t);
				# $t =~ s{([a-z])/([a-z])}{$1&thinsp;/&thinsp;$2}ig;
			}
			$text .= $t;
		}
	}
	return $text;
}


function _DoAnchors($text) {
	global $g_nested_brackets;

	$text = preg_replace_callback("{
		(					# wrap whole match in $1
		  \\[
		    ($g_nested_brackets)	# link text = $2
		  \\]

		  [ ]?				# one optional space
		  (?:\\n[ ]*)?		# one optional newline followed by spaces

		  \\[
		    (.*?)		# id = $3
		  \\]
		)
		}xs",
		'_DoAnchors_reference_callback', $text);
	
	$text = preg_replace_callback("{
		(				# wrap whole match in $1
		  \\[
			($g_nested_brackets)	# link text = $2
		  \\]
		  \\(			# literal paren
			[ \\t]*
			(.+?)		# href = $3
			[ \\t]*
			(			# title = $4
			  (['\"])	# quote char = $5
			  .*?
			  \\5		# matching quote
			)?			# title is optional
		  \\)
		)
		}xs",
		'_DoAnchors_inline_callback', $text);
	
	return $text;
}
function _DoAnchors_reference_callback($matches) {
	global $g_urls, $g_titles;
	$result;
	$whole_match = $matches[1];
	$link_text   = $matches[2];
	$link_id     = strtolower($matches[3]);

	if ($link_id == "") {
		$link_id = strtolower($link_text); # for shortcut links like [this][].
	}

	if (isset($g_urls[$link_id])) {
		$url = $g_urls[$link_id];
		$url = str_replace(array('*',     '_'),
						   array('&#42;', '&#95;'), $url);
		$result = "<a href='$url'";
		if ( isset( $g_title[$link_id] ) ) {
			$title = $g_titles[$link_id];
			$title = str_replace(array('*',     '_'),
								 array('&#42;', '&#95;'), $title);
			$result .=  " title=\"$title\"";
		}
		$result .= ">$link_text</a>";
	}
	else {
		$result = $whole_match;
	}
	return $result;
}
function _DoAnchors_inline_callback($matches) {
	$result;
	$whole_match = $matches[1];
	$link_text   = $matches[2];
	$url	  		= $matches[3];
	$title		= $matches[4];

	# We've got to encode these to avoid conflicting with italics/bold.
	$url = str_replace(array('*',     '_'),
					   array('&#42;', '&#95;'), $url);
	$result = "<a href=\"$url\"";
	if ($title) {
		$title = str_replace(array('*',     '_'),
							 array('&#42;', '&#95;'), $title);
		$result .=  " title=$title";
	}
	$result .= ">$link_text</a>";

	return $result;
}


function _DoImages($text) {
	$text = preg_replace_callback('{
		(				# wrap whole match in $1
		  !\[
		    (.*?)		# alt text = $2
		  \]

		  [ ]?				# one optional space
		  (?:\n[ ]*)?		# one optional newline followed by spaces

		  \[
		    (.*?)		# id = $3
		  \]

		)
		}xs', 
		'_DoImages_reference_callback', $text);

	#
	# Next, handle inline images:  ![alt text](url "optional title")
	# Don't forget: encode * and _

	$text = preg_replace_callback("{
		(				# wrap whole match in $1
		  !\\[
			(.*?)		# alt text = $2
		  \\]
		  \\(			# literal paren
			[ \\t]*
			(\\S+)		# src url = $3
			[ \\t]*
			(			# title = $4
			  (['\"])	# quote char = $5
			  .*?
			  \\5		# matching quote
			  [ \\t]*
			)?			# title is optional
		  \\)
		)
		}xs",
		'_DoImages_inline_callback', $text);

	return $text;
}
function _DoImages_reference_callback($matches) {
	global $g_urls, $g_titles, $g_empty_element_suffix;
	$result;
	$whole_match = $matches[1];
	$alt_text    = $matches[2];
	$link_id     = strtolower($matches[3]);

	if ($link_id == "") {
		$link_id = strtolower($alt_text); # for shortcut links like ![this][].
	}
	
	if (isset($g_urls[$link_id])) {
		$url = $g_urls[$link_id];
		$url = str_replace(array('*',     '_'),
						   array('&#42;', '&#95;'), $url);
		$result = "<img src=\"$url\" alt=\"$alt_text\"";
		if (isset($g_titles[$link_id])) {
			$title = $g_titles[$link_id];
			$title = str_replace(array('*',     '_'),
								 array('&#42;', '&#95;'), $title);
			$result .=  " title=\"$title\"";
		}
		$result .= $g_empty_element_suffix;
	}
	else {
		$result = $whole_match;
	}

	return $result;
}
function _DoImages_inline_callback($matches) {
	global $g_empty_element_suffix;
	$result;
	$whole_match = $matches[1];
	$alt_text    = $matches[2];
	$url	  		= $matches[3];
	$title		= $matches[4];

	$url = str_replace(array('*',     '_'),
					   array('&#42;', '&#95;'), $url);
	$result = "<img src=\"$url\" alt=\"$alt_text\"";
	if (isset($title)) {
		$title = str_replace(array('*',     '_'),
							 array('&#42;', '&#95;'), $title);
		$result .=  " title=$title"; # $title already quoted
	}
	$result .= $g_empty_element_suffix;

	return $result;
}


function _DoHeaders($text) {
	$text = preg_replace(
		array("/(.+)[ \t]*\n=+[ \t]*\n+/e",
			  "/(.+)[ \t]*\n-+[ \t]*\n+/e"),
		array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
			  "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
		$text);

	$text = preg_replace("{
			^(\\#{1,6})	# $1 = string of #'s
			[ \\t]*
			(.+?)		# $2 = Header text
			[ \\t]*
			\\#*			# optional closing #'s (not counted)
			\\n+
		}xme",
		"'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
		$text);

	return $text;
}


function _DoLists($text) {
	global $g_tab_width;
	$less_than_tab = $g_tab_width - 1;

	$text = preg_replace_callback("{
			(
			  (
			    ^[ ]{0,$less_than_tab}
			    (\\*|\\d+[.])
			    [ \\t]+
			  )
			  (?s:.+?)
			  (
			      \\z
			    |
				  \\n{2,}
				  (?=\\S)
				  (?![ \\t]* (\\*|\\d+[.]) [ \\t]+)
			  )
			)
		}xm",
		'_DoLists_callback', $text);

	return $text;
}
function _DoLists_callback($matches) {
	$list_type = ($matches[3] == "*") ? "ul" : "ol";
	$list = $matches[1];
	$list = preg_replace("/\n{2,}/", "\n\n\n", $list);
	$result = _ProcessListItems($list);
	$result = "<$list_type>\n" . $result . "</$list_type>\n";
	return $result;
}


function _ProcessListItems($list_str) {
	# trim trailing blank lines:
	$list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);

	$list_str = preg_replace_callback('{
		(\n)?							# leading line = $1
		(^[ \t]*)						# leading whitespace = $2
		(\*|\d+[.]) [ \t]+				# list marker = $3
		((?s:.+?)						# list item text   = $4
		(\n{1,2}))
		(?= \n* (\z | \2 (\*|\d+[.]) [ \t]+))
		}xm',
		'_ProcessListItems_callback', $list_str);

	return $list_str;
}
function _ProcessListItems_callback($matches) {
	$item = $matches[4];
	$leading_line = $matches[1];
	$leading_space = $matches[2];

	if ($leading_line || preg_match('/\n{2,}/', $item)) {
		$item = _RunBlockGamut(_Outdent($item));
		#$item =~ s/\n+/\n/g;
	}
	else {
		# Recursion for sub-lists:
		$item = _DoLists(_Outdent($item));
		$item = rtrim($item, "\n");
		$item = _RunSpanGamut($item);
	}

	return "<li>" . $item . "</li>\n";
}


function _DoCodeBlocks($text) {
	global $g_tab_width;
	$text = preg_replace_callback("{
			(.?)			# $1 = preceding character
			(:)				# $2 = colon delimiter
			(\\n+)			# $3 = newlines after colon
			(	            # $4 = the code block -- one or more lines, starting with a space/tab
			  (?:
			    (?:[ ]\{$g_tab_width} | \\t)  # Lines must start with a tab or a tab-width of spaces
			    .*\\n+
			  )+
			)
			((?=^[ ]{0,$g_tab_width}\\S)|\\Z)	# Lookahead for non-space at line-start, or end of doc
		}xm",
		'_DoCodeBlocks_callback', $text);

	return $text;
}
function _DoCodeBlocks_callback($matches) {
	$prevchar  = $matches[1];
	$newlines  = $matches[2];
	$codeblock = $matches[4];

	$result; # return value
	

	$prefix = "";
	if (!(preg_match('/\s/', $prevchar) || ($prevchar == ""))) {
			$prefix = "$prevchar:";
	}
	$codeblock = _EncodeCode(_Outdent($codeblock));
	$codeblock = _Detab($codeblock);
	# trim leading newlines and trailing whitespace
	$codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
	
	$result = $prefix . "\n\n
<code>" . $codeblock . "\n</code>
\n\n"; return $result; } function _DoCodeSpans($text) { $text = preg_replace_callback("@ (`+) # Opening run of ` (.+?) # the code block (?$c"; } function _EncodeCode($_) { global $g_escape_table; # Encode all ampersands; HTML entities are not # entities within a Markdown code span. $_ = str_replace('&', '&', $_); # Do the angle bracket song and dance: $_ = str_replace(array('<', '>'), array('<', '>'), $_); # Now, escape characters that are magic in Markdown: $_ = str_replace(array_keys($g_escape_table), array_values($g_escape_table), $_); return $_; } function _DoItalicsAndBold($text) { # must go first: $text = preg_replace('{ (\*\*|__) (?=\S) (.+?) (?<=\S) \1 }sx', '\2', $text); # Then : $text = preg_replace('{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx', '\2', $text); return $text; } function _DoBlockQuotes($text) { $text = preg_replace_callback('/ ( # Wrap whole match in $1 ( ^[ \t]*>[ \t]? # ">" at the start of a line .+\n # rest of the first line (.+\n)* # subsequent consecutive lines \n* # blanks )+ ) /xm', '_DoBlockQuotes_callback', $text); return $text; } function _DoBlockQuotes_callback($matches) { $bq = $matches[1]; $bq = preg_replace('/^[ \t]*>[ \t]?/m', '', $bq); $bq = _RunBlockGamut($bq); # recurse $bq = preg_replace('/^/m', "\t", $bq); return "
\n$bq\n
\n\n"; } function _FormParagraphs($text) { global $g_html_blocks; # Strip leading and trailing lines: $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text); $grafs = preg_split('/\n{2,}/', $text); $count = count($graph); foreach ($grafs as $key => $value) { if (!isset( $g_html_blocks[$value] )) { $value = _RunSpanGamut($value); $value = preg_replace('/^([ \t]*)/', '

', $value); $value .= "

"; $grafs[$key] = $value; } } foreach ($grafs as $key => $value) { if (isset( $g_html_blocks[$value] )) { $grafs[$key] = $g_html_blocks[$value]; } } return implode("\n\n", $grafs); } function _EncodeAmpsAndAngles($text) { $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w{1,8});)/', '&', $text);; # Encode naked <'s $text = preg_replace('{<(?![a-z/?\$!])}i', '<', $text); return $text; } function _EncodeBackslashEscapes($text) { global $g_escape_table, $g_backslash_escape_table; # Must process escaped backslashes first. return str_replace(array_keys($g_backslash_escape_table), array_values($g_backslash_escape_table), $text); } function _DoAutoLinks($text) { $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!", '\1', $text); # Email addresses: $text = preg_replace('{ < ( [-.\w]+ \@ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ ) > }exi', "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))", $text); return $text; } function _EncodeEmailAddress($addr) { $addr = "mailto:" . $addr; $length = strlen($addr); $addr = preg_replace_callback('/([^\:])/', '_EncodeEmailAddress_callback', $addr); $addr = "$addr"; $addr = preg_replace('/">.+?:/', '">', $addr); return $addr; } function _EncodeEmailAddress_callback($matches) { $char = $matches[1]; $r = rand(0, 100); if ($r > 90 && $char != '@') return $char; if ($r < 45) return '&#x'.dechex(ord($char)).';'; return '&#'.ord($char).';'; } function _UnescapeSpecialChars($text) { global $g_escape_table; return str_replace(array_values($g_escape_table), array_keys($g_escape_table), $text); } function _TokenizeHTML($str) { $pos = 0; $len = strlen($str); $tokens = array(); $depth = 6; $nested_tags = str_repeat('(?:<[a-z\/!$](?:[^<>]|',$depth) .str_repeat(')*>)', $depth); $match = "(?s:)|". # comment "(?s:<\?.*?\?>)|". # processing instruction "$nested_tags"; # nested tags preg_match_all("/($match)/", $str, $matches, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); foreach ($matches as $element) { $whole_tag = $element[0][0]; $tag_start = $element[0][1]; $sec_start = $tag_start + strlen($whole_tag); if ($pos < $tag_start) { array_push($tokens, array('text', substr($str, $pos, $tag_start - $pos))); } array_push($tokens, array('tag', $whole_tag)); $pos = $sec_start; } if ($pos < $len) array_push($tokens, array('text', substr($str, $pos, $len - $pos))); return $tokens; } function _Outdent($text) { global $g_tab_width; return preg_replace("/^(\\t|[ ]{1,$g_tab_width})/m", "", $text); } function _Detab($text) { global $g_tab_width; $text = preg_replace( "/(.*?)\t/e", "'\\1'.str_repeat(' ', $g_tab_width - strlen('\\1') % $g_tab_width)", $text); return $text; } function _UnslashQuotes($text) { return str_replace('\"', '"', $text); } // And now for the filters remove_filter('the_content', 'wpautop'); remove_filter('the_excerpt', 'wpautop'); remove_filter('comment_text', 'wpautop'); add_filter('the_content', 'Markdown'); add_filter('the_excerpt', 'Markdown'); remove_filter('comment_text', 'Markdown'); ?>




Copyright 1998-2008 Alvin Alexander
All Rights Reserved.
 
devdaily.com is based in louisville, kentucky, and this web site is hosted by godaddy.com