#!/usr/bin/perl -w
#
# getazlyric.pl - gets an lyric from azlyrics.com
#
# SYNTAX
#
# getazlyric.pl <artist> <song> <file>
#
# OPERATION
# This is a wrapper for html2text, azlyricfilter, and tee.
# The lyric is retrieved by html2text with HTML markup
# converted to text formating. The azlyricfilter program
# strips the remaining text formatting, including text
# tagging added by azlyrics.com ("[name] LYRICS" header
# and www.azlyrics.com link text at the bottom).
#
# The cleaned text file is saved to <file> and dumped to
# stdout.
#
# The "Done" message that is sent to stdout is not included
# in the saved file.
#
# PARAMETERS
#
# The format of artist names and and lyric file/song title
# file names at azlyrics.com is all lower case, no spaces
# or special characters.
#
# The base URL for azlyrics.com lyric files is
#
# http://www.azlyrics.com/lyrics/<artist>/<song>.html
#
# so to get a file, pick pass the name of the artist and the
# name of the song formatted as per above (no spaces, lower
# case - do not include the .html on the song name).
#
# Note that it might be a good idea to confirm that the
# artist/song file exists before trying to retreive it,
# since the default behaviour by azlyrics.ccom when a
# file is not found is to return an index of some kind.
#
# The filename passed as <file> should be a writable filename
# - no warning is given if <file> is being overwritten.
#
# Revision History:
# 2005-11-01: pdwilso@gmail.com - initial revision
#
# TO DO:
#
# Add command line switch processing to use a "quiet" mode
# where no output is sent to stdout.
#
# Add a switch to output only to stdout - for now,
# use /dev/null as localfile to suppress file output.
#
# Add a test to assure that <artist>/<song>.html exists on
# azlyrics.com - fail with warning if URL doesn't exist.
#
# Add a switch to issue a warning/query if <file> exists.
#
# ##############################################################
#
# "constants" - command variables
$tee = "tee";
$html2text = "html2text -nobs";
$azfilter="/usr/local/bin/azlyricfilter.pl";
#
# get parameter values
unless (scalar(@ARGV)==3) {
print "Usage: ".__FILE__." artist song localfile\n";
exit;
}
$artist = shift;
$song = shift;
$localfile = shift;
#
# construct the URL of the song from
$url = "http://www.azlyrics.com/lyrics/$artist/$song.html";
#
# backtick execution causes the output of the command to be
# returned by the command - the tee command causes the output
# of the command string to be returned by the backtick command
# execution mechanism - the print statement places the output
# of the command in backticks on std out
print `$html2text $url | $azfilter | $tee $localfile`;
#
# done - might want to remove this
print "\n *** Done.\n";
;; Emacs initialization file
;;
;; turn off the scrollbar, the toolbar, and tooltips in menus
(set-scroll-bar-mode nil)
(tool-bar-mode)
(tooltip-mode)
;;
;; From the Red Hat Linux default .emacs
;;
;; Are we running XEmacs or Emacs?
(defvar running-xemacs (string-match "XEmacs\\|Lucid" emacs-version))
;; Set up the keyboard so the delete key on both the regular keyboard
;; and the keypad delete the character under the cursor and to the right
;; under X, instead of the default, backspace behavior.
(global-set-key [delete] 'delete-char)
(global-set-key [kp-delete] 'delete-char)
;; Turn on font-lock mode for Emacs
(cond ((not running-xemacs)
(global-font-lock-mode t)
))
;; Enable wheelmouse support by default
;;(cond (window-system
;; (mwheel-install)
;;))
;; Always end a file with a newline
(setq require-final-newline t)
;; Stop at the end of the file, not just add lines
(setq next-line-add-newlines nil)
;; took this out because of errors in the php mode file
;; due to having emailed it. Need to reget the file.
;; (load-file "~/lisp/php-mode.el")
;;
;; below is from http://www.dotfiles.com/files/6/128_.emacs
;;
;; -*-emacs-lisp-*-
;; Author: K. Arun
;; File : $HOME/.emacs
;; Revision: $Id: .emacs,v 1.25 2000/06/17 07:07:44 kar Exp $
;; Paths
(setq load-path (cons "/usr/local/share/emacs/site-lisp" load-path))
(setq load-path (cons "/home/earthsid/lisp/" load-path))
(setq Info-default-directory-list '("~/lisp/info"
"/usr/local/lib/info"
"/usr/local/info"
"/usr/info"
"/usr/share/info"
))
;; Key bindings
(global-set-key [f1] 'manual-entry)
(global-set-key [f2] 'info)
;(global-set-key [f3] 'vm)
(global-set-key [f3] 'gnus)
(global-set-key [f4] 'kill-buffer)
(global-set-key [f5] 'find-file)
(global-set-key [f6] 'comment-region)
(global-set-key [f7] 'delete-frame)
(global-set-key [f8] 'insert-date);forward reference
(global-set-key [f9] 'ispell-buffer)
(global-set-key [f10] 'byte-compile-file) ;for .emacs, .gnus etc.
(global-set-key [f11] 'auto-fill-mode)
(global-set-key [f12] 'font-lock-mode)
(global-unset-key "\M-g")
(global-set-key "\M-g" 'goto-line)
(global-unset-key "\M-n")
(global-set-key "\M-n" 'make-frame-command) ; Mozilla's influence ;-)
(global-set-key "\C-cl" 'region-length);forward reference
;(global-set-key "\C-cp" 'cperl-perldoc)
(global-set-key "\C-cd" 'dos2unix);forward reference
;; DEFAULTS
;; Highlight marked region
(transient-mark-mode t)
;; Auto fill in all major modes
(setq-default auto-fill-function 'do-auto-fill)
;; Automagically read compressed files
(auto-compression-mode 1)
;; Automagically save bookmarks
(setq bookmark-save-flag 1)
;; Make mouse yank at point
(setq mouse-yank-at-point t)
;; Make searches case-insensitive
(setq case-fold-search t)
;; Man page pushiness
(setq Man-notify-method 'pushy)
;; Show line and column numbers in modeline
(line-number-mode t)
(column-number-mode t)
;; Font lock in all major modes
(global-font-lock-mode 1)
(setq font-lock-maximum-decoration t)
;; Don't add newlines to end of buffer when scrolling
(setq next-line-add-newlines nil)
;; Insert date into buffer
(defun insert-date ()
"Insert date at point."
(interactive)
(insert (format-time-string "%A, %B %e, %Y %k:%M:%S %z")))
;; Compute the length of the marked region
(defun region-length ()
"length of a region"
(interactive)
(message (format "%d" (- (region-end) (region-beginning)))))
;;
;; dos2unix and unix2dos from Benjamin Rutt's .emacs
;;
;; Convert a buffer from dos ^M end of lines to unix end
;; of lines...
(defun dos2unix ()
(interactive)
(goto-char (point-min))
(while (search-forward "\r" nil t) (replace-match "")))
;; ...vice versa
(defun unix2dos ()
(interactive)
(goto-char (point-min))
(while (search-forward "\n" nil t) (replace-match "\r\n")))
(custom-set-variables
;; custom-set-variables was added by Custom -- don't edit or cut/paste it!
;; Your init file should contain only one such instance.
'(auto-compression-mode t nil (jka-compr))
'(case-fold-search t)
'(current-language-environment "Latin-9")
'(default-input-method "latin-9-prefix")
;; '(generic-define-unix-modes nil)
'(global-font-lock-mode t nil (font-lock))
;; '(show-paren-mode t nil (paren))
'(tab-width 4)
;; '(tooltip-mode f nil (tooltip))
'(transient-mark-mode t)
'(uniquify-buffer-name-style (quote forward) nil (uniquify)))
(custom-set-faces
;; custom-set-faces was added by Custom -- don't edit or cut/paste it!
;; Your init file should contain only one such instance.
)
;; This is from the Linux kernel documentation
;; (Documentation/CodingStyle)....
(defun linux-c-mode ()
"C mode with adjusted defaults for use with the Linux kernel."
(interactive)
(c-mode)
(c-set-style "K&R")
(setq c-basic-offset 8))
;; This will define the M-x linux-c-mode command. When hacking on a
;; module, if you put the string -*- linux-c -*- somewhere on the
;; first two lines, this mode will be automatically invoked. Also, you
;; may want to add
(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
auto-mode-alist))
;; to your .emacs file if you want to have linux-c-mode switched on
;; automagically when you edit source files under /usr/src/linux.
;; .... and this one MIGHT switch it on when editing under any linux
;; source tree?
(setq auto-mode-alist (cons '("linux.*/.*\\.[ch]$" . linux-c-mode)
auto-mode-alist))
;; from the GNU Emacs FAQ
;; (http://www.gnu.org/software/emacs/windows/big.html).
(defun set-buffer-file-eol-type (eol-type)
"Set the file end-of-line conversion type of the current buffer to
EOL-TYPE.
This means that when you save the buffer, line endings will be
converted
according to EOL-TYPE.
EOL-TYPE is one of three symbols:
unix (LF)
dos (CRLF)
mac (CR)
This function marks the buffer modified so that the succeeding
\\[save-buffer]
surely saves the buffer with EOL-TYPE. From a program, if you don't
want
to mark the buffer modified, use coding-system-change-eol-conversion
directly [weikart]."
(interactive "SEOL type for visited file (unix, dos, or mac): ")
(setq buffer-file-coding-system
(coding-system-change-eol-conversion
buffer-file-coding-system eol-type))
(set-buffer-modified-p t)
(force-mode-line-update))
(global-set-key "\^Cu" (lambda () (interactive)
(set-buffer-file-eol-type 'unix)))
(global-set-key "\^Cd" (lambda () (interactive)
(set-buffer-file-eol-type 'dos)))
(global-set-key "\^Cm" (lambda () (interactive)
(set-buffer-file-eol-type 'mac)))
;; Make the mode-line display the standard EOL-TYPE symbols (used
;; above)...
(setq eol-mnemonic-undecided "(?)" ;; unknown EOL type
eol-mnemonic-unix "(unix)" ;; LF
eol-mnemonic-dos "(dos)" ;; CRLF
eol-mnemonic-mac "(mac)") ;; CR
body
{
font-family: Verdana,Arial,Helvetica,sans-serif;
font-size: 11pt;
}
h1
{
margin-left: 0.5em;
font-size: 120%;
}
h2
{
margin: 0.5em 2.5em 0.5em 1.0em;
font-size: 110%;
}
h3
{
margin: 0.5em 2.5em 0.5em 1.75em;}
font-size: 105%;}
font-weight: bold;
font-family: monospace;
}
p,ul,table
{
margin: 0.5em 2.5em 0.5em 2.5em;
}
table
{
border-left: 1px gray solid;
border-top: 1px gray solid;
padding: 0;
}
td, th
{
border-right: 1px gray solid;
border-bottom: 1px gray solid;
text-align: left;
vertical-align: top;
margin: 0;
padding: 2pt 4pt 4pt 4pt;
font-size: 80%;
}
th
{
font-family: monospace;
font-weight: bold;
}
th.toprow
{
font-family: sans-serif;
vertical-align: bottom;
background: rgb(240,240,240);
}
<?php
////////////////////////////////////////////////////////////////
//
// src.php - display HTMLized source code listing or an index
//
// This script wraps the functionality of the source code
// highighting modele syntax_hilight.php written by Scott
// Yast.
//
// The name of a source code file is passed in as a query
// value (f=filename). The file must have an extension
// that matches the regex givin in $pattern at the top of
// the program (below).
//
// The source file must exist in the CWD where this script
// is being run.
//
// If no query string is given - or if the query string
// appears to be invalid - a hyperlinked index of source
// files in the current directoy is created and output.
//
// Usage is 'src.php?f=filename' where filename is the name
// of a source file in the current directory.
//
// The source code listing hilighting code is by Scott Yang
// (http://scott.yang.id.au/2004/05/syntax-hilight-enscript/)
// and uses GNU enscript(1).
//
// Note that Yang's module is designed to work within the
// WordPress envrionment, but handles this application well.
//
// REVISION HISTORY
// 2005-10-02: pdwilso@gmail.com - Initial version (beta)
//
// TO DO:
// Need to do some checks on incoming parameter to assure
// validity before this script is released.
//
// The directory reading code should be abstracted out into
// its own module as a function.
//
// Add last modification date to footer of listing files.
//
// Add size and date info to index page.
//
// Make the script output XHTML compliant.
//
// BUGS / PROBLEMS
// There are some limitations inherent in the use of enscript
// to produce the hilighting - specifically, there is no easy
// way to combine HTML 'span' elements into CSS styles using
// source code language component syntax. This means that we
// get many 'span' elements for colorization, and many "bad"
// elements ('i' and 'b') mixed in and overlapping. Some of
// these may be replaced using e.g. preg_replace(), but
// combining e.g. span (for colorization) and i for italics
// into a span.comment style to apply to comments in the
// source listing - this becomes problematic since we don't
// have direct access to the "parts of speech" detection used
// by the underlying engine.
//
// SEE ALSO
// enscript(1), states(1), genscript(1)
//
////////////////////////////////////////////////////////////////
//
// The pattern and the types data are (mostly) 'constant'
//
// GLOBAL - source code files will be matched against this
$pattern = "/\.(pl|php|c|h|inc)$/";
// GLOBAL - types hash - uses $pattern elements as keys
$types= array(
'pl'=>"perl",
'inc'=>"php",
'php'=>"php",
'c' => "c",
'h' => "c",
);
// common CSS styles used in index and listing
$css = "
h1
{
font-size: 120%;
border-top: 1px gray solid;
}
h1
{
padding-top: 0.75em;
margin-top: 0.25em;
}
p
{
margin: 0;
font-size: small;
text-align: center
}
a:hover
{
text-decoration: underline;
}
a
{
text-decoration: none;
color: blue;
font-weight: bold;
}
";
// GLOBAL - filename from query string
$fn = $_GET['f'];
// GLOBAL - source file type
$typ = "";
// check for the a known file type in the f param
$matches=array();
if (preg_match($pattern,$fn,$matches)) {
if (array_key_exists($matches[1],$types)) {
$typ = $types[$matches[1]];
}
}
// If we didn't recognize the file type, clear the parameter
// so that when we check it below we get the default indexing
// behaviour. If there is no (useful) query value, we want
// to always generate and display an index.
if (!$typ) { $fn=""; }
if (!$fn)
{
// Generate and display an index of all source files in the
// current directory (according to $pattern).
$dh = opendir(".");
while (($f=readdir($dh)) != false) {
$f=rtrim($f);
if (preg_match($pattern,$f)) {
$files[] = urlencode($f);
}
}
closedir($dh);
$ndxcss = $css;
// display the index:
"<title>Source code index</title>\n".
"<style><!-- $ndxcss --></style>\n".
"<p><a href=\"./\">dir listing</a>\n".
"<h1>Source code index:</h1>\n".
"<ol>\n";
foreach ($files as $fn) {
$query = $_SERVER['PHP_SELF']."?f=$fn";
print "<li><a href=\"$query\">$fn</a></li>\n";
}
print "</ol>\n";
}
else
{
// Show the HTMLized source code listing
// Add listing-specific CSS
$srccss = $css . "
pre
{
width: 92%;
background: rgb(220,220,220);
border: 1px blue solid;
padding: 8pt;
margin-left: 1em;
}
";
// include the source code hilighting module.
require "syntax_hilight.php";
// output the page content, including hilighted listing
"<title>$fn</title>\n".
"<style><!-- $srccss --></style>\n".
"<p><a href=\"./\">dir listing</a> | \n".
"<a href=\"".$_SERVER['PHP_SELF'].
"\">src index</a></p>\n".
"<h1><tt>$fn:</tt></h1>\n".
"<pre>".
SyntaxHilighter::hilight_file($fn, $typ).
"\n</pre>\n";
}
?>
<?php
/*
Plugin Name: Syntax Highlighter Enscript
Plugin URI: http://scott.yang.id.au/category/syntax-hilite/
Description: Adds syntax highlighting to your pre tags with an optional lang
attribute to specify which language to base the syntax on. It uses "enscript"
and PHP's built-in highlight_string() to perform the syntax highlight.
Version: 1.3a-pdw
Author: Scott Yang
Author URI: http://scott.yang.id.au/
Revisions by Paul Wilson (pdwilso@gmail.com)
as of 2005-11-02, version 1.3a-pdw:
1. Corrected an apparent typo in the return value for highlight_file() that
referenced "SyntaxHilight" as the package name instead of "SyntaxHilighter"
2. Added additional preg_replace filters to highlight() to downcase HTML
elements and replace obnoxious color in PHP code output.
3. Added 'a-pdw' to the 1.3 version number
4. Modified line lengths to get sane wrapping behaviour when displaying code
in a web browser.
5. Not sure if the hex syntax for specifying colors is valid with the most
recent versions of the CSS and XHTML standards - in any case, the code that
replaces these hex strings needs to be fixed to include context matching,
since the hex string format is legal content that can and does occur - the
intent is to change the markup only, not the page content - this is a bug
introduced by my changes.
TODO:
- Fix the bug described in 5 above.
- Add settings for syntax highlighting color values.
- Add setting to not colorize output
*/
class SyntaxHilighter {
function hilight($code, $lang) {
if ($lang == 'php') {
$code = SyntaxHilighter::hilight_php($code);
} else {
$code = SyntaxHilighter::hilight_enscript($code, $lang);
}
// Making it XHTML compatible.
$code = preg_replace('/<FONT COLOR="/i',
'<span style="color:', $code);
$code = preg_replace('/<\/FONT>/i', '</span>', $code);
$code = preg_replace("/(<\/?)(\w+)([^\">]*>)/e",
"'\\1'.strtolower('\\2').'\\3'", $code);
$code = preg_replace("/#8C8C00/","#8C8C00", $code);
return $code;
}
function hilight_enscript($code, $lang) {
$argv = "enscript -q -p - --highlight=$lang --language=html --color";
// Calling enscript to format it. Note thata proc_open requires PHP
// 4.3. Otherwise, we will use a temp file and then popen().
if (function_exists('proc_open')) {
$desc = array(
0 => array("pipe", "r"),
1 => array("pipe", "w"),
2 => array("pipe", "w"),
);
$proc = proc_open($argv, $desc, $pipe);
if (is_resource($proc)) {
fwrite($pipe[0], $code);
fclose($pipe[0]);
$code = '';
while (!feof($pipe[1]))
$code .= fgets($pipe[1], 4096);
fclose($pipe[1]);
fclose($pipe[2]);
proc_close($proc);
}
} else {
// FIXME: We are hardcoding the path to the temporary file name
// here. It needs to be changed to be system independent.
$file = tempnam('/tmp', '_syntax');
$handle = fopen($file, 'w');
fwrite($handle, $code);
fclose($handle);
$argv .= ' '.escapeshellcmd($file).' 2>&1';
$proc = popen($argv, 'r');
$code = '';
while (!feof($proc))
$code .= fgets($proc, 4096);
pclose($proc);
unlink($file);
}
$code = eregi_replace("^.*<PRE>\n", '', $code);
$code = eregi_replace("\n?</PRE>.*$", '', $code);
return '<!--BEGIN enscript-->'.$code.'<!--END enscript-->';
}
function hilight_file($filename, $lang) {
ob_start();
readfile($filename);
$code = ob_get_contents();
ob_end_clean();
return SyntaxHilighter::hilight($code, $lang);
}
function hilight_php($code) {
$append_php = false;
if (!ereg('^<\\?', $code)) {
$append_php = true;
$code = "<?php\n".$code."\n?>";
}
// Using PHP's highlight_string to do the syntax highlighting.
// However, we need to tidy up the result for line breaks.
$code = highlight_string( $code, true );
$code = eregi_replace('^.*<code>', '', $code);
$code = eregi_replace('</code>.*$', '', $code);
// Join multiple lines.
$code = str_replace("\n", "", $code);
$code = str_replace(" ", " ", $code);
$code = implode("\n", explode("<br />", $code));
return $code;
}
function htmlunspecialchars($code) {
$func = create_function('$match',
'$value = intval($match[1]);'.
'return ($value < 256) ? chr($value) : $match[1];');
$tran = get_html_translation_table(HTML_ENTITIES);
$tran = array_flip($tran);
$code = strtr($code, $tran);
$code = preg_replace_callback("/&#([0-9]{1,5});/is", $func, $code);
return $code;
}
}
if (function_exists('add_filter')) {
function __syntax_hilight($content) {
return preg_replace_callback("/<pre([^>]*)>(.*?)<\/pre>/is",
'__syntax_hilight_callback',
$content);
}
function __syntax_hilight_callback($match) {
global $wp_version;
$attr = $match[1];
$code = $match[2];
if ($wp_version < '1.5') {
// Fix up the formatting that WordPress has put into the HTML
// code. (only 1.2.x is messing with the <pre/> output)
$code = str_replace("<br />", "", $code);
$code = preg_replace("/\\s*<p>/s", "\r\n\r\n", $code);
$code = preg_replace("/<\/p>/s", "", $code);
$code = str_replace("“", '"', $code);
$code = str_replace("”", '"', $code);
$code = str_replace("‘", "'", $code);
$code = str_replace("’", "'", $code);
$code = str_replace("–", "--", $code);
} else {
$code = str_replace('\"', '"', $code);
}
// Try to match the <pre lang="..."> tag, to determine what
// programming language we need to hilight for,
$re_lang = '/\s+lang\s*=\s*["\']?([^"\']+)["\']?/xi';
$num = preg_match($re_lang, $attr, $lang);
if ($num) {
// If we need to hilight the code, we will reverse the
// htmlspecialchars, to convert XML entities back to the right
// character.
$code = SyntaxHilighter::htmlunspecialchars($code);
$code = SyntaxHilighter::hilight($code, $lang[1]);
$attr = preg_replace($re_lang, '', $attr);
}
return "<pre$attr>$code</pre>";
}
add_filter('the_content', '__syntax_hilight');
}
?>
2004/09 2005/03 2005/04 2005/05 2005/06 2005/07 2005/08 2005/09 2005/10 2005/11 2006/01 2006/02 2006/04 2006/05 2006/06 2008/01
Subscribe to Posts [Atom]