UPDATE (and intentionally reinserted into the feed):
I've made a bunch of changes to this code, and updated it.
It's quite a bit slower, but I really don't care (-:
It uses my new pet project, the tokalizer.
You'll probably want to grab the newly-compiled diff-php as this is the one I'll be "maintaining" (ie, when someone complains, or when it breaks for me).
(end update)
I've told a few people that I'd blog about this "soon" and that was a while ago, so I figured I'd better get on the ball.
I tweeted this almost two weeks ago:
Derick responded saying that diff -p does this for C. I tried it with PHP, and it gave me the outermost block where the change occurred (ie, the class, not the function). The
interesting thing, though, is that it changed the @@ line:
@@ -32,7 +32,7 @@ class Foo2 {
Almost what I was looking for, not not quite. I really wanted a php-aware diff that could tell me context.
So, what's a developer with almost no spare time on his hands (but an idea of how to actually accomplish this pet project) to do? Write it himself, of course! (-:
So, I did. Here's an example of the output:
--- tmp/left.php
+++ tmp/right.php
@@ -1,7 +1,7 @@ (root)
<?php
class Foo {
function bar() {
- // baz!
+ // bax!
}
}
@@ -32,7 +32,7 @@ (root):Foo2(class)
// k
// l
function bar2() {
- // baz2!
+ // bax2!
}
}
@@ -63,7 +63,7 @@ (root):Foo3(class):bar3(function)
// k
// l
$test = "foo {$test}";
- // baz2!
+ // bax2!
}
function bar4() {
@@ -93,7 +93,7 @@ (root):Foo3(class):bar4(function):bar5(function)
// k
// l
$test = "foo {$test}";
- //baz5
+ //bax5
// a
// b
// c
Here's the code for my php-aware diff. I use it as my default svn diff command now (see comments). Hope you find it useful, I sure do.
#!/usr/bin/php
<?php/// PHP-Aware diff/// Copyright 2008, Sean Coates/// Usage of the works is permitted provided that this instrument is retained/// with the works, so that any entity that uses the works is notified of this/// instrument./// DISCLAIMER: THE WORKS ARE WITHOUT WARRANTY./// (Fair License - http://www.opensource.org/licenses/fair.php )/// Short license: do whatever you like with this.//// save this file as diff-php//// and make sure /path/to/diff-php is chmod +x//// TO USE from cli://// /path/to/diff-php leftfile rightfile # (compares files, as diff does)//////// TO USE from svn://// in ~/.subversion/config, add: diff-cmd = /path/to/diff-php//// You might need to adjust DIFF_PATH, below// the tokenizer scares me a bit (-:class DiffPHP
{ const DEBUG_SYNTAX
= false;
// set to true to get syntax error data (== broken diffs) const DIFF_PATH
= '/usr/bin/diff';
const DIFF_OPTS
= '-u';
/**
* The "left" file, as passed by svn (or cli)
*/ protected
$left;
/**
* The "right" file, as passed by svn (or cli)
*/ protected
$right;
/**
* A "nice" version of the left file.
*
* Instead of foo/bar/.svn/base/whatever.php, it would just be whatever.php
*/ protected
$niceLeft;
/**
* A "nice" version of the right file.
*
* Instead of foo/bar/.svn/base/whatever.php, it would just be whatever.php
*/ protected
$niceRight;
/**
* Captured file contents (prevents reading the file twice + diff)
*/ protected
$fileContents;
/**
* The output from the diff executable
*/ protected
$diff;
/**
* Each chunk of the diff goes in here (begins with a @@ identifier line)
*/ protected
$chunks;
/**
* Array of tokens from the Left file
*/ protected
$tokens;
/**
* Mapping of source lines to source class/functions
*/ protected
$lineMap;
/**
* Current context (used to construct line map)
*/ protected
$context;
/**
* Brace depth (used to determine if we're still in the current context)
*/ protected
$braceDepth;
/**
* Bool flag to indicate that syntax is somehow broken
*/ protected
$isBroken;
/**
* Object-wide index to keep track of the current token number
*/ protected
$tokenIndex;
/**
* Currently parsing token value
*/ protected
$currentValue;
/**
* Constructor. The magic happens here. Once instantiated, the entire
* process runs
*/ public function __construct
() { $this->parseArgs();
$this->fileContents = file_get_contents($this->left);
$this->doDiff();
// subject (probably) IS a PHP file: if (!isset($_ENV['NODIFFPHP']) && stripos
($this->fileContents, '<?') !== false) { $this->splitDiff();
$this->determineHierarchy();
$this->reconstructDiff();
} else { // not a PHP file; return regular diff: echo $this->diff;
} } /**
* Parses the passed arguments.
*
* Determines if it's svn (7 args) or cli (2 args), and stores the parsed
* arguments.
*/ protected
function parseArgs
() { // if this is being called from svn, we'll get 4 arguments // (8th is argv 0 == this script) if (8 == $_SERVER['argc']) { $this->niceLeft = $_SERVER['argv'][3];
$this->niceRight = $_SERVER['argv'][5];
$this->left = $_SERVER['argv'][6];
$this->right = $_SERVER['argv'][7];
} else if (3 == $_SERVER['argc']) { // 2 arguments means a regular diff $this->niceLeft = $_SERVER['argv'][1];
$this->niceRight = $_SERVER['argv'][2];
$this->left = $this->niceLeft;
$this->right = $this->niceRight;
} else { die("See " . __FILE__ . " for details on how to use this script\n");
} } /**
* Calls the external diff program to get the base diff
*/ protected
function doDiff
() { if (is_readable($this->left) && is_readable($this->right)) { $diffCmd = self::DIFF_PATH . ' ' . self::DIFF_OPTS . " {$this->left} {$this->right}";
$this->diff = `
$diffCmd`;
} else { die("{$this->left} or {$this->right} is not readable\n");
} } /**
* Takes an identifier line (looks like: @@ -30,23 +30,79 @@) and returns
* the begin line number
*/ protected
function parseLineNum
($identifier) { list(,$from) = explode(" ", $identifier);
list($from) = explode(',', $from);
return (int
) substr($from, 1);
} /**
* Sanitizes CRLF or CR into just LF
*/ protected
function sanitizeLineEndings
($data) { // first, sanitize line endings: $data = str_replace("\r\n", "\n", $data);
$data = str_replace("\r", "\n", $data);
return $data;
} /**
* Actually splits the diff into chunks and stores chunks + line numbers
*/ protected
function splitDiff
() { // now split: $this->diff = explode("\n", $this->sanitizeLineEndings($this->diff));
// array to return: $this->chunks = array();
// line counter $line = 0;
// outer loop: file(s) $maxLine = count($this->diff);
// skip first 2 lines as left, right files $line += 2;
// descend into data chunks while ($line < $maxLine) { // next line is the chunk identifier $dataChunk = array();
$dataChunk['identifier'] = $this->diff[$line++];
$dataChunk['line'] = $this->parseLineNum($dataChunk['identifier']);
$dataChunk['data'] = array();
while ($line < $maxLine && !(substr($this->diff[$line], 0, 2) == '@@' && substr($this->diff[$line], -2) == '@@')) { $dataChunk['data'][] = $this->diff[$line++];
} $this->chunks[] = $dataChunk;
} } /**
* Reconstructs the diff (with adjusted identifier lines, and outputs the
* result)
*/ protected
function reconstructDiff
() { $out = "--- {$this->niceLeft}\n+++ {$this->niceRight}\n";
foreach ($this->chunks as $chunk) { $out .= $chunk['identifier'] . "\n";
$out .= implode("\n", $chunk['data']) ."\n";
} echo $out;
} /**
* Descends into a deeper context
*
* @param string $type friendly name, either class or function
*/ protected
function enterContext
($type) { // next comes whitespace: if (is_array($this->tokens[++$this->tokenIndex])) { list($token, $this->currentValue) = $this->tokens[$this->tokenIndex];
} else { $token = null;
$this->currentValue = $this->tokens[$this->tokenIndex];
} if ($token != T_WHITESPACE
) { // syntax is broken, let's get out of here if (self::DEBUG_SYNTAX) { die("Syntax broken in whitespace assertion, " . $this->context[count($this->context) - 1] . "\n");
} $this->isBroken = true;
break;
} $this->checkLineBreak();
// next comes the name: if (is_array($this->tokens[++$this->tokenIndex])) { list($token, $this->currentValue) = $this->tokens[$this->tokenIndex];
} else { $token = null;
$this->currentValue = $this->tokens[$this->tokenIndex];
} $this->context[] = $this->currentValue . "({$type})";
// chew through the next few tokens until we get a "{" while ($this->currentValue != '{' && $this->tokenIndex < count($this->tokens)) { if (is_array($this->tokens[++$this->tokenIndex])) { list($token, $this->currentValue) = $this->tokens[$this->tokenIndex];
} else { $token = null;
$this->currentValue = $this->tokens[$this->tokenIndex];
} $this->checkLineBreak();
switch ($token) { // these are all valid before the brace: case null: case T_WHITESPACE
: case T_VARIABLE
: case T_EXTENDS
: case T_IMPLEMENTS
: case T_STRING
: case T_ARRAY
: case T_CONSTANT_ENCAPSED_STRING
: case T_LNUMBER
: case '=': break;
// if another token is found, then there's a syntax error // (this was added to prevent really deep looping) default: if (self::DEBUG_SYNTAX) { die("Syntax broken in token assertion, " . $this->context[count($this->context) - 1] . "," . token_name($token) . "\n");
} $this->isBroken = true;
return;
} } // found the starting brace $this->braceDepth[count($this->context) - 1] = 1;
} /**
* Tokenizes the code and creates a line map
*/ protected
function tokenizeHierarchy
() { $this->context = array('(root)');
$this->lineMap = array('');
$this->tokens = token_get_all($this->sanitizeLineEndings($this->fileContents));
$this->isBroken = false;
for ($this->tokenIndex=0;
$this->tokenIndex<count
($this->tokens);
$this->tokenIndex++) { if ($this->isBroken) { // syntax is somehow broken; return progress, but don't go further return;
} if (is_array($this->tokens[$this->tokenIndex])) { list($token, $this->currentValue) = $this->tokens[$this->tokenIndex];
} else { $token = null;
$this->currentValue = $this->tokens[$this->tokenIndex];
//change here } switch ($token) { // check for class case T_CLASS
: // found "class" $this->enterContext('class');
break;
case T_FUNCTION
: // found "function" $this->enterContext('function');
break;
default: $idx = count($this->context) - 1;
switch ($this->currentValue) { case '{': case T_CURLY_OPEN
: case T_DOLLAR_OPEN_CURLY_BRACES
: ++$this->braceDepth[$idx];
break;
case '}': --$this->braceDepth[$idx];
if ($this->braceDepth[$idx] == 0) { // we're out of this context array_pop($this->context);
} else if ($this->braceDepth[$idx] < 0) { // bad stuff! if (self::DEBUG_SYNTAX) { die("Syntax broken in brace close assertion, " . $this->context[count($this->context) - 1] . "\n");
} $this->isBroken = true;
} break;
default: $this->checkLineBreak();
} } } } /**
* Determines if the currently processing token contains line breaks, and
* if so, adjusts the lineMap accordingly
*/ protected
function checkLineBreak
() { // check for new line: if (strpos($this->currentValue, "\n") !== false) { for ($j=1;
$j<=substr_count($this->currentValue, "\n");
$j++) { $this->lineMap[] = implode(':', $this->context);
} } } /**
* Matches the chunk map to the line map
*/ protected
function determineHierarchy
() { $this->tokenizeHierarchy();
for ($chunknum=0;
$chunknum < count($this->chunks);
$chunknum++) { $this->chunks[$chunknum]['identifier'] .= ' ' . $this->lineMap[$this->chunks[$chunknum]['line']];
} }}new DiffPhp;
// komode: le=unix language=php codepage=utf8 tab=4 notabs indent=4The most up-to-date version of this file can also be found in my personal svn repostory: https://svn.caedmon.net/svn/public/diff-php/diff-php.
Please let me know if you run into any bugs.. I'm sure there are a few, but it works pretty well for me.
S