#!/usr/bin/env python
#
# Homepage: http://code.google.com/p/coderev
# License: GPLv2, see "COPYING"
#
# $Id: codediff.py 3 2008-08-19 04:06:17Z mattwyl $
'''Diff two files/directories and produce HTML pages.'''
import sys, os, stat, errno, time, re, difflib, filecmp
_myname = os.path.basename(sys.argv[0])
_revision = '$Revision: 3 $'.split()[1]
########## globals & templates begin ##########
# index page layout:
#
# h1: dir1 vs dir2
# summary_info: Files changed/added/deleted: xx/yy/zz
#
# Filename C/D/A Summary Diffs Sources
# Pathname x/y/z Cdiffs Udiffs Sdiffs Fdiffs Old New
# Pathname x/y/z Cdiffs Udiffs Sdiffs Fdiffs Old New
# Pathname x/y/z - - - - - New
# Pathname x/y/z - - - - Old -
#
# Legends:
# Changed Deleted Added
# <hr>
# footer_info
#
_file_template = """
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" />
<title>
%(title)s
</title>
<style type="text/css">
%(styles)s
</style>
</head>
<body>
%(header_info)s
%(summary_info)s
%(data_rows)s
%(legend)s
<hr>
%(footer_info)s
</body>
</html>"""
_styles = """
body {font-family: monospace; font-size: 9pt;}
#summary_info {font-style: italic;}
#summary_table {
text-align:left;font-family:monospace;
border: 1px solid #ccc; border-collapse: collapse
}
td {padding-left:5px;padding-right:5px;}
#legend {border:medium;text-align:center;}
#footer_info {color:#333; font-size:8pt;}
.diff {background-color:#ffd;}
.added {background-color:#afa;}
.deleted {background-color:#faa;}
.table_header th {
text-align:center;
background-color:#f0f0f0;
border-bottom: 1px solid #aaa;
padding: 4px 4px 4px 4px;
}
"""
_header_info_template = """
<h1>%(dir1)s vs %(dir2)s</h1>"""
_summary_info_template = """
<p id="summary_info">
Files Changed/Deleted/Added: %(changed)d/%(deleted)d/%(added)d
</p>"""
_data_rows_template = """
<table id="summary_table" cellspacing="1" border="1" nowrap="nowrap">
<tr class="table_header">
<th>Filename</th>
<th><abbr title="Changed/Deleted/Added">C/D/A</abbr> Summary</th>
<th colspan="4">Diffs</th>
<th colspan="2">Sources</th>
</tr>
%(data_rows)s
</table>"""
_diff_data_row_template = """
<tr class="diff">
<td>%(pathname)s</td>
<td><abbr title="Changed/Deleted/Added">%(changed)s/%(deleted)s/%(added)s</abbr></td>
<td><a href=%(pathname)s.cdiff.html title="context diffs">Cdiffs</a></td>
<td><a href=%(pathname)s.udiff.html title="unified diffs">Udiffs</a></td>
<td><a href=%(pathname)s.sdiff.html title="side-by-side context diffs">Sdiffs</a></td>
<td><a href=%(pathname)s.fdiff.html title="side-by-side full diffs">Fdiffs</a></td>
<td><a href=%(pathname)s-.html title="old file">Old</a></td>
<td><a href=%(pathname)s.html title="new file">New</a></td>
</tr>"""
_deleted_data_row_template = """
<tr class="deleted">
<td>%(pathname)s</td>
<td>-/-/-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td><a href=%(pathname)s-.html title="old file">Old</a></td>
<td>-</td>
</tr>"""
_added_data_row_template = """
<tr class="added">
<td>%(pathname)s</td>
<td>-/-/-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td>-</td>
<td><a href=%(pathname)s.html title="new file">New</a></td>
</tr>"""
_legend = """
<br><em>Legends:</em>
<table id="legend">
<tr>
<td width="20%%" class="diff">Changed</td>
<td width="20%%" class="deleted">Deleted</td>
<td width="20%%" class="added">Added</td>
</tr>
</table>"""
_footer_info_template = """
<i id="footer_info">
Generated by %(myname)s r%(revision)s at %(time)s
</i>"""
_global_dir_ignore_list = [
'\bCVS$',
'\bSCCS$',
'\b\\.svn$',
]
_global_file_ignore_list = [
'.*\\.o$',
'.*\\.swp$',
'.*\\.bak$',
'.*\\.old$',
'.*\\~$',
'\\.cvsignore$',
]
########## globals & templates end ##########
def makeTitle(pathname, width):
'Wrap long pathname to abbreviate name to fit the text width'
if not pathname:
return 'None'
if not width or width <= 0:
title = pathname
elif len(pathname) > width:
if width > 3:
title = '...' + pathname[-(width-3):]
else:
title = pathname[-width:]
else:
title = pathname
return title
def getLines(file):
'''Return content of file (a list, each is a line)'''
if not file:
return []
try:
fp = open(file, 'r')
lines = fp.readlines()
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
return lines
def sdiffLines(fomeLines, toLines, outFile, fromTitle, toTitle,
context, wrapnum, lines):
'''diff two texts and return side-by-side html page, write to outFile.
Return code indicating whether there are differences'''
d = difflib.HtmlDiff(wrapcolumn=wrapnum)
d._styles += '''
/* customized style */
body { font-family:monospace; font-size: 9pt; }
table.diff {font-family:monospace; border:medium;}'''
html = d.make_file(fomeLines, toLines, fromTitle, toTitle, context, lines)
try:
fp = open(outFile, 'w')
fp.write(html)
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
def cdiffLines(fomeLines, toLines, outFile, fromName, toName,
fromDate, toDate, context):
'''cdiff two texts and return summary info, write to outFile.
Return code indicating whether there are differences'''
d = difflib.context_diff(fomeLines, toLines, fromName, toName,
fromDate, toDate, n=context)
title = 'Cdiffs of %s and %s' % (fromName, toName)
summary, html = cdiffToHtml(d, title)
try:
fp = open(outFile, 'w')
fp.write(html)
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
return summary
def udiffLines(fomeLines, toLines, outFile, fromName, toName,
fromDate, toDate, n):
'''udiff two texts and return html page, write to outFile.
Return code indicating whether there are differences'''
d = difflib.unified_diff(fomeLines, toLines, fromName, toName,
fromDate, toDate, n)
title = 'Udiffs of %s and %s' % (fromName, toName)
html = udiffToHtml(d, title)
try:
fp = open(outFile, 'w')
fp.write(html)
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
# This is only called when diff two files specified from command line
def diffFile(file1, file2, outFile, context=False, wrapnum=None, lines=3):
'''diff two files and return html page, write to outFile.
Return code indicating whether there are differences'''
#print 'call diffFile(%s, %s, %s, ...)' % (file1, file2, outFile)
#print '-' * 32
fomeLines = getLines(file1)
toLines = getLines(file2)
fromTitle = makeTitle(file1, wrapnum)
toTitle = makeTitle(file2, wrapnum)
sdiffLines(fomeLines, toLines, outFile, fromTitle, toTitle, context,
wrapnum, lines)
def inIgnoreList(name, ignorelist):
for pat in ignorelist:
if re.match(pat, name):
return True
return False
def grabDir(dir):
'Get file list of dir, and remove unwanted file from the list'
flist = []
while dir[-1] == '/': # remove unwanted trailling slash
dir = dir[:-1]
prefix = dir + '/' # os.path.sep
plen = len(prefix)
for root, dirs, files in os.walk(dir):
for d in dirs:
if inIgnoreList(d, _global_dir_ignore_list):
dirs.remove(d)
for f in files:
if not inIgnoreList(f, _global_file_ignore_list):
name = os.path.join(root, f)
flist.append(name[plen:])
return flist
def mergeList(list1, list2):
# important: should make a new list! don't refer to list1
list = [ i for i in list1 ] # XXX: list = list1
for i in list2:
if i not in list1: list.append(i)
return list
def sourceToHtml(src, outfile):
"""Read file `src' and convert to html, write to file `outfile'"""
#print 'call sourceToHtml(src=%s, outfile=%s)' % (src, outfile)
#print '-' * 32
try:
infp = open(src, 'r')
body = infp.read()
infp.close()
body = body.replace("&","&").replace(">",">").replace("<","<")
outfp = open(outfile, 'w')
# TODO: convert to syntax highlighted html
outfp.write('''<html><head><title>%s</title></head><body>
<pre style="font-family:monospace; font-size:9pt;">%s</pre>
</body></html>''' % (src, body))
outfp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
def diffDir(dir1, dir2, outdir, wrapnum=None, lines=3):
'''diff two directories and generate an index page. Return code
indicating whether there are differences and the summary info'''
#print 'call diffDir(%s, %s, %s, ...)' % (dir1, dir2, outdir)
#print '_global_dirseq:', _global_dirseq
#print '=' * 72
flist1 = grabDir(dir1) # already filtered
flist2 = grabDir(dir2)
flist = mergeList(flist1, flist2)
return diffDirByList(dir1, dir2, outdir, flist, wrapnum, lines)
def lstatFile(obj):
'return None in case ENOENT, else a lstat info'
try:
statinfo = os.lstat(obj)
except OSError, dig:
if dig.errno == errno.ENOENT:
statinfo = None
else:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
return statinfo
def diffDirByList(dir1, dir2, outdir, flist, wrapnum=None, lines=3):
'''diff two directories and generate an index page. Return code
indicating whether there are differences and the summary info'''
data_rows = ''
data_row = ''
summary = { 'changed': 0, 'added': 0, 'deleted': 0 }
file_summary = { 'changed': 0, 'added': 0, 'deleted': 0 }
has_diff = False
flist.sort()
for f in flist:
# set default values
fromLines = ''
toLines = ''
fromDate = ''
toDate = ''
target = os.path.join(outdir, f)
obj1 = os.path.join(dir1, f)
obj2 = os.path.join(dir2, f)
# make output dir and sub dir
try:
os.makedirs(os.path.join(outdir, os.path.dirname(f)))
except OSError, dig:
if dig.errno != errno.EEXIST:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
stat1 = lstatFile(obj1)
stat2 = lstatFile(obj2)
if stat1 and not stat2: # deleted
print '%-40s |' % f,
print 'File removed',
#st_mtime = time.localtime(stat1[8])
if not stat.S_ISREG(stat1[0]) or isBinaryFile(obj1):
print '(skipped dir/special/binary)'
continue
print
old = sourceToHtml(obj1, target + '-.html')
data_row = _deleted_data_row_template % {'pathname': f}
summary['deleted'] += 1
has_diff = True
elif not stat1 and stat2: # added
print '%-40s |' % f,
print 'New file',
if not stat.S_ISREG(stat2[0]) or isBinaryFile(obj2):
print '(skipped special/binary)'
continue
print
new = sourceToHtml(obj2, target + '.html')
data_row = _added_data_row_template % {'pathname': f}
summary['added'] += 1
has_diff = True
elif stat1 and stat2: # same or diff
# do not compare special or binary file
if not stat.S_ISREG(stat1[0]) or isBinaryFile(obj1):
print '%-40s |' % f,
print '(skipped, former file is special)'
continue
if not stat.S_ISREG(stat2[0]) or isBinaryFile(obj2):
print '%-40s |' % f,
print '(skipped, latter file is binary)'
continue
if filecmp.cmp(obj1, obj2):
continue
has_diff = True
fromDate = time.ctime(stat1[8])
toDate = time.ctime(stat2[8])
fromLines = getLines(obj1)
toLines = getLines(obj2)
# Cdiffs
file_summary = cdiffLines(fromLines, toLines,
target+'.cdiff.html', obj1, obj2, fromDate, toDate, lines)
# Udiffs
udiffLines(fromLines, toLines, target+'.udiff.html', obj1, obj2,
fromDate, toDate, lines)
# Sdiffs
sdiffLines(fromLines, toLines, target+'.sdiff.html', obj1, obj2,
True, wrapnum, lines)
# Fdiffs
sdiffLines(fromLines, toLines, target+'.fdiff.html', obj1, obj2,
False, wrapnum, lines)
print '%-40s |' % f,
print 'Changed/Deleted/Added: %d/%d/%d' % (\
file_summary['changed'],
file_summary['deleted'],
file_summary['added'])
old = sourceToHtml(obj1, target + '-.html')
new = sourceToHtml(obj2, target + '.html')
data_row = _diff_data_row_template % dict(
pathname = f,
changed = file_summary['changed'],
deleted = file_summary['deleted'],
added = file_summary['added'],
)
summary['changed'] += 1
else: # this case occured when controlled by master file list
print '%-40s |' % f,
print 'Not found'
data_row = ''
data_rows += data_row
if not has_diff:
return False
# Generate footer info
footer_info = _footer_info_template % dict(
time = time.strftime('%a %b %d %X %Z %Y', time.localtime()),
myname = _myname,
revision = _revision
)
# now wirte index page
try:
index = open(os.path.join(outdir, 'index.html'), 'w')
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
index.write(_file_template % dict(
title = '%s vs %s' % (dir1, dir2),
styles = _styles,
header_info = _header_info_template % dict(dir1=dir1, dir2=dir2),
summary_info = _summary_info_template % summary,
data_rows = _data_rows_template % {'data_rows': data_rows},
legend = _legend,
footer_info = footer_info,
))
index.close()
return True
def warnOverwrite(pathname):
'Warnning for overwriting, return True if answered yes, False if no'
msg = "`%s' exists, are you sure you want to overwrite it (yes/no)? "
while True:
sys.stderr.write(msg % pathname)
answer = raw_input('')
if answer == 'yes':
return True
elif answer == 'no':
return False
# else: prompt again
def isBinaryFile(file):
'''I determine a binary file by reading the first 1024 bytes of the file
and counting the non-text characters, if the number is great than 8, then
the file is considered as binary file. This is not very reliable but is
effective'''
if not file:
return False
non_text = 0
target_count = 8
try:
fp = open(file, 'rb')
data = fp.read(1024)
for c in data:
a = ord(c)
if a < 8 or (a > 13 and a < 32): # not printable
non_text += 1
if non_text >= target_count: break
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
return non_text >= target_count
def cdiffToHtml(cdiff, title):
'''cdiff is context diff (a list) that generated by difflib.context_diff,
return summary and html page'''
summary = { 'changed': 0, 'added': 0, 'deleted': 0 }
line_pattern = '<span class="%s">%s</span>'
body = ''
old_group = False
for line in cdiff:
n = len(line)
line = line.replace("&","&").replace(">",">").replace("<","<")
if n >= 4 and line[0:4] == '*** ':
old_group = True
body += line_pattern % ('fromtitle', line)
elif n >= 4 and line[0:4] == '--- ':
old_group = False
body += line_pattern % ('totitle', line)
elif n >= 2 and line[0:2] == ' ':
body += line_pattern % ('same', line)
elif n >= 2 and line[0:2] == '! ':
body += line_pattern % ('change', line)
if old_group:
summary['changed'] += 1
elif n >= 2 and line[0:2] == '- ':
body += line_pattern % ('delete', line)
summary['deleted'] += 1
elif n >= 2 and line[0:2] == '+ ':
body += line_pattern % ('insert', line)
summary['added'] += 1
elif n >= 15 and line[0:15] == '*' * 15:
body += '<hr>'
else: # shouldn't happen
body += line
html = '''<html><head>
<title>%s</title>
<style type="text/css">
.fromtitle {color:brown; font:bold 11pt;}
.totitle {color:green; font:bold 11pt;}
.same {color:black; font:9pt;}
.change {color:blue; font:9pt;}
.delete {color:brown; font:9pt;}
.insert {color:green; font:9pt;}
</style>
<body>
<pre>%s</pre>
</body>
</head></html>''' % (title, body)
return summary, html
def udiffToHtml(udiff, title):
'''udiff is uniform diff (a list) that generated by difflib.uniform_diff,
return html page'''
line_pattern = '<span class="%s">%s</span>'
body = ''
for line in udiff:
n = len(line)
line = line.replace("&","&").replace(">",">").replace("<","<")
if n >= 4 and line[0:4] == '--- ':
body += line_pattern % ('fromtitle', line)
elif n >= 4 and line[0:4] == '+++ ':
body += line_pattern % ('totitle', line)
elif n >= 1 and line[0] == ' ':
body += line_pattern % ('same', line)
elif n >= 1 and line[0] == '-':
body += line_pattern % ('old', line)
elif n >= 1 and line[0] == '+':
body += line_pattern % ('new', line)
elif n >= 4 and line[0:4] == '@@ -':
body += '<hr>'
body += line_pattern % ('head', line)
else: # shouldn't happen
body += line
html = '''<html><head>
<title>%s</title>
<style type="text/css">
.fromtitle {color:brown; font:bold 11pt;}
.totitle {color:green; font:bold 11pt;}
.head {color:blue; font:bold 9pt;}
.same {color:black; font:9pt;}
.old {color:brown; font:9pt;}
.new {color:green; font:9pt;}
</style>
<body>
<pre>%s</pre>
</body>
</head></html>''' % (title, body)
return html
def stripPrefix(name, p=0):
'''strip NUM slashes, like patch(1) -pNUM
eg1: /foo/bar/a/b/x.c
-p0 gives orignal name (no change)
-p1 gives foo/bar/a/b/x.c
-p2 gives bar/a/b/x.c
-p9 gives x.c
eg2: foo/bar/a/b/x.c
-p0 gives orignal name (no change)
-p1 gives bar/a/b/x.c
-p2 gives a/b/x.c
-p9 gives x.c
eg3: ./foo/bar/a/b/x.c
-p0 gives orignal name (no change)
-p1 gives foo/bar/a/b/x.c
-p2 gives bar/a/b/x.c
-p9 gives x.c
'''
cur = 0
tail = len(name) - 1
while p > 0:
index = name.find('/', cur)
#print 'p:', p, 'cur:', cur, 'index:', index
if index == -1: break
while index <= tail and name[index] == '/':
index += 1
cur = index
p -= 1
return name[cur:]
def readFileList(file, p=0):
'''Return content of filelist (a file, each line is a filename)'''
flist = []
if not file:
return []
if file == '-': # read from stdin
flist = sys.stdin.readlines()
else:
try:
fp = open(file, 'r')
flist = fp.readlines()
fp.close()
except IOError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
outlist = []
for i in flist:
outlist.append(stripPrefix(i, p).rstrip())
#print outlist
return outlist
if __name__ == '__main__':
import optparse
usage = '''
%(name)s [options] OLD NEW
%(name)s OLD NEW [options]
Diff two files/directories and produce HTML pages.''' % \
{'name': os.path.basename(sys.argv[0])}
parser = optparse.OptionParser(usage)
parser.add_option('-o', '--outupt', dest='output',
help='specify output file or directory name')
parser.add_option('-c', '--context', action='store_true',
dest='context', default=False,
help='generate context diff (default is full diff),' + \
' only take effect when diffing two files')
parser.add_option('-f', '--filelist', dest='filelist', metavar='FILE',
help='specify a file list to read from, filelist can ' + \
'be generated by find -type f, specify - to read' + \
' from stdin')
parser.add_option('-p', '--striplevel', dest='striplevel',
type='int', metavar='NUM',
help='for all pathnames in the filelist, delete NUM ' + \
'path name components from the beginning of each' + \
' path name, it is similar to patch(1) -p')
parser.add_option('-n', '--lines', dest='lines',
type='int', metavar='NUM', default=3,
help='specify context line count when generating ' + \
'context diffs or unified diffs, default is 3')
parser.add_option('-w', '--wrap', dest='wrapnum',
type='int', metavar='WIDTH',
help='specify column number where lines are broken ' + \
'and wrapped for sdiff, default is no line wrapping')
parser.add_option('-y', '--yes', action='store_true',
dest='overwrite', default=False,
help='do not prompt for overwriting')
opts, args = parser.parse_args()
if len(args) != 2:
sys.stderr.write("Sorry, you must specify two file/directory names\n" \
+ "type `%s -h' for help\n" % _myname)
sys.exit(1)
if not opts.output:
sys.stderr.write("Sorry, you must specify output name (use `-o')\n")
sys.exit(2)
try:
# Note: use stat instead lstat to permit symbolic links
stat1 = os.stat(args[0])[0]
stat2 = os.stat(args[1])[0]
except OSError, dig:
sys.stderr.write(str(dig) + '\n')
sys.exit(2)
# Compare two files
#
if stat.S_ISREG(stat1) and stat.S_ISREG(stat2):
if not opts.overwrite and os.path.exists(opts.output):
if not warnOverwrite(opts.output): sys.exit(1)
url = 'file://%s\n' % os.path.realpath(opts.output)
if filecmp.cmp(args[0], arg2[1]):
print 'No difference found.'
else:
diffFile(args[0], args[1], opts.output, opts.context,
opts.wrapnum, opts.lines)
print '\nURL:\n%s' % url
# Compare two dirs
#
elif stat.S_ISDIR(stat1) and stat.S_ISDIR(stat2):
if not opts.overwrite and os.path.exists(opts.output):
if opts.filelist == '-':
# stdin redirected, so we cannot read answer from stdin
print "`%s' exists, please select another output directory, " \
"or specify '-y' to force overwriting." % opts.output
sys.exit(1)
else:
if not warnOverwrite(opts.output):
sys.exit(1)
url = 'file://%s/index.html\n' % os.path.realpath(opts.output)
if opts.filelist:
# filelist mode, contolled by master file list
# read file list, ignore lines with first char is '/'
# if see 'foo/bar/abc', makedirs foo/bar/, then put abc.diffs to it
flist = readFileList(opts.filelist, opts.striplevel)
if diffDirByList(args[0], args[1], opts.output,
flist, opts.wrapnum, opts.lines):
print '\nURL:\n%s' % url
else:
print 'No difference found.'
else:
if diffDir(args[0], args[1], opts.output, opts.wrapnum,
opts.lines):
print '\nURL:\n%s' % url
else:
print 'No difference found.'
else:
sys.stderr.write("Sorry, I don't know how to compare `%s' and `%s'\n" \
% (args[0], args[1]))
sys.exit(2)
# vim:set et sts=4 sw=4: