blis/build/flatten-headers.py

#!/usr/bin/env python
#
#  BLIS
#  An object-based framework for developing high-performance BLAS-like
#  libraries.
#
#  Copyright (C) 2014, The University of Texas at Austin
#
#  Redistribution and use in source and binary forms, with or without
#  modification, are permitted provided that the following conditions are
#  met:
#   - Redistributions of source code must retain the above copyright
#     notice, this list of conditions and the following disclaimer.
#   - Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
#   - Neither the name(s) of the copyright holder(s) nor the names of its
#     contributors may be used to endorse or promote products derived
#     from this software without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
#  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#

# Import modules
import os
import sys
import getopt
import re

def print_usage():

	my_print( " " )
	my_print( " %s" % script_name )
	my_print( " " )
	my_print( " Field G. Van Zee" )
	my_print( " " )
	my_print( " Generate a monolithic header by recursively replacing all #include" )
	my_print( " directives in a selected file with the contents of the header files" )
	my_print( " they reference." )
	my_print( " " )
	my_print( " Usage:" )
	my_print( " " )
	my_print( "   %s header header_out temp_dir dir_list" % script_name )
	my_print( " " )
	my_print( " Arguments:" )
	my_print( " " )
	my_print( "   header        The filepath to the top-level header, which is the file" )
	my_print( "                 that will #include all other header files." )
	my_print( " " )
	my_print( "   header_out    The filepath of the file into which the script will output" )
	my_print( "                 the monolithic header." )
	my_print( " " )
	my_print( "   temp_dir      A directory in which temporary files may be created." )
	my_print( "                 NOTE: No temporary files are created in the current" )
	my_print( "                 implementation, but this argument must still be specified." )
	my_print( " " )
	my_print( "   dir_list      The list of directory paths in which to search for the" )
	my_print( "                 headers that are #included by 'header'. By default, these" )
	my_print( "                 directories are scanned for .h files, but sub-directories" )
	my_print( "                 within the various directories are not inspected. If the" )
	my_print( "                 -r option is given, these directories are recursively" )
	my_print( "                 scanned. In either case, the subset of directories scanned" )
	my_print( "                 that actually contains .h files is then searched whenever" )
	my_print( "                 a #include directive is encountered in 'header' (or any" )
	my_print( "                 file subsequently #included). If a referenced header file" )
	my_print( "                 is not found, the #include directive is left untouched and" )
	my_print( "                 translated directly into 'header_out'." )
	my_print( " " )
	my_print( " The following options are accepted:" )
	my_print( " " )
	my_print( "   -r          recursive" )
	my_print( "                 Scan the directories listed in 'dir_list' recursively when" )
	my_print( "                 searching for .h header files. By default, the directories" )
	my_print( "                 are not searched recursively." )
	my_print( " " )
	my_print( "   -c          strip C-style comments" )
	my_print( "                 Strip comments enclosed in /* */ delimiters from the" )
	my_print( "                 output, including multi-line comments. By default, C-style" )
	my_print( "                 comments are not stripped." )
	my_print( " " )
	my_print( "   -o SCRIPT   output script name" )
	my_print( "                 Use SCRIPT as a prefix when outputting messages instead" )
	my_print( "                 the script's actual name. Useful when the current script" )
	my_print( "                 is going to be called from within another, higher-level" )
	my_print( "                 driver script and seeing the current script's name might" )
	my_print( "                 unnecessarily confuse the user." )
	my_print( " " )
	my_print( "   -v [0|1|2]  verboseness level" )
	my_print( "                 level 0: silent  (no output)" )
	my_print( "                 level 1: default (single character '.' per header)" )
	my_print( "                 level 2: verbose (several lines per header)." )
	my_print( " " )
	my_print( "   -h          help" )
	my_print( "                 Output this information and exit." )
	my_print( " " )


# ------------------------------------------------------------------------------

def canonicalize_ws( s ):

	return re.sub( '\s+', ' ', s ).strip()

# ---

def my_print( s ):

	sys.stdout.write( "%s\n" % s )

# ---

#def echov1( s ):
#
#	if verbose_flag == "1":
#		print "%s: %s" % ( output_name, s )

def echov1_n( s ):

	if verbose_flag == "1":
		sys.stdout.write( s )
		sys.stdout.flush()

def echov1_n2( s ):

	if verbose_flag == "1":
		sys.stdout.write( "%s\n" % s )
		sys.stdout.flush()

# ---

def echov2( s ):

	if verbose_flag == "2":
		sys.stdout.write( "%s: %s\n" % ( output_name, s ) )
		sys.stdout.flush()

def echov2_n( s ):

	if verbose_flag == "2":
		sys.stdout.write( output_name )
		sys.stdout.write( ": " )
		sys.stdout.write( s )
		sys.stdout.flush()

def echov2_n2( s ):

	if verbose_flag == "2":
		sys.stdout.write( "%s\n" % s )
		sys.stdout.flush()

# ------------------------------------------------------------------------------

def list_contains_header( items ):

	rval = False
	for item in items:

		is_h = re.search( "\.h", item )

		if is_h:
			rval = True
			break

	return rval

# ------------------------------------------------------------------------------

def get_header_path( filename, header_dirpaths ):

	filepath = None

	# Search each directory path for the filename given.
	for dirpath in header_dirpaths:

		# Construct a possible path to the sought-after file.
		cur_filepath = "%s/%s" % ( dirpath, filename )

		# Check whether the file exists.
		found = os.path.exists( cur_filepath )
		if found:
			filepath = cur_filepath
			break

	return filepath

# ------------------------------------------------------------------------------

def strip_cstyle_comments( string ):

	return re.sub( "/\*.*?\*/", "", string, flags=re.S )

# ------------------------------------------------------------------------------

def flatten_header( inputfile, header_dirpaths, cursp ):

	# This string is inserted after #include directives after having
	# determined that they are not present in the directory tree.
	skipstr  = "// skipped"
	beginstr = "// begin "
	endstr   = "// end "

	ostring  = ""

	# Open the input file to process.
	ifile = open( inputfile, "r" )

	# Iterate over the lines in the file.
	while True:

		# Read a line in the file.
		line = ifile.readline()

		# Check for EOF.
		if line == '': break

		# Check for the #include directive and isolate the header name within
		# a group (parentheses).
		#result = re.search( '^[\s]*#include (["<])([\w\.\-/]*)([">])', line )
		result = regex.search( line )

		# If the line contained a #include directive, we must try to replace
		# it with the contents of the header referenced by the directive.
		if result:

			# Extract the header file referenced in the #include directive,
			# saved as the second group in the regular expression
			# above.
			header = result.group(2)

			echov2( "%sfound reference to '%s'." % ( cursp, header ) )

			# Search for the path to the header referenced in the #include
			# directive.
			header_path = get_header_path( header, header_dirpaths )

			# First, check if the header is our root header (and if so, ignore it).
			# Otherwise, if the header was found, we recurse. Otherwise, we output
			# the #include directive with a comment indicating that it as skipped
			if header == root_file_name:

				markl = result.group(1)
				markr = result.group(3)

				echov2( "%sthis is the root header '%s'; commenting out / skipping." \
				        % ( cursp, header ) )

				# If the header found is our root header, then we cannot
				# recurse into it lest we enter an infinite loop. Output the
				# line but make sure it's commented out entirely.
				ostring += "%s #include %c%s%c %c" \
				           % ( skipstr, markl, header, markr, '\n' )

			elif header_path:

				echov2( "%slocated file '%s'; recursing." \
				        % ( cursp, header_path ) )

				# Mark the beginning of the header being inserted.
				ostring += "%s%s%c" % ( beginstr, header, '\n' )

				# Recurse on the header, accumulating the string.
				ostring += flatten_header( header_path, header_dirpaths, cursp + "  " )

				# Mark the end of the header being inserted.
				ostring += "%s%s%c" % ( endstr, header, '\n' )

				echov2( "%sheader file '%s' fully processed." \
				        % ( cursp, header_path ) )

			else:

				markl = result.group(1)
				markr = result.group(3)

				echov2( "%scould not locate file '%s'; marking as skipped." \
				        % ( cursp, header ) )

				# If the header was not found, output the line with a
				# comment that the header was skipped.
				ostring += "#include %c%s%c %s%c" \
				           % ( markl, header, markr, skipstr, '\n' )
			# endif

		else:
			# If the line did not contain a #include directive, simply output
			# the line verbatim.
			ostring += "%s" % line

		# endif

	# endwhile

	# Close the input file.
	ifile.close()

	echov1_n( "." )

	return ostring

# ------------------------------------------------------------------------------

def find_header_dirs( dirpath ):

	header_dirpaths = []
	for root, dirs, files in os.walk( dirpath, topdown=True ):

		echov2_n( "scanning contents of %s" % root )

		if list_contains_header( files ):

			echov2_n2( "...found headers" )
			header_dirpaths.append( root )

		else:
			echov2_n2( "" )

		#endif

	#endfor

	return header_dirpaths


# ------------------------------------------------------------------------------

# Global variables.
script_name    = None
output_name    = None
strip_comments = None
recursive_flag = None
verbose_flag   = None
regex          = None
root_file_name = None

def main():

	global script_name
	global output_name
	global strip_comments
	global recursive_flag
	global verbose_flag
	global regex
	global root_file_name

	# Obtain the script name.
	path, script_name = os.path.split(sys.argv[0])

	output_name    = script_name

	strip_comments = False
	recursive_flag = False
	verbose_flag   = "1"

	nestsp         = "  "

	# Process our command line options.
	try:
		opts, args = getopt.getopt( sys.argv[1:], "o:rchv:" )

	except getopt.GetoptError as err:
		# print help information and exit:
		my_print( str(err) ) # will print something like "option -a not recognized"
		print_usage()
		sys.exit(2)

	for opt, optarg in opts:
		if   opt == "-o":
			output_name = optarg
		elif opt == "-r":
			recursive_flag = True
		elif opt == "-c":
			strip_comments = True
		elif opt == "-v":
			verbose_flag = optarg
		elif opt == "-h":
			print_usage()
			sys.exit()
		else:
			print_usage()
			sys.exit()

	# Make sure that the verboseness level is valid.
	if ( verbose_flag != "0" and
	     verbose_flag != "1" and
	     verbose_flag != "2" ):
		my_print( "%s Invalid verboseness argument: %s" \
		                  % output_name, verbose_flag )
		sys.exit()

	# Print usage if we don't have exactly four arguments.
	if len( args ) != 4:
		print_usage()
		sys.exit()

	# Acquire the four required arguments:
	# - the input header file,
	# - the output header file,
	# - the temporary directory in which we can write intermediate files,
	# - the list of directories in which to search for the headers.
	inputfile  = args[0]
	outputfile = args[1]
	temp_dir   = args[2]
	dir_list   = args[3]

	# Save the filename (basename) part of the input file (or root file) into a
	# global variable that we can access later from within flatten_header().
	root_file_name = os.path.basename( inputfile )

	# Separate the directories into distinct strings.
	dir_list = dir_list.split()

	# First, confirm that the directories in dir_list are valid.
	dir_list_checked = []
	for item in dir_list:

		#absitem = os.path.abspath( item )

		echov2_n( "checking " + item )

		if os.path.exists( item ):
			dir_list_checked.append( item )
			echov2_n2( "...directory exists." )
		else:
			echov2_n2( "...invalid directory; omitting." )

	# endfor

	# Overwrite the original dir_list with the updated copy that omits
	# invalid directories.
	dir_list = dir_list_checked

	echov2( "check summary:" )
	echov2( "  accessible directories:" )
	echov2( "  %s" % ' '.join( dir_list ) )

	# Generate a list of directories (header_dirpaths) which will be searched
	# whenever a #include directive is encountered. The method by which
	# header_dirpaths is compiled will depend on whether the recursive flag
	# was given.
	if recursive_flag:

		header_dirpaths = []
		for d in dir_list:

			# For each directory in dir_list, recursively walk that directory
			# and return a list of directories that contain headers.
			d_dirpaths = find_header_dirs( d )

			# Add the list resulting from the current search to the running
			# list of directory paths that contain headers.
			header_dirpaths += d_dirpaths

		# endfor

	else:

		# If the recursive flag was not given, we can just use dir_list
		# as-is, though we opt to filter out the directories that don't
		# contain .h files.

		header_dirpaths = []
		for d in dir_list:

			echov2_n( "scanning %s" % d )

			# Acquire a list of the directory's contents.
			sub_items = os.listdir( d )

			# If there is at least one header present, add the current
			# directory to the list of header directories.
			if list_contains_header( sub_items ):
				header_dirpaths.append( d )
				echov2_n2( "...found headers." )
			else:
				echov2_n2( "...no headers found." )
			# endif

		# endfor

	# endfor

	echov2( "scan summary:" )
	echov2( "  headers found in:" )
	echov2( "  %s" % ' '.join( header_dirpaths ) )

	echov2( "preparing to monolithify '%s'" % inputfile )

	echov2( "new header will be saved to '%s'" % outputfile )

	echov1_n( "." )

	# Open the output file.
	ofile = open( outputfile, "w" )

	# Precompile the main regular expression used to isolate #include
	# directives and the headers they reference. This regex object will
	# get reused over and over again in flatten_header().
	regex = re.compile( '^[\s]*#include (["<])([\w\.\-/]*)([">])' )

	# Recursively substitute headers for occurrences of #include directives.
	final_string = flatten_header( inputfile, header_dirpaths, nestsp )

	# Strip C-style comments from the final output, if requested.
	if strip_comments:
		final_string = strip_cstyle_comments( final_string )

	# Write the lines to the file.
	ofile.write( final_string )

	# Close the output file.
	ofile.close()

	echov2( "substitution complete." )
	echov2( "monolithic header saved as '%s'" % outputfile )

	echov1_n2( "." )

	return 0


if __name__ == "__main__":
	main()