Added bash script for creating monolithic headers.

Details: - Added a new script, monolithify-header.sh, to the 'build' directory. This script recursively replaces all #include directives in a selected file with the contents of the header files referenced by each directive. The idea is to "flatten" a tree of .h files into a single file, with the script acting as a C preprocessor that only processes #include directives.
2026-05-11 17:50:00 +00:00 · 2017-11-17 17:06:42 -06:00
parent c76f77f4cc
commit 870597d166
1 changed files with 441 additions and 0 deletions
--- a/build/monolithify-header.sh
+++ b/build/monolithify-header.sh
@@ -0,0 +1,441 @@
+#!/usr/bin/env bash
+#
+#  BLIS    
+#  An object-based framework for developing high-performance BLAS-like
+#  libraries.
+#
+#  Copyright (C) 2014, The University of Texas at Austin
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are
+#  met:
+#   - Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer.
+#   - Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#   - Neither the name of The University of Texas at Austin nor the names
+#     of its contributors may be used to endorse or promote products
+#     derived from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+
+#
+# -- Helper functions ----------------------------------------------------------
+#
+
+print_usage()
+{
+	# Echo usage info.
+	echo " "
+	echo " ${script_name}"
+	echo " "
+	echo " Field G. Van Zee"
+	echo " "
+	echo " Generate a monolithic header by recursively replacing all #include"
+	echo " directives in a selected file with the contents of the header files"
+	echo " they reference."
+	echo " "
+	echo " Usage:"
+	echo " "
+	echo "   ${script_name} header header_out root_dir"
+	echo " "
+	echo " Arguments:"
+	echo " "
+	echo "   header        The filepath to the top-level header, which is file that"
+	echo "                 will #include all other header files. NOTE: It is okay if"
+	echo "                 this file resides somewhere in root_dir, described below."
+	echo " "
+	echo "   header_out    The filepath of the file into which the script will output"
+	echo "                 the monolithic header."
+	echo " "
+	echo "   root_dir      The path to the root of the directory tree containing the"
+	echo "                 headers that will be #included by 'header'. 'root_dir' is"
+	echo "                 searched recursively for any directory that contains .h"
+	echo "                 files, and the resulting list of directories is then"
+	echo "                 searched whenever a #include directive is encountered in"
+	echo "                 'header' (or any file subsequently #included). If a"
+	echo "                 referenced header file is not found within 'root_dir',"
+	echo "                 the #include directive is left untouched and translated"
+	echo "                 directly into 'header_out'."
+	echo " "
+	echo " The following options are accepted:"
+	echo " "
+	echo "   -o SCRIPT   output script name"
+	echo "                 Use SCRIPT as a prefix when outputting messages instead"
+	echo "                 the script's actual name. Useful when the current script"
+	echo "                 is going to be called from within another, higher-level"
+	echo "                 driver script and seeing the current script's name might"
+	echo "                 unnecessarily confuse the user."
+	echo " "
+	echo "   -r          remove C-style comments"
+	echo "                 Strip comments enclosed in /* */ delimiters from the"
+	echo "                 output, including multi-line comments. By default, these"
+	echo "                 comments are not stripped."
+	echo " "
+	echo "   -q          quiet"
+	echo "                 Suppress informational output. By default, the script is"
+	echo "                 verbose."
+	echo " "
+	echo "   -h          help"
+	echo "                 Output this information and exit."
+	echo " "
+
+	# Exit with non-zero exit status
+	exit 1
+}
+
+canonicalize_ws()
+{
+	local str="$1"
+
+	# Remove leading and trailing whitespace.
+	str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
+
+	# Remove duplicate spaces between words.
+	str=$(echo -e "${str}" | tr -s " ")
+
+	# Update the input argument.
+	echo "${str}"
+}
+
+echoinfo()
+{
+	if [ -z "${quiet_flag}" ]; then
+
+		# Echo the argument string to stderr instead of stdout.
+		echo "${output_name}: $1" 1>&2;
+	fi
+}
+
+echoninfo()
+{
+	if [ -z "${quiet_flag}" ]; then
+
+		# Echo the argument string to stderr instead of stdout.
+		echo -n "${output_name}: $1" 1>&2;
+	fi
+}
+
+echon2info()
+{
+	if [ -z "${quiet_flag}" ]; then
+
+		# Echo the argument string to stderr instead of stdout.
+		echo "$1" 1>&2;
+	fi
+}
+
+find_header_dirs()
+{
+	local cur_dirpath sub_items result cur_list item child_list
+
+	# Extract the argument: the current directory, and the list of
+	# directories found so far that contain headers.
+	cur_dirpath="$1"
+
+	echoninfo "scanning contents of ${cur_dirpath}"
+
+	# Acquire a list of the directory's contents.
+	sub_items=$(ls ${cur_dirpath})
+
+	# If there is at least one header present, add the current directory to
+	# the list header of directories. Otherwise, the current directory does
+	# not contribute to the list returned to the caller.
+	result=$(echo ${sub_items} | grep "\.h")
+
+	if [ -n "${result}" ]; then
+		cur_list="${cur_dirpath}"
+		echon2info " ...found headers"
+	else
+		cur_list=""
+		echon2info ""
+	fi
+
+	# Iterate over the list of directory contents.
+	for item in ${sub_items}; do
+
+		# If the current item is a directory, recursively accumulate header
+		# directories for that sub-directory.
+		if [ -d "${cur_dirpath}/${item}" ]; then
+
+			# Recursively find header directories within the sub-directory
+			# ${item} and store the directory list to child_list.
+			child_list=$(find_header_dirs "${cur_dirpath}/${item}")
+
+			# Accumulate the sub-directory's header list with the running list
+			# of header directories
+			cur_list="${cur_list} ${child_list}"
+		fi
+
+	done
+
+	# Return the list of header directories.
+	echo "${cur_list}"
+}
+
+get_header_path()
+{
+	local filename dirpaths filepath
+
+	filename="$1"
+	dirpaths="$2"
+	filepath=""
+
+	# Search each directory path for the filename given.
+	for dirpath in ${dirpaths}; do
+
+		if [ -f "${dirpath}/${filename}" ]; then
+
+			filepath="${dirpath}/${filename}"
+			break
+		fi
+	done
+
+	# Return the filepath that was found. Note that if no filepath was found
+	# in the loop above, the empty string gets returned.
+	echo "${filepath}"
+}
+
+replace_pass()
+{
+	local filename dirpaths result header headerlist
+
+	filename="$1"
+	dirpaths="$2"
+
+	headerlist=""
+
+	# This string is inserted after #include directives after having
+	# determined that they are not present in the directory tree and should
+	# be ignored when assessing whether there are still #include directives
+	# that need to be expanded. Note that it is formatted as a comment and
+	# thus will be ignored when the monolithic header is eventually read C
+	# preprocessor and/or compiler.
+	skipstr="\/\/skipped"
+
+	# The way we (optionally) remove C-style comments results in a single
+	# blank line in its place (regardless of how many lines the comment
+	# spanned. When a comment is removed, it is replaced by this string
+	# so that the line can be deleted with a subsequent sed command.
+	commstr="DeLeTeDCsTyLeCoMmEnT"
+
+	# Iterate through each line of the header file, accumulating the names of
+	# header files referenced in #include directives.
+	while read -r curline
+	do
+
+		# Check whether the line begins with a #include directive.
+		result=$(echo ${curline} | grep '^[[:space:]]*#include ' )
+
+		# If the #include directive was found...
+		if [ -n "${result}" ]; then
+
+			# Isolate the header filename.
+			header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9.]*\)[\">]/\1/g")
+
+			# Add the header file to a list.
+			headerlist=$(canonicalize_ws "${headerlist} ${header}")
+
+		fi
+	done < "${filename}"
+
+	echoinfo "  found references to: ${headerlist}"
+
+	# Initialize the return value to null.
+	result=""
+
+	# Iterate over each header file found in the previous loop.
+	for header in ${headerlist}; do
+
+		# Find the path to the header.
+		header_filepath=$(get_header_path ${header} "${dirpaths}")
+
+		# If the header file was not found, get_header_path() returns an
+		# empty string. In this case, we assume the file is a system header
+		# and thus we skip it since we don't want to inline the contents of
+		# system headers anyway.
+		if [ -z "${header_filepath}" ]; then
+
+			echoinfo "  could not locate file '${header}'; marking to skip."
+
+			# Insert a comment after the #include so we know to ignore it
+			# later. Notice that we mimic the quotes or angle brackets
+			# around the header name, whichever pair was used in the input.
+			cat ${filename} \
+			    | sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header}\)\([\">]\)/#include \1\2\3 ${skipstr}/" \
+			    > "${filename}.tmp"
+
+			# Overwrite the original file with the updated copy.
+			mv "${filename}.tmp" ${filename}
+
+		else
+
+			echoinfo "  located file '${header_filepath}'; inserting."
+
+			# Strip C-style comments from the file, if requested.
+			if [ -n "${strip_comments}" ]; then
+
+				header_filename=${header_filepath##*/}
+
+				# Make a temporary copy of ${header_filepath} stripped of its
+				# C-style comments. This leaves behind a single blank line,
+				# which is then deleted.
+				cat ${header_filepath} \
+				    | perl -0777 -pe "s/\/\*.*?\*\//${commstr}/gs" \
+				    | sed -e "/${commstr}/d" \
+				    > "${header_filename}.tmp"
+
+				header_to_insert="${header_filename}.tmp"
+			else
+				header_to_insert="${header_filepath}"
+			fi
+
+			# Replace the #include directive for the current header file with the
+			# contents of that header file, saving the result to a temporary file.
+			# We also insert begin and end markers to allow for more readability.
+			cat ${filename} \
+			    | sed -e "/^[[:space:]]*#include \"${header}\"/ {" \
+			          -e "i // begin ${header}" \
+			          -e "r ${header_to_insert}" \
+			          -e "a // end ${header}" \
+			          -e "d" \
+			          -e "}" \
+			    > "${filename}.tmp"
+
+			# Overwrite the original header file with the updated copy.
+			mv "${filename}.tmp" ${filename}
+
+			# If C-style comments were stripped, remove the temporary file.
+			if [ -n "${strip_comments}" ]; then
+				rm "${header_filename}.tmp"
+			fi
+		fi
+	done
+
+	# works, but leaves blank line:
+	#cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h"
+	# works:
+	#cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h"
+	# works:
+	#cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h"
+	#cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}'
+
+	# Search the updated file for #include directives, but ignore any
+	# hits that also contain the skip string (indicating that the header
+	# file referenced by that #include could not be found).
+	result=$(cat ${filename} | grep '^[[:space:]]*#include ' | grep -v "${skipstr}")
+
+	# Return the result so the caller knows if we need to proceed with
+	# another pass.
+	echo ${result}
+}
+
+#
+# -- main function -------------------------------------------------------------
+#
+
+main()
+{
+	# The name of the script, stripped of any preceeding path.
+	script_name=${0##*/}
+
+	# The script name to use in informational output. Defaults to ${script_name}.
+	output_name=${script_name}
+
+	# Whether or not we should suppress informational output. (Default is to
+	# output messages.)
+	quiet_flag=""
+
+	# Whether or not we should strip C-style comments from the outout. (Default
+	# is to not strip C-style comments.)
+	strip_comments=""
+
+	# Process our command line options.
+	while getopts ":ho:qr" opt; do
+	    case $opt in
+	        o  ) output_name=$OPTARG ;;
+	        q  ) quiet_flag="1" ;;
+	        r  ) strip_comments="1" ;;
+	        h  ) print_usage ;;
+	        \? ) print_usage
+	    esac
+	done
+	shift $(($OPTIND - 1))
+
+	# Print usage if we don't have exactly two arguments.
+	if [ $# != "3" ]; then
+
+		print_usage
+	fi
+
+	# Acquire the two required arguments:
+	# - the input header file,
+	# - the output header file,
+	# - the root directory to search when attempting to locate header files
+	#   referenced in #include directives.
+	inputfile="$1"
+	outputfile="$2"
+	rootdir="$3"
+
+	# Starting with the root search directory, recursively search for all
+	# directory paths that contains a header file.
+	dirpaths=$(find_header_dirs ${rootdir})
+	dirpaths=$(canonicalize_ws "${dirpaths}")
+
+	echoinfo "scan summary:"
+	echoinfo "  headers found in:"
+	echoinfo "  ${dirpaths}"
+
+	echoinfo "preparing to monolithify '${inputfile}'."
+
+	# Make a copy of the inputfile.
+	cp ${inputfile} ${outputfile}
+	
+	echoinfo "new header will be saved to '${outputfile}'."
+
+	done_flag="0"
+	while [ ${done_flag} == "0" ]; do
+
+		echoinfo "starting new pass."
+
+		# Perform a replacement pass. The return string is non-null if
+		# additional passes are necessary and null otherwise.
+		result=$(replace_pass ${outputfile} "${dirpaths}")
+
+		if [ -n "${result}" ]; then
+
+			echoinfo "pass finished; result: additional pass(es) needed."
+		else
+			echoinfo "pass finished; result: no further passes needed."
+		fi
+
+		# If the return value was null, then we're done.
+		if [ -z "${result}" ]; then
+			done_flag="1"
+		fi
+	done
+
+	echoinfo "substitution complete."
+	echoinfo "monolithic header saved as '${outputfile}'."
+
+	# Exit peacefully.
+	return 0
+}
+
+
+# The script's main entry point, passing all parameters given.
+main "$@"
+