Files
blis/build/flatten-headers.sh
Field G. Van Zee 216a4cb9cb Minor update to flatten-headers.[py|sh] help text.
Details:
- Fixed a typo and removed some outdated language from the help text of
  flatten-headers.py and flatten-headers.sh.
2018-05-18 18:47:03 -05:00

598 lines
18 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas at Austin nor the names
# of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# -- Helper functions ----------------------------------------------------------
#
print_usage()
{
# Echo usage info.
echo " "
echo " ${script_name}"
echo " "
echo " Field G. Van Zee"
echo " "
echo " Generate a monolithic header by recursively replacing all #include"
echo " directives in a selected file with the contents of the header files"
echo " they reference."
echo " "
echo " Usage:"
echo " "
echo " ${script_name} header header_out temp_dir dir_list"
echo " "
echo " Arguments:"
echo " "
echo " header The filepath to the top-level header, which is the file"
echo " that will #include all other header files."
echo " "
echo " header_out The filepath of the file into which the script will output"
echo " the monolithic header."
echo " "
echo " temp_dir A directory in which temporary files may be created."
echo " "
echo " dir_list The list of directory paths in which to search for the"
echo " headers that are #included by 'header'. By default, these"
echo " directories are scanned for .h files, but sub-directories"
echo " within the various directories are not inspected. If the"
echo " -r option is given, these directories are recursively"
echo " scanned. In either case, the subset of directories scanned"
echo " that actually contains .h files is then searched whenever"
echo " a #include directive is encountered in 'header' (or any"
echo " file subsequently #included). If a referenced header file"
echo " is not found, the #include directive is left untouched and"
echo " translated directly into 'header_out'."
echo " "
echo " The following options are accepted:"
echo " "
echo " -r recursive"
echo " Scan the directories listed in 'dir_list' recursively when"
echo " searching for .h header files. By default, the directories"
echo " are not searched recursively."
echo " "
echo " -c strip C-style comments"
echo " Strip comments enclosed in /* */ delimiters from the"
echo " output, including multi-line comments. By default, C-style"
echo " comments are not stripped."
echo " "
echo " -o SCRIPT output script name"
echo " Use SCRIPT as a prefix when outputting messages instead"
echo " the script's actual name. Useful when the current script"
echo " is going to be called from within another, higher-level"
echo " driver script and seeing the current script's name might"
echo " unnecessarily confuse the user."
echo " "
echo " -v [0|1|2] verboseness level"
echo " level 0: silent (no output)"
echo " level 1: default (single character '.' per header)"
echo " level 2: verbose (several lines per header)."
echo " "
echo " -h help"
echo " Output this information and exit."
echo " "
# Exit with non-zero exit status
exit 1
}
canonicalize_ws()
{
local str="$1"
# Remove leading and trailing whitespace.
str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
# Remove duplicate spaces between words.
str=$(echo -e "${str}" | tr -s " ")
# Update the input argument.
echo "${str}"
}
is_word_in_list()
{
word="$1"
list="$2"
rval=""
for item in ${list}; do
if [ "${item}" == "${word}" ]; then
rval="${word}"
break
fi
done
echo "${rval}"
}
echovo()
{
if [ "${verbose_flag}" == "1" ]; then
# Echo the argument string to stderr instead of stdout.
echo "${output_name}: $1" 1>&2;
fi
}
echovo_n()
{
if [ "${verbose_flag}" == "1" ]; then
# Echo the argument string to stderr instead of stdout.
echo -n "$1" 1>&2;
fi
}
echovo_n2()
{
if [ "${verbose_flag}" == "1" ]; then
# Echo the argument string to stderr instead of stdout.
echo "$1" 1>&2;
fi
}
# ---
echovt()
{
if [ "${verbose_flag}" == "2" ]; then
# Echo the argument string to stderr instead of stdout.
echo "${output_name}: $1" 1>&2;
fi
}
echovt_n()
{
if [ "${verbose_flag}" == "2" ]; then
# Echo the argument string to stderr instead of stdout.
echo -n "${output_name}: $1" 1>&2;
fi
}
echovt_n2()
{
if [ "${verbose_flag}" == "2" ]; then
# Echo the argument string to stderr instead of stdout.
echo "$1" 1>&2;
fi
}
find_header_dirs()
{
local cur_dirpath sub_items result cur_list item child_list
# Extract the argument: the current directory, and the list of
# directories found so far that contain headers.
cur_dirpath="$1"
echovt_n "scanning contents of ${cur_dirpath}"
# Acquire a list of the directory's contents.
sub_items=$(ls ${cur_dirpath})
# If there is at least one header present, add the current directory to
# the list header of directories. Otherwise, the current directory does
# not contribute to the list returned to the caller.
result=$(echo ${sub_items} | grep "\.h")
if [ -n "${result}" ]; then
cur_list="${cur_dirpath}"
echovt_n2 " ...found headers"
else
cur_list=""
echovt_n2 ""
fi
# Iterate over the list of directory contents.
for item in ${sub_items}; do
# Check whether the current item is in the ignore_list. If so, we
# ignore it.
result=$(is_word_in_list "${item}" "${ignore_list}")
if [ -n "${result}" ]; then
echovt "ignoring directory '${item}'."
continue
fi
# If the current item is a directory, recursively accumulate header
# directories for that sub-directory.
if [ -d "${cur_dirpath}/${item}" ]; then
# Recursively find header directories within the sub-directory
# ${item} and store the directory list to child_list.
child_list=$(find_header_dirs "${cur_dirpath}/${item}")
# Accumulate the sub-directory's header list with the running list
# of header directories
cur_list="${cur_list} ${child_list}"
fi
done
# Return the list of header directories.
echo "${cur_list}"
}
get_header_path()
{
local filename dirpaths filepath
filename="$1"
dirpaths="$2"
filepath=""
# Search each directory path for the filename given.
for dirpath in ${dirpaths}; do
if [ -f "${dirpath}/${filename}" ]; then
filepath="${dirpath}/${filename}"
break
fi
done
# Return the filepath that was found. Note that if no filepath was found
# in the loop above, the empty string gets returned.
echo "${filepath}"
}
replace_pass()
{
local inputfile dirpaths intermfile skipstr commstr result
local header headerlist header_filepath header_esc subintermfile
inputfile="$1"
dirpaths="$2"
cursp="$3"
# Set the output filename, which we will return to the caller. Starting
# with the input filepath, we strip it down to just the filename and
# reconstruct it with the .interm suffix in temp_dir.
intermfile="${inputfile##*/}"
intermfile="${temp_dir}/${intermfile}.interm"
# This string is inserted after #include directives after having
# determined that they are not present in the directory tree.
skipstr="\/\/ skipped"
# Initialize the list of headers referenced in #include directives
# found in the current header file.
headerlist=""
result=$(grep '^[[:space:]]*#include ' ${inputfile})
# Only iterate through the file line-by-line if it contains at least
# one #include directive. If it does not contain any #include directives,
# then we can leave headerlist initialized to empty and proceed.
if [ -n "${result}" ]; then
# Iterate through each line of the header file, accumulating the names of
# header files referenced in #include directives.
while read -r curline
do
# Check whether the line begins with a #include directive, but ignore
# the line if it contains the skip string.
result=$(echo ${curline} | grep '^[[:space:]]*#include ')
# If the #include directive was found...
if [ -n "${result}" ]; then
# Isolate the header filename. We must take care to include all
# characters that might appear between the "" or <>.
header=$(echo ${curline} | sed -e "s/#include [\"<]\([a-zA-Z0-9\_\.\/\-]*\)[\">].*/\1/g")
# Add the header file to a list.
headerlist=$(canonicalize_ws "${headerlist} ${header}")
fi
done < "${inputfile}"
fi
if [ -n "${headerlist}" ]; then
echovt "${cursp}found references to: ${headerlist}"
else
echovt "${cursp}no header references found."
fi
# Before we go any further, we strip C-style comments from the file,
# if requested.
if [ -n "${strip_comments}" ]; then
# Make a copy of inputfile stripped of its C-style comments and
# save it to intermfile. This substitution leaves behind a single
# blank line.
cat ${inputfile} \
| perl -0777 -pe "s/\/\*.*?\*\///gs" \
> "${intermfile}"
else
# Otherwise, just copy inputfile to intermfile verbatim.
cp ${inputfile} ${intermfile}
fi
# Iterate over each header file found in the previous loop.
for header in ${headerlist}; do
# Find the path to the header.
header_filepath=$(get_header_path ${header} "${dirpaths}")
# If the header has a slash, escape it so that sed doesn't get confused
# (since we use '/' as our search-and-replace delimiter).
header_esc=$(echo "${header}" | sed -e 's/\//\\\//g')
# If the header file was not found, get_header_path() returns an
# empty string. This probably means that the header file is a
# system header and thus we skip it since we don't want to inline
# the contents of system headers anyway.
if [ -z "${header_filepath}" ]; then
echovt "${cursp}could not locate file '${header}'; marking as skipped."
# Insert a comment after the #include so we know it was ignored.
# Notice that we mimic the quotes or angle brackets around the
# header name, whichever pair was used in the input.
cat ${intermfile} \
| sed -e "s/^[[:space:]]*#include \([\"<]\)\(${header_esc}\)\([\">]\).*/#include \1\2\3 ${skipstr}/" \
> "${intermfile}.tmp"
mv "${intermfile}.tmp" ${intermfile}
else
echovt "${cursp}located file '${header_filepath}'; recursing."
# Recursively produce an inlined/flattened intermediate file at
# ${header_filepath}.
subintermfile=$(replace_pass ${header_filepath} "${dirpaths}" "${cursp}${nestsp}")
echovt "${cursp}inserting '${subintermfile}'."
# Replace the #include directive for the current header file with the
# contents of that header file, saving the result to a temporary file.
# We also insert begin and end markers to allow for more readability.
# NOTE: We use the 'i\...' and 'a\...' notation with '$', which causes
# bash to interpret '\n' as a newline, as needed for the 'a\' and 'i\'
# commands in POSIX (e.g. OS X) sed. (GNU sed allows a much more
# natural usage that does not require the backslash or newline.)
cat ${intermfile} \
| sed -e "/^[[:space:]]*#include \"${header_esc}\"/ {" \
-e 'i\'$'\n'"// begin ${header}"$'\n' \
-e "r ${subintermfile}" \
-e 'a\'$'\n'"// end ${header}"$'\n' \
-e "d" \
-e "}" \
> "${intermfile}.tmp"
mv "${intermfile}.tmp" ${intermfile}
echovt "${cursp}removing intermediate file '${subintermfile}'."
# Remove the recursive call's intermediate file now that it has been
# inserted into this level's intermediate.
rm "${subintermfile}"
fi
done
# works, but leaves blank line:
#cat "test.h" | sed -e "/^#include \"foo.h\"/r foo.h" -e "s///" > "test.new.h"
# works:
#cat "test.h" | sed -e '/^#include \"foo.h\"/ {' -e 'r foo.h' -e 'd' -e '}' > "test.new.h"
# works:
#cat "test.h" | sed -e '/^#include \"foo.h\"/r foo.h' -e '/^#include \"foo.h\"/d' > "test.new.h"
#cat zorn/header.h | sed -e '/^#include \"header1.h\"/ {' -e 'i // begin insertion' -e 'r alice/header1.h' -e 'a // end insertion' -e 'd' -e '}'
echovt "${cursp}header file '${inputfile}' fully processed."
echovt "${cursp}returning via '${intermfile}'."
echovo_n "."
# Return the intermediate filename so the caller knows the name of this
# invocation's output file.
echo "${intermfile}"
}
#
# -- main function -------------------------------------------------------------
#
main()
{
# The name of the script, stripped of any preceding path.
script_name=${0##*/}
# The script name to use in informational output. Defaults to ${script_name}.
output_name=${script_name}
# Whether or not we should strip C-style comments from the output. (Default
# is to not strip C-style comments.)
strip_comments=""
# Whether or not we search the directories in dir_list recursively. (Default
# is to not search recursively.)
recursive_flag=""
# The list of directories to ignore
ignore_list="old other temp test testsuite windows"
# The amount to nest each level of recursion in the output.
nestsp=" "
# Process our command line options.
while getopts ":o:rchv:" opt; do
case $opt in
o ) output_name=$OPTARG ;;
r ) recursive_flag="1" ;;
c ) strip_comments="1" ;;
v ) verbose_flag=$OPTARG ;;
h ) print_usage ;;
\? ) print_usage
esac
done
shift $(($OPTIND - 1))
# Make sure that the verboseness level is valid.
if [ "${verbose_flag}" != "0" ] &&
[ "${verbose_flag}" != "1" ] &&
[ "${verbose_flag}" != "2" ]; then
echo "${output_name}: Invalid verboseness argument '${verbose_flag}'." 1>&2;
exit 1
fi
# Print usage if we don't have exactly two arguments.
if [ $# != "4" ]; then
print_usage
fi
# Acquire the four required arguments:
# - the input header file,
# - the output header file,
# - the temporary directory in which we can write intermediate files,
# - the list of directories in which to search for the headers
inputfile="$1"
outputfile="$2"
temp_dir="$3"
dir_list="$4"
# First, confirm that the directories in dir_list are valid.
dir_list2=""
for item in ${dir_list}; do
# Strip a trailing slash from the path, if it has one.
item=${item%/}
echovt_n "checking ${item} "
if [ -d ${item} ]; then
echovt_n2 " ...directory exists."
dir_list2="${dir_list2} ${item}"
else
echovt_n2 " ...invalid directory; omitting."
fi
done
dir_list2=$(canonicalize_ws "${dir_list2}")
# Overwrite the original dir_list with the updated copy that omits
# invalid directories.
dir_list="${dir_list2}"
echovt "check summary:"
echovt " accessible directories:"
echovt " ${dir_list}"
# Generate a list of directories (dirpaths) which will be searched whenever
# a #include directive is encountered. The method by which dirpaths is
# compiled will depend on whether the recursive flag was given.
if [ -n "${recursive_flag}" ]; then
# If the recursive flag was given, we need to recursively scan each
# directory in dir_list for directories with headers via the
# function find_header_dirs().
dirpaths=""
for item in ${dir_list}; do
item_dirpaths=$(find_header_dirs ${item})
dirpaths="${dirpaths} ${item_dirpaths}"
done
dirpaths=$(canonicalize_ws "${dirpaths}")
else
# If the recursive flag was not given, we can just use dir_list
# as-is, though we opt to filter out the directories that don't
# contain .h files.
dirpaths=""
for item in ${dir_list}; do
echovt_n "scanning ${item}"
# Acquire a list of the directory's contents.
sub_items=$(ls ${item})
# If there is at least one header present, add the current directory to
# the list header of directories.
result=$(echo ${sub_items} | grep "\.h")
if [ -n "${result}" ]; then
dirpaths="${dirpaths} ${item}"
echovt_n2 " ...found headers."
else
echovt_n2 " ...no headers found."
fi
done
dirpaths=$(canonicalize_ws "${dirpaths}")
fi
echovt "scan summary:"
echovt " headers found in:"
echovt " ${dirpaths}"
echovt "preparing to monolithify '${inputfile}'."
# Make a copy of the inputfile.
#cp ${inputfile} ${outputfile}
echovt "new header will be saved to '${outputfile}'."
echovo_n "."
# Recursively substitute headers for occurrences of #include directives.
intermfile=$(replace_pass ${inputfile} "${dirpaths}" "${nestsp}")
# Rename the intermediate file(path) to the output file(path).
mv ${intermfile} ${outputfile}
echovt "substitution complete."
echovt "monolithic header saved as '${outputfile}'."
echovo_n2 "."
# Exit peacefully.
return 0
}
# The script's main entry point, passing all parameters given.
main "$@"