diff --git a/CMakeLists.txt b/CMakeLists.txt index 9610482..4752d4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,6 +25,7 @@ add_subdirectory(third_party/pybind11) add_subdirectory(third_party/variant) set(CMAKE_CXX_STANDARD 14) +add_subdirectory(utils) add_subdirectory(src) add_subdirectory(doc) add_subdirectory(test) \ No newline at end of file diff --git a/doc/doc_check/README.md b/doc/doc_check/README.md index 62cb34d..61923ff 100644 --- a/doc/doc_check/README.md +++ b/doc/doc_check/README.md @@ -2,33 +2,61 @@ ### Goal -DocCheck provides a set of utilities to enforce invariant properties of artifacts (e.g., code snippets or -output of execution) presented in the software documentation. They can be used to ensure that these -artifacts are always compatible and up-to-date with the state of software development. +It is always desirable to ensure that every piece of knowledge has a single, unambiguous, authoritative representation +in our codebase. However, sometimes violating such principle can result in improved overall quality of the software +project. For instance, when we write documentation containing example code snippets, it is desirable to write tests +for them - however, if we do so, the same code example will exist both in documentation and in tests! Such duplication +of knowledge has tangible adverse consequences - when documentation is updated to new examples, tests become obsolete. +Moreover, the discrepancy between multiple copies of the same knowledge (e.g., code example) can only be spotted with +manual inspection. + +Under such circumstances, to establish a single source of trough in an enforceable manner, we can turn to the DocCheck +tool. Simply put, DocCheck enforces the consistency constraints as specified by the users between textual artifacts in +our codebase. Textual artifacts can be: +- Sections in documentation +- Content of a file +- Output of command execution +- ... + +Specifically, DocCheck allows us to precisely specify how a textual artifact is derived from another. Such +specification is then parsed and verified by our software testing infrastructure to ensure the consistency between +derived textual artifact and the original one. This overall workflow provides an enforceable way to establish a single, +unambiguous and authoritative representation of knowledge in our codebase. ### Directives -DocCheck provides a set of directives that can be used in documentations to enforce desired invariants. -A directive is a comment with a specific format/syntax to communicate the intent to check certain invariants to the -DocCheck checker. Generally, a directive has the following syntax in markdown: +Directives can be used to communicate the relationship between derived and original textual artifacts to DocCheck. +DocCheck will perform consistency constraints checking according to the specification. In this section, supported +directives are explained in details. + +Currently, directives can be specified either in a Markdown file or in a standalone DocCheck configuration file (a file +ending with `.dc` suffix). For markdown file, specify directive using the following syntax: ```markdown [{directive}]: <> ({configuration}) ``` -Where {directive} specifies the type of invariance checking intended and {configuration} expresses the specific +For standalone DocCheck configuration file, use the following syntax: +``` +{directive}({configuration}) +``` + +where `{directive}` is the name of the directive and `{configuration}` expresses the specific parameters of this directive. In general, a directive configuration is expressed using a python dictionary literal, -but special shorthands exist for each directive individually. +with supported configuration parameter name as keys and the desired state of configuration as values. + +Special shorthands exist for each directive individually. ##### `same-as-file`: Use `same-as-file` directive to ensure that the code section following this directive is the same as a source file. This is useful primarily because testing code snippet in documentation directly is often impossible. However, unit tests can be written utilizing an exact copy of the code snippet content. We can use `same-as-file` directive -to ensure the code snippet is always the same as its copy used in some unit tests, +to ensure the code snippet is always the same as its copy used in some unit tests. -`same-as-file` directive supports a convenient short-hand configuration format where the directive configuration can be fully specified using the name of the reference file to check against. -For example, to ensure a code snippet is the same as a unit-tested file `reference.cpp`, use the following directive as shown in the documentation snippet: +`same-as-file` directive supports a convenient short-hand configuration format where the directive configuration can +be fully specified using the name of the reference file to check against. For example, to ensure a code snippet is the +same as a unit-tested file `reference.cpp`, use the following directive as shown in the documentation snippet: [same-as-file]: <> (doc/doc_check/test/same-as-file/simple/README.md) ````markdown @@ -74,4 +102,26 @@ int main() { return 0; } ``` -```` \ No newline at end of file +```` + +#### `file-same-as-stdout` + +Use `file-same-as-stdout` to ensure that file content is the same as the output of executing a command. +This directive supports these parameters in it: + +`file` (string): file to compare with. + +`cmd` (List[str]): the command (expressed as a list of command components), e.g. `["ls", "-l"]`. + +For example, to ensure that the content of a file `test.in`: + +[same-as-file]: <> (doc/doc_check/test/file-same-as-stdout/success/test.in) +``` +dog +``` + +is exactly the same as the output of command execution `echo dog`, one can use the following directive: +[same-as-file]: <> (doc/doc_check/test/file-same-as-stdout/success/test.in.dc) +``` +file-same-as-stdout({"file": "test.in", "cmd": ["echo", "dog"]}) +``` \ No newline at end of file diff --git a/doc/doc_check/check.py b/doc/doc_check/check.py index 94e101e..65c2d53 100644 --- a/doc/doc_check/check.py +++ b/doc/doc_check/check.py @@ -9,6 +9,7 @@ import argparse import os, sys +from itertools import chain from pathlib import Path from utils import setup_logger, DocCheckerCtx @@ -35,7 +36,7 @@ def main(root_dir, exclude_dirs): exclude_dirs[i] = os.path.join(root_dir, exclude_dir) ctx = DocCheckerCtx(root_dir) - for doc_file in Path(root_dir).rglob('*.md'): + for doc_file in chain(Path(root_dir).rglob('*.md'), Path(root_dir).rglob('*.dc')): # Skip, if doc file is in directories to be excluded. if any([str(doc_file).startswith(exclude_dir) for exclude_dir in exclude_dirs]): continue diff --git a/doc/doc_check/directive.py b/doc/doc_check/directive.py index 884e289..61bcde1 100644 --- a/doc/doc_check/directive.py +++ b/doc/doc_check/directive.py @@ -37,23 +37,19 @@ class Directive(object): self.handler = handler def try_parse_directive( - self, ctx: DocCheckerCtx, + self, line: str, doc_file_ext: str, directive_config: DirectiveConfigList) -> Tuple[str, Any]: """ - :param ctx: parser context. + :param line: next line to try parse a directive from. + :param doc_file_ext: file extention. :param directive_config: a list used to output parsed directive configuration. :return: parse result. """ - try: - line = ctx.doc_file.next_non_empty_line() - except RuntimeError as e: - # Do not raise exception when next non-empty line - # does not exist. Instead, return failure. - if str(e) != "Enf of file.": - raise + + if doc_file_ext not in self.ext_to_patterns: return failure() - matches = self.ext_to_patterns[ctx.doc_file_ext()].findall(line) + matches = self.ext_to_patterns[doc_file_ext].findall(line) if len(matches) > 1: raise ValueError("more than one directives in a line") diff --git a/doc/doc_check/directive_impl/file_same_as_stdout.py b/doc/doc_check/directive_impl/file_same_as_stdout.py new file mode 100644 index 0000000..bd0cc28 --- /dev/null +++ b/doc/doc_check/directive_impl/file_same_as_stdout.py @@ -0,0 +1,51 @@ +# ===------- file_same_as_stdout.py - File Same as stdout Directive -------===// +# +# Copyright 2019-2020 The IBM Research Authors. +# +# ============================================================================= +# +# Verifies that a file is the same as stdout of some command execution. +# +# ===----------------------------------------------------------------------===// + +import logging +import subprocess +import difflib +import sys + +logger = logging.getLogger('doc-check') + +from doc_parser import * +from utils import * + + +def handle(config, ctx): + logger.debug( + "Handling a file-same-as-stdout directive with config {}".format( + config)) + + # Read in file content. + file = config["file"] + with open(os.path.join(ctx.root_dir, file)) as f: + file_content = f.read() + + # Execute command and retrieve output. + cmd = config["cmd"] + cmd_stdout = subprocess.run(cmd, stdout=subprocess.PIPE, + cwd=ctx.root_dir).stdout.decode('utf-8') + + # Compute diff. + diff = difflib.unified_diff(file_content.splitlines(keepends=True), + cmd_stdout.splitlines(keepends=True), + fromfile=file, + tofile="$({})".format(" ".join(cmd))) + diff = list(diff) + + # If diff is non-trivial, raise error and display diff. + if len(diff): + print("The folloing diff is detected:") + sys.stdout.writelines(diff) + raise ValueError("Check file-same-as-stdout failed") + + +ext_to_patterns = {'.dc': 'file-same-as-stdout\\(([^)]*)\\)'} diff --git a/doc/doc_check/doc_parser.py b/doc/doc_check/doc_parser.py index 38d9358..13a5caf 100644 --- a/doc/doc_check/doc_parser.py +++ b/doc/doc_check/doc_parser.py @@ -6,11 +6,14 @@ # # ===----------------------------------------------------------------------===// +import logging + from typing import List from utils import * from directive import Directive, generic_config_parser +logger = logging.getLogger('doc-check') def parse_code_section_delimiter(ctx): assert ctx.doc_file_ext() == ".md" @@ -20,15 +23,26 @@ def parse_code_section_delimiter(ctx): def try_parse_and_handle_directive(ctx): from directive_impl import same_as_file + from directive_impl import file_same_as_stdout + + try: + line = ctx.doc_file.next_non_empty_line() + except RuntimeError as e: + # Do not raise exception when next non-empty line + # does not exist. Instead, return failure. + if str(e) != "Enf of file.": + raise + return failure() # Register all directives. all_directives: List[Directive] = [ - Directive(same_as_file.ext_to_patterns, [generic_config_parser, same_as_file.parse], same_as_file.handle) + Directive(same_as_file.ext_to_patterns, [generic_config_parser, same_as_file.parse], same_as_file.handle), + Directive(file_same_as_stdout.ext_to_patterns, [generic_config_parser], file_same_as_stdout.handle), ] for directive in all_directives: directive_config = [] - if succeeded(directive.try_parse_directive(ctx, directive_config)): + if succeeded(directive.try_parse_directive(line, ctx.doc_file_ext(), directive_config)): directive.handle(directive_config.pop(), ctx) return success(directive_config) diff --git a/doc/doc_check/test/file-same-as-stdout/failure/test.in b/doc/doc_check/test/file-same-as-stdout/failure/test.in new file mode 100644 index 0000000..18a619c --- /dev/null +++ b/doc/doc_check/test/file-same-as-stdout/failure/test.in @@ -0,0 +1 @@ +dog diff --git a/doc/doc_check/test/file-same-as-stdout/failure/test.in.dc b/doc/doc_check/test/file-same-as-stdout/failure/test.in.dc new file mode 100644 index 0000000..5cfe0ab --- /dev/null +++ b/doc/doc_check/test/file-same-as-stdout/failure/test.in.dc @@ -0,0 +1 @@ +file-same-as-stdout({"file": "test.in", "cmd": ["echo", "'cat'"]}) \ No newline at end of file diff --git a/doc/doc_check/test/file-same-as-stdout/success/test.in b/doc/doc_check/test/file-same-as-stdout/success/test.in new file mode 100644 index 0000000..18a619c --- /dev/null +++ b/doc/doc_check/test/file-same-as-stdout/success/test.in @@ -0,0 +1 @@ +dog diff --git a/doc/doc_check/test/file-same-as-stdout/success/test.in.dc b/doc/doc_check/test/file-same-as-stdout/success/test.in.dc new file mode 100644 index 0000000..bcff399 --- /dev/null +++ b/doc/doc_check/test/file-same-as-stdout/success/test.in.dc @@ -0,0 +1 @@ +file-same-as-stdout({"file": "test.in", "cmd": ["echo", "dog"]}) \ No newline at end of file diff --git a/doc/doc_check/test/test_file-same-as-stdout.py b/doc/doc_check/test/test_file-same-as-stdout.py new file mode 100644 index 0000000..5649677 --- /dev/null +++ b/doc/doc_check/test/test_file-same-as-stdout.py @@ -0,0 +1,33 @@ +# ===------- test-same-as-file.py - Test for same-as-file directive -------===// +# +# Copyright 2019-2020 The IBM Research Authors. +# +# ============================================================================= +# +# ===----------------------------------------------------------------------===// + +import unittest +import os +import sys + +# Make common utilities visible by adding them to system paths. +test_dir = os.path.dirname(os.path.realpath(__file__)) +doc_check_base_dir = os.path.abspath(os.path.join(test_dir, os.pardir)) +print(doc_check_base_dir) +sys.path.append(doc_check_base_dir) + +import check + + +class TestStringMethods(unittest.TestCase): + def test_basic(self): + check.main('./file-same-as-stdout/success/', []) + + def test_failure(self): + with self.assertRaises(ValueError) as context: + check.main('./file-same-as-stdout/failure/', []) + self.assertTrue('Check file-same-as-stdout failed' in str( + context.exception)) + +if __name__ == '__main__': + unittest.main() diff --git a/src/Builder/CMakeLists.txt b/src/Builder/CMakeLists.txt index 573abd8..a85c00e 100644 --- a/src/Builder/CMakeLists.txt +++ b/src/Builder/CMakeLists.txt @@ -2,8 +2,7 @@ add_library(OMBuilder FrontendDialectHelper.cpp FrontendDialectHelper.hpp FrontendDialectTransformer.cpp - FrontendDialectTransformer.hpp - OpBuildTable.inc) + FrontendDialectTransformer.hpp) target_include_directories(OMBuilder PRIVATE ${ONNX_MLIR_SRC_ROOT}) target_include_directories(OMBuilder PRIVATE ${CMAKE_BINARY_DIR}) diff --git a/src/Builder/OpBuildTable.inc b/src/Builder/OpBuildTable.inc index d23416a..e876398 100644 --- a/src/Builder/OpBuildTable.inc +++ b/src/Builder/OpBuildTable.inc @@ -1,5 +1,4 @@ //******************************************************** -// This file is generated on UTC-02/24/2020, 06:29:01. // Do not modify this file directly. // This file is automatically generated via script. // Details can be found in doc/readonnxdefs.md . diff --git a/src/Builder/OpBuildTable.inc.dc b/src/Builder/OpBuildTable.inc.dc new file mode 100644 index 0000000..ba1d8d1 --- /dev/null +++ b/src/Builder/OpBuildTable.inc.dc @@ -0,0 +1 @@ +file-same-as-stdout({"file": "src/Builder/OpBuildTable.inc", "cmd": ["python", "utils/gen_doc.py", "--dry-run-op-build-table"]}) \ No newline at end of file diff --git a/src/Dialect/ONNX/ONNXOps.td.inc b/src/Dialect/ONNX/ONNXOps.td.inc index 20cbfaf..8476986 100644 --- a/src/Dialect/ONNX/ONNXOps.td.inc +++ b/src/Dialect/ONNX/ONNXOps.td.inc @@ -1,5 +1,4 @@ //******************************************************** -// This file is generated on UTC-04/03/2020, 03:41:28. // Do not modify this file directly. // This file is automatically generated via script. // Details can be found in doc/readonnxdefs.md . diff --git a/src/Dialect/ONNX/ONNXOps.td.inc.dc b/src/Dialect/ONNX/ONNXOps.td.inc.dc new file mode 100644 index 0000000..5b19365 --- /dev/null +++ b/src/Dialect/ONNX/ONNXOps.td.inc.dc @@ -0,0 +1 @@ +file-same-as-stdout({"file": "src/Dialect/ONNX/ONNXOps.td.inc", "cmd": ["python", "utils/gen_doc.py", "--dry-run-onnx-ops"]}) \ No newline at end of file diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt new file mode 100644 index 0000000..d973650 --- /dev/null +++ b/utils/CMakeLists.txt @@ -0,0 +1,29 @@ +# Invoke gen_doc.py to obtain ONNXOps.td.inc, OpBuildTable.inc. +add_custom_command(OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/ONNXOps.td.inc + ${CMAKE_CURRENT_SOURCE_DIR}/OpBuildTable.inc + COMMAND python ${CMAKE_CURRENT_SOURCE_DIR}/gen_doc.py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gen_doc.py) + +# Copy the generated files to respective destinations: +# ONNXOps.td.inc -> src/Dialect/ONNX/ONNXOps.td.inc +add_custom_command(OUTPUT ${ONNX_MLIR_SRC_ROOT}/src/Dialect/ONNX/ONNXOps.td.inc + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/ONNXOps.td.inc + ${ONNX_MLIR_SRC_ROOT}/src/Dialect/ONNX/ONNXOps.td.inc + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/ONNXOps.td.inc) + +# OpBuildTable.inc -> src/Builder/OpBuildTable.inc +add_custom_command(OUTPUT ${ONNX_MLIR_SRC_ROOT}/src/Builder/OpBuildTable.inc + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/OpBuildTable.inc + ${ONNX_MLIR_SRC_ROOT}/src/Builder/OpBuildTable.inc + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/OpBuildTable.inc) + +add_custom_target(OMONNXOpsTableGenIncGen + DEPENDS ${ONNX_MLIR_SRC_ROOT}/src/Dialect/ONNX/ONNXOps.td.inc) +add_custom_target(OMONNXOpsBuildTableIncGen + DEPENDS ${ONNX_MLIR_SRC_ROOT}/src/Builder/OpBuildTable.inc) + +add_custom_target(OMONNXOpsIncTranslation + DEPENDS OMONNXOpsTableGenIncGen + OMONNXOpsBuildTableIncGen) \ No newline at end of file diff --git a/doc/gen_doc.py b/utils/gen_doc.py similarity index 93% rename from doc/gen_doc.py rename to utils/gen_doc.py index 2d13937..db52898 100644 --- a/doc/gen_doc.py +++ b/utils/gen_doc.py @@ -5,10 +5,12 @@ from __future__ import print_function from __future__ import unicode_literals from collections import defaultdict, OrderedDict +from io import StringIO import io import os import sys import datetime +import argparse import numpy as np # type: ignore @@ -18,6 +20,17 @@ from onnx.backend.test.case import collect_snippets from onnx.backend.sample.ops import collect_sample_implementations from typing import Any, Text, Sequence, Dict, List, Type, Set, Tuple +parser = argparse.ArgumentParser() +parser.add_argument("--dry-run-onnx-ops", + help="Output ONNXOps.td.inc content to stdout.", + action="store_true", + default=False) +parser.add_argument("--dry-run-op-build-table", + help="Output OpBuildTable.inc content to stdout.", + action="store_true", + default=False) +args = parser.parse_args() + # Manual specification of attribute defaults. special_attr_defaults = dict([ # ("AveragePool.kernel_shape", ('ints', '{}')), @@ -76,7 +89,7 @@ SAMPLE_IMPLEMENTATIONS = collect_sample_implementations() ONNX_ML = not bool(os.getenv('ONNX_ML') == '0') ONNX_ML = False -print("ONNX_ML", ONNX_ML) +sys.stderr.write("ONNX_ML {}\n".format(ONNX_ML)) if ONNX_ML: ext = '-ml.md' @@ -253,8 +266,8 @@ def get_operands_or_results(schema, is_input): types = ["Variadic<{}>".format(any_type_of(types))] else: #TODO handle(variadic, heterogeneous) " - print("warning: (variadic, heterogeneous) for" + schema.name + - ' ' + value.name) + sys.stderr.write("warning: (variadic, heterogeneous) for" + schema.name + + ' ' + value.name + "\n") # Since output name can coincide with that of an input, we explicitly # append a suffix "_out" to such names for disambiguation. @@ -518,17 +531,16 @@ def main(args): # type: (Type[Args]) -> None datetime.timezone.utc).strftime("%m/%d/%Y, %H:%M:%S") autogen_warning = ( '//********************************************************\n' - '// This file is generated on UTC-{}.\n' '// Do not modify this file directly.\n' '// This file is automatically generated via script.\n' '// Details can be found in doc/readonnxdefs.md .\n' '//********************************************************\n\n') autogen_warning = autogen_warning.format(curr_utc_time) - op_def = io.open(args.op_def_file, 'w', newline='') + op_def = args.op_def op_def.write(autogen_warning) - op_importer = io.open(args.op_importer_file, 'w', newline='') + op_importer = args.op_importer op_importer.write(autogen_warning) for domain, supportmap in build_operator_schemas(): @@ -538,12 +550,25 @@ def main(args): # type: (Type[Args]) -> None r = gen_op_def(schema) op_def.write(r) - if __name__ == '__main__': curr_dir = os.path.dirname(os.path.realpath(__file__)) class Args(object): - op_def_file = os.path.join(curr_dir, 'ONNXOps.td.inc') - op_importer_file = os.path.join(curr_dir, 'OpBuildTable.inc') + if args.dry_run_onnx_ops: + op_def = StringIO() + else: + op_def_file_path = os.path.join(curr_dir, 'ONNXOps.td.inc') + op_def = io.open(op_def_file_path, 'w', newline='') + if args.dry_run_op_build_table: + op_importer = StringIO() + else: + op_importer_file_path = os.path.join(curr_dir, 'OpBuildTable.inc') + op_importer = io.open(op_importer_file_path, 'w', newline='') main(Args) + + if args.dry_run_onnx_ops: + sys.stdout.write(Args.op_def.getvalue()) + if args.dry_run_op_build_table: + sys.stdout.write(Args.op_importer.getvalue()) +