diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d6709bd20..5bd16ebb2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -5,7 +5,7 @@ on: types: [created] jobs: - benchmark_main: + benchmark: if: github.event.issue.pull_request != '' && contains(github.event.comment.body, '/run-benchmark') runs-on: self-hosted @@ -14,21 +14,77 @@ jobs: uses: actions/checkout@v2 with: ref: main + + - name: Get base branch commit ID + id: get_base_commit + run: echo "BASE_COMMIT_ID=$(git rev-parse HEAD)" >> $GITHUB_ENV + - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.9' + - name: Activate virtual environment and install dependencies + run: | + source bitblas_benchmark/bin/activate + python -m pip install --upgrade pip + if [ -f requirements-dev.txt ]; then python -m pip install -r requirements-dev.txt; fi + + - name: Install project in wheel mode + run: | + source bitblas_benchmark/bin/activate + python -m pip install . + + - name: Matmul Benchmark + run: | + source bitblas_benchmark/bin/activate + cd benchmark/operators + python ./benchmark_ops_matmul.py + + - name: Get PR branch commit ID + id: get_pr_commit + run: echo "PR_COMMIT_ID=$(git rev-parse HEAD)" >> $GITHUB_ENV + - name: Create virtual environment run: python -m venv bitblas_benchmark - name: Activate virtual environment and install dependencies run: | - source bitblas_ci/bin/activate + source bitblas_benchmark/bin/activate python -m pip install --upgrade pip if [ -f requirements-dev.txt ]; then python -m pip install -r requirements-dev.txt; fi - name: Install project in wheel mode run: | - source bitblas_ci/bin/activate - python -m pip install . \ No newline at end of file + source bitblas_benchmark/bin/activate + python -m pip install . + + - name: Matmul Benchmark + run: | + source bitblas_benchmark/bin/activate + cd benchmark/operators + python ./benchmark_ops_matmul.py + + - name: Compare benchmark results + run: | + source bitblas_benchmark/bin/activate + cd benchmark/operators + python ./compare_benchmark.py --base ${{ env.BASE_COMMIT_ID }} --head ${{ env.PR_COMMIT_ID }} 2>&1 | tee compare_results.txt + + - name: Install GitHub CLI + run: | + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-key C99B11DEB97541F0 + sudo apt-add-repository https://cli.github.com/packages + sudo apt update + sudo apt install gh + + - name: Authenticate GitHub CLI + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh auth login --with-token <<< $GITHUB_TOKEN + + - name: Post benchmark results + run: | + cat compare_results.txt + gh pr comment ${{ github.event.issue.number }} --body "$(cat compare_results.txt)" diff --git a/benchmark/operators/benchmark_ops_matmul.py b/benchmark/operators/benchmark_ops_matmul.py index 2e1ac362b..db83be28a 100644 --- a/benchmark/operators/benchmark_ops_matmul.py +++ b/benchmark/operators/benchmark_ops_matmul.py @@ -8,7 +8,7 @@ from tabulate import tabulate import json from os import path, makedirs -from typing import Tuple +from typing import Tuple, Dict, List, Union set_log_level("DEBUG") @@ -87,9 +87,16 @@ def serialize_results(self) -> None: ) # Save benchmark shapes into JSON - shapes = [(config.M, config.N, config.K) - for name, results in self.benchmark_results.items() for i, _ in enumerate(results) - for config in [self.benchmark_sets[name][i][1]]] + shapes: Dict[List[List[Union[List, int], int, int]]] = {} + + # Iterate through the benchmark results to extract the shapes + for name, results in self.benchmark_results.items(): + shapes[name] = [] + for i, _ in enumerate(results): + config = self.benchmark_sets[name][i][1] + dyn_prof_shape = self.benchmark_sets[name][i][2] + shapes[name].append([config.M, config.N, config.K, dyn_prof_shape]) + self._save_json(shapes, path.join(log_commit_path, self.BENCHMARK_SHAPES_FILE)) # Save device info into JSON @@ -103,20 +110,40 @@ def _save_json(self, data, file_path): with open(file_path, "w") as f: json.dump(data, f) - def deserialize_results(self, log_path: str) -> None: + @classmethod + def deserialize_from_logs(cls, commit_id: str) -> None: """Deserialize benchmark results from JSON files.""" - self.benchmark_results = self._load_json(path.join(log_path, self.BENCHMARK_RESULTS_FILE)) + benchmark = cls() + commit_id_path = f"CommitID_{commit_id}" + log_commit_path = path.join(benchmark.log_path, commit_id_path) - shapes_file = path.join(log_path, self.BENCHMARK_SHAPES_FILE) - with open(shapes_file, "r") as f: - shapes = json.load(f) - # TODO: Reconstruction of benchmark_sets from shapes - del shapes + benchmark.benchmark_results = cls._load_json( + path.join(log_commit_path, cls.BENCHMARK_RESULTS_FILE)) - self.benchmark_target = self._load_json(path.join(log_path, - self.BENCHMARK_DEVICE_FILE))["device"] + shapes_file = path.join(log_commit_path, cls.BENCHMARK_SHAPES_FILE) - def _load_json(self, file_path): + with open(shapes_file, "r") as f: + shapes = json.load(f) + for name, shape_list in shapes.items(): + for shape in shape_list: + M, N, K, dyn_prof_shape = shape + benchmark.add_benchmark_set( + name, + [ + benchmark.generate_op_unit( + benchmark.generate_operator_config(name, M, N, K), + dynamic_profiling_shape=dyn_prof_shape, + ) + ], + ) + + benchmark.benchmark_target = cls._load_json( + path.join(log_commit_path, cls.BENCHMARK_DEVICE_FILE))["device"] + + return benchmark + + @staticmethod + def _load_json(file_path): """Helper function to load JSON data from a file.""" with open(file_path, "r") as f: return json.load(f) diff --git a/benchmark/operators/compare_benchmark.py b/benchmark/operators/compare_benchmark.py new file mode 100644 index 000000000..080d49dca --- /dev/null +++ b/benchmark/operators/compare_benchmark.py @@ -0,0 +1,102 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import argparse +from benchmark_ops_matmul import BitblasMatmulOpsBenchmark, HELPER_MESSAGE +from tabulate import tabulate +from typing import Tuple + + +def compare(base: BitblasMatmulOpsBenchmark, head: BitblasMatmulOpsBenchmark): + """Generate and print a report of the benchmark results.""" + for name, results in head.benchmark_results.items(): + table_data = [ + ["TAG:", name, "Device:", head.benchmark_target], + [ + "Shape (M-N-K / N-K_M)", + "Time (ms)", + "Throughput (TFLOPS)", + "Tune Time (s)", + ], + ] + + def get_suffix(base, head): + symbol = "↑" if head > base else "↓" if head < base else "=" + ratio = f"{((head - base) / base) * 100:.2f}%" if base is not None else "N/A" + return f"{symbol}({ratio})" + + def legalize_shape(M, N, K, dyn_prof_shape): + """Generate a string representation of the operator shape. + + Args: + M: The M dimension (can be an int or a tuple). + N: The N dimension (must be an int). + K: The K dimension (must be an int). + dyn_prof_shape: The dynamic profiling shape (dict with 'M' key if M is dynamic). + + Returns: + A string representing the shape in either 'M-N-K' or 'N-K_M' format. + """ + if isinstance(M, int): + return f"{M}-{N}-{K}" + elif dyn_prof_shape and "M" in dyn_prof_shape: + return f"{N}-{K}_{dyn_prof_shape['M']}" + else: + # Calculate the average of tuple M + opt_m = sum(M) / len(M) + return f"{N}-{K}_{opt_m}" + + for i, (latency, tuning_time) in enumerate(results): + op_config = head.benchmark_sets[name][i][1] + dyn_prof_shape = head.benchmark_sets[name][i][2] + shape = legalize_shape(op_config.M, op_config.N, op_config.K, dyn_prof_shape) + + benchmark_M = ( + sum(op_config.M) / + len(op_config.M) if isinstance(op_config.M, Tuple) else op_config.M) + + base_latency = base.benchmark_results[name][i][0] + if latency is not None: + throughput = (2 * benchmark_M * op_config.N * op_config.K / (latency * 1e-3) / 1e12) + base_throughput = (2 * benchmark_M * op_config.N * op_config.K / + (base_latency * 1e-3) / 1e12) + throughput = f"{throughput:.3f}{get_suffix(base_throughput, throughput)}" + else: + throughput = "N/A" + + if base_latency is not None: + latency_str = f"{latency:.3f}{get_suffix(base_latency, latency)}" + else: + latency_str = "N/A" + + base_tuning_time = base.benchmark_results[name][i][1] + if tuning_time is not None: + tuning_time_str = f"{tuning_time:.3f}{get_suffix(base_tuning_time, tuning_time)}" + else: + tuning_time_str = "N/A" + + table_data.append([shape, latency_str, throughput, tuning_time_str]) + + print(tabulate(table_data, headers="firstrow", tablefmt="fancy_grid")) + print(HELPER_MESSAGE) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument( + "--base", + type=str, + help="the base commit id", + ) + parser.add_argument( + "--head", + type=str, + help="the head commit id", + ) + args = parser.parse_args() + + base_benchmark = BitblasMatmulOpsBenchmark.deserialize_from_logs(args.base) + + head_benchmark = BitblasMatmulOpsBenchmark.deserialize_from_logs(args.head) + + compare(base_benchmark, head_benchmark) diff --git a/bitblas/benchmark/operator/__init__.py b/bitblas/benchmark/operator/__init__.py index 113aab5c5..c5e7852e3 100644 --- a/bitblas/benchmark/operator/__init__.py +++ b/bitblas/benchmark/operator/__init__.py @@ -26,7 +26,7 @@ class BitblasOperatorBenchmarkBase(ABC): enable_hardware_aware_tuning: bool = False # Log path - log_path: Optional[str] = None + log_path: Optional[str] = path.join(get_default_cache_path(), "benchmark") @abstractmethod def prepare_benchmark_sets(self): @@ -53,7 +53,6 @@ def add_benchmark_set( def run(self, report=True, serialize=True, enable_tuning: bool = False): """Run the benchmark process.""" - self.log_path = path.join(get_default_cache_path(), "benchmark") if not path.exists(self.log_path): makedirs(self.log_path) @@ -135,11 +134,6 @@ def serialize_results(self) -> None: """Serialize the benchmark results.""" pass - @abstractmethod - def deserialize_results(self) -> None: - """Deserialize the benchmark results.""" - pass - def enable_tuning(self): """Enable hardware-aware tuning.""" self.enable_hardware_aware_tuning = True @@ -151,3 +145,12 @@ def disable_tuning(self): def set_log_path(self, log_path: str): """Set the log path.""" self.log_path = log_path + + def set_benchmark_target(self, target: str): + """Set the benchmark target.""" + self.benchmark_target = target + + def set_benchmark_results(self, results: Dict[str, List[Tuple[Optional[float], + Optional[float]]]]): + """Set the benchmark results.""" + self.benchmark_results = results