diff --git a/Cargo.lock b/Cargo.lock index 0933fb6..41e7779 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "codem8" -version = "0.7.6" +version = "0.7.7" dependencies = [ "clap", "ignore", diff --git a/Cargo.toml b/Cargo.toml index ed5d217..afd482f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codem8" -version = "0.7.6" +version = "0.7.7" edition = "2021" rust-version = "1.85" license = "MIT" diff --git a/README.md b/README.md index efce9e6..8f72c61 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ codem8 --report-duplicate -file-extension=ts,js -files="src/a.ts,src/b.js" Quoting `-files` values is recommended in PowerShell when paths contain file extensions. -Analyze files changed on the current local Git branch compared to the origin +Analyze lines changed on the current local Git branch compared to the origin base branch: ```bash @@ -111,7 +111,7 @@ The default maximum cognitive complexity is 15, and the default maximum cyclomatic complexity is 10. Use `-max-cognitive-complexity=` and `-max-cyclomatic-complexity=` to adjust them. -Use `-git-branch` to analyze complexity only in supported files changed on the +Use `-git-branch` to analyze complexity only in supported lines changed on the current local branch. The same origin branch resolution and `-files` exclusion rules used by the duplicate report apply. @@ -130,7 +130,7 @@ trailing Unicode whitespace are removed before hashing and comparison. Empty trimmed lines are ignored. CodeM8 currently expects UTF-8 source files; invalid UTF-8 produces a clear error rather than lossy output. -Use `-git-branch` to search duplicate code only in files changed on the current +Use `-git-branch` to search duplicate code only in lines changed on the current local branch. CodeM8 resolves that branch set from `origin/HEAD` with `origin/main` and `origin/master` fallbacks. The option requires a Git repository and cannot be combined with `-files`. diff --git a/src/cli/args.rs b/src/cli/args.rs index 92e0159..aba6817 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -20,8 +20,6 @@ struct ClapCli { verbose: u8, #[arg(long = "codem8-git-branch", action = ArgAction::Count)] git_branch: u8, - #[arg(long = "codem8-git-branch-strict", action = ArgAction::Count)] - git_branch_strict: u8, #[arg( long = "codem8-file-extension", value_name = "extensions", @@ -64,9 +62,7 @@ where let report = selected_report(&parsed)?; validate_repeated_options(&parsed)?; let git_branch = parsed.git_branch != 0; - let git_branch_strict = parsed.git_branch_strict != 0; - let files = selected_files(&parsed, git_branch || git_branch_strict)?; - validate_git_branch_modes(git_branch, git_branch_strict)?; + let files = selected_files(&parsed, git_branch)?; validate_complexity_limits(report, &parsed)?; Ok(CliConfig { report, @@ -74,7 +70,6 @@ where file_extensions: selected_file_extensions(&parsed), files, git_branch, - git_branch_strict, max_cognitive_complexity: parsed .max_cognitive_complexity .unwrap_or(DEFAULT_MAX_COGNITIVE_COMPLEXITY), @@ -114,11 +109,6 @@ fn validate_repeated_options(parsed: &ClapCli) -> Result<()> { "git branch mode was provided more than once", )); } - if parsed.git_branch_strict > 1 { - return Err(CodeM8Error::new( - "strict git branch mode was provided more than once", - )); - } if parsed.file_extensions.len() > 1 { return Err(CodeM8Error::new( "file extensions were provided more than once", @@ -132,15 +122,6 @@ fn validate_repeated_options(parsed: &ClapCli) -> Result<()> { Ok(()) } -fn validate_git_branch_modes(git_branch: bool, git_branch_strict: bool) -> Result<()> { - if git_branch && git_branch_strict { - return Err(CodeM8Error::new( - "git branch mode and strict git branch mode are mutually exclusive", - )); - } - Ok(()) -} - fn selected_files(parsed: &ClapCli, git_branch: bool) -> Result>> { let files = parsed.files.first().cloned(); if git_branch && files.is_some() { @@ -279,8 +260,6 @@ fn normalized_clap_arg(arg: String) -> Result { Ok("--codem8-verbose".to_owned()) } else if arg == "-git-branch" { Ok("--codem8-git-branch".to_owned()) - } else if arg == "-git-branch-strict" { - Ok("--codem8-git-branch-strict".to_owned()) } else if let Some(value) = arg.strip_prefix("-file-extension=") { Ok(format!("--codem8-file-extension={value}")) } else if let Some(value) = arg.strip_prefix("-files=") { @@ -310,7 +289,6 @@ mod tests { assert_eq!(config.file_extensions, supported_file_extensions()); assert_eq!(config.files, None); assert!(!config.git_branch); - assert!(!config.git_branch_strict); assert_eq!( config.max_cognitive_complexity, DEFAULT_MAX_COGNITIVE_COMPLEXITY @@ -358,16 +336,6 @@ mod tests { fn parses_git_branch_duplicate_report_config() { let config = parse_args(["--report-duplicate", "-git-branch"]).expect("config parses"); assert!(config.git_branch); - assert!(!config.git_branch_strict); - assert_eq!(config.files, None); - } - - #[test] - fn parses_strict_git_branch_duplicate_report_config() { - let config = - parse_args(["--report-duplicate", "-git-branch-strict"]).expect("config parses"); - assert!(!config.git_branch); - assert!(config.git_branch_strict); assert_eq!(config.files, None); } @@ -419,7 +387,6 @@ mod tests { "--file-extension=js", "--files=src/a.ts", "--git-branch", - "--git-branch-strict", "--max-cognitive-complexity=20", "--max-cyclomatic-complexity=12", ] { @@ -495,23 +462,10 @@ mod tests { } #[test] - fn rejects_repeated_strict_git_branch_arguments() { - let error = parse_args([ - "--report-duplicate", - "-git-branch-strict", - "-git-branch-strict", - ]) - .expect_err("repeated strict git branch mode fails"); - assert!(error - .to_string() - .contains("strict git branch mode was provided more than once")); - } - - #[test] - fn rejects_git_branch_with_strict_git_branch() { - let error = parse_args(["--report-duplicate", "-git-branch", "-git-branch-strict"]) - .expect_err("exclusive git branch modes fail"); - assert!(error.to_string().contains("mutually exclusive")); + fn rejects_removed_git_branch_strict_argument() { + let error = parse_args(["--report-duplicate", "-git-branch-strict"]) + .expect_err("removed git branch mode fails"); + assert!(error.to_string().contains("unexpected argument")); } #[test] @@ -523,15 +477,6 @@ mod tests { .contains("git branch mode cannot be combined with explicit files")); } - #[test] - fn rejects_strict_git_branch_with_explicit_files() { - let error = parse_args(["--report-duplicate", "-git-branch-strict", "-files=a.ts"]) - .expect_err("exclusive strict file modes fail"); - assert!(error - .to_string() - .contains("git branch mode cannot be combined with explicit files")); - } - #[test] fn parses_explicit_file_list() { let files = parse_file_list("src/a.ts, ./src/b.ts").expect("files parse"); diff --git a/src/cli/help.rs b/src/cli/help.rs index 28dcb8d..b6b3bb5 100644 --- a/src/cli/help.rs +++ b/src/cli/help.rs @@ -1,11 +1,8 @@ -use std::fmt::Write as _; - -use super::version::codem8_version_from_cargo_lock; - const HELP_TEXT_BODY: &str = "\ USAGE: codem8 help codem8 -h + codem8 --version codem8 --report-complexity [OPTIONS] codem8 --report-duplicate [OPTIONS] @@ -14,6 +11,9 @@ COMMANDS: -h Display this detailed documentation. + --version + Display the current CodeM8 version. + REQUIRED REPORT SWITCHES: --report-complexity Analyze supported source files and print a function complexity report. @@ -35,13 +35,9 @@ OPTIONS: Example: -files=\"src/a.ts,src/b.js\" -git-branch - Search only in files changed on the current local Git + Limit the report to lines changed on the current local Git branch. Cannot be combined with -files. - -git-branch-strict - Limit the report to lines changed on the current git branch. - Cannot be combined with -files or -git-branch. - -max-cognitive-complexity= Maximum allowed cognitive complexity for --report-complexity. Defaults to 15. @@ -52,7 +48,7 @@ OPTIONS: -verbose Include analyzed files and timings in report output, plus duplicate block details. - In -git-branch-strict mode, analyzed files include changed line ranges. + In -git-branch mode, analyzed files include changed line ranges. COMPLEXITY REPORT PURPOSE: The complexity report helps you find functions whose cognitive or cyclomatic @@ -69,22 +65,16 @@ EXAMPLES: codem8 --report-complexity codem8 --report-complexity -file-extension=rs -max-cognitive-complexity=12 codem8 --report-complexity -git-branch - codem8 --report-complexity -git-branch-strict codem8 --report-duplicate codem8 --report-duplicate -file-extension=ts,tsx,js,jsx codem8 --report-duplicate -file-extension=ts,js -files=\"src/a.ts,src/b.js\" codem8 --report-duplicate -git-branch - codem8 --report-duplicate -git-branch-strict "; #[must_use] pub fn help_text() -> String { - let version = codem8_version_from_cargo_lock().unwrap_or("unknown"); let mut output = String::new(); - let _ = writeln!( - output, - "CodeM8 {version} - deterministic source code analysis reports." - ); + output.push_str("CodeM8 - deterministic source code analysis reports.\n"); output.push('\n'); output.push_str(HELP_TEXT_BODY); output @@ -93,7 +83,6 @@ pub fn help_text() -> String { #[cfg(test)] mod tests { use super::*; - use crate::cli::version::codem8_version_from_cargo_lock; #[test] fn exposes_detailed_help_text() { @@ -107,7 +96,9 @@ mod tests { fn assert_help_includes_expected_sections(help: &str) { assert!(help.contains("USAGE:")); assert!(help.contains("codem8 -h")); + assert!(help.contains("codem8 --version")); assert!(help.contains(" -h")); + assert!(help.contains(" --version")); assert!(help.contains("--report-duplicate")); assert!(help.contains("--report-complexity")); assert!(help.contains("helps you find repeated code")); @@ -120,7 +111,6 @@ mod tests { assert!(help.contains("-file-extension=")); assert!(help.contains("-files=")); assert!(help.contains("-git-branch")); - assert!(help.contains("-git-branch-strict")); assert!(help.contains("-max-cognitive-complexity=")); assert!(help.contains("-max-cyclomatic-complexity=")); } @@ -130,7 +120,6 @@ mod tests { assert!(!help.contains("--file-extension=")); assert!(!help.contains("--files=")); assert!(!help.contains("--git-branch")); - assert!(!help.contains("--git-branch-strict")); assert!(!help.contains("--max-cognitive-complexity=")); assert!(!help.contains("--max-cyclomatic-complexity=")); } @@ -160,8 +149,8 @@ mod tests { } #[test] - fn help_text_includes_version_from_cargo_lock() { - let version = codem8_version_from_cargo_lock().expect("codem8 version exists"); - assert!(help_text().starts_with(&format!("CodeM8 {version} - "))); + fn help_text_header_excludes_version() { + assert!(help_text().starts_with("CodeM8 - ")); + assert!(!help_text().starts_with("CodeM8 0.")); } } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index beaf6e2..eac5978 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -6,6 +6,7 @@ mod version; pub use args::{parse_args, parse_file_extensions, parse_file_list}; pub use help::help_text; +pub use version::codem8_version_from_cargo_lock; use crate::error::Result; @@ -13,6 +14,7 @@ use crate::error::Result; pub enum CliCommand { Help, Report(CliConfig), + Version, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -28,7 +30,6 @@ pub struct CliConfig { pub file_extensions: Vec, pub files: Option>, pub git_branch: bool, - pub git_branch_strict: bool, pub max_cognitive_complexity: u32, pub max_cyclomatic_complexity: u32, } @@ -48,6 +49,9 @@ where if args.len() == 1 && is_help_argument(&args[0]) { return Ok(CliCommand::Help); } + if args.len() == 1 && args[0] == "--version" { + return Ok(CliCommand::Version); + } parse_args(args).map(CliCommand::Report) } @@ -70,4 +74,10 @@ mod tests { let command = parse_command(["-h"]).expect("short help parses"); assert_eq!(command, CliCommand::Help); } + + #[test] + fn parses_version_option() { + let command = parse_command(["--version"]).expect("version parses"); + assert_eq!(command, CliCommand::Version); + } } diff --git a/src/cli/version.rs b/src/cli/version.rs index 8829c32..403d2d8 100644 --- a/src/cli/version.rs +++ b/src/cli/version.rs @@ -6,7 +6,8 @@ struct CargoLockPackage<'a> { version: &'a str, } -pub(super) fn codem8_version_from_cargo_lock() -> Option<&'static str> { +#[must_use] +pub fn codem8_version_from_cargo_lock() -> Option<&'static str> { cargo_lock_packages(CARGO_LOCK) .find(|package| package.name == "codem8") .map(|package| package.version) diff --git a/src/discovery/git.rs b/src/discovery/git.rs index 14a815c..c793276 100644 --- a/src/discovery/git.rs +++ b/src/discovery/git.rs @@ -1,4 +1,3 @@ -use std::collections::BTreeSet; use std::fs; use std::path::{Path, PathBuf}; use std::process::{Command, Output}; @@ -6,61 +5,6 @@ use std::process::{Command, Output}; use crate::error::{CodeM8Error, Result}; use crate::model::{ChangedFileLines, LineRange}; -/// Lists files changed on the current branch compared to the origin base branch. -/// -/// # Errors -/// -/// Returns an error when `current_dir` is not inside a Git repository, the -/// current branch cannot be resolved, or the origin base branch is missing. -pub fn changed_files_against_origin(current_dir: &Path) -> Result> { - let repo_root = repo_root(current_dir)?; - ensure_named_branch(&repo_root)?; - let origin_ref = origin_base_ref(&repo_root)?; - let merge_base = run_git_text( - &repo_root, - &["merge-base", &origin_ref, "HEAD"], - "find merge base with origin base branch", - )?; - let mut paths = BTreeSet::new(); - collect_nul_paths( - &repo_root, - &[ - "diff", - "--name-only", - "-z", - "--diff-filter=ACMRTUXB", - merge_base.trim(), - "HEAD", - ], - &mut paths, - )?; - collect_nul_paths( - &repo_root, - &[ - "diff", - "--name-only", - "-z", - "--cached", - "--diff-filter=ACMRTUXB", - ], - &mut paths, - )?; - collect_nul_paths( - &repo_root, - &["diff", "--name-only", "-z", "--diff-filter=ACMRTUXB"], - &mut paths, - )?; - collect_nul_paths( - &repo_root, - &["ls-files", "--others", "--exclude-standard", "-z"], - &mut paths, - )?; - Ok(paths - .into_iter() - .filter_map(|path| existing_file_path(&repo_root, current_dir, &path)) - .collect()) -} - /// Lists changed lines on the current branch compared to the origin base branch. /// /// # Errors @@ -144,15 +88,6 @@ fn verify_origin_ref(repo_root: &Path, origin_ref: &str) -> bool { .is_ok_and(|output| output.status.success()) } -fn collect_nul_paths(repo_root: &Path, args: &[&str], paths: &mut BTreeSet) -> Result<()> { - let output = run_git_output(repo_root, args, "list changed git files")?; - let stdout = ensure_git_success(output, "list changed git files")?; - for path in nul_paths(&stdout) { - paths.insert(path); - } - Ok(()) -} - fn extend_changed_lines( repo_root: &Path, current_dir: &Path, @@ -333,7 +268,10 @@ fn parse_hunk_range(line: &str) -> Result> { let count = count .parse::() .map_err(|_| CodeM8Error::new(format!("could not parse changed git hunk: {line}")))?; - Ok((count != 0).then_some(LineRange { + if count == 0 { + return Ok(None); + } + Ok(Some(LineRange { start, end: start + count - 1, })) @@ -393,8 +331,13 @@ fn existing_file_path(repo_root: &Path, current_dir: &Path, path: &Path) -> Opti if !metadata.is_file() || metadata.file_type().is_symlink() { return None; } - let relative = absolute.strip_prefix(current_dir).map(Path::to_path_buf); - Some(relative.unwrap_or(absolute)) + let normalized_absolute = fs::canonicalize(&absolute).unwrap_or(absolute); + let normalized_current_dir = + fs::canonicalize(current_dir).unwrap_or_else(|_| current_dir.to_path_buf()); + let relative = normalized_absolute + .strip_prefix(&normalized_current_dir) + .map(Path::to_path_buf); + Some(relative.unwrap_or(normalized_absolute)) } fn run_git_text(current_dir: &Path, args: &[&str], action: &str) -> Result { @@ -520,13 +463,16 @@ mod tests { #[test] fn rejects_non_git_directory() { + if !git_is_available() { + return; + } let repo = TempGitRepo::new("non-repo"); - let error = changed_files_against_origin(repo.path()).expect_err("non-repo fails"); + let error = changed_lines_against_origin(repo.path()).expect_err("non-repo fails"); assert!(error.to_string().contains("requires the current directory")); } #[test] - fn lists_committed_staged_unstaged_and_untracked_files() { + fn lists_committed_staged_unstaged_and_untracked_changed_lines() { if !git_is_available() { return; } @@ -545,9 +491,13 @@ mod tests { repo.write("src/base.ts", "const value = modified;\n"); repo.write("src/untracked.ts", "const value = untracked;\n"); fs::remove_file(repo.path().join("src/deleted.ts")).expect("delete tracked file"); - let files = changed_files_against_origin(repo.path()).expect("list branch files"); + let files = changed_lines_against_origin(repo.path()).expect("list branch lines"); + let paths = files + .into_iter() + .map(|changed_file| changed_file.path) + .collect::>(); assert_eq!( - files, + paths, [ PathBuf::from("src/base.ts"), PathBuf::from("src/committed.ts"), @@ -595,6 +545,32 @@ mod tests { ); } + #[cfg(unix)] + #[test] + fn reports_relative_paths_from_symlinked_current_dir() { + if !git_is_available() { + return; + } + let repo = TempGitRepo::new("symlinked-current-dir"); + repo.git(&["init"]); + repo.write("src/example.ts", "base\n"); + repo.commit("initial"); + repo.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); + repo.git(&["branch", "-M", "feature"]); + repo.write("src/example.ts", "base\nchanged\n"); + let symlink = repo.path().with_extension("link"); + std::os::unix::fs::symlink(repo.path(), &symlink).expect("create repo symlink"); + let files = changed_lines_against_origin(&symlink).expect("list changed lines"); + assert_eq!( + files, + [ChangedFileLines { + path: PathBuf::from("src/example.ts"), + lines: vec![LineRange { start: 2, end: 2 }], + }] + ); + fs::remove_file(symlink).expect("remove repo symlink"); + } + #[test] fn parses_changed_lines_for_quoted_diff_paths() { let diff = concat!( diff --git a/src/discovery/mod.rs b/src/discovery/mod.rs index e7384e3..f90c3f4 100644 --- a/src/discovery/mod.rs +++ b/src/discovery/mod.rs @@ -4,7 +4,7 @@ mod explicit; mod git; mod recursive; -pub(crate) use git::{changed_files_against_origin, changed_lines_against_origin}; +pub(crate) use git::changed_lines_against_origin; use crate::error::Result; use crate::model::SourceFile; diff --git a/src/lib.rs b/src/lib.rs index f202b0a..6587f45 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,14 +23,13 @@ use crate::model::{ use crate::paths::format_path; struct BranchScope { - files: Option>, lines: Option>, - strict_file_paths: Option>, + file_paths: Option>, } impl BranchScope { fn files(&self) -> Option<&[PathBuf]> { - self.files.as_deref().or(self.strict_file_paths.as_deref()) + self.file_paths.as_deref() } fn lines(&self) -> Option<&[ChangedFileLines]> { @@ -71,17 +70,40 @@ where S: Into, W: Write, { - let status = match cli::parse_command(args)? { - cli::CliCommand::Help => { - write_help(writer)?; - RunStatus::Success - } - cli::CliCommand::Report(config) => match config.report { - cli::ReportKind::Duplicate => run_duplicate_report(&config, current_dir, writer)?, - cli::ReportKind::Complexity => run_complexity_report(&config, current_dir, writer)?, - }, - }; - Ok(status) + run_command(cli::parse_command(args)?, current_dir, writer) +} + +fn run_command( + command: cli::CliCommand, + current_dir: &Path, + writer: &mut W, +) -> Result { + match command { + cli::CliCommand::Help => run_help_command(writer), + cli::CliCommand::Version => run_version_command(writer), + cli::CliCommand::Report(config) => run_report_command(&config, current_dir, writer), + } +} + +fn run_help_command(writer: &mut W) -> Result { + write_help(writer)?; + Ok(RunStatus::Success) +} + +fn run_version_command(writer: &mut W) -> Result { + write_version(writer)?; + Ok(RunStatus::Success) +} + +fn run_report_command( + config: &cli::CliConfig, + current_dir: &Path, + writer: &mut W, +) -> Result { + match config.report { + cli::ReportKind::Duplicate => run_duplicate_report(config, current_dir, writer), + cli::ReportKind::Complexity => run_complexity_report(config, current_dir, writer), + } } fn write_help(writer: &mut W) -> Result<()> { @@ -90,6 +112,12 @@ fn write_help(writer: &mut W) -> Result<()> { .map_err(|error| CodeM8Error::new(format!("could not write help output: {error}"))) } +fn write_version(writer: &mut W) -> Result<()> { + let version = cli::codem8_version_from_cargo_lock().unwrap_or("unknown"); + writeln!(writer, "{version}") + .map_err(|error| CodeM8Error::new(format!("could not write version output: {error}"))) +} + fn run_duplicate_report( config: &cli::CliConfig, current_dir: &Path, @@ -149,7 +177,9 @@ fn run_complexity_report( ) })?; let functions = match branch_scope.lines() { - Some(git_branch_lines) => filtered_strict_complex_functions(functions, git_branch_lines), + Some(git_branch_lines) => { + filtered_changed_line_complex_functions(functions, git_branch_lines) + } None => functions, }; let report = report::ComplexityReport { @@ -170,32 +200,16 @@ fn run_complexity_report( } fn changed_branch_scope(config: &cli::CliConfig, current_dir: &Path) -> Result { - let files = changed_git_branch_files(config, current_dir)?; let lines = changed_git_branch_lines(config, current_dir)?; - let strict_file_paths = lines.as_ref().map(|lines| changed_line_paths(lines)); - Ok(BranchScope { - files, - lines, - strict_file_paths, - }) -} - -fn changed_git_branch_files( - config: &cli::CliConfig, - current_dir: &Path, -) -> Result>> { - if config.git_branch { - discovery::changed_files_against_origin(current_dir).map(Some) - } else { - Ok(None) - } + let file_paths = lines.as_ref().map(|lines| changed_line_paths(lines)); + Ok(BranchScope { lines, file_paths }) } fn changed_git_branch_lines( config: &cli::CliConfig, current_dir: &Path, ) -> Result>> { - if config.git_branch_strict { + if config.git_branch { discovery::changed_lines_against_origin(current_dir).map(Some) } else { Ok(None) @@ -203,7 +217,7 @@ fn changed_git_branch_lines( } fn duplicate_discovery_files(config: &cli::CliConfig) -> Option<&[PathBuf]> { - if config.git_branch || config.git_branch_strict { + if config.git_branch { None } else { config.files.as_deref() @@ -327,7 +341,7 @@ fn filtered_duplicate_blocks_for_scope( None => duplicate_blocks, }; match branch_scope.lines() { - Some(lines) => filtered_strict_duplicate_blocks(duplicate_blocks, lines), + Some(lines) => filtered_changed_line_duplicate_blocks(duplicate_blocks, lines), None => duplicate_blocks, } } @@ -378,7 +392,7 @@ fn changed_lines_for_path( .map(|changed_file| changed_file.lines.clone()) } -fn filtered_strict_duplicate_blocks( +fn filtered_changed_line_duplicate_blocks( duplicate_blocks: Vec, changed_lines: &[ChangedFileLines], ) -> Vec { @@ -409,7 +423,7 @@ fn occurrence_applies_to_changed_lines( }) } -fn filtered_strict_complex_functions( +fn filtered_changed_line_complex_functions( functions: Vec, changed_lines: &[ChangedFileLines], ) -> Vec { @@ -703,7 +717,7 @@ mod tests { } #[test] - fn git_branch_mode_reports_duplicates_for_changed_files_against_repo() { + fn git_branch_mode_reports_duplicates_for_changed_lines_against_repo() { if !git_is_available() { return; } @@ -744,11 +758,11 @@ mod tests { } #[test] - fn strict_git_branch_mode_reports_duplicates_only_on_changed_lines() { + fn git_branch_mode_reports_duplicates_only_on_changed_lines() { if !git_is_available() { return; } - let project = TempGitRepo::new("strict-duplicate-lines"); + let project = TempGitRepo::new("git-branch-duplicate-lines"); project.git(&["init"]); project.write("src/a.ts", "const shared = 1;\nconst branch = 1;\n"); project.write("src/b.ts", "const shared = 1;\n"); @@ -756,26 +770,26 @@ mod tests { project.git(&["update-ref", "refs/remotes/origin/main", "HEAD"]); project.git(&["branch", "-M", "feature"]); project.write("src/a.ts", "const shared = 1;\nconst branch = 2;\n"); - let output = run_in(&project, &["--report-duplicate", "-git-branch-strict"]) - .expect("report succeeds"); + let output = + run_in(&project, &["--report-duplicate", "-git-branch"]).expect("report succeeds"); assert!(output.contains("Number of files analyzed: 1")); assert!(output.contains("Duplicate blocks found: 0")); project.write("src/a.ts", "const changed = 1;\nconst branch = 2;\n"); project.commit("branch change"); project.write("src/b.ts", "const changed = 1;\n"); - let output = run_in(&project, &["--report-duplicate", "-git-branch-strict"]) - .expect("report succeeds"); + let output = + run_in(&project, &["--report-duplicate", "-git-branch"]).expect("report succeeds"); assert!(output.contains("Duplicate blocks found: 1")); assert!(output.contains("- src/a.ts:1-1")); assert!(output.contains("- src/b.ts:1-1")); } #[test] - fn verbose_strict_git_branch_report_lists_changed_line_ranges() { + fn verbose_git_branch_report_lists_changed_line_ranges() { if !git_is_available() { return; } - let project = TempGitRepo::new("strict-verbose-ranges"); + let project = TempGitRepo::new("git-branch-verbose-ranges"); project.git(&["init"]); project.write( "src/a.ts", @@ -788,11 +802,8 @@ mod tests { "src/a.ts", "const one = 1;\nconst two = 20;\nconst three = 30;\n", ); - let output = run_in( - &project, - &["--report-duplicate", "-git-branch-strict", "-verbose"], - ) - .expect("report succeeds"); + let output = run_in(&project, &["--report-duplicate", "-git-branch", "-verbose"]) + .expect("report succeeds"); assert!(output.contains("Files analyzed:\n- src/a.ts (2-3)\n")); } @@ -892,7 +903,7 @@ mod tests { } #[test] - fn git_branch_mode_limits_complexity_search_to_changed_files() { + fn git_branch_mode_limits_complexity_search_to_changed_lines() { if !git_is_available() { return; } @@ -932,11 +943,11 @@ mod tests { } #[test] - fn strict_git_branch_mode_reports_complexity_only_for_changed_functions() { + fn git_branch_mode_reports_complexity_only_for_changed_functions() { if !git_is_available() { return; } - let project = TempGitRepo::new("strict-complexity-lines"); + let project = TempGitRepo::new("git-branch-complexity-lines"); project.git(&["init"]); project.write( "src/lib.rs", @@ -971,7 +982,7 @@ mod tests { &project, &[ "--report-complexity", - "-git-branch-strict", + "-git-branch", "-file-extension=rs", "-max-cognitive-complexity=1", "-max-cyclomatic-complexity=1", @@ -997,7 +1008,7 @@ mod tests { &project, &[ "--report-complexity", - "-git-branch-strict", + "-git-branch", "-file-extension=rs", "-max-cognitive-complexity=1", "-max-cyclomatic-complexity=1", @@ -1025,4 +1036,11 @@ mod tests { assert!(output.contains("USAGE:")); assert!(output.contains("--report-duplicate")); } + + #[test] + fn version_option_prints_current_version() { + let project = TempProject::new("version"); + let output = run_in(&project, &["--version"]).expect("version succeeds"); + assert_eq!(output, "0.7.7\n"); + } } diff --git a/src/report/complexity_detection.rs b/src/report/complexity_detection.rs index c50242b..5403cd6 100644 --- a/src/report/complexity_detection.rs +++ b/src/report/complexity_detection.rs @@ -7,6 +7,7 @@ use rust_code_analysis::{get_from_ext, get_function_spaces, FuncSpace, SpaceKind use crate::error::{CodeM8Error, Result}; use crate::model::{FunctionComplexity, SourceFile}; use crate::paths::format_path; +use crate::report::java_cognitive; const ANONYMOUS_FUNCTION_NAME: &str = ""; @@ -53,63 +54,50 @@ fn detect_file_complex_functions( }; let source = fs::read(&file.path) .map_err(|error| CodeM8Error::io(&file.display_path, "read file", &error))?; - let Some(root_space) = get_function_spaces(&language, source, &file.path, None) else { + let Some(root_space) = get_function_spaces(&language, source.clone(), &file.path, None) else { return Ok(Vec::new()); }; let mut functions = Vec::new(); - collect_complex_functions( + let mut context = ComplexityDetectionContext { file, - &root_space, + source: &source, max_cognitive_complexity, max_cyclomatic_complexity, - &mut functions, - ); + functions: &mut functions, + }; + collect_complex_functions(&mut context, &root_space); Ok(functions) } -fn collect_complex_functions( - file: &SourceFile, - space: &FuncSpace, +struct ComplexityDetectionContext<'a> { + file: &'a SourceFile, + source: &'a [u8], max_cognitive_complexity: u32, max_cyclomatic_complexity: u32, - functions: &mut Vec, -) { + functions: &'a mut Vec, +} + +fn collect_complex_functions(context: &mut ComplexityDetectionContext<'_>, space: &FuncSpace) { if space.kind == SpaceKind::Function { - push_complex_function( - file, - space, - max_cognitive_complexity, - max_cyclomatic_complexity, - functions, - ); + push_complex_function(context, space); } for child in &space.spaces { - collect_complex_functions( - file, - child, - max_cognitive_complexity, - max_cyclomatic_complexity, - functions, - ); + collect_complex_functions(context, child); } } -fn push_complex_function( - file: &SourceFile, - space: &FuncSpace, - max_cognitive_complexity: u32, - max_cyclomatic_complexity: u32, - functions: &mut Vec, -) { - let cognitive_complexity = space.metrics.cognitive.cognitive(); +fn push_complex_function(context: &mut ComplexityDetectionContext<'_>, space: &FuncSpace) { + let cognitive_complexity = + java_cognitive::cognitive_complexity(&context.file.extension, context.source, space) + .unwrap_or_else(|| space.metrics.cognitive.cognitive()); let cyclomatic_complexity = space.metrics.cyclomatic.cyclomatic(); - if cognitive_complexity <= f64::from(max_cognitive_complexity) - && cyclomatic_complexity <= f64::from(max_cyclomatic_complexity) + if cognitive_complexity <= f64::from(context.max_cognitive_complexity) + && cyclomatic_complexity <= f64::from(context.max_cyclomatic_complexity) { return; } - functions.push(FunctionComplexity { - file_path: file.display_path.clone(), + context.functions.push(FunctionComplexity { + file_path: context.file.display_path.clone(), function_name: space .name .clone() @@ -243,6 +231,30 @@ mod tests { fs::remove_file(cyclomatic_only_file.path).expect("cleanup"); } + #[test] + fn computes_java_cognitive_complexity() { + let java_file = source_file( + "java", + "class Sample {\n\ + int risky(int value) {\n\ + if (value > 10 && value < 20) {\n\ + while (value > 0) {\n\ + value--;\n\ + }\n\ + }\n\ + return value;\n\ + }\n\ + }\n", + ); + let functions = + detect_complex_functions(std::slice::from_ref(&java_file), 1, 20).expect("detect"); + assert_eq!(functions.len(), 1); + assert!(functions[0].function_name.contains("risky")); + assert!(functions[0].cognitive_complexity > 1.0); + assert!(functions[0].cyclomatic_complexity <= 20.0); + fs::remove_file(java_file.path).expect("cleanup"); + } + #[test] fn sorts_functions_by_combined_complexity_excess() { let mut functions = [ diff --git a/src/report/java_cognitive.rs b/src/report/java_cognitive.rs new file mode 100644 index 0000000..091b226 --- /dev/null +++ b/src/report/java_cognitive.rs @@ -0,0 +1,457 @@ +use rust_code_analysis::FuncSpace; + +const JAVA_EXTENSION: &str = "java"; + +pub(super) fn cognitive_complexity( + extension: &str, + source: &[u8], + space: &FuncSpace, +) -> Option { + (extension == JAVA_EXTENSION).then(|| { + let contents = String::from_utf8_lossy(source); + let source = function_source(&contents, space); + let tokens = tokenize_java(source); + JavaCognitiveScanner::new(&tokens).scan() + }) +} + +fn function_source<'a>(source: &'a str, space: &FuncSpace) -> &'a str { + let start_line = space.start_line.saturating_sub(1); + let line_count = space.end_line.saturating_sub(start_line); + let start = byte_index_for_line(source, start_line); + let end = byte_index_for_line(source, start_line + line_count); + &source[start..end] +} + +fn byte_index_for_line(source: &str, line: usize) -> usize { + if line == 0 { + return 0; + } + source + .match_indices('\n') + .nth(line.saturating_sub(1)) + .map_or(source.len(), |(index, _)| index + 1) +} + +fn tokenize_java(source: &str) -> Vec { + let mut tokens = Vec::new(); + let mut chars = source.chars().peekable(); + while let Some(ch) = chars.next() { + if let Some(token) = read_java_token(ch, &mut chars) { + tokens.push(token); + } + } + tokens +} + +fn read_java_token( + ch: char, + chars: &mut std::iter::Peekable>, +) -> Option { + if skip_comment_or_quote(ch, chars) { + return None; + } + read_operator_or_symbol(ch, chars).or_else(|| read_identifier_token(ch, chars)) +} + +fn skip_comment_or_quote(ch: char, chars: &mut std::iter::Peekable>) -> bool { + match ch { + '/' if chars.peek() == Some(&'/') => { + skip_line_comment(chars); + true + } + '/' if chars.peek() == Some(&'*') => { + skip_block_comment(chars); + true + } + '"' | '\'' => { + skip_quoted(ch, chars); + true + } + _ => false, + } +} + +fn read_operator_or_symbol( + ch: char, + chars: &mut std::iter::Peekable>, +) -> Option { + match ch { + '&' if chars.peek() == Some(&'&') => Some(read_double_char_operator("&&", chars)), + '|' if chars.peek() == Some(&'|') => Some(read_double_char_operator("||", chars)), + '?' | '{' | '}' | '(' | ')' | ';' => Some(ch.to_string()), + _ => None, + } +} + +fn read_double_char_operator( + token: &str, + chars: &mut std::iter::Peekable>, +) -> String { + chars.next(); + token.to_string() +} + +fn read_identifier_token( + ch: char, + chars: &mut std::iter::Peekable>, +) -> Option { + (ch.is_alphabetic() || ch == '_').then(|| read_identifier(ch, chars)) +} + +fn skip_line_comment(chars: &mut std::iter::Peekable>) { + chars.next(); + for ch in chars.by_ref() { + if ch == '\n' { + break; + } + } +} + +fn skip_block_comment(chars: &mut std::iter::Peekable>) { + chars.next(); + let mut previous = '\0'; + for ch in chars.by_ref() { + if previous == '*' && ch == '/' { + break; + } + previous = ch; + } +} + +fn skip_quoted(quote: char, chars: &mut std::iter::Peekable>) { + let mut escaped = false; + for ch in chars.by_ref() { + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == quote { + break; + } + } +} + +fn read_identifier(first: char, chars: &mut std::iter::Peekable>) -> String { + let mut identifier = first.to_string(); + while let Some(ch) = chars.peek().copied() { + if ch.is_alphanumeric() || ch == '_' { + identifier.push(ch); + chars.next(); + } else { + break; + } + } + identifier +} + +struct JavaCognitiveScanner<'a> { + tokens: &'a [String], + index: usize, + nesting: u32, + pending_blocks: Vec, + blocks: Vec, + previous_non_header_boolean_operator: Option, + after_do_block: bool, + score: u32, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BlockKind { + Control, + Do, + Plain, +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BooleanOperator { + And, + Or, +} + +impl<'a> JavaCognitiveScanner<'a> { + const fn new(tokens: &'a [String]) -> Self { + Self { + tokens, + index: 0, + nesting: 0, + pending_blocks: Vec::new(), + blocks: Vec::new(), + previous_non_header_boolean_operator: None, + after_do_block: false, + score: 0, + } + } + + fn scan(mut self) -> f64 { + while self.index < self.tokens.len() { + self.scan_token(); + self.index += 1; + } + f64::from(self.score) + } + + fn scan_token(&mut self) { + if self.scan_if_after_else() + || self.scan_control_token() + || self.scan_else() + || self.scan_do() + || self.scan_question_mark() + || self.scan_boolean_operator() + || self.scan_block_boundary() + || self.scan_statement_boundary() + { + return; + } + self.after_do_block = false; + } + + fn current(&self) -> &str { + self.tokens[self.index].as_str() + } + + fn previous_is(&self, token: &str) -> bool { + self.index > 0 && self.tokens[self.index - 1] == token + } + + fn scan_if_after_else(&mut self) -> bool { + if self.current() != "if" || !self.previous_is("else") { + return false; + } + self.add_condition_complexity(); + self.pending_blocks.push(BlockKind::Control); + true + } + + fn scan_control_token(&mut self) -> bool { + if !is_control_token(Some(&self.tokens[self.index])) { + return false; + } + if self.current() == "while" && self.after_do_block { + self.add_condition_complexity(); + self.after_do_block = false; + self.reset_non_header_boolean_sequence(); + return true; + } + self.score += self.nesting + 1; + self.add_condition_complexity(); + self.pending_blocks.push(BlockKind::Control); + true + } + + fn scan_else(&mut self) -> bool { + if self.current() != "else" { + return false; + } + self.score += 1; + true + } + + fn scan_do(&mut self) -> bool { + if self.current() != "do" { + return false; + } + self.score += self.nesting + 1; + self.pending_blocks.push(BlockKind::Do); + true + } + + fn scan_question_mark(&mut self) -> bool { + if self.current() != "?" { + return false; + } + self.score += self.nesting + 1; + true + } + + fn scan_boolean_operator(&mut self) -> bool { + if !matches!(self.current(), "&&" | "||") { + return false; + } + self.add_boolean_sequence_complexity(); + true + } + + fn scan_block_boundary(&mut self) -> bool { + if self.current() == "{" { + self.open_block(); + return true; + } + if self.current() == "}" { + self.close_block(); + return true; + } + false + } + + fn scan_statement_boundary(&mut self) -> bool { + if self.current() != ";" || self.is_inside_condition_header() { + return false; + } + self.pending_blocks.clear(); + self.after_do_block = false; + self.reset_non_header_boolean_sequence(); + true + } + + fn open_block(&mut self) { + let block = self.pending_blocks.pop().unwrap_or(BlockKind::Plain); + if matches!(block, BlockKind::Control | BlockKind::Do) { + self.nesting += 1; + } + self.blocks.push(block); + self.reset_non_header_boolean_sequence(); + } + + fn close_block(&mut self) { + if let Some(block) = self.blocks.pop() { + if matches!(block, BlockKind::Control | BlockKind::Do) { + self.nesting = self.nesting.saturating_sub(1); + } + self.after_do_block = block == BlockKind::Do; + if matches!(block, BlockKind::Control | BlockKind::Do) { + self.pending_blocks.clear(); + } + } + self.reset_non_header_boolean_sequence(); + } + + fn add_condition_complexity(&mut self) { + if self.peek() == Some("(") { + let end = self.matching_paren_index(self.index + 1); + self.score += boolean_sequence_complexity(&self.tokens[self.index + 1..end]); + } + } + + fn peek(&self) -> Option<&str> { + self.tokens.get(self.index + 1).map(String::as_str) + } + + fn matching_paren_index(&self, start: usize) -> usize { + let mut depth = 0usize; + for index in start..self.tokens.len() { + match self.tokens[index].as_str() { + "(" => depth += 1, + ")" if close_paren(&mut depth) => return index, + _ => {} + } + } + self.tokens.len() + } + + fn add_boolean_sequence_complexity(&mut self) { + if !self.is_inside_condition_header() { + self.add_non_header_boolean_sequence_complexity(); + } + } + + fn add_non_header_boolean_sequence_complexity(&mut self) { + let operator = BooleanOperator::from_token(self.current()); + if self.previous_non_header_boolean_operator != Some(operator) { + self.score += 1; + } + self.previous_non_header_boolean_operator = Some(operator); + } + + const fn reset_non_header_boolean_sequence(&mut self) { + self.previous_non_header_boolean_operator = None; + } + + fn is_inside_condition_header(&self) -> bool { + let mut depth = 0usize; + for index in (0..self.index).rev() { + match self.tokens[index].as_str() { + ")" => depth += 1, + "(" if depth == 0 => { + return self.is_control_header_start(index); + } + "(" => depth = depth.saturating_sub(1), + "{" | ";" if depth == 0 => return false, + _ => {} + } + } + false + } + + fn is_control_header_start(&self, open_paren_index: usize) -> bool { + let mut index = open_paren_index; + while index > 0 && self.tokens[index - 1] == "(" { + index -= 1; + } + is_control_token(self.tokens.get(index.wrapping_sub(1))) + } +} + +impl BooleanOperator { + const fn from_token(token: &str) -> Self { + match token.as_bytes().first() { + Some(b'&') => Self::And, + _ => Self::Or, + } + } +} + +fn boolean_sequence_complexity(tokens: &[String]) -> u32 { + let mut score = 0; + let mut previous_operator: Option<&str> = None; + for token in tokens { + if (token == "&&" || token == "||") && previous_operator != Some(token.as_str()) { + score += 1; + previous_operator = Some(token.as_str()); + } + } + score +} + +const fn close_paren(depth: &mut usize) -> bool { + *depth = depth.saturating_sub(1); + *depth == 0 +} + +fn is_control_token(token: Option<&String>) -> bool { + matches!( + token.map(String::as_str), + Some("if" | "for" | "while" | "switch" | "catch") + ) +} + +#[cfg(test)] +mod tests { + use super::{tokenize_java, JavaCognitiveScanner}; + + fn assert_java_cognitive_score(source: &str, expected: f64) { + let tokens = tokenize_java(source); + let actual = JavaCognitiveScanner::new(&tokens).scan(); + assert!((actual - expected).abs() < f64::EPSILON); + } + + #[test] + fn keeps_control_nesting_after_nested_plain_block_closes() { + let source = "void f() { if (a) { { helper(); } if (b) { helper(); } } }"; + assert_java_cognitive_score(source, 3.0); + } + + #[test] + fn does_not_leak_braceless_control_nesting_to_unrelated_block() { + let source = "void f() { if (a) helper(); { if (b) { helper(); } } }"; + assert_java_cognitive_score(source, 2.0); + } + + #[test] + fn treats_do_while_as_one_loop() { + let source = "void f() { do { if (a) { helper(); } } while (b); }"; + assert_java_cognitive_score(source, 3.0); + } + + #[test] + fn groups_non_header_boolean_sequences() { + let source = "void f() { return a && b && c; }"; + assert_java_cognitive_score(source, 1.0); + } + + #[test] + fn treats_nested_control_header_parentheses_as_header() { + let source = "void f() { if ((a && b)) { helper(); } }"; + assert_java_cognitive_score(source, 2.0); + } +} diff --git a/src/report/mod.rs b/src/report/mod.rs index 42edd25..d7fb84e 100644 --- a/src/report/mod.rs +++ b/src/report/mod.rs @@ -2,6 +2,7 @@ mod complexity_detection; mod complexity_renderer; mod duplicate_detection; mod duplicate_renderer; +mod java_cognitive; pub(crate) use complexity_detection::{ complexity_supported_file_extensions, detect_complex_functions,