From 22700d95b79974d88ef7bb3a21460f79b1b05d8a Mon Sep 17 00:00:00 2001 From: Amresh-01 Date: Sat, 13 Jun 2026 15:06:52 +0530 Subject: [PATCH] feat: add ref (commit SHA) to search and symbol navigation results --- .../sourcebot-public.openapi.json | 8 ++ packages/backend/src/repoCompileUtils.ts | 44 +++++-- packages/backend/src/zoekt.ts | 14 ++- packages/shared/src/utils.ts | 10 +- packages/web/src/features/codeNav/api.test.ts | 118 ++++++++++++++++++ packages/web/src/features/codeNav/api.ts | 25 ++++ packages/web/src/features/codeNav/types.ts | 1 + packages/web/src/features/search/types.ts | 1 + .../web/src/features/search/zoektSearcher.ts | 10 ++ .../features/tools/findSymbolDefinitions.ts | 2 +- .../features/tools/findSymbolReferences.ts | 2 +- 11 files changed, 217 insertions(+), 18 deletions(-) create mode 100644 packages/web/src/features/codeNav/api.test.ts diff --git a/docs/api-reference/sourcebot-public.openapi.json b/docs/api-reference/sourcebot-public.openapi.json index 745d22233..ddba72cc9 100644 --- a/docs/api-reference/sourcebot-public.openapi.json +++ b/docs/api-reference/sourcebot-public.openapi.json @@ -338,6 +338,10 @@ "type": "string" } }, + "ref": { + "type": "string", + "description": "The git ref/commit SHA of the file." + }, "content": { "type": "string" } @@ -829,6 +833,10 @@ "language": { "type": "string" }, + "ref": { + "type": "string", + "description": "The git ref/commit SHA of the file." + }, "matches": { "type": "array", "items": { diff --git a/packages/backend/src/repoCompileUtils.ts b/packages/backend/src/repoCompileUtils.ts index 04632a796..e09147bba 100644 --- a/packages/backend/src/repoCompileUtils.ts +++ b/packages/backend/src/repoCompileUtils.ts @@ -15,6 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js"; import path from 'path'; import fs from 'fs/promises'; +import { fileURLToPath } from 'node:url'; import { glob } from 'glob'; import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js'; import assert from 'assert'; @@ -106,7 +107,7 @@ export const createGitHubRepoRecord = ({ .replace(/^https?:\/\//, ''); const repoDisplayName = repo.full_name; - const repoName = path.join(repoNameRoot, repoDisplayName); + const repoName = path.posix.join(repoNameRoot, repoDisplayName); const cloneUrl = new URL(repo.clone_url!); const isPublic = repo.private === false; @@ -184,7 +185,7 @@ export const compileGitlabConfig = async ( project.visibility === 'public' || project.visibility === 'internal'; const repoDisplayName = project.path_with_namespace; - const repoName = path.join(repoNameRoot, repoDisplayName); + const repoName = path.posix.join(repoNameRoot, repoDisplayName); // project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available const avatarUrl = project.avatar_url ? new URL(`/api/v4/projects/${project.id}/avatar`, hostUrl).toString() @@ -263,7 +264,7 @@ export const compileGiteaConfig = async ( const cloneUrl = new URL(repo.clone_url!); cloneUrl.host = configUrl.host const repoDisplayName = repo.full_name!; - const repoName = path.join(repoNameRoot, repoDisplayName); + const repoName = path.posix.join(repoNameRoot, repoDisplayName); const isPublic = repo.internal === false && repo.private === false; logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`); @@ -326,9 +327,9 @@ export const compileGerritConfig = async ( .replace(/^https?:\/\//, ''); const repos = gerritRepos.map((project) => { - const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name))); + const cloneUrl = new URL(path.posix.join(hostUrl, encodeURIComponent(project.name))); const repoDisplayName = project.name; - const repoName = path.join(repoNameRoot, repoDisplayName); + const repoName = path.posix.join(repoNameRoot, repoDisplayName); const webUrl = (() => { if (!project.web_links || project.web_links.length === 0) { @@ -344,7 +345,7 @@ export const compileGerritConfig = async ( // https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50 if (webUrl.startsWith('/plugins/gitiles/')) { logger.debug(`WebUrl is a gitiles path, joining with hostUrl: ${webUrl}`); - return new URL(path.join(hostUrl, webUrl)).toString(); + return new URL(path.posix.join(hostUrl, webUrl)).toString(); } else { logger.debug(`WebUrl is not a gitiles path, returning as is: ${webUrl}`); return webUrl; @@ -499,7 +500,7 @@ export const compileBitbucketConfig = async ( : (repo as BitbucketCloudRepository).is_private === false; const isArchived = isServer ? (repo as BitbucketServerRepository).archived === true : false; const isFork = isServer ? (repo as BitbucketServerRepository).origin !== undefined : (repo as BitbucketCloudRepository).parent !== undefined; - const repoName = path.join(repoNameRoot, displayName); + const repoName = path.posix.join(repoNameRoot, displayName); const cloneUrl = getCloneUrl(repo); const webUrl = getWebUrl(repo); const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name; @@ -583,6 +584,16 @@ export const compileGenericGitHostConfig = async ( } } +/** + * Compiles a generic git host configuration backed by a local `file://` URL. + * Resolves the file URL to a native filesystem path using `fileURLToPath` (with a safe fallback), + * applies glob matching, validates each matched path as a git repository root, and produces + * repository records with POSIX-normalized names. + * + * @param config - The generic git host connection configuration with a `file://` URL. + * @param connectionId - The database ID of the connection record. + * @returns A CompileResult containing the resolved repository data and any warnings. + */ export const compileGenericGitHostConfig_file = async ( config: GenericGitHostConnectionConfig, connectionId: number, @@ -590,8 +601,15 @@ export const compileGenericGitHostConfig_file = async ( const configUrl = new URL(config.url); assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL'); + let folderPath: string; + try { + folderPath = fileURLToPath(configUrl).replace(/\\/g, '/'); + } catch { + folderPath = configUrl.pathname; + } + // Resolve the glob pattern to a list of repo-paths - const repoPaths = await glob(configUrl.pathname, { + const repoPaths = await glob(folderPath, { absolute: true, }); @@ -600,7 +618,7 @@ export const compileGenericGitHostConfig_file = async ( // Warn if the glob pattern matched no paths at all if (repoPaths.length === 0) { - const warning = `No paths matched the pattern '${configUrl.pathname}'. Please verify the path exists and is accessible.`; + const warning = `No paths matched the pattern '${folderPath}'. Please verify the path exists and is accessible.`; logger.warn(warning); warnings.push(warning); return { @@ -609,7 +627,7 @@ export const compileGenericGitHostConfig_file = async ( }; } - logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${configUrl.pathname}'`); + logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${folderPath}'`); await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => { const stat = await fs.stat(repoPath).catch(() => null); @@ -651,7 +669,7 @@ export const compileGenericGitHostConfig_file = async ( const hostWithPort = extractHostWithPort(origin) ?? remoteUrl.host; // Decode URL-encoded characters (e.g., %20 -> space) to ensure consistent repo names const decodedPathname = decodeURIComponent(remoteUrl.pathname); - const repoName = path.join(hostWithPort, decodedPathname.replace(/\.git$/, '')); + const repoName = path.posix.join(hostWithPort, decodedPathname.replace(/\.git$/, '')); const repo: RepoData = { external_codeHostType: 'genericGitHost', @@ -723,7 +741,7 @@ export const compileGenericGitHostConfig_url = async ( // @note: matches the naming here: // https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293 - const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); + const repoName = path.posix.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); const repo: RepoData = { external_codeHostType: 'genericGitHost', @@ -787,7 +805,7 @@ export const compileAzureDevOpsConfig = async ( } const repoDisplayName = `${repo.project.name}/${repo.name}`; - const repoName = path.join(repoNameRoot, repoDisplayName); + const repoName = path.posix.join(repoNameRoot, repoDisplayName); const isPublic = repo.project.visibility === ProjectVisibility.Public; if (!repo.remoteUrl) { diff --git a/packages/backend/src/zoekt.ts b/packages/backend/src/zoekt.ts index ab8425333..0a659f401 100644 --- a/packages/backend/src/zoekt.ts +++ b/packages/backend/src/zoekt.ts @@ -8,6 +8,16 @@ import { getShardPrefix } from "./utils.js"; const logger = createLogger('zoekt'); +/** + * Indexes a git repository using Zoekt (zoekt-git-index CLI). + * Standardizes index directory path and repository path parsing for Windows OS. + * + * @param repo - The repository database record to index. + * @param settings - The global or custom configuration settings for the indexing limits. + * @param revisions - The git branch/revision references to index. + * @param signal - Optional AbortSignal to cancel the indexing process. + * @returns A promise that resolves to stdout/stderr of the zoekt process. + */ export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => { const { path: repoPath } = getRepoPath(repo); const shardPrefix = getShardPrefix(repo.orgId, repo.id); @@ -17,7 +27,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio const command = [ 'zoekt-git-index', '-allow_missing_branches', - `-index ${INDEX_CACHE_DIR}`, + `-index "${INDEX_CACHE_DIR}"`, `-max_trigram_count ${settings.maxTrigramCount}`, `-file_limit ${settings.maxFileSize}`, `-branches "${revisions.join(',')}"`, @@ -25,7 +35,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio `-repo_id ${repo.id}`, `-shard_prefix_override ${shardPrefix}`, ...largeFileGlobPatterns.map((pattern) => `-large_file "${pattern}"`), - repoPath + `"${repoPath}"` ].join(' '); return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => { diff --git a/packages/shared/src/utils.ts b/packages/shared/src/utils.ts index 848b941eb..f1114c537 100644 --- a/packages/shared/src/utils.ts +++ b/packages/shared/src/utils.ts @@ -1,6 +1,7 @@ import { readFile } from 'fs/promises'; import stripJsonComments from 'strip-json-comments'; import { z } from "zod"; +import { fileURLToPath } from 'node:url'; import { DEFAULT_CONFIG_SETTINGS } from "./constants.js"; import { ConfigSettings } from "./types.js"; import { Org, Repo } from "@sourcebot/db"; @@ -102,13 +103,20 @@ export const getRepoIdFromPath = (repoPath: string): number | undefined => { return isNaN(id) ? undefined : id; } +/** + * Resolves the filesystem path for a given repository. + * If the repository is a local generic git host (cloned via file://), the path is decoded properly. + * + * @param repo - The repository record from the database. + * @returns An object containing the absolute path to the repository and whether it should be treated as read-only. + */ export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => { // If we are dealing with a local repository, then use that as the path. // Mark as read-only since we aren't guaranteed to have write access to the local filesystem. const cloneUrl = new URL(repo.cloneUrl); if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') { return { - path: cloneUrl.pathname, + path: fileURLToPath(cloneUrl), isReadOnly: true, } } diff --git a/packages/web/src/features/codeNav/api.test.ts b/packages/web/src/features/codeNav/api.test.ts new file mode 100644 index 000000000..02a751c44 --- /dev/null +++ b/packages/web/src/features/codeNav/api.test.ts @@ -0,0 +1,118 @@ +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +vi.mock('server-only', () => ({})); + +import { findSearchBasedSymbolReferences, findSearchBasedSymbolDefinitions } from './api'; +import { search } from '@/features/search'; + +vi.mock('@/features/search', () => ({ + search: vi.fn(), +})); + +vi.mock('@/middleware/withAuth', () => ({ + withOptionalAuth: (fn: any) => fn(), +})); + +vi.mock('@/middleware/sew', () => ({ + sew: (fn: any) => fn(), +})); + +const MOCK_SEARCH_RESPONSE = { + stats: { + actualMatchCount: 1, + totalMatchCount: 1, + duration: 100, + fileCount: 1, + filesSkipped: 0, + contentBytesLoaded: 100, + indexBytesLoaded: 100, + crashes: 0, + shardFilesConsidered: 1, + filesConsidered: 1, + filesLoaded: 1, + shardsScanned: 1, + shardsSkipped: 0, + shardsSkippedFilter: 0, + ngramMatches: 1, + ngramLookups: 1, + wait: 0, + matchTreeConstruction: 10, + matchTreeSearch: 90, + regexpsConsidered: 0, + flushReason: 'FLUSH_REASON_FINAL_FLUSH', + }, + files: [ + { + fileName: { + text: 'src/index.ts', + matchRanges: [], + }, + repository: 'github.com/owner/repo', + repositoryId: 123, + webUrl: 'https://sourcebot.example.com/browse/github.com/owner/repo/blob/main/src/index.ts', + language: 'TypeScript', + ref: 'abcdef1234567890', + chunks: [ + { + content: 'const a = 1;', + matchRanges: [ + { + start: { byteOffset: 0, lineNumber: 1, column: 1 }, + end: { byteOffset: 12, lineNumber: 1, column: 13 }, + } + ], + } + ], + branches: ['main'], + } + ], + repositoryInfo: [], + isSearchExhaustive: true, +}; + +describe('CodeNav Search-Based APIs', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.mocked(search).mockResolvedValue(MOCK_SEARCH_RESPONSE as any); + }); + + describe('findSearchBasedSymbolReferences', () => { + it('includes the ref (commit SHA) in the returned file results', async () => { + const result = await findSearchBasedSymbolReferences({ + symbolName: 'mySymbol', + repoName: 'github.com/owner/repo', + revisionName: 'HEAD', + }); + + expect(isServiceError(result)).toBe(false); + const response = result as any; + expect(response.files).toHaveLength(1); + expect(response.files[0].ref).toBe('abcdef1234567890'); + }); + }); + + describe('findSearchBasedSymbolDefinitions', () => { + it('includes the ref (commit SHA) in the returned file results', async () => { + const result = await findSearchBasedSymbolDefinitions({ + symbolName: 'mySymbol', + repoName: 'github.com/owner/repo', + revisionName: 'HEAD', + }); + + expect(isServiceError(result)).toBe(false); + const response = result as any; + expect(response.files).toHaveLength(1); + expect(response.files[0].ref).toBe('abcdef1234567890'); + }); + }); +}); + +/** + * Type guard that checks whether a given object is a ServiceError by looking for an `errorCode` property. + * + * @param obj - The value to inspect. + * @returns True if the object looks like a ServiceError. + */ +function isServiceError(obj: any): boolean { + return obj && typeof obj === 'object' && 'errorCode' in obj; +} diff --git a/packages/web/src/features/codeNav/api.ts b/packages/web/src/features/codeNav/api.ts index 60a36ffd7..d32e77e30 100644 --- a/packages/web/src/features/codeNav/api.ts +++ b/packages/web/src/features/codeNav/api.ts @@ -13,6 +13,14 @@ import escapeStringRegexp from "escape-string-regexp"; // The maximum number of matches to return from the search API. const MAX_REFERENCE_COUNT = 1000; +/** + * Finds all search-based symbol references for a given symbol name. + * Constructs a Zoekt IR query filtering by symbol name, branch, language, and repository, + * then parses the search response to extract matching file results (including the commit ref). + * + * @param props - The request parameters including symbolName, language, revisionName, and repoName. + * @returns The matching files with their references, or a ServiceError on failure. + */ export const findSearchBasedSymbolReferences = async (props: FindRelatedSymbolsRequest): Promise => sew(() => withOptionalAuth(async () => { const { @@ -67,6 +75,14 @@ export const findSearchBasedSymbolReferences = async (props: FindRelatedSymbolsR })); +/** + * Finds all search-based symbol definitions for a given symbol name. + * Uses Zoekt's symbol search to locate definition sites, filtering by branch, language, and repository. + * The response includes the commit ref (SHA) for each matched file. + * + * @param props - The request parameters including symbolName, language, revisionName, and repoName. + * @returns The matching files with their definitions, or a ServiceError on failure. + */ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbolsRequest): Promise => sew(() => withOptionalAuth(async () => { const { @@ -124,6 +140,14 @@ export const findSearchBasedSymbolDefinitions = async (props: FindRelatedSymbols return parseRelatedSymbolsSearchResponse(searchResult); })); +/** + * Transforms a raw Zoekt SearchResponse into a FindRelatedSymbolsResponse. + * Maps each file's chunks and match ranges into a structured response, including + * the git commit ref (SHA) from the search result. + * + * @param searchResult - The raw search response from the Zoekt searcher. + * @returns A structured response containing stats, matched files with refs, and repository info. + */ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRelatedSymbolsResponse => { return { stats: { @@ -138,6 +162,7 @@ const parseRelatedSymbolsSearchResponse = (searchResult: SearchResponse): FindRe repositoryId: file.repositoryId, webUrl: file.webUrl, language: file.language, + ref: file.ref, matches: chunks.flatMap((chunk) => { return chunk.matchRanges.map((range) => ({ lineContent: chunk.content, diff --git a/packages/web/src/features/codeNav/types.ts b/packages/web/src/features/codeNav/types.ts index 59fd7b22f..24c54d276 100644 --- a/packages/web/src/features/codeNav/types.ts +++ b/packages/web/src/features/codeNav/types.ts @@ -27,6 +27,7 @@ export const findRelatedSymbolsResponseSchema = z.object({ repositoryId: z.number(), webUrl: z.string(), language: z.string(), + ref: z.string().optional().describe('The git ref/commit SHA of the file.'), matches: z.array(z.object({ lineContent: z.string(), range: rangeSchema, diff --git a/packages/web/src/features/search/types.ts b/packages/web/src/features/search/types.ts index e053c8e20..8e4261cf9 100644 --- a/packages/web/src/features/search/types.ts +++ b/packages/web/src/features/search/types.ts @@ -78,6 +78,7 @@ export const searchFileSchema = z.object({ })).optional(), })), branches: z.array(z.string()).optional(), + ref: z.string().optional().describe('The git ref/commit SHA of the file.'), // Set if `whole` is true. content: z.string().optional(), }); diff --git a/packages/web/src/features/search/zoektSearcher.ts b/packages/web/src/features/search/zoektSearcher.ts index f3fa6278f..7ae21bb05 100644 --- a/packages/web/src/features/search/zoektSearcher.ts +++ b/packages/web/src/features/search/zoektSearcher.ts @@ -375,6 +375,15 @@ const createReposMapForChunk = async (chunk: ZoektGrpcSearchResponse, reposMapCa return reposMap; } +/** + * Transforms a raw Zoekt gRPC search response into a structured search result. + * Resolves repository metadata, maps file matches to structured SearchResultFile objects + * (including the git commit ref/SHA from the zoekt version field), and computes aggregate stats. + * + * @param response - The raw gRPC search response from zoekt. + * @param reposMapCache - A mutable cache mapping repository IDs to Repo records for efficient lookups. + * @returns An object containing the transformed files array, repository info, and search stats. + */ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, reposMapCache: Map): Promise<{ stats: SearchStats, files: SearchResultFile[], @@ -465,6 +474,7 @@ const transformZoektSearchResponse = async (response: ZoektGrpcSearchResponse, r } }), branches: file.branches, + ref: file.version || undefined, content: file.content ? file.content.toString('utf-8') : undefined, } }).filter(file => file !== undefined); diff --git a/packages/web/src/features/tools/findSymbolDefinitions.ts b/packages/web/src/features/tools/findSymbolDefinitions.ts index b77176163..62eb63a61 100644 --- a/packages/web/src/features/tools/findSymbolDefinitions.ts +++ b/packages/web/src/features/tools/findSymbolDefinitions.ts @@ -71,7 +71,7 @@ export const findSymbolDefinitionsDefinition: ToolDefinition< files: response.files.map((file) => ({ fileName: file.fileName, repo: file.repository, - revision, + revision: file.ref ?? revision, })), }; diff --git a/packages/web/src/features/tools/findSymbolReferences.ts b/packages/web/src/features/tools/findSymbolReferences.ts index 0b04c9f91..5698b90f4 100644 --- a/packages/web/src/features/tools/findSymbolReferences.ts +++ b/packages/web/src/features/tools/findSymbolReferences.ts @@ -81,7 +81,7 @@ export const findSymbolReferencesDefinition: ToolDefinition< files: response.files.map((file) => ({ fileName: file.fileName, repo: file.repository, - revision, + revision: file.ref ?? revision, })), };