Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/api-reference/sourcebot-public.openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,10 @@
"type": "string"
}
},
"ref": {
"type": "string",
"description": "The git ref/commit SHA of the file."
},
"content": {
"type": "string"
}
Expand Down Expand Up @@ -829,6 +833,10 @@
"language": {
"type": "string"
},
"ref": {
"type": "string",
"description": "The git ref/commit SHA of the file."
},
"matches": {
"type": "array",
"items": {
Expand Down
44 changes: 31 additions & 13 deletions packages/backend/src/repoCompileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfi
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
import path from 'path';
import fs from 'fs/promises';
import { fileURLToPath } from 'node:url';
import { glob } from 'glob';
import { getLocalDefaultBranch, getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
import assert from 'assert';
Expand Down Expand Up @@ -106,7 +107,7 @@ export const createGitHubRepoRecord = ({
.replace(/^https?:\/\//, '');

const repoDisplayName = repo.full_name;
const repoName = path.join(repoNameRoot, repoDisplayName);
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
const cloneUrl = new URL(repo.clone_url!);
const isPublic = repo.private === false;

Expand Down Expand Up @@ -184,7 +185,7 @@ export const compileGitlabConfig = async (
project.visibility === 'public' ||
project.visibility === 'internal';
const repoDisplayName = project.path_with_namespace;
const repoName = path.join(repoNameRoot, repoDisplayName);
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
// project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available
const avatarUrl = project.avatar_url
? new URL(`/api/v4/projects/${project.id}/avatar`, hostUrl).toString()
Expand Down Expand Up @@ -263,7 +264,7 @@ export const compileGiteaConfig = async (
const cloneUrl = new URL(repo.clone_url!);
cloneUrl.host = configUrl.host
const repoDisplayName = repo.full_name!;
const repoName = path.join(repoNameRoot, repoDisplayName);
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
const isPublic = repo.internal === false && repo.private === false;

logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`);
Expand Down Expand Up @@ -326,9 +327,9 @@ export const compileGerritConfig = async (
.replace(/^https?:\/\//, '');

const repos = gerritRepos.map((project) => {
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
const cloneUrl = new URL(path.posix.join(hostUrl, encodeURIComponent(project.name)));
const repoDisplayName = project.name;
const repoName = path.join(repoNameRoot, repoDisplayName);
const repoName = path.posix.join(repoNameRoot, repoDisplayName);

const webUrl = (() => {
if (!project.web_links || project.web_links.length === 0) {
Expand All @@ -344,7 +345,7 @@ export const compileGerritConfig = async (
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
if (webUrl.startsWith('/plugins/gitiles/')) {
logger.debug(`WebUrl is a gitiles path, joining with hostUrl: ${webUrl}`);
return new URL(path.join(hostUrl, webUrl)).toString();
return new URL(path.posix.join(hostUrl, webUrl)).toString();
} else {
logger.debug(`WebUrl is not a gitiles path, returning as is: ${webUrl}`);
return webUrl;
Expand Down Expand Up @@ -499,7 +500,7 @@ export const compileBitbucketConfig = async (
: (repo as BitbucketCloudRepository).is_private === false;
const isArchived = isServer ? (repo as BitbucketServerRepository).archived === true : false;
const isFork = isServer ? (repo as BitbucketServerRepository).origin !== undefined : (repo as BitbucketCloudRepository).parent !== undefined;
const repoName = path.join(repoNameRoot, displayName);
const repoName = path.posix.join(repoNameRoot, displayName);
const cloneUrl = getCloneUrl(repo);
const webUrl = getWebUrl(repo);
const defaultBranch = isServer ? (repo as BitbucketServerRepository).defaultBranch : (repo as BitbucketCloudRepository).mainbranch?.name;
Expand Down Expand Up @@ -583,15 +584,32 @@ export const compileGenericGitHostConfig = async (
}
}

/**
* Compiles a generic git host configuration backed by a local `file://` URL.
* Resolves the file URL to a native filesystem path using `fileURLToPath` (with a safe fallback),
* applies glob matching, validates each matched path as a git repository root, and produces
* repository records with POSIX-normalized names.
*
* @param config - The generic git host connection configuration with a `file://` URL.
* @param connectionId - The database ID of the connection record.
* @returns A CompileResult containing the resolved repository data and any warnings.
*/
export const compileGenericGitHostConfig_file = async (
config: GenericGitHostConnectionConfig,
connectionId: number,
): Promise<CompileResult> => {
const configUrl = new URL(config.url);
assert(configUrl.protocol === 'file:', 'config.url must be a file:// URL');

let folderPath: string;
try {
folderPath = fileURLToPath(configUrl).replace(/\\/g, '/');
} catch {
folderPath = configUrl.pathname;
}

// Resolve the glob pattern to a list of repo-paths
const repoPaths = await glob(configUrl.pathname, {
const repoPaths = await glob(folderPath, {
absolute: true,
});

Expand All @@ -600,7 +618,7 @@ export const compileGenericGitHostConfig_file = async (

// Warn if the glob pattern matched no paths at all
if (repoPaths.length === 0) {
const warning = `No paths matched the pattern '${configUrl.pathname}'. Please verify the path exists and is accessible.`;
const warning = `No paths matched the pattern '${folderPath}'. Please verify the path exists and is accessible.`;
logger.warn(warning);
warnings.push(warning);
return {
Expand All @@ -609,7 +627,7 @@ export const compileGenericGitHostConfig_file = async (
};
}

logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${configUrl.pathname}'`);
logger.debug(`Found ${repoPaths.length} path(s) matching pattern '${folderPath}'`);

await Promise.all(repoPaths.map((repoPath) => gitOperationLimit(async () => {
const stat = await fs.stat(repoPath).catch(() => null);
Expand Down Expand Up @@ -651,7 +669,7 @@ export const compileGenericGitHostConfig_file = async (
const hostWithPort = extractHostWithPort(origin) ?? remoteUrl.host;
// Decode URL-encoded characters (e.g., %20 -> space) to ensure consistent repo names
const decodedPathname = decodeURIComponent(remoteUrl.pathname);
const repoName = path.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));
const repoName = path.posix.join(hostWithPort, decodedPathname.replace(/\.git$/, ''));

const repo: RepoData = {
external_codeHostType: 'genericGitHost',
Expand Down Expand Up @@ -723,7 +741,7 @@ export const compileGenericGitHostConfig_url = async (

// @note: matches the naming here:
// https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293
const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));
const repoName = path.posix.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, ''));

const repo: RepoData = {
external_codeHostType: 'genericGitHost',
Expand Down Expand Up @@ -787,7 +805,7 @@ export const compileAzureDevOpsConfig = async (
}

const repoDisplayName = `${repo.project.name}/${repo.name}`;
const repoName = path.join(repoNameRoot, repoDisplayName);
const repoName = path.posix.join(repoNameRoot, repoDisplayName);
const isPublic = repo.project.visibility === ProjectVisibility.Public;

if (!repo.remoteUrl) {
Expand Down
14 changes: 12 additions & 2 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ import { getShardPrefix } from "./utils.js";

const logger = createLogger('zoekt');

/**
* Indexes a git repository using Zoekt (zoekt-git-index CLI).
* Standardizes index directory path and repository path parsing for Windows OS.
*
* @param repo - The repository database record to index.
* @param settings - The global or custom configuration settings for the indexing limits.
* @param revisions - The git branch/revision references to index.
* @param signal - Optional AbortSignal to cancel the indexing process.
* @returns A promise that resolves to stdout/stderr of the zoekt process.
*/
export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => {
const { path: repoPath } = getRepoPath(repo);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
Expand All @@ -17,15 +27,15 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, revisio
const command = [
'zoekt-git-index',
'-allow_missing_branches',
`-index ${INDEX_CACHE_DIR}`,
`-index "${INDEX_CACHE_DIR}"`,
`-max_trigram_count ${settings.maxTrigramCount}`,
`-file_limit ${settings.maxFileSize}`,
`-branches "${revisions.join(',')}"`,
`-tenant_id ${repo.orgId}`,
`-repo_id ${repo.id}`,
`-shard_prefix_override ${shardPrefix}`,
...largeFileGlobPatterns.map((pattern) => `-large_file "${pattern}"`),
repoPath
`"${repoPath}"`
].join(' ');

return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
Expand Down
10 changes: 9 additions & 1 deletion packages/shared/src/utils.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { readFile } from 'fs/promises';
import stripJsonComments from 'strip-json-comments';
import { z } from "zod";
import { fileURLToPath } from 'node:url';
import { DEFAULT_CONFIG_SETTINGS } from "./constants.js";
import { ConfigSettings } from "./types.js";
import { Org, Repo } from "@sourcebot/db";
Expand Down Expand Up @@ -102,13 +103,20 @@ export const getRepoIdFromPath = (repoPath: string): number | undefined => {
return isNaN(id) ? undefined : id;
}

/**
* Resolves the filesystem path for a given repository.
* If the repository is a local generic git host (cloned via file://), the path is decoded properly.
*
* @param repo - The repository record from the database.
* @returns An object containing the absolute path to the repository and whether it should be treated as read-only.
*/
export const getRepoPath = (repo: Repo): { path: string, isReadOnly: boolean } => {
// If we are dealing with a local repository, then use that as the path.
// Mark as read-only since we aren't guaranteed to have write access to the local filesystem.
const cloneUrl = new URL(repo.cloneUrl);
if (repo.external_codeHostType === 'genericGitHost' && cloneUrl.protocol === 'file:') {
return {
path: cloneUrl.pathname,
path: fileURLToPath(cloneUrl),
isReadOnly: true,
}
}
Expand Down
118 changes: 118 additions & 0 deletions packages/web/src/features/codeNav/api.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { describe, it, expect, vi, beforeEach } from 'vitest';

vi.mock('server-only', () => ({}));

import { findSearchBasedSymbolReferences, findSearchBasedSymbolDefinitions } from './api';
import { search } from '@/features/search';

vi.mock('@/features/search', () => ({
search: vi.fn(),
}));

vi.mock('@/middleware/withAuth', () => ({
withOptionalAuth: (fn: any) => fn(),
}));

vi.mock('@/middleware/sew', () => ({
sew: (fn: any) => fn(),
}));

const MOCK_SEARCH_RESPONSE = {
stats: {
actualMatchCount: 1,
totalMatchCount: 1,
duration: 100,
fileCount: 1,
filesSkipped: 0,
contentBytesLoaded: 100,
indexBytesLoaded: 100,
crashes: 0,
shardFilesConsidered: 1,
filesConsidered: 1,
filesLoaded: 1,
shardsScanned: 1,
shardsSkipped: 0,
shardsSkippedFilter: 0,
ngramMatches: 1,
ngramLookups: 1,
wait: 0,
matchTreeConstruction: 10,
matchTreeSearch: 90,
regexpsConsidered: 0,
flushReason: 'FLUSH_REASON_FINAL_FLUSH',
},
files: [
{
fileName: {
text: 'src/index.ts',
matchRanges: [],
},
repository: 'github.com/owner/repo',
repositoryId: 123,
webUrl: 'https://sourcebot.example.com/browse/github.com/owner/repo/blob/main/src/index.ts',
language: 'TypeScript',
ref: 'abcdef1234567890',
chunks: [
{
content: 'const a = 1;',
matchRanges: [
{
start: { byteOffset: 0, lineNumber: 1, column: 1 },
end: { byteOffset: 12, lineNumber: 1, column: 13 },
}
],
}
],
branches: ['main'],
}
],
repositoryInfo: [],
isSearchExhaustive: true,
};

describe('CodeNav Search-Based APIs', () => {
beforeEach(() => {
vi.clearAllMocks();
vi.mocked(search).mockResolvedValue(MOCK_SEARCH_RESPONSE as any);
});

describe('findSearchBasedSymbolReferences', () => {
it('includes the ref (commit SHA) in the returned file results', async () => {
const result = await findSearchBasedSymbolReferences({
symbolName: 'mySymbol',
repoName: 'github.com/owner/repo',
revisionName: 'HEAD',
});

expect(isServiceError(result)).toBe(false);
const response = result as any;
expect(response.files).toHaveLength(1);
expect(response.files[0].ref).toBe('abcdef1234567890');
});
});

describe('findSearchBasedSymbolDefinitions', () => {
it('includes the ref (commit SHA) in the returned file results', async () => {
const result = await findSearchBasedSymbolDefinitions({
symbolName: 'mySymbol',
repoName: 'github.com/owner/repo',
revisionName: 'HEAD',
});

expect(isServiceError(result)).toBe(false);
const response = result as any;
expect(response.files).toHaveLength(1);
expect(response.files[0].ref).toBe('abcdef1234567890');
});
});
});

/**
* Type guard that checks whether a given object is a ServiceError by looking for an `errorCode` property.
*
* @param obj - The value to inspect.
* @returns True if the object looks like a ServiceError.
*/
function isServiceError(obj: any): boolean {
return obj && typeof obj === 'object' && 'errorCode' in obj;
}
Loading