Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions Clients/src/application/repository/modelEvaluations.repository.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import { apiServices } from "../../infrastructure/api/networkServices";

export interface ModelEvaluation {
id: string;
name: string;
status: string;
config: Record<string, any>;
results?: Record<string, any>;
error_message?: string;
error?: string;
completed_at?: string;
created_at: string;
created_by?: string;
eval_type: "experiment" | "bias_audit";
model_inventory_id?: number;
model_provider?: string;
model_name?: string;
model_version?: string;
}

export interface ModelEvaluationsResponse {
experiments: ModelEvaluation[];
biasAudits: ModelEvaluation[];
}

export async function getAllModelEvaluations(): Promise<ModelEvaluationsResponse> {
const response = await apiServices.get("/modelInventory/evaluations");
return response.data as ModelEvaluationsResponse;
}
2 changes: 2 additions & 0 deletions Clients/src/infrastructure/api/biasAuditService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export interface BiasAuditSummary {
updatedAt: string;
completedAt: string | null;
createdBy: string | null;
modelInventoryId?: number | null;
}

export interface GroupResultRow {
Expand Down Expand Up @@ -214,6 +215,7 @@ export interface CreateBiasAuditConfig {
dataSource?: string;
dataDateRangeStart?: string;
dataDateRangeEnd?: string;
modelInventoryId?: number;
}

// ==================== SERVICE ====================
Expand Down
2 changes: 2 additions & 0 deletions Clients/src/infrastructure/api/evaluationLogsService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ export interface Experiment {
updated_at: string;
tenant: string;
created_by?: number;
model_inventory_id?: number;
}

export interface MetricAggregates {
Expand Down Expand Up @@ -194,6 +195,7 @@ export const experimentsService = {
description?: string;
config: Record<string, any>;
baseline_experiment_id?: string;
model_inventory_id?: number;
}) {
const response = await CustomAxios.post("/deepeval/experiments", data);
return response.data;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,40 @@ const EvaluationTableBody: React.FC<IEvaluationTableBodyProps> = ({
{row.dataset}
</TableCell>

{/* LINKED MODEL - center aligned */}
{row.linkedModel !== undefined && (
<TableCell
sx={{
...singleTheme.tableStyles.primary.body.cell,
paddingLeft: "12px",
paddingRight: "12px",
textTransform: "none",
textAlign: "center",
width: "10%",
}}
>
{row.linkedModel ? (
<Box
sx={{
display: "inline-flex",
alignItems: "center",
px: "8px",
py: "2px",
borderRadius: "4px",
backgroundColor: palette.status.success.bg,
color: palette.status.success.text,
fontSize: "11px",
fontWeight: 500,
}}
>
Linked
</Box>
) : (
<Typography sx={{ fontSize: "11px", color: palette.text.secondary }}>Unlinked</Typography>
)}
</TableCell>
)}

{/* DATE - center aligned */}
{row.date !== undefined && (
<TableCell
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ const columnWidths: Record<string, string> = {
"MODEL": "12%",
"JUDGE/SCORER": "16%",
"# PROMPTS": "8%",
"DATASET": "14%",
"DATE": "16%",
"DATASET": "12%",
"LINKED MODEL": "10%",
"DATE": "14%",
"ACTION": "60px",
};

Expand Down
30 changes: 26 additions & 4 deletions Clients/src/presentation/pages/EvalsDashboard/BiasAuditsList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ type SortConfig = { key: string; direction: SortDirection };
const SORTING_KEY = "verifywise_bias_audits_sorting";

const columns = [
{ id: "framework", label: "FRAMEWORK", sortable: true, width: "25%" },
{ id: "mode", label: "MODE", sortable: true, width: "15%" },
{ id: "status", label: "STATUS", sortable: true, width: "15%" },
{ id: "result", label: "RESULT", sortable: true, width: "15%" },
{ id: "framework", label: "FRAMEWORK", sortable: true, width: "22%" },
{ id: "mode", label: "MODE", sortable: true, width: "12%" },
{ id: "status", label: "STATUS", sortable: true, width: "12%" },
{ id: "result", label: "RESULT", sortable: true, width: "12%" },
{ id: "linkedModel", label: "LINKED MODEL", sortable: false, width: "12%" },
{ id: "date", label: "DATE", sortable: true, width: "20%" },
{ id: "action", label: "ACTION", sortable: false, width: "60px" },
];
Expand Down Expand Up @@ -325,6 +326,27 @@ export default function BiasAuditsList({ orgId, onViewAudit }: BiasAuditsListPro
<TableCell sx={singleTheme.tableStyles.primary.body.cell}>{getModeChip(audit.mode)}</TableCell>
<TableCell sx={singleTheme.tableStyles.primary.body.cell}>{getStatusChip(audit.status)}</TableCell>
<TableCell sx={singleTheme.tableStyles.primary.body.cell}>{getResultSummary(audit)}</TableCell>
<TableCell sx={singleTheme.tableStyles.primary.body.cell}>
{audit.modelInventoryId ? (
<Box
sx={{
display: "inline-flex",
alignItems: "center",
px: "8px",
py: "2px",
borderRadius: "4px",
backgroundColor: palette.status.success.bg,
color: palette.status.success.text,
fontSize: "11px",
fontWeight: 500,
}}
>
Linked
</Box>
) : (
<Typography sx={{ fontSize: "11px", color: palette.text.secondary }}>Unlinked</Typography>
)}
</TableCell>
<TableCell sx={singleTheme.tableStyles.primary.body.cell}>
<Typography sx={{ fontSize: 13, color: theme.palette.text.secondary }}>
{formatDate(audit.createdAt)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
type BiasAuditMetric,
type CreateBiasAuditConfig,
} from "../../../application/repository/deepEval.repository";
import { getAllEntities } from "../../../application/repository/entity.repository";
import { palette } from "../../themes/palette";

/** Parse a single CSV row handling quoted fields (RFC 4180). */
Expand Down Expand Up @@ -215,6 +216,10 @@ const NewBiasAuditModal: React.FC<NewBiasAuditModalProps> = ({
const [alert, setAlert] = useState<{ show: boolean; variant: "success" | "error" | "info"; title: string; body: string } | null>(null);
const presetRequestIdRef = useRef(0);

// Model inventory link
const [modelInventories, setModelInventories] = useState<Array<{ id: number; provider: string; model: string; version: string; status: string }>>([]);
const [selectedModelInventoryId, setSelectedModelInventoryId] = useState<number | null>(null);

// Load presets on modal open
useEffect(() => {
if (!isOpen) return;
Expand All @@ -233,6 +238,18 @@ const NewBiasAuditModal: React.FC<NewBiasAuditModalProps> = ({
.finally(() => setLoadingPresets(false));
}, [isOpen]);

// Load model inventories on modal open
useEffect(() => {
if (!isOpen) return;
getAllEntities({ routeUrl: "/modelInventory" })
.then((response) => {
if (response?.data) {
setModelInventories(response.data);
}
})
.catch(() => {});
}, [isOpen]);

// Handle preset selection (request ID prevents race conditions on rapid clicks)
const handlePresetSelect = async (presetId: string) => {
setSelectedPresetId(presetId);
Expand Down Expand Up @@ -345,6 +362,7 @@ const NewBiasAuditModal: React.FC<NewBiasAuditModalProps> = ({
metadata: {
distribution_date: distributionDate,
},
modelInventoryId: selectedModelInventoryId || undefined,
};
const result = await runBiasAudit(csvFile, config);
const rowCount = csvPreview.length > 0 ? `${csvHeaders.length} columns` : "";
Expand Down Expand Up @@ -388,6 +406,7 @@ const NewBiasAuditModal: React.FC<NewBiasAuditModalProps> = ({
setIntersectionalEnabled(false);
setSubmitError(null);
setPresetsError(null);
setSelectedModelInventoryId(null);
onClose();
};

Expand Down Expand Up @@ -541,6 +560,26 @@ const NewBiasAuditModal: React.FC<NewBiasAuditModalProps> = ({
rows={3}
isOptional
/>

<Box mt="16px">
<Select
id="model-inventory-link"
label="Link to model inventory (optional)"
placeholder="None — don't link to inventory"
value={selectedModelInventoryId !== null ? String(selectedModelInventoryId) : ""}
onChange={(e) =>
setSelectedModelInventoryId(e.target.value === "" ? null : Number(e.target.value))
}
items={[
{ _id: "", name: "None — don't link to inventory" },
...modelInventories.map((m) => ({
_id: String(m.id),
name: `${m.provider} — ${m.model} (v${m.version})`,
})),
]}
sx={{ width: "100%" }}
/>
</Box>
</Stack>
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import {
type LLMApiKey,
type LLMProvider,
} from "../../../application/repository/deepEval.repository";
import { getAllEntities } from "../../../application/repository/entity.repository";
import { PROVIDERS, type ModelInfo } from "../../utils/providers";
import { evalModelsService, type SavedModel } from "../../../infrastructure/api/evalModelsService";
import { useModelPreferences } from "../../../application/hooks/useModelPreferences";
Expand Down Expand Up @@ -139,6 +140,10 @@ export default function NewExperimentModal({
const { preferences: savedPreferences, loading: preferencesLoading, savePreferences } = useModelPreferences(projectId, orgId);
const [preferencesApplied, setPreferencesApplied] = useState(false);

// Model inventory link (optional)
const [modelInventories, setModelInventories] = useState<Array<{ id: number; provider: string; model: string; version: string; status: string }>>([]);
const [selectedModelInventoryId, setSelectedModelInventoryId] = useState<number | null>(null);

// Configuration state - taskType initialized from project's useCase prop
const [config, setConfig] = useState({
// High-level task type for builtin dataset presets - synced with project use case
Expand Down Expand Up @@ -241,6 +246,21 @@ export default function NewExperimentModal({
setApiKeyWarningAcknowledged(false);
}, [config.model.name, config.model.accessMethod]);

// Fetch model inventories when modal opens
useEffect(() => {
if (isOpen) {
getAllEntities({ routeUrl: "/modelInventory" })
.then((response) => {
if (response?.data) {
setModelInventories(response.data);
}
})
.catch(() => {
// Non-critical — dropdown just stays empty
});
}
}, [isOpen]);

// Track if selected dataset is multi-turn
const isMultiTurnDataset = selectedUserDataset?.turnType === "multi-turn" ||
(selectedPresetPath && selectedPresetPath.includes("multiturn"));
Expand Down Expand Up @@ -598,6 +618,7 @@ export default function NewExperimentModal({
project_id: projectId,
name: `${experimentModelName} - ${dateTimeStr}`,
description: `Evaluating ${experimentModelName} with ${datasetPrompts.length} prompts`,
model_inventory_id: selectedModelInventoryId || undefined,
config: {
project_id: projectId, // Include in config for runner
model: {
Expand Down Expand Up @@ -767,6 +788,7 @@ export default function NewExperimentModal({
// Reset custom model name toggles
setUseCustomModelName(false);
setUseCustomJudgeModelName(false);
setSelectedModelInventoryId(null);
setConfig({
taskType: useCase,
model: {
Expand Down Expand Up @@ -1346,6 +1368,36 @@ export default function NewExperimentModal({
</Stack>
</Box>
)}

{/* Link to model inventory (optional) */}
<Box sx={{ mt: "16px" }}>
<Typography variant="body2" sx={{ mb: "4px", fontWeight: 500, fontSize: "13px", color: palette.text.secondary }}>
Link to model inventory (optional)
</Typography>
<FormControl fullWidth size="small">
<Select
value={selectedModelInventoryId !== null ? selectedModelInventoryId : ""}
onChange={(e) => {
const val = String(e.target.value);
setSelectedModelInventoryId(val === "" ? null : Number(val));
}}
displayEmpty
sx={{ height: "34px", fontSize: "13px", borderRadius: "4px" }}
>
<MenuItem value="">
<Typography sx={{ fontSize: "13px", color: palette.text.secondary }}>None — don't link to inventory</Typography>
</MenuItem>
{modelInventories.map((m) => (
<MenuItem key={m.id} value={m.id}>
<Stack direction="row" spacing={1} alignItems="center">
<Typography sx={{ fontSize: "13px" }}>{m.provider} — {m.model}</Typography>
<Typography sx={{ fontSize: "11px", color: palette.text.secondary }}>v{m.version}</Typography>
</Stack>
</MenuItem>
))}
</Select>
</FormControl>
</Box>
</Stack>
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ export default function ProjectExperiments({ projectId, orgId, onViewExperiment,
}, [experiments, filterData, searchTerm]);

// Transform to table format
const tableColumns = ["EXPERIMENT NAME", "MODEL", "JUDGE/SCORER", "# PROMPTS", "DATASET", "DATE", "ACTION"];
const tableColumns = ["EXPERIMENT NAME", "MODEL", "JUDGE/SCORER", "# PROMPTS", "DATASET", "LINKED MODEL", "DATE", "ACTION"];

const tableRows: IEvaluationRow[] = filteredExperiments.map((exp) => {
// Get dataset name from config - try multiple sources
Expand Down Expand Up @@ -545,6 +545,7 @@ export default function ProjectExperiments({ projectId, orgId, onViewExperiment,
judge: judgeDisplay,
dataset: datasetName,
prompts: exp.sampleCount || 0,
linkedModel: exp.model_inventory_id ?? null,
date: createdDate,
status:
exp.status === "completed" ? "Completed" :
Expand Down
Loading
Loading