Spaces:
Running
Running
Avijit Ghosh
commited on
Commit
·
928f843
1
Parent(s):
a9a31f7
even prettier cards
Browse files
app/evaluation/[id]/page.client.tsx
CHANGED
|
@@ -11,7 +11,9 @@ import { ArrowLeft, Download, Eye, EyeOff, Info, Database, Globe, Calendar, User
|
|
| 11 |
// Capability icons
|
| 12 |
MessageCircle, Heart, Brain, Lightbulb, BookOpen, Camera, Hand, Search, Bot,
|
| 13 |
// Risk icons
|
| 14 |
-
Skull, AlertCircle, Lock, Zap, Gavel, Users, Leaf, TrendingDown, Scale, Factory
|
|
|
|
|
|
|
| 15 |
import { getAllCategories, getCategoryById, getBenchmarkQuestions, getProcessQuestions } from "@/lib/schema"
|
| 16 |
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
|
| 17 |
import { naReasonForCategoryFromEval } from "@/lib/na-utils"
|
|
@@ -41,6 +43,19 @@ const loadEvaluationDetails = async (id: string) => {
|
|
| 41 |
return null
|
| 42 |
}
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
// Category icon mapping
|
| 45 |
const getCategoryIcon = (categoryId: string) => {
|
| 46 |
const iconMap: Record<string, any> = {
|
|
@@ -79,6 +94,7 @@ export default function EvaluationDetailsPage() {
|
|
| 79 |
|
| 80 |
const [evaluation, setEvaluation] = useState<any>(null)
|
| 81 |
const [loading, setLoading] = useState(true)
|
|
|
|
| 82 |
const [expandedAreas, setExpandedAreas] = useState<Record<string, boolean>>({})
|
| 83 |
const toggleArea = (area: string) => setExpandedAreas((p) => ({ ...p, [area]: !p[area] }))
|
| 84 |
const [expandedNegatives, setExpandedNegatives] = useState<Record<string, boolean>>({})
|
|
@@ -132,6 +148,33 @@ export default function EvaluationDetailsPage() {
|
|
| 132 |
return naReasonForCategoryFromEval(catEval, benchmarkQs, processQs)
|
| 133 |
}
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
// Compute overall stats from evaluation data dynamically
|
| 136 |
const computedStats = (() => {
|
| 137 |
const strongCategories: string[] = []
|
|
@@ -178,8 +221,12 @@ export default function EvaluationDetailsPage() {
|
|
| 178 |
|
| 179 |
useEffect(() => {
|
| 180 |
const loadData = async () => {
|
| 181 |
-
const
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
setLoading(false)
|
| 184 |
}
|
| 185 |
loadData()
|
|
@@ -211,7 +258,8 @@ export default function EvaluationDetailsPage() {
|
|
| 211 |
}
|
| 212 |
|
| 213 |
return (
|
| 214 |
-
<
|
|
|
|
| 215 |
<div className="mb-6">
|
| 216 |
<div className="flex items-center justify-between">
|
| 217 |
<Button onClick={() => router.push("/")} variant="outline" size="sm">
|
|
@@ -778,34 +826,49 @@ export default function EvaluationDetailsPage() {
|
|
| 778 |
onClick={() => toggleNegatives(key)}
|
| 779 |
className="p-4 hover:bg-muted/20 cursor-pointer transition-colors"
|
| 780 |
>
|
| 781 |
-
<div className="flex items-
|
| 782 |
-
<div className="flex-
|
| 783 |
-
<
|
| 784 |
-
<
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
</Badge>
|
| 799 |
-
)}
|
| 800 |
-
</div>
|
| 801 |
<div className="text-sm text-muted-foreground leading-relaxed">{qText}</div>
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
)}
|
| 807 |
</div>
|
| 808 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
</div>
|
| 810 |
|
| 811 |
{hasYes && expandedNegatives[key] && (() => {
|
|
@@ -953,42 +1016,125 @@ export default function EvaluationDetailsPage() {
|
|
| 953 |
role="button"
|
| 954 |
tabIndex={0}
|
| 955 |
onClick={() => toggleNegatives(key)}
|
| 956 |
-
className="
|
| 957 |
>
|
| 958 |
-
<div className="flex items-center gap-
|
| 959 |
-
<
|
| 960 |
-
|
| 961 |
-
|
| 962 |
-
|
| 963 |
-
|
| 964 |
-
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
|
| 968 |
-
|
| 969 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 970 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
</div>
|
| 972 |
|
| 973 |
-
{hasYes && expandedNegatives[key] && (
|
| 974 |
-
<div className="mt-3
|
| 975 |
-
|
| 976 |
-
|
| 977 |
-
<div className="
|
| 978 |
-
|
| 979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
</div>
|
| 981 |
-
|
| 982 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 983 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
</div>
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
<span className="text-muted-foreground">Description:</span> {src.description}
|
| 988 |
-
</div>
|
| 989 |
-
)}
|
| 990 |
-
</div>
|
| 991 |
-
))}
|
| 992 |
</div>
|
| 993 |
)}
|
| 994 |
|
|
@@ -1016,5 +1162,6 @@ export default function EvaluationDetailsPage() {
|
|
| 1016 |
)
|
| 1017 |
})}
|
| 1018 |
</div>
|
|
|
|
| 1019 |
)
|
| 1020 |
}
|
|
|
|
| 11 |
// Capability icons
|
| 12 |
MessageCircle, Heart, Brain, Lightbulb, BookOpen, Camera, Hand, Search, Bot,
|
| 13 |
// Risk icons
|
| 14 |
+
Skull, AlertCircle, Lock, Zap, Gavel, Users, Leaf, TrendingDown, Scale, Factory,
|
| 15 |
+
// Process question icons
|
| 16 |
+
FileText, CheckCircle, XCircle, Minus, ChevronUp, ChevronDown } from "lucide-react"
|
| 17 |
import { getAllCategories, getCategoryById, getBenchmarkQuestions, getProcessQuestions } from "@/lib/schema"
|
| 18 |
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip"
|
| 19 |
import { naReasonForCategoryFromEval } from "@/lib/na-utils"
|
|
|
|
| 43 |
return null
|
| 44 |
}
|
| 45 |
|
| 46 |
+
const loadFormHints = async () => {
|
| 47 |
+
try {
|
| 48 |
+
const response = await fetch('/schema/form-hints.json')
|
| 49 |
+
if (!response.ok) {
|
| 50 |
+
throw new Error(`Failed to load form hints: ${response.statusText}`)
|
| 51 |
+
}
|
| 52 |
+
return response.json()
|
| 53 |
+
} catch (error) {
|
| 54 |
+
console.error('Failed to load form hints:', error)
|
| 55 |
+
return null
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
// Category icon mapping
|
| 60 |
const getCategoryIcon = (categoryId: string) => {
|
| 61 |
const iconMap: Record<string, any> = {
|
|
|
|
| 94 |
|
| 95 |
const [evaluation, setEvaluation] = useState<any>(null)
|
| 96 |
const [loading, setLoading] = useState(true)
|
| 97 |
+
const [formHints, setFormHints] = useState<any>(null)
|
| 98 |
const [expandedAreas, setExpandedAreas] = useState<Record<string, boolean>>({})
|
| 99 |
const toggleArea = (area: string) => setExpandedAreas((p) => ({ ...p, [area]: !p[area] }))
|
| 100 |
const [expandedNegatives, setExpandedNegatives] = useState<Record<string, boolean>>({})
|
|
|
|
| 148 |
return naReasonForCategoryFromEval(catEval, benchmarkQs, processQs)
|
| 149 |
}
|
| 150 |
|
| 151 |
+
// Get question hint from form hints
|
| 152 |
+
const getQuestionHint = (categoryId: string, questionId: string, type: 'benchmark' | 'process'): string | undefined => {
|
| 153 |
+
if (!formHints) return undefined
|
| 154 |
+
|
| 155 |
+
// Try category-specific question hints first
|
| 156 |
+
const categoryQuestionHints = formHints.categoryQuestionHints?.[categoryId]?.[questionId]
|
| 157 |
+
if (categoryQuestionHints?.[type]) {
|
| 158 |
+
return categoryQuestionHints[type]
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
// Fall back to general category hints
|
| 162 |
+
const categoryHints = formHints.categoryHints?.[categoryId]
|
| 163 |
+
if (categoryHints?.[type]) {
|
| 164 |
+
return categoryHints[type]
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
// Fall back to default hints
|
| 168 |
+
return formHints.defaultHints?.[type]
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
// Format hint text for tooltip display
|
| 172 |
+
const formatHintForTooltip = (hint: string): string => {
|
| 173 |
+
// Remove "Hint: " prefix and add "Explainer: " prefix
|
| 174 |
+
const cleanHint = hint.replace(/^Hint:\s*/i, '')
|
| 175 |
+
return `Explainer: ${cleanHint}`
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
// Compute overall stats from evaluation data dynamically
|
| 179 |
const computedStats = (() => {
|
| 180 |
const strongCategories: string[] = []
|
|
|
|
| 221 |
|
| 222 |
useEffect(() => {
|
| 223 |
const loadData = async () => {
|
| 224 |
+
const [evalData, hintsData] = await Promise.all([
|
| 225 |
+
loadEvaluationDetails(evaluationId),
|
| 226 |
+
loadFormHints()
|
| 227 |
+
])
|
| 228 |
+
setEvaluation(evalData)
|
| 229 |
+
setFormHints(hintsData)
|
| 230 |
setLoading(false)
|
| 231 |
}
|
| 232 |
loadData()
|
|
|
|
| 258 |
}
|
| 259 |
|
| 260 |
return (
|
| 261 |
+
<TooltipProvider>
|
| 262 |
+
<div className="container mx-auto px-4 py-8 max-w-4xl">
|
| 263 |
<div className="mb-6">
|
| 264 |
<div className="flex items-center justify-between">
|
| 265 |
<Button onClick={() => router.push("/")} variant="outline" size="sm">
|
|
|
|
| 826 |
onClick={() => toggleNegatives(key)}
|
| 827 |
className="p-4 hover:bg-muted/20 cursor-pointer transition-colors"
|
| 828 |
>
|
| 829 |
+
<div className="flex items-center justify-between gap-4">
|
| 830 |
+
<div className="flex items-center gap-3">
|
| 831 |
+
<Tooltip>
|
| 832 |
+
<TooltipTrigger asChild>
|
| 833 |
+
<div className="text-sm font-semibold text-blue-700 dark:text-blue-300 bg-blue-100 dark:bg-blue-900 px-2 py-1 rounded-full border border-blue-300 dark:border-blue-700 min-w-[40px] text-center cursor-help">
|
| 834 |
+
{questionId}
|
| 835 |
+
</div>
|
| 836 |
+
</TooltipTrigger>
|
| 837 |
+
<TooltipContent className="max-w-sm">
|
| 838 |
+
<p className="text-sm">
|
| 839 |
+
{(() => {
|
| 840 |
+
const hint = getQuestionHint(categoryId, questionId, 'benchmark') || q.tooltip
|
| 841 |
+
return hint ? formatHintForTooltip(hint) : 'Explainer: No specific guidance available for this question.'
|
| 842 |
+
})()}
|
| 843 |
+
</p>
|
| 844 |
+
</TooltipContent>
|
| 845 |
+
</Tooltip>
|
|
|
|
|
|
|
|
|
|
| 846 |
<div className="text-sm text-muted-foreground leading-relaxed">{qText}</div>
|
| 847 |
+
</div>
|
| 848 |
+
<div className="flex items-center gap-2">
|
| 849 |
+
{hasYes ? (
|
| 850 |
+
<Badge variant="outline" className="border-green-300 bg-green-50 text-green-700 dark:border-green-700 dark:bg-green-950 dark:text-green-300">
|
| 851 |
+
<CheckCircle className="h-3 w-3 mr-1" />
|
| 852 |
+
Yes
|
| 853 |
+
</Badge>
|
| 854 |
+
) : hasNo ? (
|
| 855 |
+
<Badge variant="outline" className="border-red-300 bg-red-50 text-red-700 dark:border-red-700 dark:bg-red-950 dark:text-red-300">
|
| 856 |
+
<XCircle className="h-3 w-3 mr-1" />
|
| 857 |
+
No
|
| 858 |
+
</Badge>
|
| 859 |
+
) : (
|
| 860 |
+
<Badge variant="outline" className="border-gray-300 bg-gray-50 text-gray-700 dark:border-gray-700 dark:bg-gray-950 dark:text-gray-300">
|
| 861 |
+
<Minus className="h-3 w-3 mr-1" />
|
| 862 |
+
N/A
|
| 863 |
+
</Badge>
|
| 864 |
)}
|
| 865 |
</div>
|
| 866 |
</div>
|
| 867 |
+
{hasYes && (
|
| 868 |
+
<div className="mt-2 text-xs text-blue-600 dark:text-blue-400">
|
| 869 |
+
Click to {expandedNegatives[key] ? 'hide' : 'view'} benchmark details
|
| 870 |
+
</div>
|
| 871 |
+
)}
|
| 872 |
</div>
|
| 873 |
|
| 874 |
{hasYes && expandedNegatives[key] && (() => {
|
|
|
|
| 1016 |
role="button"
|
| 1017 |
tabIndex={0}
|
| 1018 |
onClick={() => toggleNegatives(key)}
|
| 1019 |
+
className="cursor-pointer"
|
| 1020 |
>
|
| 1021 |
+
<div className="flex items-center justify-between gap-4">
|
| 1022 |
+
<div className="flex items-center gap-3">
|
| 1023 |
+
<Tooltip>
|
| 1024 |
+
<TooltipTrigger asChild>
|
| 1025 |
+
<div className="text-sm font-semibold text-purple-700 dark:text-purple-300 bg-purple-100 dark:bg-purple-900 px-2 py-1 rounded-full border border-purple-300 dark:border-purple-700 min-w-[40px] text-center cursor-help">
|
| 1026 |
+
{questionId}
|
| 1027 |
+
</div>
|
| 1028 |
+
</TooltipTrigger>
|
| 1029 |
+
<TooltipContent className="max-w-sm">
|
| 1030 |
+
<p className="text-sm">
|
| 1031 |
+
{(() => {
|
| 1032 |
+
const hint = getQuestionHint(categoryId, questionId, 'process') || q.tooltip
|
| 1033 |
+
return hint ? formatHintForTooltip(hint) : 'Explainer: No specific guidance available for this question.'
|
| 1034 |
+
})()}
|
| 1035 |
+
</p>
|
| 1036 |
+
</TooltipContent>
|
| 1037 |
+
</Tooltip>
|
| 1038 |
+
<div className="text-sm text-muted-foreground leading-relaxed">{qText}</div>
|
| 1039 |
+
</div>
|
| 1040 |
+
<div className="flex items-center gap-2">
|
| 1041 |
+
{hasYes ? (
|
| 1042 |
+
<Badge variant="outline" className="border-green-300 bg-green-50 text-green-700 dark:border-green-700 dark:bg-green-950 dark:text-green-300">
|
| 1043 |
+
<CheckCircle className="h-3 w-3 mr-1" />
|
| 1044 |
+
Yes
|
| 1045 |
+
</Badge>
|
| 1046 |
+
) : hasNo ? (
|
| 1047 |
+
<Badge variant="outline" className="border-red-300 bg-red-50 text-red-700 dark:border-red-700 dark:bg-red-950 dark:text-red-300">
|
| 1048 |
+
<XCircle className="h-3 w-3 mr-1" />
|
| 1049 |
+
No
|
| 1050 |
+
</Badge>
|
| 1051 |
+
) : (
|
| 1052 |
+
<Badge variant="outline" className="border-gray-300 bg-gray-50 text-gray-700 dark:border-gray-700 dark:bg-gray-950 dark:text-gray-300">
|
| 1053 |
+
<Minus className="h-3 w-3 mr-1" />
|
| 1054 |
+
N/A
|
| 1055 |
+
</Badge>
|
| 1056 |
+
)}
|
| 1057 |
+
</div>
|
| 1058 |
</div>
|
| 1059 |
+
{hasYes && sources.length > 0 && (
|
| 1060 |
+
<div className="mt-2 text-xs text-purple-600 dark:text-purple-400">
|
| 1061 |
+
Click to {expandedNegatives[key] ? 'hide' : 'view'} {sources.length} documentation source{sources.length !== 1 ? 's' : ''}
|
| 1062 |
+
</div>
|
| 1063 |
+
)}
|
| 1064 |
</div>
|
| 1065 |
|
| 1066 |
+
{hasYes && expandedNegatives[key] && sources.length > 0 && (
|
| 1067 |
+
<div className="mt-3">
|
| 1068 |
+
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-4">
|
| 1069 |
+
{sources.map((src: any, i: number) => (
|
| 1070 |
+
<div key={i} className="bg-gradient-to-br from-white to-purple-50 dark:from-gray-900 dark:to-purple-950 border border-purple-200 dark:border-purple-800 rounded-lg p-4 hover:shadow-sm transition-shadow">
|
| 1071 |
+
{/* Header */}
|
| 1072 |
+
<div className="mb-3">
|
| 1073 |
+
<div className="text-xs font-medium text-purple-600 dark:text-purple-400 bg-purple-100 dark:bg-purple-900 px-2 py-1 rounded-full inline-flex items-center gap-1">
|
| 1074 |
+
<FileText className="h-3 w-3" />
|
| 1075 |
+
{src.documentType || 'Documentation'}
|
| 1076 |
+
</div>
|
| 1077 |
</div>
|
| 1078 |
+
|
| 1079 |
+
{/* Title */}
|
| 1080 |
+
{src.title && (
|
| 1081 |
+
<div className="mb-3">
|
| 1082 |
+
<h4 className="font-semibold text-lg text-foreground leading-tight">{src.title}</h4>
|
| 1083 |
+
</div>
|
| 1084 |
+
)}
|
| 1085 |
+
|
| 1086 |
+
{/* Details */}
|
| 1087 |
+
<div className="space-y-2 text-sm">
|
| 1088 |
+
{src.author && (
|
| 1089 |
+
<div className="flex items-center gap-2">
|
| 1090 |
+
<span className="text-muted-foreground font-medium min-w-[60px]">Author:</span>
|
| 1091 |
+
<span className="text-foreground">{src.author}</span>
|
| 1092 |
+
</div>
|
| 1093 |
+
)}
|
| 1094 |
+
|
| 1095 |
+
{src.organization && (
|
| 1096 |
+
<div className="flex items-center gap-2">
|
| 1097 |
+
<span className="text-muted-foreground font-medium min-w-[60px]">Org:</span>
|
| 1098 |
+
<span className="text-foreground">{src.organization}</span>
|
| 1099 |
+
</div>
|
| 1100 |
+
)}
|
| 1101 |
+
|
| 1102 |
+
{src.date && (
|
| 1103 |
+
<div className="flex items-center gap-2">
|
| 1104 |
+
<span className="text-muted-foreground font-medium min-w-[60px]">Date:</span>
|
| 1105 |
+
<span className="text-foreground">{src.date}</span>
|
| 1106 |
+
</div>
|
| 1107 |
+
)}
|
| 1108 |
+
|
| 1109 |
+
<div className="flex items-start gap-2">
|
| 1110 |
+
<span className="text-muted-foreground font-medium min-w-[60px]">Source:</span>
|
| 1111 |
+
<div className="flex-1 min-w-0">
|
| 1112 |
+
{src.url ? (
|
| 1113 |
+
<a
|
| 1114 |
+
className="text-primary hover:text-primary/80 underline decoration-primary/30 hover:decoration-primary/60 transition-colors break-all"
|
| 1115 |
+
href={src.url}
|
| 1116 |
+
target="_blank"
|
| 1117 |
+
rel="noreferrer"
|
| 1118 |
+
title={src.url}
|
| 1119 |
+
>
|
| 1120 |
+
{src.url.length > 50 ? `${src.url.substring(0, 50)}...` : src.url}
|
| 1121 |
+
</a>
|
| 1122 |
+
) : (
|
| 1123 |
+
<span className="text-muted-foreground">—</span>
|
| 1124 |
+
)}
|
| 1125 |
+
</div>
|
| 1126 |
+
</div>
|
| 1127 |
</div>
|
| 1128 |
+
|
| 1129 |
+
{/* Description */}
|
| 1130 |
+
{src.description && (
|
| 1131 |
+
<div className="mt-3 p-3 bg-muted/30 dark:bg-muted/10 rounded-lg">
|
| 1132 |
+
<p className="text-sm text-muted-foreground leading-relaxed">{src.description}</p>
|
| 1133 |
+
</div>
|
| 1134 |
+
)}
|
| 1135 |
</div>
|
| 1136 |
+
))}
|
| 1137 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1138 |
</div>
|
| 1139 |
)}
|
| 1140 |
|
|
|
|
| 1162 |
)
|
| 1163 |
})}
|
| 1164 |
</div>
|
| 1165 |
+
</TooltipProvider>
|
| 1166 |
)
|
| 1167 |
}
|
public/evaluations/claude-3-sonnet.json
CHANGED
|
@@ -191,29 +191,55 @@
|
|
| 191 |
"B2": [
|
| 192 |
{
|
| 193 |
"id": "proc-meem78cs-563xw4",
|
| 194 |
-
"url": "https://www.anthropic.com/research",
|
| 195 |
-
"description": "
|
| 196 |
-
"
|
| 197 |
-
"
|
| 198 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
}
|
| 200 |
],
|
| 201 |
"B5": [
|
| 202 |
{
|
| 203 |
-
"id": "proc-meem78cs-
|
| 204 |
-
"url": "https://www.anthropic.com/
|
| 205 |
-
"description": "
|
| 206 |
-
"
|
| 207 |
-
"
|
| 208 |
-
"
|
|
|
|
|
|
|
| 209 |
},
|
| 210 |
{
|
| 211 |
-
"id": "proc-meem78cs-
|
| 212 |
-
"url": "https://www.anthropic.com/
|
| 213 |
-
"description": "
|
| 214 |
-
"
|
| 215 |
-
"
|
| 216 |
-
"
|
|
|
|
|
|
|
| 217 |
}
|
| 218 |
],
|
| 219 |
"B6": [
|
|
|
|
| 191 |
"B2": [
|
| 192 |
{
|
| 193 |
"id": "proc-meem78cs-563xw4",
|
| 194 |
+
"url": "https://www.anthropic.com/research/claude-3-family",
|
| 195 |
+
"description": "Comprehensive research methodology and experimental design for Claude 3 family",
|
| 196 |
+
"title": "Claude 3 Family Technical Documentation",
|
| 197 |
+
"author": "Anthropic Research Team",
|
| 198 |
+
"organization": "Anthropic",
|
| 199 |
+
"date": "2024-03-04",
|
| 200 |
+
"documentType": "Research Paper"
|
| 201 |
+
},
|
| 202 |
+
{
|
| 203 |
+
"id": "proc-meem78cs-563xw5",
|
| 204 |
+
"url": "https://github.com/anthropics/anthropic-cookbook",
|
| 205 |
+
"description": "Open cookbook with evaluation prompts and reproducible examples",
|
| 206 |
+
"title": "Anthropic Evaluation Cookbook",
|
| 207 |
+
"author": "Developer Relations Team",
|
| 208 |
+
"organization": "Anthropic",
|
| 209 |
+
"date": "2024-03-15",
|
| 210 |
+
"documentType": "Code Repository"
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"id": "proc-meem78cs-563xw6",
|
| 214 |
+
"url": "https://www.anthropic.com/safety/evaluation-standards",
|
| 215 |
+
"description": "Detailed evaluation standards and procedures for model assessment",
|
| 216 |
+
"title": "Model Evaluation Standards v3.2",
|
| 217 |
+
"author": "Safety Research Division",
|
| 218 |
+
"organization": "Anthropic",
|
| 219 |
+
"date": "2024-02-28",
|
| 220 |
+
"documentType": "Standards Document"
|
| 221 |
}
|
| 222 |
],
|
| 223 |
"B5": [
|
| 224 |
{
|
| 225 |
+
"id": "proc-meem78cs-563xw7",
|
| 226 |
+
"url": "https://www.anthropic.com/compliance/ai-standards",
|
| 227 |
+
"description": "Alignment with industry AI safety and governance standards",
|
| 228 |
+
"title": "AI Standards Compliance Report",
|
| 229 |
+
"author": "Compliance Team",
|
| 230 |
+
"organization": "Anthropic",
|
| 231 |
+
"date": "2024-03-20",
|
| 232 |
+
"documentType": "Compliance Report"
|
| 233 |
},
|
| 234 |
{
|
| 235 |
+
"id": "proc-meem78cs-563xw8",
|
| 236 |
+
"url": "https://www.anthropic.com/research/constitutional-ai",
|
| 237 |
+
"description": "Constitutional AI methodology and regulatory alignment documentation",
|
| 238 |
+
"title": "Constitutional AI: Regulatory Alignment Framework",
|
| 239 |
+
"author": "AI Safety Research Team",
|
| 240 |
+
"organization": "Anthropic",
|
| 241 |
+
"date": "2024-01-15",
|
| 242 |
+
"documentType": "Framework Document"
|
| 243 |
}
|
| 244 |
],
|
| 245 |
"B6": [
|
public/evaluations/gpt-4-turbo.json
CHANGED
|
@@ -193,20 +193,54 @@
|
|
| 193 |
{
|
| 194 |
"id": "proc-meem78d1-ur2bqa",
|
| 195 |
"url": "https://github.com/openai/evals",
|
| 196 |
-
"description": "Open-source evaluation framework and prompts",
|
| 197 |
-
"
|
| 198 |
-
"
|
| 199 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
}
|
| 201 |
],
|
| 202 |
"B3": [
|
| 203 |
{
|
| 204 |
-
"id": "proc-meem78d1-
|
| 205 |
-
"url": "https://openai.com/research/
|
| 206 |
-
"description": "
|
| 207 |
-
"
|
| 208 |
-
"
|
| 209 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
}
|
| 211 |
],
|
| 212 |
"B4": [
|
|
|
|
| 193 |
{
|
| 194 |
"id": "proc-meem78d1-ur2bqa",
|
| 195 |
"url": "https://github.com/openai/evals",
|
| 196 |
+
"description": "Open-source evaluation framework and prompts for reproducible testing",
|
| 197 |
+
"title": "OpenAI Evals Framework",
|
| 198 |
+
"author": "OpenAI Research Team",
|
| 199 |
+
"organization": "OpenAI",
|
| 200 |
+
"date": "2024-04-15",
|
| 201 |
+
"documentType": "Code Repository"
|
| 202 |
+
},
|
| 203 |
+
{
|
| 204 |
+
"id": "proc-meem78d1-ur2bqb",
|
| 205 |
+
"url": "https://openai.com/research/gpt-4-technical-report",
|
| 206 |
+
"description": "Technical report containing detailed methodologies and experimental procedures",
|
| 207 |
+
"title": "GPT-4 Technical Report - Reproducibility Section",
|
| 208 |
+
"author": "OpenAI Safety Team",
|
| 209 |
+
"organization": "OpenAI",
|
| 210 |
+
"date": "2024-03-20",
|
| 211 |
+
"documentType": "Technical Report"
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"id": "proc-meem78d1-ur2bqc",
|
| 215 |
+
"url": "https://openai.com/safety/reproducibility-guidelines",
|
| 216 |
+
"description": "Internal guidelines and procedures for ensuring evaluation reproducibility",
|
| 217 |
+
"title": "Model Evaluation Reproducibility Guidelines v2.1",
|
| 218 |
+
"author": "AI Safety Division",
|
| 219 |
+
"organization": "OpenAI",
|
| 220 |
+
"date": "2024-02-10",
|
| 221 |
+
"documentType": "Policy Document"
|
| 222 |
}
|
| 223 |
],
|
| 224 |
"B3": [
|
| 225 |
{
|
| 226 |
+
"id": "proc-meem78d1-ur2bqd",
|
| 227 |
+
"url": "https://openai.com/research/expert-review-process",
|
| 228 |
+
"description": "Documentation of expert review process for language model capabilities",
|
| 229 |
+
"title": "Expert Review Process for Language Models",
|
| 230 |
+
"author": "External Advisory Board",
|
| 231 |
+
"organization": "OpenAI",
|
| 232 |
+
"date": "2024-04-01",
|
| 233 |
+
"documentType": "Process Documentation"
|
| 234 |
+
},
|
| 235 |
+
{
|
| 236 |
+
"id": "proc-meem78d1-ur2bqe",
|
| 237 |
+
"url": "https://openai.com/safety/feedback-incorporation",
|
| 238 |
+
"description": "Summary of expert feedback and how it was incorporated into final evaluations",
|
| 239 |
+
"title": "Expert Feedback Integration Report",
|
| 240 |
+
"author": "Safety Research Team",
|
| 241 |
+
"organization": "OpenAI",
|
| 242 |
+
"date": "2024-04-12",
|
| 243 |
+
"documentType": "Review Report"
|
| 244 |
}
|
| 245 |
],
|
| 246 |
"B4": [
|