tscircuit · Feb 17, 2025 · Feb 18, 2025 · Feb 18, 2025
diff --git a/benchmarks/benchmark-local-circuit-error-correction.eval.ts b/benchmarks/benchmark-local-circuit-error-correction.eval.ts
@@ -2,7 +2,7 @@ import path from "node:path"
 import { createLocalCircuitPrompt } from "../lib/prompt-templates/create-local-circuit-prompt"
 import { evalite } from "evalite"
 import { CircuitScorer } from "./scorers/circuit-scorer"
-import { askAboutOutput } from "tests/fixtures/ask-about-output"
+import { askAboutOutput } from "lib/ai/ask-about-output"
 import { cleanupAttemptLogs } from "lib/utils/cleanup-attempt-logs"
 import { savePrompt } from "lib/utils/save-prompt"
 import { loadProblems } from "lib/utils/load-problems"
@@ -16,7 +16,7 @@ evalite("Reasoning Electronics Engineer", {
   data: async () => {
     cleanupAttemptLogs(logsDir)
     const problems = loadProblems(
-      path.join(__dirname, "..", "problem-sets", "problems-2.toml"),
+      path.join(__dirname, "problem-sets", "problems-2.toml"),
     )
     systemPrompt = await createLocalCircuitPrompt()
 

diff --git a/benchmarks/benchmark-local-circuit.eval.ts b/benchmarks/benchmark-local-circuit.eval.ts
@@ -1,19 +1,19 @@
 import path from "node:path"
-import { safeEvaluateCode } from "../lib/code-runner/safe-evaluate-code"
 import { createLocalCircuitPrompt } from "../lib/prompt-templates/create-local-circuit-prompt"
 import { evalite } from "evalite"
 import { CircuitScorer } from "./scorers/circuit-scorer"
-import { askAboutOutput } from "tests/fixtures/ask-about-output"
+import { askAboutOutput } from "lib/ai/ask-about-output"
 import { savePrompt } from "lib/utils/save-prompt"
 import { loadProblems } from "lib/utils/load-problems"
 import { askAi } from "lib/ai/ask-ai"
+import { evaluateTscircuitCode } from "lib/ai/evaluate-tscircuit-code"
 
 let systemPrompt = ""
 
-evalite.experimental_skip("Electronics Engineer", {
+evalite("Electronics Engineer", {
   data: async () => {
     const problems = loadProblems(
-      path.join(__dirname, "..", "problem-sets", "problems-1.toml"),
+      path.join(__dirname, "problem-sets", "problems-1.toml"),
     )
     systemPrompt = await createLocalCircuitPrompt()
 
@@ -37,17 +37,15 @@ evalite.experimental_skip("Electronics Engineer", {
     const code = codeMatch ? codeMatch[1].trim() : ""
     const codeBlockMatch = aiResponse.match(/```tsx[\s\S]*?```/)
     const codeBlock = codeBlockMatch ? codeBlockMatch[0] : ""
-    const evaluation = safeEvaluateCode(code, {
-      outputType: "board",
-      preSuppliedImports: {},
-    })
+    const { success, error: evaluationError } =
+      await evaluateTscircuitCode(code)
 
     const output: {
       results: { result: boolean; expected: boolean }[]
       code: string
     } = { results: [], code: "" }
 
-    if (evaluation.success) {
+    if (success) {
       output.code = codeBlock
       for (const question of input.questions) {
         output.results.push({
@@ -57,7 +55,7 @@ evalite.experimental_skip("Electronics Engineer", {
       }
       return output
     }
-    return `${evaluation.error}. Code:\n${codeBlock}`
+    return `${evaluationError}. Code:\n${codeBlock}`
   },
   experimental_customColumns: async (result) => {
     if (typeof result.output === "string")

diff --git a/problem-sets/problems-1.toml → benchmarks/problem-sets/problems-1.toml b/problem-sets/problems-1.toml → benchmarks/problem-sets/problems-1.toml
diff --git a/problem-sets/problems-2.toml → benchmarks/problem-sets/problems-2.toml b/problem-sets/problems-2.toml → benchmarks/problem-sets/problems-2.toml
diff --git a/...-logs/prompt-2025-02-05T14-02-05-004Z.txt → ...-logs/prompt-2025-02-18T12-39-20-326Z.txt b/...-logs/prompt-2025-02-05T14-02-05-004Z.txt → ...-logs/prompt-2025-02-18T12-39-20-326Z.txt
diff --git a/...-logs/prompt-2025-02-05T14-07-18-242Z.txt → ...-logs/prompt-2025-02-18T12-45-42-158Z.txt b/...-logs/prompt-2025-02-05T14-07-18-242Z.txt → ...-logs/prompt-2025-02-18T12-45-42-158Z.txt
diff --git a/...-logs/prompt-2025-02-05T14-10-53-144Z.txt → ...-logs/prompt-2025-02-18T12-48-05-585Z.txt b/...-logs/prompt-2025-02-05T14-10-53-144Z.txt → ...-logs/prompt-2025-02-18T12-48-05-585Z.txt
diff --git a/...-logs/prompt-2025-02-05T14-16-05-671Z.txt → ...-logs/prompt-2025-02-18T12-48-05-645Z.txt b/...-logs/prompt-2025-02-05T14-16-05-671Z.txt → ...-logs/prompt-2025-02-18T12-48-05-645Z.txt
diff --git a/...-logs/prompt-2025-02-05T14-17-44-810Z.txt → ...-logs/prompt-2025-02-18T12-49-18-664Z.txt b/...-logs/prompt-2025-02-05T14-17-44-810Z.txt → ...-logs/prompt-2025-02-18T12-49-18-664Z.txt
diff --git a/...-logs/prompt-2025-02-05T14-22-33-776Z.txt → ...-logs/prompt-2025-02-18T12-49-18-674Z.txt b/...-logs/prompt-2025-02-05T14-22-33-776Z.txt → ...-logs/prompt-2025-02-18T12-49-18-674Z.txt
diff --git a/...-logs/prompt-2025-02-06T20-55-26-590Z.txt → ...-logs/prompt-2025-02-18T13-00-27-788Z.txt b/...-logs/prompt-2025-02-06T20-55-26-590Z.txt → ...-logs/prompt-2025-02-18T13-00-27-788Z.txt
diff --git a/...-logs/prompt-2025-02-07T16-27-20-957Z.txt → ...-logs/prompt-2025-02-18T13-00-27-860Z.txt b/...-logs/prompt-2025-02-07T16-27-20-957Z.txt → ...-logs/prompt-2025-02-18T13-00-27-860Z.txt
diff --git a/tests/fixtures/ask-about-output.ts → lib/ai/ask-about-output.ts b/tests/fixtures/ask-about-output.ts → lib/ai/ask-about-output.ts
diff --git a/lib/code-runner-utils/get-import-description.ts b/lib/code-runner-utils/get-import-description.ts
diff --git a/lib/code-runner-utils/index.ts b/lib/code-runner-utils/index.ts
diff --git a/lib/code-runner/CodeRunner.ts b/lib/code-runner/CodeRunner.ts
diff --git a/lib/code-runner/code-runner-context.ts b/lib/code-runner/code-runner-context.ts
diff --git a/lib/code-runner/index.ts b/lib/code-runner/index.ts
diff --git a/lib/code-runner/pull-snippet-import.ts b/lib/code-runner/pull-snippet-import.ts
diff --git a/lib/code-runner/run-prompt.tsx b/lib/code-runner/run-prompt.tsx