Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: tscircuit/prompt-benchmarks
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v0.0.40
Choose a base ref
...
head repository: tscircuit/prompt-benchmarks
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v0.0.41
Choose a head ref
  • 3 commits
  • 43 files changed
  • 2 contributors

Commits on Feb 17, 2025

  1. v0.0.40

    actions-user committed Feb 17, 2025
    Copy the full SHA
    ac364f9 View commit details

Commits on Feb 18, 2025

  1. refactored the project to use the new evaluation method

    + removed the old method
    ShiboSoftwareDev committed Feb 18, 2025
    Copy the full SHA
    1f8657b View commit details
  2. Merge pull request #41 from ShiboSoftwareDev/main

    refactored the project to use the new evaluation method
    ShiboSoftwareDev authored Feb 18, 2025
    Copy the full SHA
    224dd55 View commit details
Showing with 15 additions and 944 deletions.
  1. +2 −2 benchmarks/benchmark-local-circuit-error-correction.eval.ts
  2. +8 −10 benchmarks/benchmark-local-circuit.eval.ts
  3. 0 { → benchmarks}/problem-sets/problems-1.toml
  4. 0 { → benchmarks}/problem-sets/problems-2.toml
  5. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-02-05-004Z.txt → prompt-2025-02-18T12-39-20-326Z.txt}
  6. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-07-18-242Z.txt → prompt-2025-02-18T12-45-42-158Z.txt}
  7. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-10-53-144Z.txt → prompt-2025-02-18T12-48-05-585Z.txt}
  8. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-16-05-671Z.txt → prompt-2025-02-18T12-48-05-645Z.txt}
  9. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-17-44-810Z.txt → prompt-2025-02-18T12-49-18-664Z.txt}
  10. 0 benchmarks/prompt-logs/{prompt-2025-02-05T14-22-33-776Z.txt → prompt-2025-02-18T12-49-18-674Z.txt}
  11. 0 benchmarks/prompt-logs/{prompt-2025-02-06T20-55-26-590Z.txt → prompt-2025-02-18T13-00-27-788Z.txt}
  12. 0 benchmarks/prompt-logs/{prompt-2025-02-07T16-27-20-957Z.txt → prompt-2025-02-18T13-00-27-860Z.txt}
  13. 0 {tests/fixtures → lib/ai}/ask-about-output.ts
  14. +0 −19 lib/code-runner-utils/get-import-description.ts
  15. +0 −2 lib/code-runner-utils/index.ts
  16. +0 −70 lib/code-runner/CodeRunner.ts
  17. +0 −32 lib/code-runner/code-runner-context.ts
  18. +0 −8 lib/code-runner/index.ts
  19. +0 −26 lib/code-runner/pull-snippet-import.ts
  20. +0 −83 lib/code-runner/run-prompt.tsx
  21. +0 −126 lib/code-runner/safe-compile-dts.ts
  22. +0 −165 lib/code-runner/safe-evaluate-code.tsx
  23. +0 −1 lib/code-runner/safe-resolve-imports-and-evaluate-code.tsx
  24. +0 −24 lib/code-runner/transpile-code.tsx
  25. +1 −4 lib/index.ts
  26. +0 −54 lib/prompt-templates/create-circuit-board2.ts
  27. +0 −2 lib/prompt-templates/index.ts
  28. 0 lib/{code-runner-utils → utils}/get-imports-from-code.ts
  29. +3 −5 package.json
  30. +0 −1 tests/board-samples/sample1.ts
  31. +0 −1 tests/board-samples/sample2.ts
  32. +0 −2 tests/board-samples/sample3.ts
  33. +0 −1 tests/board-samples/sample4.ts
  34. +0 −27 tests/lib/CodeRunner.test.tsx
  35. +1 −1 tests/lib/get-imports-from-code.test.ts
  36. +0 −28 tests/lib/safe-compile-dts.test.ts
  37. +0 −33 tests/lib/safe-evaluate-code.test.tsx
  38. +0 −65 tests/lib/safe-transpile-code.test.ts
  39. +0 −1 tests/module-samples/sample1-na555.ts
  40. +0 −22 tests/prompts/create-circuit-board1/create-circuit-board1-sample1.test.ts
  41. +0 −22 tests/prompts/create-circuit-module1/create-circuit-module1-sample1.test.ts
  42. +0 −35 tests/smoke/create-circuit-board1-sample1.test.ts
  43. +0 −72 tests/smoke/create-circuit-board1-with-import.test.tsx
4 changes: 2 additions & 2 deletions benchmarks/benchmark-local-circuit-error-correction.eval.ts
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@ import path from "node:path"
import { createLocalCircuitPrompt } from "../lib/prompt-templates/create-local-circuit-prompt"
import { evalite } from "evalite"
import { CircuitScorer } from "./scorers/circuit-scorer"
import { askAboutOutput } from "tests/fixtures/ask-about-output"
import { askAboutOutput } from "lib/ai/ask-about-output"
import { cleanupAttemptLogs } from "lib/utils/cleanup-attempt-logs"
import { savePrompt } from "lib/utils/save-prompt"
import { loadProblems } from "lib/utils/load-problems"
@@ -16,7 +16,7 @@ evalite("Reasoning Electronics Engineer", {
data: async () => {
cleanupAttemptLogs(logsDir)
const problems = loadProblems(
path.join(__dirname, "..", "problem-sets", "problems-2.toml"),
path.join(__dirname, "problem-sets", "problems-2.toml"),
)
systemPrompt = await createLocalCircuitPrompt()

18 changes: 8 additions & 10 deletions benchmarks/benchmark-local-circuit.eval.ts
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import path from "node:path"
import { safeEvaluateCode } from "../lib/code-runner/safe-evaluate-code"
import { createLocalCircuitPrompt } from "../lib/prompt-templates/create-local-circuit-prompt"
import { evalite } from "evalite"
import { CircuitScorer } from "./scorers/circuit-scorer"
import { askAboutOutput } from "tests/fixtures/ask-about-output"
import { askAboutOutput } from "lib/ai/ask-about-output"
import { savePrompt } from "lib/utils/save-prompt"
import { loadProblems } from "lib/utils/load-problems"
import { askAi } from "lib/ai/ask-ai"
import { evaluateTscircuitCode } from "lib/ai/evaluate-tscircuit-code"

let systemPrompt = ""

evalite.experimental_skip("Electronics Engineer", {
evalite("Electronics Engineer", {
data: async () => {
const problems = loadProblems(
path.join(__dirname, "..", "problem-sets", "problems-1.toml"),
path.join(__dirname, "problem-sets", "problems-1.toml"),
)
systemPrompt = await createLocalCircuitPrompt()

@@ -37,17 +37,15 @@ evalite.experimental_skip("Electronics Engineer", {
const code = codeMatch ? codeMatch[1].trim() : ""
const codeBlockMatch = aiResponse.match(/```tsx[\s\S]*?```/)
const codeBlock = codeBlockMatch ? codeBlockMatch[0] : ""
const evaluation = safeEvaluateCode(code, {
outputType: "board",
preSuppliedImports: {},
})
const { success, error: evaluationError } =
await evaluateTscircuitCode(code)

const output: {
results: { result: boolean; expected: boolean }[]
code: string
} = { results: [], code: "" }

if (evaluation.success) {
if (success) {
output.code = codeBlock
for (const question of input.questions) {
output.results.push({
@@ -57,7 +55,7 @@ evalite.experimental_skip("Electronics Engineer", {
}
return output
}
return `${evaluation.error}. Code:\n${codeBlock}`
return `${evaluationError}. Code:\n${codeBlock}`
},
experimental_customColumns: async (result) => {
if (typeof result.output === "string")
File renamed without changes.
File renamed without changes.
File renamed without changes.
19 changes: 0 additions & 19 deletions lib/code-runner-utils/get-import-description.ts

This file was deleted.

2 changes: 0 additions & 2 deletions lib/code-runner-utils/index.ts

This file was deleted.

70 changes: 0 additions & 70 deletions lib/code-runner/CodeRunner.ts

This file was deleted.

32 changes: 0 additions & 32 deletions lib/code-runner/code-runner-context.ts

This file was deleted.

8 changes: 0 additions & 8 deletions lib/code-runner/index.ts

This file was deleted.

26 changes: 0 additions & 26 deletions lib/code-runner/pull-snippet-import.ts

This file was deleted.

83 changes: 0 additions & 83 deletions lib/code-runner/run-prompt.tsx

This file was deleted.

Loading