test: calculate quality score

0x4007 · 0x4007 · commit 992c17e378cf · 2023-10-19T17:55:33.000+09:00
diff --git a/jest.config.js b/jest.config.js
@@ -1,6 +1,5 @@
 module.exports = {
   "cache": false,
-  "rootDir": "src/tests/",
   "maxConcurrency": 1,
   "preset": "ts-jest",
   "testEnvironment": "node",
diff --git a/src/handlers/comment/handlers/issue/calculate-quality-score.test.ts b/src/handlers/comment/handlers/issue/calculate-quality-score.test.ts
@@ -0,0 +1,125 @@
+import {
+  calculateQualScore,
+  estimateOptimalModel,
+  countTokensOfConversation,
+  gptRelevance,
+} from "./calculate-quality-score";
+import { Comment, Issue, User, UserType } from "../../../../types/payload";
+
+// jest.mock("openai", () => {
+//   return jest.fn().mockImplementation(() => {
+//     return {
+//       chat: {
+//         completions: {
+//           create: jest.fn().mockResolvedValue({
+//             choices: [
+//               {
+//                 message: {
+//                   content: "[1, 1, 0]",
+//                 },
+//               },
+//             ],
+//           }),
+//         },
+//       },
+//     };
+//   });
+// });
+
+describe("(**Real OpenAI API Call**) calculateQualScore", () => {
+  it("should calculate quality score", async () => {
+    const issue = { body: "my topic is about apples" } as Issue;
+    const comments: Comment[] = [
+      { body: "the apple is red", user: { type: UserType.User } as User } as Comment,
+      { body: "it is juicy", user: { type: UserType.User } as User } as Comment,
+      { body: "bananas are great", user: { type: UserType.User } as User } as Comment,
+    ];
+    const result = await calculateQualScore(issue, comments);
+    expect(result).toBeDefined();
+    expect(result.tokens).toBeGreaterThan(0);
+    expect(result.estimatedOptimalModel).toBeDefined();
+  });
+});
+
+describe("(**Real OpenAI API Call**) gptRelevance", () => {
+  it("should calculate gpt relevance", async () => {
+    const result = await gptRelevance("gpt-3.5-turbo", "my topic is about apples", [
+      "the apple is red",
+      "it is juicy",
+      "bananas are great",
+    ]);
+    expect(result[0]).toBeGreaterThan(0);
+    expect(result[1]).toBeGreaterThan(0);
+    expect(result[result.length - 1]).toBe(0);
+  });
+});
+
+describe("countTokensOfConversation", () => {
+  it("should count tokens of conversation", () => {
+    const issue = { body: "my topic is about apples" } as Issue;
+    const comments: Comment[] = [
+      { body: "the apple is red", user: { type: UserType.User } as User } as Comment,
+      { body: "it is juicy", user: { type: UserType.User } as User } as Comment,
+      { body: "bananas are great", user: { type: UserType.User } as User } as Comment,
+    ];
+    const result = countTokensOfConversation(issue, comments);
+    expect(result).toBeGreaterThan(0);
+  });
+});
+
+describe("estimateOptimalModel", () => {
+  it("should estimate optimal model", () => {
+    const result = estimateOptimalModel(5000);
+    expect(result).toBe("gpt-3.5-turbo-16k");
+  });
+});
+
+jest.mock("openai", () => {
+  // mock OPEN AI API
+  // the purpose of this is to test without real API calls in order to isolate issues
+  return jest.fn().mockImplementation(() => {
+    return {
+      chat: {
+        completions: {
+          create: jest.fn().mockResolvedValue({
+            choices: [
+              {
+                message: {
+                  content: "[1, 1, 0]",
+                },
+              },
+            ],
+          }),
+        },
+      },
+    };
+  });
+});
+
+describe("calculateQualScore", () => {
+  it("should calculate quality score", async () => {
+    const issue = { body: "issue body" } as Issue;
+    const comment = { body: "comment body", user: { type: "User" } } as Comment;
+    const comments = [comment, comment, comment] as Comment[];
+    const result = await calculateQualScore(issue, comments);
+    expect(result).toBeDefined();
+    expect(result.tokens).toBeGreaterThan(0);
+    expect(result.estimatedOptimalModel).toBeDefined();
+  });
+});
+
+// describe("countTokensOfConversation", () => {
+//   it("should count tokens of conversation", () => {
+//     const issue = { body: "issue body" } as Issue;
+//     const comments = [{ body: "comment body", user: { type: "User" } }] as Comment[];
+//     const result = countTokensOfConversation(issue, comments);
+//     expect(result).toBeGreaterThan(0);
+//   });
+// });
+
+describe("gptRelevance", () => {
+  it("should calculate gpt relevance", async () => {
+    const result = await gptRelevance("gpt-3.5-turbo", "issue body", ["comment body"]);
+    expect(result).toEqual([1, 1, 0]);
+  });
+});
diff --git a/src/handlers/comment/handlers/issue/calculate-quality-score.ts b/src/handlers/comment/handlers/issue/calculate-quality-score.ts
@@ -3,7 +3,8 @@ import OpenAI from "openai";
 import { encodingForModel } from "js-tiktoken";
 
 // TODO: make a filter to scrub out block quotes
-const botCommandsAndCommentsFilter = (comment) => !comment.body.startsWith("/") && comment.user.type === "User";
+const botCommandsAndCommentsFilter = (comment: Comment) =>
+  !comment.body.startsWith("/") && comment.user.type === "User";
 
 export async function calculateQualScore(issue: Issue, allComments: Comment[]) {
   const contributorComments = allComments.filter(botCommandsAndCommentsFilter);
@@ -18,12 +19,13 @@ export async function calculateQualScore(issue: Issue, allComments: Comment[]) {
   );
 
   if (relevance.length != contributorComments.length) {
+    console.log({ relevance, contributorComments });
     throw new Error("Relevance scores returned from OpenAI do not match the number of comments");
   }
   return { tokens: sumOfConversationTokens, estimatedOptimalModel };
 }
 
-function estimateOptimalModel(sumOfTokens: number) {
+export function estimateOptimalModel(sumOfTokens: number) {
   // we used the gpt-3.5-turbo encoder to estimate the amount of tokens.
   // this also doesn't include the overhead of the prompting etc so this is expected to be a slight underestimate
   if (sumOfTokens <= 4097) {
@@ -38,7 +40,7 @@ function estimateOptimalModel(sumOfTokens: number) {
   }
 }
 
-function countTokensOfConversation(issue: Issue, comments: Comment[]) {
+export function countTokensOfConversation(issue: Issue, comments: Comment[]) {
   const specificationComment = issue.body;
   if (!specificationComment) {
     throw new Error("Issue specification comment is missing");
@@ -60,12 +62,12 @@ function countTokensOfConversation(issue: Issue, comments: Comment[]) {
   return totalSumOfTokens;
 }
 
-async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CONVERSATION_STRINGS: string[]) {
+export async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CONVERSATION_STRINGS: string[]) {
   const openai = new OpenAI(); // apiKey: // defaults to process.env["OPENAI_API_KEY"]
   const PROMPT = `I need to evaluate the relevance of GitHub contributors' comments to a specific issue specification. Specifically, I'm interested in how much each comment helps to further define the issue specification or contributes new information or research relevant to the issue. Please provide a float between 0 and 1 to represent the degree of relevance. A score of 1 indicates that the comment is entirely relevant and adds significant value to the issue, whereas a score of 0 indicates no relevance or added value. Each contributor's comment is on a new line.\n\nIssue Specification:\n\`\`\`\n${ISSUE_SPECIFICATION_BODY}\n\`\`\`\n\nConversation:\n\`\`\`\n${CONVERSATION_STRINGS.join(
     "\n"
   )}\n\`\`\`\n\n\nTo what degree are each of the comments in the conversation relevant and valuable to further defining the issue specification? Please reply with an array of float numbers between 0 and 1, corresponding to each comment in the order they appear. Each float should represent the degree of relevance and added value of the comment to the issue.`;
-  console.trace({ PROMPT });
+  // console.trace({ PROMPT });
   const response: OpenAI.Chat.ChatCompletion = await openai.chat.completions.create({
     model: model,
     messages: [
@@ -81,7 +83,7 @@ async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CON
     presence_penalty: 0,
   });
 
-  const parsedResponse = JSON.parse(response.choices[0].message.content as "[1, 1, 0, 0]");
-  console.trace({ parsedResponse });
+  const parsedResponse = JSON.parse(response.choices[0].message.content as "[1, 1, 0.5, 0]");
+  // console.trace({ parsedResponse });
   return parsedResponse;
 }
diff --git a/tsconfig.json b/tsconfig.json
@@ -62,5 +62,6 @@
     "sourceMap": true
   },
   "include": ["src/"],
+  "exclude": ["src/tests/"],
   "compileOnSave": false
 }

Original file line number	Diff line number	Diff line change
`@@ -62,5 +62,6 @@`
`62`	`62`	`"sourceMap": true`
`63`	`63`	`},`
`64`	`64`	`"include": ["src/"],`
	`65`	`+ "exclude": ["src/tests/"],`
`65`	`66`	`"compileOnSave": false`
`66`	`67`	`}`