Skip to content

Commit 992c17e

Browse files
committed
test: calculate quality score
1 parent 4e4f2ba commit 992c17e

File tree

4 files changed

+135
-8
lines changed

4 files changed

+135
-8
lines changed

jest.config.js

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
module.exports = {
22
"cache": false,
3-
"rootDir": "src/tests/",
43
"maxConcurrency": 1,
54
"preset": "ts-jest",
65
"testEnvironment": "node",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import {
2+
calculateQualScore,
3+
estimateOptimalModel,
4+
countTokensOfConversation,
5+
gptRelevance,
6+
} from "./calculate-quality-score";
7+
import { Comment, Issue, User, UserType } from "../../../../types/payload";
8+
9+
// jest.mock("openai", () => {
10+
// return jest.fn().mockImplementation(() => {
11+
// return {
12+
// chat: {
13+
// completions: {
14+
// create: jest.fn().mockResolvedValue({
15+
// choices: [
16+
// {
17+
// message: {
18+
// content: "[1, 1, 0]",
19+
// },
20+
// },
21+
// ],
22+
// }),
23+
// },
24+
// },
25+
// };
26+
// });
27+
// });
28+
29+
describe("(**Real OpenAI API Call**) calculateQualScore", () => {
30+
it("should calculate quality score", async () => {
31+
const issue = { body: "my topic is about apples" } as Issue;
32+
const comments: Comment[] = [
33+
{ body: "the apple is red", user: { type: UserType.User } as User } as Comment,
34+
{ body: "it is juicy", user: { type: UserType.User } as User } as Comment,
35+
{ body: "bananas are great", user: { type: UserType.User } as User } as Comment,
36+
];
37+
const result = await calculateQualScore(issue, comments);
38+
expect(result).toBeDefined();
39+
expect(result.tokens).toBeGreaterThan(0);
40+
expect(result.estimatedOptimalModel).toBeDefined();
41+
});
42+
});
43+
44+
describe("(**Real OpenAI API Call**) gptRelevance", () => {
45+
it("should calculate gpt relevance", async () => {
46+
const result = await gptRelevance("gpt-3.5-turbo", "my topic is about apples", [
47+
"the apple is red",
48+
"it is juicy",
49+
"bananas are great",
50+
]);
51+
expect(result[0]).toBeGreaterThan(0);
52+
expect(result[1]).toBeGreaterThan(0);
53+
expect(result[result.length - 1]).toBe(0);
54+
});
55+
});
56+
57+
describe("countTokensOfConversation", () => {
58+
it("should count tokens of conversation", () => {
59+
const issue = { body: "my topic is about apples" } as Issue;
60+
const comments: Comment[] = [
61+
{ body: "the apple is red", user: { type: UserType.User } as User } as Comment,
62+
{ body: "it is juicy", user: { type: UserType.User } as User } as Comment,
63+
{ body: "bananas are great", user: { type: UserType.User } as User } as Comment,
64+
];
65+
const result = countTokensOfConversation(issue, comments);
66+
expect(result).toBeGreaterThan(0);
67+
});
68+
});
69+
70+
describe("estimateOptimalModel", () => {
71+
it("should estimate optimal model", () => {
72+
const result = estimateOptimalModel(5000);
73+
expect(result).toBe("gpt-3.5-turbo-16k");
74+
});
75+
});
76+
77+
jest.mock("openai", () => {
78+
// mock OPEN AI API
79+
// the purpose of this is to test without real API calls in order to isolate issues
80+
return jest.fn().mockImplementation(() => {
81+
return {
82+
chat: {
83+
completions: {
84+
create: jest.fn().mockResolvedValue({
85+
choices: [
86+
{
87+
message: {
88+
content: "[1, 1, 0]",
89+
},
90+
},
91+
],
92+
}),
93+
},
94+
},
95+
};
96+
});
97+
});
98+
99+
describe("calculateQualScore", () => {
100+
it("should calculate quality score", async () => {
101+
const issue = { body: "issue body" } as Issue;
102+
const comment = { body: "comment body", user: { type: "User" } } as Comment;
103+
const comments = [comment, comment, comment] as Comment[];
104+
const result = await calculateQualScore(issue, comments);
105+
expect(result).toBeDefined();
106+
expect(result.tokens).toBeGreaterThan(0);
107+
expect(result.estimatedOptimalModel).toBeDefined();
108+
});
109+
});
110+
111+
// describe("countTokensOfConversation", () => {
112+
// it("should count tokens of conversation", () => {
113+
// const issue = { body: "issue body" } as Issue;
114+
// const comments = [{ body: "comment body", user: { type: "User" } }] as Comment[];
115+
// const result = countTokensOfConversation(issue, comments);
116+
// expect(result).toBeGreaterThan(0);
117+
// });
118+
// });
119+
120+
describe("gptRelevance", () => {
121+
it("should calculate gpt relevance", async () => {
122+
const result = await gptRelevance("gpt-3.5-turbo", "issue body", ["comment body"]);
123+
expect(result).toEqual([1, 1, 0]);
124+
});
125+
});

src/handlers/comment/handlers/issue/calculate-quality-score.ts

+9-7
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@ import OpenAI from "openai";
33
import { encodingForModel } from "js-tiktoken";
44

55
// TODO: make a filter to scrub out block quotes
6-
const botCommandsAndCommentsFilter = (comment) => !comment.body.startsWith("/") && comment.user.type === "User";
6+
const botCommandsAndCommentsFilter = (comment: Comment) =>
7+
!comment.body.startsWith("/") && comment.user.type === "User";
78

89
export async function calculateQualScore(issue: Issue, allComments: Comment[]) {
910
const contributorComments = allComments.filter(botCommandsAndCommentsFilter);
@@ -18,12 +19,13 @@ export async function calculateQualScore(issue: Issue, allComments: Comment[]) {
1819
);
1920

2021
if (relevance.length != contributorComments.length) {
22+
console.log({ relevance, contributorComments });
2123
throw new Error("Relevance scores returned from OpenAI do not match the number of comments");
2224
}
2325
return { tokens: sumOfConversationTokens, estimatedOptimalModel };
2426
}
2527

26-
function estimateOptimalModel(sumOfTokens: number) {
28+
export function estimateOptimalModel(sumOfTokens: number) {
2729
// we used the gpt-3.5-turbo encoder to estimate the amount of tokens.
2830
// this also doesn't include the overhead of the prompting etc so this is expected to be a slight underestimate
2931
if (sumOfTokens <= 4097) {
@@ -38,7 +40,7 @@ function estimateOptimalModel(sumOfTokens: number) {
3840
}
3941
}
4042

41-
function countTokensOfConversation(issue: Issue, comments: Comment[]) {
43+
export function countTokensOfConversation(issue: Issue, comments: Comment[]) {
4244
const specificationComment = issue.body;
4345
if (!specificationComment) {
4446
throw new Error("Issue specification comment is missing");
@@ -60,12 +62,12 @@ function countTokensOfConversation(issue: Issue, comments: Comment[]) {
6062
return totalSumOfTokens;
6163
}
6264

63-
async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CONVERSATION_STRINGS: string[]) {
65+
export async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CONVERSATION_STRINGS: string[]) {
6466
const openai = new OpenAI(); // apiKey: // defaults to process.env["OPENAI_API_KEY"]
6567
const PROMPT = `I need to evaluate the relevance of GitHub contributors' comments to a specific issue specification. Specifically, I'm interested in how much each comment helps to further define the issue specification or contributes new information or research relevant to the issue. Please provide a float between 0 and 1 to represent the degree of relevance. A score of 1 indicates that the comment is entirely relevant and adds significant value to the issue, whereas a score of 0 indicates no relevance or added value. Each contributor's comment is on a new line.\n\nIssue Specification:\n\`\`\`\n${ISSUE_SPECIFICATION_BODY}\n\`\`\`\n\nConversation:\n\`\`\`\n${CONVERSATION_STRINGS.join(
6668
"\n"
6769
)}\n\`\`\`\n\n\nTo what degree are each of the comments in the conversation relevant and valuable to further defining the issue specification? Please reply with an array of float numbers between 0 and 1, corresponding to each comment in the order they appear. Each float should represent the degree of relevance and added value of the comment to the issue.`;
68-
console.trace({ PROMPT });
70+
// console.trace({ PROMPT });
6971
const response: OpenAI.Chat.ChatCompletion = await openai.chat.completions.create({
7072
model: model,
7173
messages: [
@@ -81,7 +83,7 @@ async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CON
8183
presence_penalty: 0,
8284
});
8385

84-
const parsedResponse = JSON.parse(response.choices[0].message.content as "[1, 1, 0, 0]");
85-
console.trace({ parsedResponse });
86+
const parsedResponse = JSON.parse(response.choices[0].message.content as "[1, 1, 0.5, 0]");
87+
// console.trace({ parsedResponse });
8688
return parsedResponse;
8789
}

tsconfig.json

+1
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,6 @@
6262
"sourceMap": true
6363
},
6464
"include": ["src/"],
65+
"exclude": ["src/tests/"],
6566
"compileOnSave": false
6667
}

0 commit comments

Comments
 (0)