feat: calculate quality score seems to work

0x4007 · 0x4007 · commit 4e4f2bacb1f9 · 2023-10-19T16:37:57.000+09:00
diff --git a/.github/workflows/short-files.yml b/.github/workflows/short-files.yml
@@ -13,7 +13,7 @@ jobs:
 
       - name: Check file length
         run: |
-          IGNORE=("src/generatedFile1.ts" "src/generatedFile2.ts") # Add more files to ignore as needed
+          IGNORE=("src/adapters/supabase/types/database.ts" "src/generatedFile2.ts") # Add more files to ignore as needed
           find src -name "*.ts" -type f -exec bash -c '
             for ignore in "${IGNORE[@]}"; do
               if [[ "$1" == "$ignore" ]]; then
diff --git a/src/adapters/supabase/helpers/tables/logs.ts b/src/adapters/supabase/helpers/tables/logs.ts
@@ -1,6 +1,7 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 // This is disabled because logs should be able to log any type of data
 // Normally this is forbidden
+// TODO: break this apart into smaller files.
 
 import { SupabaseClient } from "@supabase/supabase-js";
 import Runtime from "../../../../bindings/bot-runtime";
diff --git a/src/handlers/comment/handlers/issue/calculate-quality-score.ts b/src/handlers/comment/handlers/issue/calculate-quality-score.ts
@@ -1,13 +1,26 @@
 import { Comment, Issue } from "../../../../types/payload";
-// import OpenAI from "openai";
+import OpenAI from "openai";
 import { encodingForModel } from "js-tiktoken";
 
+// TODO: make a filter to scrub out block quotes
 const botCommandsAndCommentsFilter = (comment) => !comment.body.startsWith("/") && comment.user.type === "User";
 
-export function calculateQualScore(issue: Issue, comments: Comment[]) {
-  const tokens = countTokensOfConversation(issue, comments);
-  const estimatedOptimalModel = estimateOptimalModel(tokens);
-  return { tokens, estimatedOptimalModel };
+export async function calculateQualScore(issue: Issue, allComments: Comment[]) {
+  const contributorComments = allComments.filter(botCommandsAndCommentsFilter);
+
+  const sumOfConversationTokens = countTokensOfConversation(issue, contributorComments);
+  const estimatedOptimalModel = estimateOptimalModel(sumOfConversationTokens);
+
+  const relevance = await gptRelevance(
+    estimatedOptimalModel,
+    issue.body,
+    contributorComments.map((comment) => comment.body)
+  );
+
+  if (relevance.length != contributorComments.length) {
+    throw new Error("Relevance scores returned from OpenAI do not match the number of comments");
+  }
+  return { tokens: sumOfConversationTokens, estimatedOptimalModel };
 }
 
 function estimateOptimalModel(sumOfTokens: number) {
@@ -26,40 +39,49 @@ function estimateOptimalModel(sumOfTokens: number) {
 }
 
 function countTokensOfConversation(issue: Issue, comments: Comment[]) {
-  const gpt3TurboEncoder = encodingForModel("gpt-3.5-turbo");
   const specificationComment = issue.body;
   if (!specificationComment) {
     throw new Error("Issue specification comment is missing");
   }
 
-  const contributorCommentsWithTokens = comments.filter(botCommandsAndCommentsFilter).map((comment) => {
+  const gpt3TurboEncoder = encodingForModel("gpt-3.5-turbo");
+  const contributorCommentsWithTokens = comments.map((comment) => {
     return {
       tokens: gpt3TurboEncoder.encode(comment.body),
       comment,
     };
   });
 
-  // const contributorCommentsTokens = contributorCommentsWithTokens.map(({ tokens }) => tokens);
   const sumOfContributorTokens = contributorCommentsWithTokens.reduce((acc, { tokens }) => acc + tokens.length, 0);
   const specificationTokens = gpt3TurboEncoder.encode(specificationComment);
   const sumOfSpecificationTokens = specificationTokens.length;
   const totalSumOfTokens = sumOfSpecificationTokens + sumOfContributorTokens;
-  // const estimatedOptimalModel = estimateOptimalModel(totalSumOfTokens);
 
-  // const buffer = {
-  //   totalSumOfTokens,
-  //   estimatedOptimalModel,
-  // };
-  // return buffer;
   return totalSumOfTokens;
 }
 
-// async function gpt() {
-//   // /v1/chat/completions
-//   const openai = new OpenAI(); // apiKey: // defaults to process.env["OPENAI_API_KEY"]
-//   const params: OpenAI.Chat.ChatCompletionCreateParams = {
-//     messages: [{ role: "user", content: "Say this is a test" }],
-//     model: "gpt-3.5-turbo",
-//   };
-//   const chatCompletion: OpenAI.Chat.ChatCompletion = await openai.chat.completions.create(params);
-// }
+async function gptRelevance(model: string, ISSUE_SPECIFICATION_BODY: string, CONVERSATION_STRINGS: string[]) {
+  const openai = new OpenAI(); // apiKey: // defaults to process.env["OPENAI_API_KEY"]
+  const PROMPT = `I need to evaluate the relevance of GitHub contributors' comments to a specific issue specification. Specifically, I'm interested in how much each comment helps to further define the issue specification or contributes new information or research relevant to the issue. Please provide a float between 0 and 1 to represent the degree of relevance. A score of 1 indicates that the comment is entirely relevant and adds significant value to the issue, whereas a score of 0 indicates no relevance or added value. Each contributor's comment is on a new line.\n\nIssue Specification:\n\`\`\`\n${ISSUE_SPECIFICATION_BODY}\n\`\`\`\n\nConversation:\n\`\`\`\n${CONVERSATION_STRINGS.join(
+    "\n"
+  )}\n\`\`\`\n\n\nTo what degree are each of the comments in the conversation relevant and valuable to further defining the issue specification? Please reply with an array of float numbers between 0 and 1, corresponding to each comment in the order they appear. Each float should represent the degree of relevance and added value of the comment to the issue.`;
+  console.trace({ PROMPT });
+  const response: OpenAI.Chat.ChatCompletion = await openai.chat.completions.create({
+    model: model,
+    messages: [
+      {
+        role: "system",
+        content: PROMPT,
+      },
+    ],
+    temperature: 1,
+    max_tokens: 1024,
+    top_p: 1,
+    frequency_penalty: 0,
+    presence_penalty: 0,
+  });
+
+  const parsedResponse = JSON.parse(response.choices[0].message.content as "[1, 1, 0, 0]");
+  console.trace({ parsedResponse });
+  return parsedResponse;
+}
diff --git a/src/handlers/comment/handlers/issue/calculate-typings.ts b/src/handlers/comment/handlers/issue/calculate-typings.ts
@@ -0,0 +1,141 @@
+// import { User } from "../../../../types";
+
+// type ContributionLocation = "issue" | "review";
+// type ContributionStyle = "issuer" | "assignee" | "collaborator" | "default";
+// type Role =
+//   | "issueIssuer"
+//   | "issueAssignee"
+//   | "issueCollaborator"
+//   | "issueDefault"
+//   | "reviewIssuer"
+//   | "reviewAssignee"
+//   | "reviewCollaborator"
+//   | "reviewDefault";
+
+// type DevPoolContributor = {
+//   contribution: {
+//     role: Role;
+//     style: ContributionStyle;
+//   };
+//   records: {
+//     comments: [
+//       {
+//         location: ContributionLocation;
+//         issueId: number;
+//         commentId: number;
+//         body: string;
+//         score: {
+//           quantitative: number;
+//           qualitative: string;
+//         };
+//       }
+//     ];
+//     review: [];
+//   };
+//   user: User;
+//   walletAddress: string;
+// };
+
+// type Payments = {
+//   contributors: User[];
+// };
+
+//
+
+type ContributionLocation = "issue" | "review";
+type ContributionStyle = "issuer" | "assignee" | "collaborator" | "default";
+type Role =
+  | "issueIssuer"
+  | "issueAssignee"
+  | "issueCollaborator"
+  | "issueDefault"
+  | "reviewIssuer"
+  | "reviewAssignee"
+  | "reviewCollaborator"
+  | "reviewDefault";
+
+type ReviewState = "commented" | "approved" | "requestChanges" | "dismissed";
+
+type CommentScoringConfig = {
+  // wordCredit: number; // credit per word
+  // listItemCredit: number; // credit per list item
+  // imageCredit: number; // credit per image
+  // linkCredit: number; // credit per link
+  // codeBlockCredit: number; // credit per code block
+};
+
+export type CommentScore = {
+  qualitative: number; // a float between 0 and 1
+  quantitative: number; // calculated based on CommentScoringConfig
+  finalScore: number; // qualitative * quantitative
+};
+
+type LabelAction = {
+  label: string;
+  added: boolean; // true if added, false if removed
+};
+
+type PaymentConfig = {
+  // Define how much each role and action is worth in monetary terms
+  [key in Role]: {
+    comment: number;
+    // labelPriority: number;
+    // labelTime: number;
+    // codeCommit: number;
+    // edit: number;
+    reviewState: {
+      [key in ReviewState]: number;
+    };
+    // timeSpent: number; // Per unit time
+  };
+};
+
+type ContributionRecord = {
+  comments: {
+    location: ContributionLocation;
+    issueId: number;
+    commentId: number;
+    body: string;
+    timestamp: string;
+    score: CommentScore;
+  }[];
+  // labels: {
+  //   issueId: number;
+  //   actions: LabelAction[];
+  //   timestamp: string;
+  // }[];
+  // commits: {
+  //   pullRequestId: number;
+  //   commitId: string;
+  //   timestamp: string;
+  // }[];
+  // edits: {
+  //   location: ContributionLocation;
+  //   issueId: number;
+  //   editedField: "description" | "comment"; // Add more fields if necessary
+  //   timestamp: string;
+  // }[];
+  reviewStates: {
+    pullRequestId: number;
+    state: ReviewState;
+    timestamp: string;
+  }[];
+  // timeSpent: number; // In some unit, e.g., minutes
+};
+
+type DevPoolContributor = {
+  contribution: {
+    role: Role;
+    style: ContributionStyle;
+  };
+  records: ContributionRecord;
+  user: User;
+  walletAddress: string;
+};
+
+type Payments = {
+  contributors: DevPoolContributor[];
+  totalPayment: number;
+};
+
+// Your existing logic here
diff --git a/src/handlers/comment/handlers/issue/issue-closed.ts b/src/handlers/comment/handlers/issue/issue-closed.ts