AI_Interviewee_Chatbot/router_example.py at main · thaoluon/AI_Interviewee_Chatbot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
Router/Retriever Logic Example
Demonstrates how the system decides which knowledge sources to query
"""

from typing import List, Dict, Tuple
from enum import Enum


class KnowledgeSource(Enum):
    """Knowledge source types"""
    SESSION_MEMORY = "M1"
    RESUME_PROFILE = "M2"
    PREPARED_ANSWERS = "M3"
    COMPANY_ROLE = "M4"


class Router:
    """
    Intelligent router that decides which knowledge sources to prioritize
    based on question content and conversation context
    """

    def __init__(self):
        # Keywords that indicate different question types
        self.behavioral_keywords = [
            "tell me about a time", "describe a situation", "conflict",
            "leadership", "challenge", "difficult", "mistake", "failure"
        ]

        self.experience_keywords = [
            "your experience", "your project", "your role", "you worked",
            "you have", "your background", "your skills"
        ]

        self.technical_keywords = [
            "how would you", "explain", "implement", "design", "architecture",
            "algorithm", "optimize", "debug", "troubleshoot"
        ]

    def route_query(
        self,
        question: str,
        context: Dict = None
    ) -> List[Tuple[KnowledgeSource, float]]:
        """
        Route query to knowledge sources with priority scores

        Args:
            question: The current interview question
            context: Conversation context (history, follow-up flags, etc.)

        Returns:
            List of (KnowledgeSource, priority_score) tuples, sorted by priority
        """
        question_lower = question.lower()
        context = context or {}

        # Check if this is a follow-up question
        is_followup = context.get("is_followup", False)
        has_history = context.get("has_history", False)

        # Behavioral questions → Prioritize prepared answers
        if any(keyword in question_lower for keyword in self.behavioral_keywords):
            return [
                (KnowledgeSource.PREPARED_ANSWERS, 1.0),  # STAR stories
                (KnowledgeSource.RESUME_PROFILE, 0.8),    # User's actual stories
                (KnowledgeSource.SESSION_MEMORY, 0.6),    # Previous mentions
                (KnowledgeSource.COMPANY_ROLE, 0.3)
            ]

        # Experience questions → Prioritize resume and history
        elif any(keyword in question_lower for keyword in self.experience_keywords):
            return [
                (KnowledgeSource.RESUME_PROFILE, 1.0),     # User's resume
                (KnowledgeSource.SESSION_MEMORY, 0.9),    # What they've said
                (KnowledgeSource.PREPARED_ANSWERS, 0.5),   # Similar scenarios
                (KnowledgeSource.COMPANY_ROLE, 0.4)
            ]

        # Follow-up questions → Prioritize conversation history
        elif is_followup and has_history:
            return [
                (KnowledgeSource.SESSION_MEMORY, 1.0),    # Recent context
                (KnowledgeSource.RESUME_PROFILE, 0.7),    # Related info
                (KnowledgeSource.PREPARED_ANSWERS, 0.5),
                (KnowledgeSource.COMPANY_ROLE, 0.3)
            ]

        # Technical questions → Mix of sources
        elif any(keyword in question_lower for keyword in self.technical_keywords):
            return [
                (KnowledgeSource.PREPARED_ANSWERS, 0.9),  # Technical playbooks
                (KnowledgeSource.COMPANY_ROLE, 0.8),      # Job requirements
                (KnowledgeSource.RESUME_PROFILE, 0.7),    # User's tech stack
                (KnowledgeSource.SESSION_MEMORY, 0.5)
            ]

        # Default: Balanced approach
        else:
            return [
                (KnowledgeSource.RESUME_PROFILE, 0.8),
                (KnowledgeSource.PREPARED_ANSWERS, 0.7),
                (KnowledgeSource.SESSION_MEMORY, 0.6),
                (KnowledgeSource.COMPANY_ROLE, 0.5)
            ]

    def retrieve_from_sources(
        self,
        question: str,
        question_vector: List[float],
        routing: List[Tuple[KnowledgeSource, float]],
        top_k: int = 5
    ) -> Dict[KnowledgeSource, List[Dict]]:
        """
        Retrieve relevant chunks from prioritized knowledge sources

        Args:
            question: Original question text
            question_vector: Embedding vector of the question
            routing: Priority-ordered list of sources
            top_k: Number of chunks to retrieve per source

        Returns:
            Dictionary mapping sources to retrieved chunks
        """
        results = {}

        for source, priority in routing:
            # Adjust top_k based on priority
            adjusted_k = int(top_k * priority)
            if adjusted_k < 1:
                adjusted_k = 1

            # Simulate retrieval (replace with actual vector DB query)
            chunks = self._query_vector_db(
                source=source,
                query_vector=question_vector,
                top_k=adjusted_k
            )

            results[source] = chunks

        return results

    def _query_vector_db(
        self,
        source: KnowledgeSource,
        query_vector: List[float],
        top_k: int
    ) -> List[Dict]:
        """
        Query vector database for a specific knowledge source

        This is a placeholder - replace with actual vector DB implementation
        """
        # Placeholder implementation
        # In production, this would:
        # 1. Connect to the appropriate vector DB index
        # 2. Perform similarity search
        # 3. Return chunks with metadata

        return [
            {
                "text": f"Sample chunk from {source.value}",
                "score": 0.85,
                "metadata": {"source": source.value}
            }
        ] * top_k


# Example usage
if __name__ == "__main__":
    router = Router()

    # Example 1: Behavioral question
    question1 = "Tell me about a time you faced a conflict with a team member"
    routing1 = router.route_query(question1)
    print(f"Question: {question1}")
    print("Routing priority:")
    for source, priority in routing1:
        print(f"  {source.value}: {priority}")
    print()

    # Example 2: Experience question
    question2 = "What was your experience with Kubernetes in your previous role?"
    routing2 = router.route_query(question2)
    print(f"Question: {question2}")
    print("Routing priority:")
    for source, priority in routing2:
        print(f"  {source.value}: {priority}")
    print()

    # Example 3: Follow-up question
    question3 = "Why did you choose that approach?"
    routing3 = router.route_query(question3, context={"is_followup": True, "has_history": True})
    print(f"Question: {question3}")
    print("Routing priority:")
    for source, priority in routing3:
        print(f"  {source.value}: {priority}")