Skip to content

Commit ddd77de

Browse files
committed
use 4o-mini in more places
1 parent fad216c commit ddd77de

4 files changed

+40
-25
lines changed

app/pipeline/chat/course_chat_pipeline.py

+20-13
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ def get_mastery(progress, confidence):
7070
class CourseChatPipeline(Pipeline):
7171
"""Course chat pipeline that answers course related questions from students."""
7272

73-
llm: IrisLangchainChatModel
73+
llm_big: IrisLangchainChatModel
74+
llm_small: IrisLangchainChatModel
7475
pipeline: Runnable
7576
lecture_pipeline: LectureChatPipeline
7677
suggestion_pipeline: InteractionSuggestionPipeline
@@ -93,14 +94,20 @@ def __init__(
9394
self.event = event
9495

9596
# Set the langchain chat model
96-
request_handler = CapabilityRequestHandler(
97-
requirements=RequirementList(
98-
gpt_version_equivalent=4.5,
99-
)
100-
)
10197
completion_args = CompletionArguments(temperature=0.5, max_tokens=2000)
102-
self.llm = IrisLangchainChatModel(
103-
request_handler=request_handler, completion_args=completion_args
98+
self.llm_big = IrisLangchainChatModel(
99+
request_handler=CapabilityRequestHandler(
100+
requirements=RequirementList(
101+
gpt_version_equivalent=4.5,
102+
)
103+
), completion_args=completion_args
104+
)
105+
self.llm_small = IrisLangchainChatModel(
106+
request_handler=CapabilityRequestHandler(
107+
requirements=RequirementList(
108+
gpt_version_equivalent=4.25,
109+
)
110+
), completion_args=completion_args
104111
)
105112
self.callback = callback
106113

@@ -110,14 +117,14 @@ def __init__(
110117
self.citation_pipeline = CitationPipeline()
111118

112119
# Create the pipeline
113-
self.pipeline = self.llm | JsonOutputParser()
120+
self.pipeline = self.llm_big | JsonOutputParser()
114121
self.tokens = []
115122

116123
def __repr__(self):
117-
return f"{self.__class__.__name__}(llm={self.llm})"
124+
return f"{self.__class__.__name__}(llm_big={self.llm_big}, llm_small={self.llm_small})"
118125

119126
def __str__(self):
120-
return f"{self.__class__.__name__}(llm={self.llm})"
127+
return f"{self.__class__.__name__}(llm_big={self.llm_big}, llm_small={self.llm_small})"
121128

122129
@traceable(name="Course Chat Pipeline")
123130
def __call__(self, dto: CourseChatPipelineExecutionDTO, **kwargs):
@@ -395,7 +402,7 @@ def lecture_content_retrieval() -> str:
395402
# No idea why we need this extra contrary to exercise chat agent in this case, but solves the issue.
396403
params.update({"tools": tools})
397404
agent = create_tool_calling_agent(
398-
llm=self.llm, tools=tools, prompt=self.prompt
405+
llm=self.llm_big, tools=tools, prompt=self.prompt
399406
)
400407
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False)
401408

@@ -404,7 +411,7 @@ def lecture_content_retrieval() -> str:
404411
for step in agent_executor.iter(params):
405412
print("STEP:", step)
406413
self._append_tokens(
407-
self.llm.tokens, PipelineEnum.IRIS_CHAT_COURSE_MESSAGE
414+
self.llm_big.tokens, PipelineEnum.IRIS_CHAT_COURSE_MESSAGE
408415
)
409416
if step.get("output", None):
410417
out = step["output"]

app/pipeline/chat/exercise_chat_agent_pipeline.py

+18-10
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ def map_message_role(role: IrisMessageRole) -> str:
9494
class ExerciseChatAgentPipeline(Pipeline):
9595
"""Exercise chat agent pipeline that answers exercises related questions from students."""
9696

97-
llm: IrisLangchainChatModel
97+
llm_big: IrisLangchainChatModel
98+
llm_small: IrisLangchainChatModel
9899
pipeline: Runnable
99100
callback: ExerciseChatStatusCallback
100101
suggestion_pipeline: InteractionSuggestionPipeline
@@ -112,14 +113,22 @@ def __init__(
112113
super().__init__(implementation_id="exercise_chat_pipeline")
113114
# Set the langchain chat model
114115
completion_args = CompletionArguments(temperature=0.5, max_tokens=2000)
115-
self.llm = IrisLangchainChatModel(
116+
self.llm_big = IrisLangchainChatModel(
116117
request_handler=CapabilityRequestHandler(
117118
requirements=RequirementList(
118119
gpt_version_equivalent=4.5,
119120
),
120121
),
121122
completion_args=completion_args,
122123
)
124+
self.llm_small = IrisLangchainChatModel(
125+
request_handler=CapabilityRequestHandler(
126+
requirements=RequirementList(
127+
gpt_version_equivalent=4.25,
128+
),
129+
),
130+
completion_args=completion_args,
131+
)
123132
self.variant = variant
124133
self.event = event
125134
self.callback = callback
@@ -130,15 +139,15 @@ def __init__(
130139
self.retriever = LectureRetrieval(self.db.client)
131140
self.reranker_pipeline = RerankerPipeline()
132141
self.code_feedback_pipeline = CodeFeedbackPipeline()
133-
self.pipeline = self.llm | JsonOutputParser()
142+
self.pipeline = self.llm_big | JsonOutputParser()
134143
self.citation_pipeline = CitationPipeline()
135144
self.tokens = []
136145

137146
def __repr__(self):
138-
return f"{self.__class__.__name__}(llm={self.llm})"
147+
return f"{self.__class__.__name__}(llm_big={self.llm_big}, llm_small={self.llm_small})"
139148

140149
def __str__(self):
141-
return f"{self.__class__.__name__}(llm={self.llm})"
150+
return f"{self.__class__.__name__}(llm_big={self.llm_big}, llm_small={self.llm_small})"
142151

143152
@traceable(name="Exercise Chat Agent Pipeline")
144153
def __call__(self, dto: ExerciseChatPipelineExecutionDTO):
@@ -504,15 +513,14 @@ def lecture_content_retrieval() -> str:
504513
tool_list.append(lecture_content_retrieval)
505514
tools = generate_structured_tools_from_functions(tool_list)
506515
agent = create_tool_calling_agent(
507-
llm=self.llm, tools=tools, prompt=self.prompt
516+
llm=self.llm_big, tools=tools, prompt=self.prompt
508517
)
509518
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False)
510519
self.callback.in_progress()
511520
out = None
512521
for step in agent_executor.iter(params):
513-
print("STEP:", step)
514522
self._append_tokens(
515-
self.llm.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_AGENT_MESSAGE
523+
self.llm_big.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_AGENT_MESSAGE
516524
)
517525
if step.get("output", None):
518526
out = step["output"]
@@ -525,13 +533,13 @@ def lecture_content_retrieval() -> str:
525533
]
526534
)
527535

528-
guide_response = (self.prompt | self.llm | StrOutputParser()).invoke(
536+
guide_response = (self.prompt | self.llm_small | StrOutputParser()).invoke(
529537
{
530538
"response": out,
531539
}
532540
)
533541
self._append_tokens(
534-
self.llm.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_AGENT_MESSAGE
542+
self.llm_small.tokens, PipelineEnum.IRIS_CHAT_EXERCISE_AGENT_MESSAGE
535543
)
536544
if "!ok!" in guide_response:
537545
print("Response is ok and not rewritten!!!")

app/pipeline/chat/interaction_suggestion_pipeline.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def __init__(self, variant: str = "default"):
6161
# Set the langchain chat model
6262
request_handler = CapabilityRequestHandler(
6363
requirements=RequirementList(
64-
gpt_version_equivalent=4.5,
64+
gpt_version_equivalent=4.25,
6565
json_mode=True,
6666
)
6767
)

app/pipeline/prompts/citation_prompt.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ Answer without citations:
2828
Paragraphs with their Lecture Names, Unit Names, Links and Page Numbers:
2929
{Paragraphs}
3030

31-
Answer with citations (ensure empty line between the message and the citations):
3231
If the answer actually does not contain any information from the paragraphs, please do not include any citations and return '!NONE!'.
32+
Original answer with citations (ensure two empty lines between the message and the citations):

0 commit comments

Comments
 (0)