@@ -1258,3 +1258,126 @@ def mock_process_chat(*args, **kwargs):
12581258
12591259assert has_chat_info
12601260assert has_error
1261+
1262+
1263+ def test_chat_with_token_usage (test_client ):
1264+ """Test that token usage is properly stored and retrieved.
1265+
1266+ This test verifies that:
1267+ 1. Token usage is stored during chat
1268+ 2. Token usage appears in chat response
1269+ 3. Token usage can be retrieved via get_chat API
1270+ 4. Token usage aggregation works correctly for multiple messages
1271+ """
1272+ from langchain_core .messages import AIMessage
1273+
1274+ from dive_mcp_host .models .fake import FakeMessageToolModel
1275+
1276+ client ,app = test_client
1277+ test_chat_id = str (uuid .uuid4 ())
1278+
1279+ # Create FakeMessageToolModel with responses that include usage_metadata
1280+ # Note: The conftest fixture creates an initial chat which consumes one response,
1281+ # so we need to provide responses for that plus our two test interactions
1282+ fake_responses = [
1283+ AIMessage (content = "I am a fake model." ),# For conftest's initial chat
1284+ AIMessage (
1285+ content = "The answer is 4." ,
1286+ usage_metadata = {
1287+ "input_tokens" :10 ,
1288+ "output_tokens" :8 ,
1289+ "total_tokens" :18 ,
1290+ },
1291+ ),
1292+ AIMessage (
1293+ content = "The answer is 6." ,
1294+ usage_metadata = {
1295+ "input_tokens" :12 ,
1296+ "output_tokens" :9 ,
1297+ "total_tokens" :21 ,
1298+ },
1299+ ),
1300+ ]
1301+ fake_model = FakeMessageToolModel (responses = fake_responses )
1302+
1303+ # Replace the model in the host
1304+ app .dive_host ["default" ]._model = fake_model
1305+
1306+ # First interaction
1307+ response = client .post (
1308+ "/api/chat" ,
1309+ data = {"chatId" :test_chat_id ,"message" :"What is 2+2?" },
1310+ )
1311+ assert response .status_code == SUCCESS_CODE
1312+
1313+ # Verify token usage in streaming response
1314+ has_token_usage_in_stream = False
1315+ for json_obj in helper .extract_stream (response .text ):
1316+ if json_obj .get ("message" ):
1317+ inner_json = json .loads (json_obj ["message" ])
1318+ if inner_json .get ("type" )== "token_usage" :
1319+ token_usage = inner_json .get ("content" , {})
1320+ if token_usage :
1321+ has_token_usage_in_stream = True
1322+ assert token_usage .get ("inputTokens" )== 10
1323+ assert token_usage .get ("outputTokens" )== 8
1324+
1325+ assert has_token_usage_in_stream ,"Token usage not found in stream response"
1326+
1327+ # Second interaction
1328+ response = client .post (
1329+ "/api/chat" ,
1330+ data = {"chatId" :test_chat_id ,"message" :"What is 3+3?" },
1331+ )
1332+ assert response .status_code == SUCCESS_CODE
1333+
1334+ # Verify token usage in streaming response (shows latest message usage)
1335+ has_second_usage = False
1336+ for json_obj in helper .extract_stream (response .text ):
1337+ if json_obj .get ("message" ):
1338+ inner_json = json .loads (json_obj ["message" ])
1339+ if inner_json .get ("type" )== "token_usage" :
1340+ token_usage = inner_json .get ("content" , {})
1341+ if token_usage :
1342+ has_second_usage = True
1343+ # Stream shows the latest message's token usage
1344+ assert token_usage .get ("inputTokens" )== 12
1345+ assert token_usage .get ("outputTokens" )== 9
1346+
1347+ assert has_second_usage ,"Second token usage not found"
1348+
1349+ # Test get_chat API to verify token usage is persisted and aggregated
1350+ response = client .get (f"/api/chat/{ test_chat_id } " )
1351+ assert response .status_code == SUCCESS_CODE
1352+
1353+ response_data = response .json ()
1354+ assert "data" in response_data ,"data not in response"
1355+
1356+ chat_data = response_data ["data" ]
1357+ assert "token_usage" in chat_data ,"token_usage not in get_chat response"
1358+
1359+ token_usage = chat_data ["token_usage" ]
1360+ assert token_usage is not None ,"token_usage is None"
1361+ # Verify aggregated token usage
1362+ assert token_usage ["totalInputTokens" ]== 22 # 10 + 12
1363+ assert token_usage ["totalOutputTokens" ]== 17 # 8 + 9
1364+
1365+ # Verify messages also have resource_usage
1366+ messages = chat_data .get ("messages" , [])
1367+ assert len (messages )> 0 ,"No messages found"
1368+
1369+ # Check AI messages have resource_usage
1370+ ai_messages = [msg for msg in messages if msg ["role" ]== "assistant" ]
1371+ assert len (ai_messages )== 2 ,f"Expected 2 AI messages, got{ len (ai_messages )} "
1372+
1373+ # Verify first AI message
1374+ first_ai_msg = ai_messages [0 ]
1375+ assert "resource_usage" in first_ai_msg
1376+ assert first_ai_msg ["resource_usage" ]["total_input_tokens" ]== 10
1377+ assert first_ai_msg ["resource_usage" ]["total_output_tokens" ]== 8
1378+
1379+ # Verify second AI message
1380+ second_ai_msg = ai_messages [1 ]
1381+ assert "resource_usage" in second_ai_msg
1382+ assert second_ai_msg ["resource_usage" ]["total_input_tokens" ]== 12
1383+ assert second_ai_msg ["resource_usage" ]["total_output_tokens" ]== 9