-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluation_results.jsonl
More file actions
5 lines (5 loc) · 18 KB
/
Copy pathevaluation_results.jsonl
File metadata and controls
5 lines (5 loc) · 18 KB
1
2
3
4
5
{"test_id": 1, "user_query": "Can you show me the location of Curry's art supply store on Queen Street West in Toronto on a map?", "full_prompt": "<bos><start_of_turn>developer\nCurrent date and time given in YYYY-MM-DDTHH:MM:SS format: 2026-01-23T21:20:31\nDay of week is Friday\nYou are a model that can do function calling with the following functions<start_function_declaration>declaration:send_email{description:<escape>Sends an email.<escape>,parameters:{properties:{body:{description:<escape>The body of the email.<escape>,type:<escape>STRING<escape>},subject:{description:<escape>The subject of the email.<escape>,type:<escape>STRING<escape>},to:{description:<escape>The email address of the recipient.<escape>,type:<escape>STRING<escape>}},required:[<escape>to<escape>,<escape>subject<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:show_map{description:<escape>Shows a location on the map.<escape>,parameters:{properties:{query:{description:<escape>The location to search for. May be the name of a place, a business, or an address.<escape>,type:<escape>STRING<escape>}},required:[<escape>query<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_calendar_event{description:<escape>Creates a new calendar event.<escape>,parameters:{properties:{datetime:{description:<escape>The date and time of the event in the format YYYY-MM-DDTHH:MM:SS.<escape>,type:<escape>STRING<escape>},title:{description:<escape>The title of the event.<escape>,type:<escape>STRING<escape>}},required:[<escape>title<escape>,<escape>datetime<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_off_flashlight{description:<escape>Turns the flashlight off.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_on_flashlight{description:<escape>Turns the flashlight on.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:open_wifi_settings{description:<escape>Opens the Wi-Fi settings.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_contact{description:<escape>Creates a contact in the phone's contact list.<escape>,parameters:{properties:{email:{description:<escape>The email address of the contact.<escape>,type:<escape>STRING<escape>},first_name:{description:<escape>The first name of the contact.<escape>,type:<escape>STRING<escape>},last_name:{description:<escape>The last name of the contact.<escape>,type:<escape>STRING<escape>},phone_number:{description:<escape>The phone number of the contact.<escape>,type:<escape>STRING<escape>}},required:[<escape>first_name<escape>,<escape>last_name<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nCan you show me the location of Curry's art supply store on Queen Street West in Toronto on a map?<end_of_turn>\n<start_of_turn>model\n", "expected_output": "<start_function_call>call:show_map{query:<escape>Curry's art supply store on Queen Street West in Toronto<escape>}<end_function_call><start_function_response>", "model_output": "<start_function_call>call:show_map{query:<escape>Currys art supply store on Queen Street West in Toronto<escape>}<end_function_call><start_function_response>", "is_exact_match": false}
{"test_id": 2, "user_query": "Please add a new contact named Kenji Tanaka with the phone number 03-3444-5555, and also schedule a calendar event titled 'Lunch with Kenji' for next Tuesday at 12:30 PM.", "full_prompt": "<bos><start_of_turn>developer\nCurrent date and time given in YYYY-MM-DDTHH:MM:SS format: 2026-05-16T18:38:44\nDay of week is Saturday\nYou are a model that can do function calling with the following functions<start_function_declaration>declaration:create_calendar_event{description:<escape>Creates a new calendar event.<escape>,parameters:{properties:{datetime:{description:<escape>The date and time of the event in the format YYYY-MM-DDTHH:MM:SS.<escape>,type:<escape>STRING<escape>},title:{description:<escape>The title of the event.<escape>,type:<escape>STRING<escape>}},required:[<escape>title<escape>,<escape>datetime<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:open_wifi_settings{description:<escape>Opens the Wi-Fi settings.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:show_map{description:<escape>Shows a location on the map.<escape>,parameters:{properties:{query:{description:<escape>The location to search for. May be the name of a place, a business, or an address.<escape>,type:<escape>STRING<escape>}},required:[<escape>query<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_on_flashlight{description:<escape>Turns the flashlight on.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_contact{description:<escape>Creates a contact in the phone's contact list.<escape>,parameters:{properties:{email:{description:<escape>The email address of the contact.<escape>,type:<escape>STRING<escape>},first_name:{description:<escape>The first name of the contact.<escape>,type:<escape>STRING<escape>},last_name:{description:<escape>The last name of the contact.<escape>,type:<escape>STRING<escape>},phone_number:{description:<escape>The phone number of the contact.<escape>,type:<escape>STRING<escape>}},required:[<escape>first_name<escape>,<escape>last_name<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:send_email{description:<escape>Sends an email.<escape>,parameters:{properties:{body:{description:<escape>The body of the email.<escape>,type:<escape>STRING<escape>},subject:{description:<escape>The subject of the email.<escape>,type:<escape>STRING<escape>},to:{description:<escape>The email address of the recipient.<escape>,type:<escape>STRING<escape>}},required:[<escape>to<escape>,<escape>subject<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_off_flashlight{description:<escape>Turns the flashlight off.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nPlease add a new contact named Kenji Tanaka with the phone number 03-3444-5555, and also schedule a calendar event titled 'Lunch with Kenji' for next Tuesday at 12:30 PM.<end_of_turn>\n<start_of_turn>model\n", "expected_output": "<start_function_call>call:create_contact{first_name:<escape>Kenji<escape>,last_name:<escape>Tanaka<escape>,phone_number:<escape>03-3444-5555<escape>}<end_function_call><start_function_call>call:create_calendar_event{datetime:<escape>2026-05-19T12:30:00<escape>,title:<escape>Lunch with Kenji<escape>}<end_function_call><start_function_response>", "model_output": "<start_function_call>call:create_contact{first_name:<escape>Kenji<escape>,last_name:<escape>Tanaka<escape>,phone_number:<escape>03-3444-5555<escape>}<end_function_call><start_function_call>call:create_calendar_event{datetime:<escape>2026-05-19T12:30:00<escape>,title:<escape>Lunch with Kenji<escape>}<end_function_call><start_function_response>", "is_exact_match": true}
{"test_id": 3, "user_query": "Please create a new calendar event titled 'Annual Dental Checkup' on Tuesday, April 22, 2025, at 10:30 AM.", "full_prompt": "<bos><start_of_turn>developer\nCurrent date and time given in YYYY-MM-DDTHH:MM:SS format: 2025-04-19T22:20:39\nDay of week is Saturday\nYou are a model that can do function calling with the following functions<start_function_declaration>declaration:send_email{description:<escape>Sends an email.<escape>,parameters:{properties:{body:{description:<escape>The body of the email.<escape>,type:<escape>STRING<escape>},subject:{description:<escape>The subject of the email.<escape>,type:<escape>STRING<escape>},to:{description:<escape>The email address of the recipient.<escape>,type:<escape>STRING<escape>}},required:[<escape>to<escape>,<escape>subject<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_contact{description:<escape>Creates a contact in the phone's contact list.<escape>,parameters:{properties:{email:{description:<escape>The email address of the contact.<escape>,type:<escape>STRING<escape>},first_name:{description:<escape>The first name of the contact.<escape>,type:<escape>STRING<escape>},last_name:{description:<escape>The last name of the contact.<escape>,type:<escape>STRING<escape>},phone_number:{description:<escape>The phone number of the contact.<escape>,type:<escape>STRING<escape>}},required:[<escape>first_name<escape>,<escape>last_name<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:open_wifi_settings{description:<escape>Opens the Wi-Fi settings.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_off_flashlight{description:<escape>Turns the flashlight off.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_calendar_event{description:<escape>Creates a new calendar event.<escape>,parameters:{properties:{datetime:{description:<escape>The date and time of the event in the format YYYY-MM-DDTHH:MM:SS.<escape>,type:<escape>STRING<escape>},title:{description:<escape>The title of the event.<escape>,type:<escape>STRING<escape>}},required:[<escape>title<escape>,<escape>datetime<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:show_map{description:<escape>Shows a location on the map.<escape>,parameters:{properties:{query:{description:<escape>The location to search for. May be the name of a place, a business, or an address.<escape>,type:<escape>STRING<escape>}},required:[<escape>query<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_on_flashlight{description:<escape>Turns the flashlight on.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nPlease create a new calendar event titled 'Annual Dental Checkup' on Tuesday, April 22, 2025, at 10:30 AM.<end_of_turn>\n<start_of_turn>model\n", "expected_output": "<start_function_call>call:create_calendar_event{datetime:<escape>2025-04-22T10:30:00<escape>,title:<escape>Annual Dental Checkup<escape>}<end_function_call><start_function_response>", "model_output": "<start_function_call>call:create_calendar_event{datetime:<escape>2025-04-22T10:30:00<escape>,title:<escape>Annual Dental Checkup<escape>}<end_function_call><start_function_response>", "is_exact_match": true}
{"test_id": 4, "user_query": "I need to access the Wi-Fi settings now. Could you open them?", "full_prompt": "<bos><start_of_turn>developer\nCurrent date and time given in YYYY-MM-DDTHH:MM:SS format: 2024-08-28T21:16:55\nDay of week is Wednesday\nYou are a model that can do function calling with the following functions<start_function_declaration>declaration:open_wifi_settings{description:<escape>Opens the Wi-Fi settings.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_calendar_event{description:<escape>Creates a new calendar event.<escape>,parameters:{properties:{datetime:{description:<escape>The date and time of the event in the format YYYY-MM-DDTHH:MM:SS.<escape>,type:<escape>STRING<escape>},title:{description:<escape>The title of the event.<escape>,type:<escape>STRING<escape>}},required:[<escape>title<escape>,<escape>datetime<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:send_email{description:<escape>Sends an email.<escape>,parameters:{properties:{body:{description:<escape>The body of the email.<escape>,type:<escape>STRING<escape>},subject:{description:<escape>The subject of the email.<escape>,type:<escape>STRING<escape>},to:{description:<escape>The email address of the recipient.<escape>,type:<escape>STRING<escape>}},required:[<escape>to<escape>,<escape>subject<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_off_flashlight{description:<escape>Turns the flashlight off.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_on_flashlight{description:<escape>Turns the flashlight on.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:show_map{description:<escape>Shows a location on the map.<escape>,parameters:{properties:{query:{description:<escape>The location to search for. May be the name of a place, a business, or an address.<escape>,type:<escape>STRING<escape>}},required:[<escape>query<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_contact{description:<escape>Creates a contact in the phone's contact list.<escape>,parameters:{properties:{email:{description:<escape>The email address of the contact.<escape>,type:<escape>STRING<escape>},first_name:{description:<escape>The first name of the contact.<escape>,type:<escape>STRING<escape>},last_name:{description:<escape>The last name of the contact.<escape>,type:<escape>STRING<escape>},phone_number:{description:<escape>The phone number of the contact.<escape>,type:<escape>STRING<escape>}},required:[<escape>first_name<escape>,<escape>last_name<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nI need to access the Wi-Fi settings now. Could you open them?<end_of_turn>\n<start_of_turn>model\n", "expected_output": "<start_function_call>call:open_wifi_settings{}<end_function_call><start_function_response>", "model_output": "<start_function_call>call:open_wifi_settings{}<end_function_call><start_function_response>", "is_exact_match": true}
{"test_id": 5, "user_query": "Please schedule a calendar event called 'Team Sync Meeting' for September 19th, 2025, at 10:30 AM, and also send an email to sara.khalid@globalcorp.net with the subject 'Meeting scheduled' and the body 'The link will be sent separately.'", "full_prompt": "<bos><start_of_turn>developer\nCurrent date and time given in YYYY-MM-DDTHH:MM:SS format: 2025-09-10T11:23:20\nDay of week is Wednesday\nYou are a model that can do function calling with the following functions<start_function_declaration>declaration:show_map{description:<escape>Shows a location on the map.<escape>,parameters:{properties:{query:{description:<escape>The location to search for. May be the name of a place, a business, or an address.<escape>,type:<escape>STRING<escape>}},required:[<escape>query<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_calendar_event{description:<escape>Creates a new calendar event.<escape>,parameters:{properties:{datetime:{description:<escape>The date and time of the event in the format YYYY-MM-DDTHH:MM:SS.<escape>,type:<escape>STRING<escape>},title:{description:<escape>The title of the event.<escape>,type:<escape>STRING<escape>}},required:[<escape>title<escape>,<escape>datetime<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:send_email{description:<escape>Sends an email.<escape>,parameters:{properties:{body:{description:<escape>The body of the email.<escape>,type:<escape>STRING<escape>},subject:{description:<escape>The subject of the email.<escape>,type:<escape>STRING<escape>},to:{description:<escape>The email address of the recipient.<escape>,type:<escape>STRING<escape>}},required:[<escape>to<escape>,<escape>subject<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:open_wifi_settings{description:<escape>Opens the Wi-Fi settings.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:create_contact{description:<escape>Creates a contact in the phone's contact list.<escape>,parameters:{properties:{email:{description:<escape>The email address of the contact.<escape>,type:<escape>STRING<escape>},first_name:{description:<escape>The first name of the contact.<escape>,type:<escape>STRING<escape>},last_name:{description:<escape>The last name of the contact.<escape>,type:<escape>STRING<escape>},phone_number:{description:<escape>The phone number of the contact.<escape>,type:<escape>STRING<escape>}},required:[<escape>first_name<escape>,<escape>last_name<escape>],type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_on_flashlight{description:<escape>Turns the flashlight on.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><start_function_declaration>declaration:turn_off_flashlight{description:<escape>Turns the flashlight off.<escape>,parameters:{type:<escape>OBJECT<escape>}}<end_function_declaration><end_of_turn>\n<start_of_turn>user\nPlease schedule a calendar event called 'Team Sync Meeting' for September 19th, 2025, at 10:30 AM, and also send an email to sara.khalid@globalcorp.net with the subject 'Meeting scheduled' and the body 'The link will be sent separately.'<end_of_turn>\n<start_of_turn>model\n", "expected_output": "<start_function_call>call:create_calendar_event{datetime:<escape>2025-09-19T10:30:00<escape>,title:<escape>Team Sync Meeting<escape>}<end_function_call><start_function_call>call:send_email{body:<escape>The link will be sent separately.<escape>,subject:<escape>Meeting scheduled<escape>,to:<escape>sara.khalid@globalcorp.net<escape>}<end_function_call><start_function_response>", "model_output": "<start_function_call>call:create_calendar_event{datetime:<escape>2025-09-19T10:30:00<escape>,title:<escape>Team Sync Meeting<escape>}<end_function_call><start_function_call>call:send_email{body:<escape>The link will be sent separately.<escape>,subject:<escape>Meeting scheduled<escape>,to:<escape>sara.khalid@globalcorp.net<escape>}<end_function_call><start_function_response>", "is_exact_match": true}