{ "cells": [ { "cell_type": "markdown", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "<a href=\"https://colab.research.google.com/github/vanderbilt-data-science/lo-achievement/blob/main/grading_from_json.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "kfO7rE64ZTI_" }, "outputs": [], "source": [ "!pip install openai" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "f26sZpe-MCCj" }, "outputs": [], "source": [ "import json\n", "import openai\n", "import os\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 614 }, "id": "BVTr_mR0XIJI", "outputId": "897e41a0-d5e1-4b5f-d254-0a6e0f6aa3fa" }, "outputs": [ { "data": { "text/html": [ "\n", " <div id=\"df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e\">\n", " <div class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>timestamp</th>\n", " <th>author</th>\n", " <th>message</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2023-06-07 08:16:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 1:\\nWhich of the following statements...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2023-06-07 08:16:30+00:00</td>\n", " <td>user</td>\n", " <td>C</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2023-06-07 08:17:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Correct! Option C is the correct answer...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2023-06-07 08:17:30+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 2:\\nWhich of the following expenses a...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2023-06-07 08:18:00+00:00</td>\n", " <td>user</td>\n", " <td>A</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>2023-06-07 08:18:30+00:00</td>\n", " <td>assistant</td>\n", " <td>I'm sorry, but your answer is incorrect...</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>2023-06-07 08:19:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 2 (Revised):\\nWhich of the following ...</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>2023-06-07 08:19:30+00:00</td>\n", " <td>user</td>\n", " <td>D</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>2023-06-07 08:20:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Correct! Option D is the correct answer...</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>2023-06-07 08:20:30+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 3:\\nWhat is the purpose of capitalizi...</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>2023-06-07 08:21:00+00:00</td>\n", " <td>user</td>\n", " <td>C</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>2023-06-07 08:21:30+00:00</td>\n", " <td>assistant</td>\n", " <td>Correct! Option C is the correct answer...</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>2023-06-07 08:22:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 4:\\nWhich financial statement provide...</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>2023-06-07 08:22:30+00:00</td>\n", " <td>user</td>\n", " <td>C</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>2023-06-07 08:23:00+00:00</td>\n", " <td>assistant</td>\n", " <td>Correct! Option C is the correct answer...</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>2023-06-07 08:23:30+00:00</td>\n", " <td>assistant</td>\n", " <td>Question 5:\\nWhat is the purpose of the matchi...</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>2023-06-07 08:24:00+00:00</td>\n", " <td>user</td>\n", " <td>B</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>2023-06-07 08:24:30+00:00</td>\n", " <td>assistant</td>\n", " <td>Correct! Option B is the correct answer...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", " \n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", " </svg>\n", " </button>\n", " \n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " flex-wrap:wrap;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-e24b7014-4d98-4fc5-9ff1-07fa5c26ba5e');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", " </div>\n", " " ], "text/plain": [ " timestamp author \\\n", "0 2023-06-07 08:16:00+00:00 assistant \n", "1 2023-06-07 08:16:30+00:00 user \n", "2 2023-06-07 08:17:00+00:00 assistant \n", "3 2023-06-07 08:17:30+00:00 assistant \n", "4 2023-06-07 08:18:00+00:00 user \n", "5 2023-06-07 08:18:30+00:00 assistant \n", "6 2023-06-07 08:19:00+00:00 assistant \n", "7 2023-06-07 08:19:30+00:00 user \n", "8 2023-06-07 08:20:00+00:00 assistant \n", "9 2023-06-07 08:20:30+00:00 assistant \n", "10 2023-06-07 08:21:00+00:00 user \n", "11 2023-06-07 08:21:30+00:00 assistant \n", "12 2023-06-07 08:22:00+00:00 assistant \n", "13 2023-06-07 08:22:30+00:00 user \n", "14 2023-06-07 08:23:00+00:00 assistant \n", "15 2023-06-07 08:23:30+00:00 assistant \n", "16 2023-06-07 08:24:00+00:00 user \n", "17 2023-06-07 08:24:30+00:00 assistant \n", "\n", " message \n", "0 Question 1:\\nWhich of the following statements... \n", "1 C \n", "2 Correct! Option C is the correct answer... \n", "3 Question 2:\\nWhich of the following expenses a... \n", "4 A \n", "5 I'm sorry, but your answer is incorrect... \n", "6 Question 2 (Revised):\\nWhich of the following ... \n", "7 D \n", "8 Correct! Option D is the correct answer... \n", "9 Question 3:\\nWhat is the purpose of capitalizi... \n", "10 C \n", "11 Correct! Option C is the correct answer... \n", "12 Question 4:\\nWhich financial statement provide... \n", "13 C \n", "14 Correct! Option C is the correct answer... \n", "15 Question 5:\\nWhat is the purpose of the matchi... \n", "16 B \n", "17 Correct! Option B is the correct answer... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_json('demo_json.json')\n", "pd.read_json('demo_json.json')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "anSNlvqlXh6i" }, "outputs": [], "source": [ "openai.api_key = \"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\"" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 627 }, "id": "udujJrX6SryU", "outputId": "9b182162-7c1c-4d5a-be56-16947ddcda33" }, "outputs": [ { "data": { "text/html": [ "\n", " <div id=\"df-5123f950-1dca-46a6-be4d-dab5de1f8899\">\n", " <div class=\"colab-df-container\">\n", " <div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Question</th>\n", " <th>Correct Answer</th>\n", " <th>User Answer</th>\n", " <th>Evaluation</th>\n", " <th>Score</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Question 1:\\nWhich of the following statements...</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>correct.</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Question 2 (Revised):\\nWhich of the following ...</td>\n", " <td>D</td>\n", " <td>D</td>\n", " <td>incorrect. the correct answer is d, software d...</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Question 3:\\nWhat is the purpose of capitalizi...</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>incorrect. the correct answer is b.</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Question 4:\\nWhich financial statement provide...</td>\n", " <td>C</td>\n", " <td>C</td>\n", " <td>correct</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Question 5:\\nWhat is the purpose of the matchi...</td>\n", " <td>B</td>\n", " <td>B</td>\n", " <td>correct</td>\n", " <td>3</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>\n", " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5123f950-1dca-46a6-be4d-dab5de1f8899')\"\n", " title=\"Convert this dataframe to an interactive table.\"\n", " style=\"display:none;\">\n", " \n", " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", " width=\"24px\">\n", " <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n", " <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n", " </svg>\n", " </button>\n", " \n", " <style>\n", " .colab-df-container {\n", " display:flex;\n", " flex-wrap:wrap;\n", " gap: 12px;\n", " }\n", "\n", " .colab-df-convert {\n", " background-color: #E8F0FE;\n", " border: none;\n", " border-radius: 50%;\n", " cursor: pointer;\n", " display: none;\n", " fill: #1967D2;\n", " height: 32px;\n", " padding: 0 0 0 0;\n", " width: 32px;\n", " }\n", "\n", " .colab-df-convert:hover {\n", " background-color: #E2EBFA;\n", " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", " fill: #174EA6;\n", " }\n", "\n", " [theme=dark] .colab-df-convert {\n", " background-color: #3B4455;\n", " fill: #D2E3FC;\n", " }\n", "\n", " [theme=dark] .colab-df-convert:hover {\n", " background-color: #434B5C;\n", " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", " fill: #FFFFFF;\n", " }\n", " </style>\n", "\n", " <script>\n", " const buttonEl =\n", " document.querySelector('#df-5123f950-1dca-46a6-be4d-dab5de1f8899 button.colab-df-convert');\n", " buttonEl.style.display =\n", " google.colab.kernel.accessAllowed ? 'block' : 'none';\n", "\n", " async function convertToInteractive(key) {\n", " const element = document.querySelector('#df-5123f950-1dca-46a6-be4d-dab5de1f8899');\n", " const dataTable =\n", " await google.colab.kernel.invokeFunction('convertToInteractive',\n", " [key], {});\n", " if (!dataTable) return;\n", "\n", " const docLinkHtml = 'Like what you see? Visit the ' +\n", " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", " + ' to learn more about interactive tables.';\n", " element.innerHTML = '';\n", " dataTable['output_type'] = 'display_data';\n", " await google.colab.output.renderOutput(dataTable, element);\n", " const docLink = document.createElement('div');\n", " docLink.innerHTML = docLinkHtml;\n", " element.appendChild(docLink);\n", " }\n", " </script>\n", " </div>\n", " </div>\n", " " ], "text/plain": [ " Question Correct Answer \\\n", "0 Question 1:\\nWhich of the following statements... C \n", "1 Question 2 (Revised):\\nWhich of the following ... D \n", "2 Question 3:\\nWhat is the purpose of capitalizi... C \n", "3 Question 4:\\nWhich financial statement provide... C \n", "4 Question 5:\\nWhat is the purpose of the matchi... B \n", "\n", " User Answer Evaluation Score \n", "0 C correct. 1 \n", "1 D incorrect. the correct answer is d, software d... 1 \n", "2 C incorrect. the correct answer is b. 1 \n", "3 C correct 2 \n", "4 B correct 3 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Initialize necessary variables\n", "prompt = \"\"\n", "question = \"\"\n", "correct_answer = \"\"\n", "user_answer = \"\"\n", "\n", "# Initialize score\n", "score = 0\n", "\n", "# Initialize an empty list to hold row data\n", "row_data = []\n", "\n", "for index, row in df.iterrows():\n", " author = row['author']\n", " message = row['message']\n", "\n", " # Choose the appropriate prompt based on the author\n", " if author == 'assistant':\n", " if 'Question' in message:\n", " question = message\n", " user_answer = '' # Reset user_answer after a new question\n", " elif 'Correct! Option' in message:\n", " correct_answer = message.split('Option ')[1][0]\n", " if user_answer: # If user_answer exists, make the API call\n", " prompt = f\"Given the following question:\\n{question}\\nThe student responded with: {user_answer}\\nIs the student's response correct or incorrect?\"\n", "\n", " # Make an API call to OpenAI\n", " api_response = openai.Completion.create(\n", " engine='text-davinci-003',\n", " prompt=prompt,\n", " max_tokens=100,\n", " temperature=0.7,\n", " n=1,\n", " stop=None\n", " )\n", "\n", " # Extract and evaluate the generated response\n", " generated_response = api_response.choices[0].text.strip().lower()\n", "\n", " # Update score based on generated_response\n", " if 'correct' in generated_response and 'incorrect' not in generated_response:\n", " score += 1\n", "\n", " # Create a dictionary for the current row\n", " row_dict = {\n", " 'Question': question,\n", " 'Correct Answer': correct_answer,\n", " 'User Answer': user_answer,\n", " 'Evaluation': generated_response,\n", " 'Score': score\n", " }\n", " # Append the row dictionary to row_data\n", " row_data.append(row_dict)\n", "\n", " elif author == 'user':\n", " user_answer = message\n", "\n", "# Create a DataFrame from row_data\n", "output_df = pd.DataFrame(row_data)\n", "output_df\n" ] } ], "metadata": { "colab": { "authorship_tag": "ABX9TyOn+FniXzrkHNKH5uAKgyUD", "include_colab_link": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 4 }