<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v14i1e80205</article-id><article-id pub-id-type="doi">10.2196/80205</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Large Language Model&#x2013;Enabled Editing of Patient Audio Interviews From &#x201C;This Is My Story&#x201D; Conversations: Comparative Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Bains</surname><given-names>Bikram</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Rapuri</surname><given-names>Sampath</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Robitaille</surname><given-names>Edgar</given-names></name><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Jonathan</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Khera</surname><given-names>Arnav</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gomez</surname><given-names>Catalina</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Reyes</surname><given-names>Eduardo</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Perry</surname><given-names>Cole</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wilson</surname><given-names>Jason</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Tracey</surname><given-names>Elizabeth</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Biomedical Engineering, Johns Hopkins University</institution><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff2"><institution>School of Medicine, Johns Hopkins University</institution><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Computer Science, Johns Hopkins University</institution><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff4"><institution>Technology Innovation Center, Johns Hopkins Medicine</institution><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff5"><institution>Division of Spiritual Care and Chaplaincy, Johns Hopkins Medicine</institution><addr-line>1800 Orleans St</addr-line><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Coristine</surname><given-names>Andrew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Kernberg</surname><given-names>Annessa</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Lin</surname><given-names>Kuan-Hsun</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Thawinwisan</surname><given-names>Nattawipa</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Biswas</surname><given-names>Sandipan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Elizabeth Tracey, MS, Division of Spiritual Care and Chaplaincy, Johns Hopkins Medicine, 1800 Orleans St, Baltimore, MD, 21218, United States, 1 410-215-7749; <email>etracey@jhmi.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>9</day><month>1</month><year>2026</year></pub-date><volume>14</volume><elocation-id>e80205</elocation-id><history><date date-type="received"><day>07</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>04</day><month>11</month><year>2025</year></date><date date-type="accepted"><day>04</day><month>11</month><year>2025</year></date></history><copyright-statement>&#x00A9; Bikram Bains, Sampath Rapuri, Edgar Robitaille, Jonathan Wang, Arnav Khera, Catalina Gomez, Eduardo Reyes, Cole Perry, Jason Wilson, Elizabeth Tracey. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 9.1.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2026/1/e80205"/><abstract><sec><title>Background</title><p>This Is My Story (TIMS) was started by Chaplain Elizabeth Tracey to promote a humanistic approach to medicine. Patients in the TIMS program are the subject of a guided conversation in which a chaplain interviews either the patient or their loved one. They are asked four questions to elicit clinically actionable information that has been shown to improve communication between patients and medical providers, strengthening medical providers&#x2019; empathy. The original recorded conversation is edited into a condensed audio file approximately 1 minute and 15 seconds in length and placed in the electronic health record where it is easily accessible by all providers caring for the patient.</p></sec><sec><title>Objective</title><p>TIMS is active at the Johns Hopkins Hospital and has shown value in assisting with provider empathy and communication. It is unique in using audio recordings to accomplish this purpose. As the program expands, there exists a barrier to adoption due to limited time and resources needed to manually edit audio conversations. To address this, we propose an automated solution using a large language model to create meaningful and concise audio summaries.</p></sec><sec sec-type="methods"><title>Methods</title><p>We analyzed 24 TIMS audio interviews and created three edited versions of each: (1) expert-edited, (2) artificial intelligence (AI)&#x2013;edited using a fully automated large language model pipeline, and (3) novice-edited by two medical students trained by the expert. A second expert, blinded to the editor, rated the audio interviews in a randomized order. This expert scored both the audio quality and content quality of each interview on 5-point Likert scales. We quantified transcript similarity to the expert-edited reference using lexical and semantic similarity metrics and identified omitted content relative to that same expert interview.</p></sec><sec sec-type="results"><title>Results</title><p>Audio quality (flow, pacing, clarity) and content quality (coherence, relevance, nuance) were each rated on 5-point Likert scales. Expert-edited interviews received the highest mean ratings for both audio quality (4.84) and content quality (4.83). Novice-edited scored moderately (3.84 audio, 3.63 content), while AI-edited scored slightly lower (3.49 audio, 3.20 content). Novice and AI edits were rated significantly lower than the expert edits (<italic>P</italic>&#x003C;.001), but not significantly different from each other. AI and novice-edited interview transcripts had comparable overlap with the expert reference transcript, while qualitative review found frequent omissions of patient identity, actionable insights, and overall context in both the AI and novice-edited interviews. AI editing was fully automated and significantly reduced the editing time compared to both human editors.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>An AI-based editing pipeline can generate TIMS audio summaries with comparable content and audio quality to novice human editors with one hour of training. AI significantly reduces editing time and removes the need for manual training; with further validation, it could offer a solution to scale TIMS to a large range of health care settings.</p></sec></abstract><kwd-group><kwd>audio recording</kwd><kwd>communication</kwd><kwd>This Is My Story</kwd><kwd>TIMS</kwd><kwd>distress reduction</kwd><kwd>empathy</kwd><kwd>patient interview</kwd><kwd>provider/patient communication</kwd><kwd>large language model</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Recent statistics show that health worker burnout is a widespread issue [<xref ref-type="bibr" rid="ref1">1</xref>]. A 2022 Centers for Disease Control and Prevention report found that 46% of health workers &#x201C;often&#x201D; or &#x201C;very often&#x201D; felt burned out, increasing from 32% in 2018. Physicians in the United States also report similarly high burnout rates (56% in 2021, 53% in 2022, and 48% in 2023), with an all-time high physician burnout rate of 63% during the pandemic [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Some reasons for this burnout include excessive work hours, administrative burdens (such as electronic health record documentation), insufficient support staff, and limited organizational and leadership support [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. These chronic stresses impact both patients and clinicians. For example, Andhavarapu et al [<xref ref-type="bibr" rid="ref9">9</xref>] mentioned that symptoms of depression, anxiety, and posttraumatic stress disorder were reported in 34% of the health care workers surveyed (while 14% reported severe posttraumatic stress disorder), with the highest prevalence among nursing staff (42.8%) and physicians (25.2%). Similarly, the National Academies&#x2019; 2019 report found that between 35% and 54% of US nurses and physicians and 45% to 60% of medical students and residents experience substantial burnout symptoms throughout their careers [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Empathy can serve as a solution, reducing widespread symptoms of burnout while promoting professional fulfillment and strengthening connection with patients [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Already, health care organizations have recognized the value of empathy and designed personal and patient-centered interventions within their clinical workflows [<xref ref-type="bibr" rid="ref15">15</xref>]. For example, the This Is My Story (TIMS) program was developed by Chaplain Elizabeth Tracey at the Johns Hopkins Hospital to bring a more patient-centered and empathetic approach to medicine [<xref ref-type="bibr" rid="ref16">16</xref>]. Patients who participate in the TIMS program take part in a conversation with a chaplain; if the patient is noncommunicative, a chaplain has a conversation with the patient&#x2019;s loved ones. These conversations are guided by four questions: How do you prefer to be addressed? What brings you joy? What does your medical team need to know to care for you best? What brings you peace?</p><p>In the words of Dr Charles Cumming, Director Emeritus of Otolaryngology at Johns Hopkins, <italic>&#x201C;TIMS is about helping us get back to the proper essence of medicine...it&#x2019;s essential to get to know the patient if we&#x2019;re going to be able to help that patient as best we can&#x201D;</italic> [<xref ref-type="bibr" rid="ref17">17</xref>]. TIMS conversations have demonstrated clear benefits for clinical communication and empathy, providing an opportunity for meaningful connection with patients to directly target the emotional aspects of burnout [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>]. Past studies by Tracey et al [<xref ref-type="bibr" rid="ref21">21</xref>] support the positive outcomes the program has had on patients, their families, and the care team. For example, one previous study reported a 74% increase in staff empathy for patients and a 99% improvement in interactions by patients&#x2019; loved ones with the medical team. Although it has also been shown to be useful in improving staff empathy and reducing distress by 69%, the process of recording and editing conversations can be labor-intensive [<xref ref-type="bibr" rid="ref21">21</xref>]. By automating the conversation summarization process, these benefits can be made accessible to a wider range of patients and medical institutions.</p><p>In this study, we propose an automated editing pipeline for TIMS interviews using a large language model (LLM) and evaluate whether artificial intelligence (AI)&#x2013;edited interviews are a viable alternative to manual editing. Because medical students were frequently trained to edit TIMS interviews during the pandemic, they are a reasonable baseline for comparing performance. We designed our analysis around two key hypotheses: (1) that AI-edited interviews maintain similar quality to expert-edited interviews in both audio and content metrics, and (2) that AI-edited interviews can be produced more quickly than interviews produced by expert or novice editors.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We used a within&#x2010;subjects, single&#x2010;group design in which our reviewer evaluated interviews across three independent editing conditions (expert, AI, novice). Editors were eligible if they had professional experience interpreting patient&#x2013;clinician audio interviews. Two chaplains from the Johns Hopkins Hospital took part in the study. The novice editors were two medical students who joined the study team from the Johns Hopkins School of Medicine, each having completed an hour-long training session on audio&#x2010;editing with an expert editor (Chaplain Elizabeth Tracey). The two novice editors edited 12 randomly assigned audio interviews, mirroring the normal workflow for the TIMS initiative without the AI tool.</p></sec><sec id="s2-2"><title>Patient Audio Dataset</title><p>We used a retrospective dataset of audio recordings from 24 patients admitted to the Johns Hopkins Neurosciences Critical Care Unit, a tertiary intensive care unit for patients with diseases of the central or peripheral nervous systems, admitted from departments such as neurosurgery, neurology, and others.</p></sec><sec id="s2-3"><title>AI Editing Pipeline</title><p>Audio recordings were first transcribed using Nvidia&#x2019;s Parakeet-TDT 0.6B v2 automatic speech recognition (ASR) model (<xref ref-type="fig" rid="figure1">Figure 1</xref>) [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. ChatGPT-4o [<xref ref-type="bibr" rid="ref25">25</xref>] processed the transcript using a custom prompt. This prompt asked the model to extract only patient statements that provided essential information about their condition, experiences, feelings, or personal identity. It was also directed to exclude any filler or repetitive content and keep humorous or insightful remarks to preserve patient identity and humanity.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Overview of artificial intelligence&#x2013;automated workflow for summarizing This Is My Story audio interviews. LLM: large language model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig01.png"/></fig><p>The model was instructed to include the interviewer&#x2019;s four core questions for the TIMS program while excluding interjections or examples provided by the interviewer. Instructions were given to return a processed transcript that the model estimated would take 1.5 minutes during a TIMS interview. The full model prompt is available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The relevant timestamps identified were then used to splice together the final audio file. Examples of cases in which ASR output might have impacted the LLM output are presented in Table S2 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. This process was entirely automated and was run on an NVIDIA GeForce RTX 4090 with 24 GB of RAM.</p></sec><sec id="s2-4"><title>Survey Design</title><p>To evaluate both audio quality and content quality for each edited TIMS interview, we created an online questionnaire for our reviewer to complete using a 5-point Likert scale (1=poor, 5=excellent) for each question. The survey included questions grouped into audio (natural flow, pausing/spacing, transitions, pacing, overall listenability of the interview) and content (conversation flow, speaker/topic tracking, patient representation, understanding of patient characteristics, preparation for interaction for patient providers/care team, nuance of the patient&#x2019;s life, relevance of details) quality domains with all details shown in <xref ref-type="other" rid="box1">Textbox 1</xref> below. This same survey was used independently for each edited interview for consistent comparison.</p><boxed-text id="box1"><title> Summary of domain and survey questions following survey administration and data collection.</title><p><bold>Audio quality</bold></p><p>1. How natural is the conversation flow?</p><p>2. How effective are the pauses and spaces between answers?</p><p>3. How smooth are the transitions between questions and answers?</p><p>4. How does the pacing of the audio feel overall?</p><p>5. Overall, how easy is the interview to listen to?</p><p>6. Can you understand the flow of the conversation?</p><p><bold>Content quality</bold></p><p>7. Can you keep track of the speaker and the topic?</p><p>8. Is the patient well represented in this conversation?</p><p>9. Do you understand the patient&#x2019;s likes/dislikes (proclivities, idiosyncrasies, etc)?</p><p>10. Do you feel prepared to interact with the patient in a meaningful way?</p><p>11. How well does the conversation capture the depth and nuance of the patient&#x2019;s experience?</p><p>12. Does the conversation convey the patient&#x2019;s emotions effectively?</p><p>13. How relevant are the details shared during the conversation for understanding the patient&#x2019;s story?</p><p>14. How engaging is the conversation in terms of sustaining your interest in the patient&#x2019;s story?</p></boxed-text><p>The survey was created and administered using Google Forms. Content experts (ET, JW, CG) provided qualitative feedback on an initial draft of the questions. This feedback focused on improving the clarity and relevance of each item. The questions were then refined based on this input to better capture the intended domains of audio and content quality. It is important to note that because the survey instrument is novel, its reliability and validity have not yet been formally established. The survey asked questions 1, 5, 10, and 11 to gauge general sentiment for the conversation being rated, and more pointed questions to understand where the audios may differ in terms of score (<xref ref-type="other" rid="box1">Textbox 1</xref>). Since assessing the flow of conversation relies on both audio and content quality, a question about it was used for both parts of the survey. The individual survey responses are available in Figure S1 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><p>Before rating each edited version, the reviewer listened to the corresponding raw interview as a baseline reference to better understand the context of the interview. Before rating each interview, the reviewer was asked to first listen to two calibration audios&#x2014;one edited poorly that should score low across all questions, and an expert-edited interview that would score highly. This allowed us to set audio quality expectations for each extreme on the survey. The survey was completed independently for each condition, with the order of the audio files randomized by condition for each patient to reduce potential order effects.</p><p>Editing durations were recorded automatically for the AI pipeline and self&#x2010;reported by each novice editor. Expert editing times were not collected due to the limitations of the retrospective dataset.</p></sec><sec id="s2-5"><title>Content Analysis</title><p>For each edited interview, we generated text transcriptions to study the differences in content among the three types of edited interviews. Text transcriptions were created using the Parakeet transcription model. The novice- and AI-edited interviews were compared to the expert edits, and three members of the study team analyzed differences. Types of errors were identified for both AI- and novice-edited conditions across all samples. The most common types of errors were then formalized and described in the results (<xref ref-type="table" rid="table1">Table 1</xref>). Content similarity between each condition and expert edits was quantitatively measured using ROUGE-L, ROUGE-1, ROUGE-2, bidirectional encoder representations from transformers (BERT), and METEOR on interview transcripts following studies of medical document summarization [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. All metrics measure the similarity of words between summaries and assign a score from 0 to 1, with the ROUGE scores measuring lexical overlap and other scores [<xref ref-type="bibr" rid="ref28">28</xref>]. METEOR and the BERT scores were used to assess the semantic overlap. Both factor in semantic similarity between words rather than the exact word choice. METEOR also assigns a penalty for differences in phrasing.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Common omissions and inaccurate portrayals by artificial intelligence and novice editors.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Type of error</td><td align="left" valign="bottom">Artificial intelligence errors</td><td align="left" valign="bottom">Novice errors</td></tr></thead><tbody><tr><td align="left" valign="top">Omission of actionable patient insights</td><td align="left" valign="top">Failure to include specific interests or hobbies of the patient [talking about favorite musical artists bringing her joy]: &#x201C;She likes Anita Baker, Regina Belle, and Gladys Knight.&#x201D;</td><td align="left" valign="top">Failure to include information about the patient&#x2019;s comfort [informing about her medical condition to better care for the patient]: &#x201C;She has had eczema since she was about three or four, so her skin has to stay moisturized.&#x201D;</td></tr><tr><td align="left" valign="top">Omission of patient identity and empathy</td><td align="left" valign="top">Failure to include details relevant to understanding the patient&#x2019;s background [explaining his occupation and hobbies] : &#x201C;On the church side, he loves to teach. He is a pastor.&#x201D;</td><td align="left" valign="top">Failure to mention important characteristics about the patient [claiming that her time at Hopkins has made her more independent and resilient]: &#x201C;She [patient] worked at Johns Hopkins for over 30 years.&#x201D;</td></tr><tr><td align="left" valign="top">Omission of emotional background</td><td align="left" valign="top">Failure to include framing details relevant for a patient&#x2019;s background [talking about what brings the patient joy]: &#x201C;Me [patient&#x2019;s husband] &#x2026; We&#x2019;ve been married 20 years.&#x201D;</td><td align="left" valign="top">Failure to include details relevant for a patient&#x2019;s emotional state and anxiety [explaining how he mainly only trusts his partner for everything]: &#x201C;[He has] a little bit of a trust issue with the medical field.&#x201D;</td></tr><tr><td align="left" valign="top">Poor narrative fluency</td><td align="left" valign="top">Prompting questions fail to be edited out of the interview: &#x201C;Introduce yourself and tell me how you&#x2019;re related to the patient.&#x201D;</td><td align="left" valign="top">Filler words before prompting questions fail to be edited out of the interview: &#x201C;That&#x2019;s great! So, what brings the patient peace?&#x201D;</td></tr></tbody></table></table-wrap><p>Relationships between audio length and content quality were also explored through simple linear regression of each ROUGE metric on the duration of the original interview (in minutes).</p></sec><sec id="s2-6"><title>Statistical Analysis</title><p>We conducted a Friedman test to compare audio&#x2010;quality and content&#x2010;quality ratings across conditions, with Bonferroni&#x2010;corrected Wilcoxon signed-rank post hoc tests to adjust for multiple comparisons. Editing times were analyzed with an independent&#x2010;samples <italic>t</italic> test to test significant differences between the two novice editors. We also examined the relationship between the raw interview length and lexical and semantic score overlap for each editor type using Pearson correlation, testing if each slope differed from zero. We then performed an analysis of covariance with transcript length, editor type, and their interaction term to determine if the slope of the length-overlap relationship differed between AI and novice editors.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>Ethical approval was not required for this study as it involved a secondary analysis of anonymized data. The original data collection was conducted under Johns Hopkins institutional review board review and approval of the studies with informed consent obtained from all subjects; the consent allowed for future data use and any participants who declined this future use were not included in this secondary analysis. This study was conducted in accordance with all local, institutional, national, and international regulations on human subject research.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Quantitative Comparison of Editing Quality</title><p>Across the three types of editors, the mean audio quality ratings were 3.49 (AI-edited), 3.84 (novice-edited), and 4.84 (expert-edited). Similarly, the mean content quality ratings were 3.20 (AI-edited), 3.63 (novice-edited), and 4.83 (expert-edited). The AI-edited interviews demonstrated a higher variability in the rated content quality compared to the novice-edited interviews (SD 0.73 and SD 0.68, respectively). Both interview types were similarly varied in their audio quality ratings (SD 0.77 and 0.78, respectively). <xref ref-type="fig" rid="figure2">Figure 2</xref> highlights the distributions of content and audio quality ratings across each type of edited interview.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>(<bold>A</bold>) A comparison of mean audio quality ratings across the three types of editors aggregated across all 24 audio interviews. (<bold>B</bold>) All editors follow similar trends for the content quality ratings. AI: artificial intelligence.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig02.png"/></fig><p>We observed significant differences in audio and content quality between the novice- and expert-edited interviews (<italic>P</italic>&#x003C;.001) as well as between the AI- and expert-edited interviews (<italic>P</italic>&#x003C;.001). No significant differences were noted between the AI- and novice-edited interviews for either content quality (<italic>P</italic>=.31) or audio quality (<italic>P</italic>=.33). A detailed breakdown of the ratings for each individual survey question across all patient interviews can be found in Figure S1 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><p>To understand the variability between the novice editors, we broke down the differences in rated audio and content quality in <xref ref-type="fig" rid="figure3">Figure 3</xref>. Between the two novice editors, we found that novice editor 1 demonstrated a mean content quality score of 3.81 (SD 0.83) and a mean audio quality score of 3.52 (SD 0.76). The second editor&#x2019;s mean content quality score was measured to be 3.88 (SD 0.52), with a mean audio quality score of 3.75 (SD 0.81). However, neither intragroup difference was significant for both content quality (<italic>P</italic>&#x003E;.99) and audio quality (<italic>P</italic>=.51). <xref ref-type="fig" rid="figure4">Figure 4</xref> shows the mean statistical scores across all audio interviews for both the AI and novice editors.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>(<bold>A</bold>) A comparison of mean audio quality ratings between the two novice editors, each of whom edited 12 randomly assigned audio interviews. (<bold>B</bold>) Both editors achieved comparable content quality ratings, but the second novice editor exhibited significantly lower variability. All statistical scores of content similarity highlighted the similarities between the artificial intelligence and novice editors, and we report no statistically significant differences between any metric across both types of editors (<italic>P</italic>&#x003E;.05). Detailed scores across each metric are contained in Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Mean statistical scores across all 24 audio interviews for both the artificial intelligence and novice editors. AI: artificial intelligence. BERT: bidirectional encoder representations from transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig04.png"/></fig></sec><sec id="s3-2"><title>Qualitative Error Analysis</title><p>We qualitatively compared the AI- and novice-edited interviews to the expert-edited interviews and found five types of errors repeated across many of the interviews, which are described in <xref ref-type="table" rid="table1">Table 1</xref>. Many of the errors were similar across the AI- and novice-edited interviews.</p></sec><sec id="s3-3"><title>Editing Efficiency</title><p>On average, each novice editor took 29.54 minutes to edit each interview (SD 12.69 min). However, we report a difference (<italic>P</italic>=.06) in the time each novice editor takes, with the first editor taking 24.50 minutes (SD 11.18 min) and the second taking 34.58 minutes (SD 13.08 min) to edit each interview. <xref ref-type="fig" rid="figure5">Figure 5</xref> highlights the variability in the time to edit interviews between novice editors. Mean editing times for the expert editor are unavailable as they were not recorded. Based on anecdotal evidence from the expert editor, each audio interview required around 5 to 10 minutes to edit. In contrast to both the expert and novice editors, our automated AI-editing pipeline took less than 10 seconds from ingestion of the raw audio interview to the saving of the edited interview.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Time to edit each interview between each novice editor.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig05.png"/></fig></sec><sec id="s3-4"><title>Impact of Interview Length on Editing Quality</title><p>We investigated if longer raw audio interviews were associated with changes in the lexical and semantic overlap with the expert reference, as measured by ROUGE-L. For AI-edited interviews, there was a significant negative correlation between transcript length and ROUGE-L scores (<italic>r</italic>=&#x2212;0.58, <italic>R</italic><sup>2</sup>=0.34; <xref ref-type="fig" rid="figure6">Figure 6</xref>). This same trend was seen for novice-edited interviews, with a negative correlation (<italic>r</italic>=&#x2212;0.52, <italic>R</italic><sup>2</sup>=0.27). Both slopes were significantly different from zero (<italic>P</italic>&#x003C;.05), suggesting longer interviews were associated with lower transcript overlap to expert editor reference for both conditions. Regression plots for ROUGE-1, ROUGE-2, METEOR, and BERT scores showed comparable patterns and are provided in Figure S2 in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Linear regression of the raw transcript length versus ROUGE-L, reflecting the change in lexical similarity to the expert reference with longer interview times.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v14i1e80205_fig06.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>Our study compares the listening experience of patient conversations summarized by an expert editor, a novice editor, and ChatGPT-4o. There was no significant difference in content and audio quality between the AI and novice editors, and both showed varying performance across the samples. Further analysis of the edited transcripts revealed that both of these groups omitted key details. The expert editor had a significantly higher audio and content quality rating than both experimental groups and less variability across samples. Exclusion of natural pauses between phrases and auditory cues to break up conversation led to lower audio quality scores.</p></sec><sec id="s4-2"><title>Comparison to Prior Work</title><p>As burnout remains high among health care workers, TIMS provides an opportunity for meaningful connections with patients to target the emotional aspect of burnout. However, the manpower needed to edit audio is a limiting factor for this program&#x2019;s widespread adoption. Although the audio-editing pipeline introduced was originally developed to expand the TIMS program, these results are also broadly relevant to the use of AI in clinical practice and examine a previously unexplored application of LLMs, specifically editing audio content for a medical context. Previous studies have investigated ChatGPT as a clinical decision-making tool, for taking notes, and analyzing literature [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref35">35</xref>]. There have also been studies that demonstrate ChatGPT&#x2019;s capability to elicit empathetic feelings in an emergency setting can even exceed that of clinicians [<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref38">38</xref>]. However, the combination of a summarization task to build empathy in a third-party listener has not been investigated. In this study, we aim to evaluate whether an LLM is able to extract emotionally relevant details from a conversation. Additionally, we provide a replicable framework for integrating LLMs in the analysis of patient audio for a broad variety of applications within health care settings. We found that our pipeline faced similar issues raised in previous studies examining ChatGPT&#x2019;s ability to summarize in a medical context. Kernberg et al [<xref ref-type="bibr" rid="ref39">39</xref>] reported that 58% of structured medical notes from patient-physician interactions omitted important information. A manual analysis of the transcripts revealed that details considered important to a patient&#x2019;s story were also often omitted, highlighting a key disadvantage of LLMs in the literature. ChatGPT also tends to vary widely in the quality of responses across the samples. Although this was a shortcoming observed in the novice editors&#x2019; performance on audio and content quality (SD 0.68 and SD 0.78), the expert editor&#x2019;s performance was consistent.</p><p>In addition to the survey, ROUGE scores (ROUGE-1, ROUGE-2, ROUGE-L) for the novice and AI editors indicated a high level of word overlap with the expert-edited transcript, with no significant differences between the two groups. High (~0.9) BERT scores (BERT-recall, BERT-precision, BERT-<italic>F</italic><sub>1</sub>) were also reported, indicating a high degree of semantic overlap that was not necessarily reflected in the ROUGE score. As the length of the audios increased, there was a statistically significant negative correlation between interview length and the ROUGE-L score for both AI and novice editors (<italic>P&#x003C;</italic>.05), indicating longer interviews tended to have less lexical overlap with the expert reference. This implies that there might be a length of audio that may not be as suitable for AI editing that will become more apparent as longer audios are recorded. These results from an established tool align with the insights from the survey, suggesting some level of construct validity for the survey questions.</p></sec><sec id="s4-3"><title>Limitations</title><p>There are key limitations beyond the strengths of this study. First, there is no standardized or validated survey instrument available, so the introduction of a novel survey to assess the impact of each audio on a listener was necessary. However, abstract questions concerning &#x201C;patient representation&#x201D; or &#x201C;nuance&#x201D; are susceptible to subjective interpretation, a weakness that is amplified by our use of a single blinded reviewer. We attempted to standardize these ratings with calibration audios that were developed, but we cannot exclude the possibility that these subjective quality scores were influenced by rater bias. Despite this concern, the consistently high scores awarded to the expert-edited interviews provide some evidence of the survey&#x2019;s validity, as the rater reliably scored the gold-standard interviews. This survey could be adopted in the future by studies to measure the efficacy of interventions to increase empathy in medicine.</p><p>Second, the sample size of the study was also relatively limited, with only 24 samples and 1 recruited rater who was surveyed, which makes the results prone to bias. To build on this work, a larger sample size of patient audio interviews and experienced interview raters should be recruited. Previous volunteers of the program were able to receive iterative feedback on their work over long periods, but the novice editors had approximately 1 hour of training in comparison, so their skills were not as developed. Lastly, we were unable to obtain granular editing time measurements from the expert editor as these were retrospectively edited. However, the AI pipeline&#x2019;s completion time of under 10 seconds represents a multiple-orders-of-magnitude improvement in efficiency against any manual editing process.</p></sec><sec id="s4-4"><title>Future Directions</title><p>We have presented the groundwork for an audio transcription and editing pipeline for humanistic patient conversations. Future work should test newer models as they improve, and others that are currently available besides ChatGPT-4o, with the same pipeline. Other strategies to improve performance include fine-tuning the LLM model, using AI agents to summarize the transcript, testing other ASR models, introducing patient-specific contextual metadata, and further prompt engineering to optimize the output. Error propagation was not formally tracked through the entire editing pipeline, but we have hypothesized an association between ASR errors and the final output quality. Future work should investigate these errors.</p></sec><sec id="s4-5"><title>Conclusions</title><p>We conclude that ChatGPT-4o can create summarized audio files with similar audio and content quality to a novice editor in just a fraction of the time. However, the expert editor outperforms the AI editing pipeline and the novice editors on all metrics. After further validation, this tool could be implemented in the TIMS program to reduce workload and overcome adoption barriers.</p></sec></sec></body><back><ack><p>This research has been generously supported with grants from the John Conley Foundation for Ethics and Philosophy in Medicine. We additionally acknowledge support from the Catalyst Award and the Diversity Innovation Grant from Johns Hopkins University. Lastly, we thank the Johns Hopkins Technology Innovation Center for providing access to a protected health information&#x2013;compliant version of ChatGPT-4o.</p></ack><notes><sec><title>Data Availability</title><p>The original interview recordings are not publicly available for privacy protection considerations but are available from the corresponding author on reasonable request. The relevant code is provided here [<xref ref-type="bibr" rid="ref40">40</xref>]. This code corresponds to the automated audio-editing pipeline that processes raw audio, uses ChatGPT-4o to extract key segments, and stitches those selected segments into an AI-edited audio.</p></sec></notes><fn-group><fn fn-type="con"><p>All authors contributed to the conceptualization and study design. SR, BB, and ER curated the data and conducted the analysis. ET and JW managed and supervised the project. All authors reviewed and edited the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ASR</term><def><p>automatic speech recognition</p></def></def-item><def-item><term id="abb3">BERT</term><def><p>bidirectional encoder representations from transformers</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">TIMS</term><def><p>This Is My Story</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Health worker mental health crisis</article-title><source>Centers for Disease Control and Prevention</source><year>2023</year><month>10</month><day>24</day><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/vitalsigns/health-worker-mental-health/index.html">https://www.cdc.gov/vitalsigns/health-worker-mental-health/index.html</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shanafelt</surname><given-names>TD</given-names> </name><name name-style="western"><surname>West</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Dyrbye</surname><given-names>LN</given-names> </name><etal/></person-group><article-title>Changes in burnout and satisfaction with work-life integration in physicians during the first 2 years of the COVID-19 pandemic</article-title><source>Mayo Clin Proc</source><year>2022</year><month>12</month><volume>97</volume><issue>12</issue><fpage>2248</fpage><lpage>2258</lpage><pub-id pub-id-type="doi">10.1016/j.mayocp.2022.09.002</pub-id><pub-id pub-id-type="medline">36229269</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="web"><article-title>Physician burnout statistics 2024: the latest changes and trends in physician burnout by specialty</article-title><source>American Medical Association</source><year>2024</year><month>08</month><day>19</day><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ama-assn.org/practice-management/physician-health/physician-burnout-statistics-2024-latest-changes-and-trends">https://www.ama-assn.org/practice-management/physician-health/physician-burnout-statistics-2024-latest-changes-and-trends</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>West</surname><given-names>CP</given-names> </name><name name-style="western"><surname>Dyrbye</surname><given-names>LN</given-names> </name><name name-style="western"><surname>Shanafelt</surname><given-names>TD</given-names> </name></person-group><article-title>Physician burnout: contributors, consequences and solutions</article-title><source>J Intern Med</source><year>2018</year><month>06</month><volume>283</volume><issue>6</issue><fpage>516</fpage><lpage>529</lpage><pub-id pub-id-type="doi">10.1111/joim.12752</pub-id><pub-id pub-id-type="medline">29505159</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Singh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Volner</surname><given-names>K</given-names> </name><name name-style="western"><surname>Marlowe</surname><given-names>D</given-names> </name></person-group><article-title>Provider burnout</article-title><source>StatPearls</source><publisher-name>StatPearls Publishing</publisher-name><pub-id pub-id-type="medline">30855914</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Belki&#x0107;</surname><given-names>K</given-names> </name></person-group><article-title>Toward better prevention of physician burnout: insights from individual participant data using the MD-specific Occupational Stressor Index and organizational interventions</article-title><source>Front Public Health</source><year>2025</year><volume>13</volume><fpage>1514706</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2025.1514706</pub-id><pub-id pub-id-type="medline">40177083</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>Health worker burnout</article-title><source>Office of the Surgeon General, US Department of Health and Human Services</source><year>2024</year><month>05</month><day>28</day><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hhs.gov/surgeongeneral/reports-and-publications/health-worker-burnout/index.html">https://www.hhs.gov/surgeongeneral/reports-and-publications/health-worker-burnout/index.html</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sipos</surname><given-names>D</given-names> </name><name name-style="western"><surname>Goyal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zapata</surname><given-names>T</given-names> </name></person-group><article-title>Addressing burnout in the healthcare workforce: current realities and mitigation strategies</article-title><source>Lancet Reg Health Eur</source><year>2024</year><month>07</month><volume>42</volume><fpage>100961</fpage><pub-id pub-id-type="doi">10.1016/j.lanepe.2024.100961</pub-id><pub-id pub-id-type="medline">39070752</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andhavarapu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yardi</surname><given-names>I</given-names> </name><name name-style="western"><surname>Bzhilyanskaya</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Post-traumatic stress in healthcare workers during the COVID-19 pandemic: a systematic review and meta-analysis</article-title><source>Psychiatry Res</source><year>2022</year><month>11</month><volume>317</volume><fpage>114890</fpage><pub-id pub-id-type="doi">10.1016/j.psychres.2022.114890</pub-id><pub-id pub-id-type="medline">36260970</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>National Academies of Sciences, Engineering, and Medicine</collab></person-group><source>Taking Action Against Clinician Burnout: A Systems Approach to Professional Well-Being</source><year>2019</year><publisher-name>National Academies Press</publisher-name><pub-id pub-id-type="doi">10.17226/25521</pub-id><pub-id pub-id-type="other">9780309495509</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>Empathy: a critical ally in battling physician burnout</article-title><source>American Medical Association</source><year>2019</year><month>02</month><day>5</day><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ama-assn.org/practice-management/physician-health/empathy-critical-ally-battling-physician-burnout">https://www.ama-assn.org/practice-management/physician-health/empathy-critical-ally-battling-physician-burnout</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cairns</surname><given-names>P</given-names> </name><name name-style="western"><surname>Isham</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Zachariae</surname><given-names>R</given-names> </name></person-group><article-title>The association between empathy and burnout in medical students: a systematic review and meta-analysis</article-title><source>BMC Med Educ</source><year>2024</year><month>06</month><day>7</day><volume>24</volume><issue>1</issue><fpage>640</fpage><pub-id pub-id-type="doi">10.1186/s12909-024-05625-6</pub-id><pub-id pub-id-type="medline">38849794</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delgado</surname><given-names>N</given-names> </name><name name-style="western"><surname>Delgado</surname><given-names>J</given-names> </name><name name-style="western"><surname>Betancort</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bonache</surname><given-names>H</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>LT</given-names> </name></person-group><article-title>What is the link between different components of empathy and burnout in healthcare professionals? A systematic review and meta-analysis</article-title><source>Psychol Res Behav Manag</source><year>2023</year><volume>16</volume><fpage>447</fpage><lpage>463</lpage><pub-id pub-id-type="doi">10.2147/PRBM.S384247</pub-id><pub-id pub-id-type="medline">36814637</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilkinson</surname><given-names>H</given-names> </name><name name-style="western"><surname>Whittington</surname><given-names>R</given-names> </name><name name-style="western"><surname>Perry</surname><given-names>L</given-names> </name><name name-style="western"><surname>Eames</surname><given-names>C</given-names> </name></person-group><article-title>Examining the relationship between burnout and empathy in healthcare professionals: a systematic review</article-title><source>Burn Res</source><year>2017</year><month>09</month><volume>6</volume><fpage>18</fpage><lpage>29</lpage><pub-id pub-id-type="doi">10.1016/j.burn.2017.06.003</pub-id><pub-id pub-id-type="medline">28868237</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nembhard</surname><given-names>IM</given-names> </name><name name-style="western"><surname>David</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ezzeddine</surname><given-names>I</given-names> </name><name name-style="western"><surname>Betts</surname><given-names>D</given-names> </name><name name-style="western"><surname>Radin</surname><given-names>J</given-names> </name></person-group><article-title>A systematic review of research on empathy in health care</article-title><source>Health Serv Res</source><year>2023</year><month>04</month><volume>58</volume><issue>2</issue><fpage>250</fpage><lpage>263</lpage><pub-id pub-id-type="doi">10.1111/1475-6773.14016</pub-id><pub-id pub-id-type="medline">35765156</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tracey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Crowe</surname><given-names>T</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ponnala</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rodriguez-Hobbs</surname><given-names>J</given-names> </name><name name-style="western"><surname>Teague</surname><given-names>P</given-names> </name></person-group><article-title>An introduction to a novel intervention, &#x201C;This is My Story&#x201D;, to support interdisciplinary medical teams delivering care to non-communicative patients</article-title><source>J Relig Health</source><year>2021</year><month>10</month><volume>60</volume><issue>5</issue><fpage>3282</fpage><lpage>3290</lpage><pub-id pub-id-type="doi">10.1007/s10943-021-01379-3</pub-id><pub-id pub-id-type="medline">34386889</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>This Is My Story</article-title><source>Johns Hopkins Medicine</source><year>2022</year><month>06</month><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hopkinsmedicine.org/news/articles/2022/06/this-is-my-story">https://www.hopkinsmedicine.org/news/articles/2022/06/this-is-my-story</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tracey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Im</surname><given-names>C</given-names> </name><name name-style="western"><surname>Abshire-Saylor</surname><given-names>M</given-names> </name></person-group><article-title>A brief patient-recorded audio file called TIMS (This Is My Story) improves communication and empathy for healthcare teams in the hospital</article-title><source>J Patient Exp</source><year>2024</year><volume>11</volume><fpage>23743735241274015</fpage><pub-id pub-id-type="doi">10.1177/23743735241274015</pub-id><pub-id pub-id-type="medline">39161418</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Tracey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ponnala</surname><given-names>J</given-names> </name><name name-style="western"><surname>Rodriguez-Hobbs</surname><given-names>J</given-names> </name><name name-style="western"><surname>Crowe</surname><given-names>T</given-names> </name></person-group><article-title>An ICU expansion of a novel chaplain intervention, &#x201C;This is My Story,&#x201D; to support interdisciplinary medical teams delivering care to non-communicative patients in an academic medical center</article-title><source>J Relig Health</source><year>2023</year><month>02</month><volume>62</volume><issue>1</issue><fpage>83</fpage><lpage>97</lpage><pub-id pub-id-type="doi">10.1007/s10943-022-01567-9</pub-id><pub-id pub-id-type="medline">35482270</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tracey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mathur</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hager</surname><given-names>D</given-names> </name></person-group><article-title>Impressions of recording a brief audio file known as a TIMS (This is My Story) file</article-title><source>J Patient Exp</source><year>2025</year><volume>12</volume><fpage>23743735251346585</fpage><pub-id pub-id-type="doi">10.1177/23743735251346585</pub-id><pub-id pub-id-type="medline">40470310</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tracey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Abshire Saylor</surname><given-names>M</given-names> </name><etal/></person-group><article-title>TIMS: a mixed methods evaluation of the impact of a novel chaplain facilitated recorded interview placed in the medical chart for the medical staff in an ICU during the COVID-19 Pandemic</article-title><source>J Relig Health</source><year>2023</year><month>06</month><volume>62</volume><issue>3</issue><fpage>1532</fpage><lpage>1545</lpage><pub-id pub-id-type="doi">10.1007/s10943-023-01800-z</pub-id><pub-id pub-id-type="medline">37014488</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Rekesh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Koluguri</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Kriman</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Fast conformer with linearly scalable attention for efficient speech recognition</article-title><source>arXiv</source><comment>Preprint posted online on  May 9, 2023</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2305.05084</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jia</surname><given-names>F</given-names> </name><name name-style="western"><surname>Majumdar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Watanabe</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ginsburg</surname><given-names>B</given-names> </name></person-group><article-title>Efficient sequence transduction by jointly predicting tokens and durations</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 13, 2023</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2304.06795</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>NVIDIA/Parakeet TDT 0.6B V2 (En)</article-title><source>Hugging Face</source><year>2024</year><access-date>2025-07-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2">https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="other"><person-group person-group-type="author"><collab>OpenAI</collab><name name-style="western"><surname>Hurst</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lerer</surname><given-names>A</given-names> </name><etal/></person-group><article-title>GPT-4o system card</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 22, 2024</comment><pub-id pub-id-type="doi">10.48550/ARXIV.2410.21276</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Idnay</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Evaluating large language models on medical evidence summarization</article-title><source>NPJ Digit Med</source><year>2023</year><month>08</month><day>24</day><volume>6</volume><issue>1</issue><fpage>158</fpage><pub-id pub-id-type="doi">10.1038/s41746-023-00896-7</pub-id><pub-id pub-id-type="medline">37620423</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ju</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name></person-group><article-title>Exploring the potential of ChatGPT in medical dialogue summarization: a study on consistency with human preferences</article-title><source>BMC Med Inform Decis Mak</source><year>2024</year><month>03</month><day>14</day><volume>24</volume><issue>1</issue><fpage>75</fpage><pub-id pub-id-type="doi">10.1186/s12911-024-02481-8</pub-id><pub-id pub-id-type="medline">38486198</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gan</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Uddin</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gan</surname><given-names>AZ</given-names> </name><name name-style="western"><surname>Yew</surname><given-names>YY</given-names> </name><name name-style="western"><surname>Gonz&#x00E1;lez</surname><given-names>PA</given-names> </name></person-group><article-title>ChatGPT&#x2019;s performance before and after teaching in mass casualty incident triage</article-title><source>Sci Rep</source><year>2023</year><month>11</month><day>21</day><volume>13</volume><issue>1</issue><fpage>20350</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-46986-0</pub-id><pub-id pub-id-type="medline">37989755</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Assessing the utility of ChatGPT throughout the entire clinical workflow: development and usability study</article-title><source>J Med Internet Res</source><year>2023</year><month>08</month><day>22</day><volume>25</volume><issue>1</issue><fpage>e48659</fpage><pub-id pub-id-type="doi">10.2196/48659</pub-id><pub-id pub-id-type="medline">37606976</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name></person-group><article-title>Utility of ChatGPT in clinical practice</article-title><source>J Med Internet Res</source><year>2023</year><month>06</month><day>28</day><volume>25</volume><issue>1</issue><fpage>e48568</fpage><pub-id pub-id-type="doi">10.2196/48568</pub-id><pub-id pub-id-type="medline">37379067</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clusmann</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kolbinger</surname><given-names>FR</given-names> </name><name name-style="western"><surname>Muti</surname><given-names>HS</given-names> </name><etal/></person-group><article-title>The future landscape of large language models in medicine</article-title><source>Commun Med (Lond)</source><year>2023</year><month>10</month><day>10</day><volume>3</volume><issue>1</issue><fpage>141</fpage><pub-id pub-id-type="doi">10.1038/s43856-023-00370-1</pub-id><pub-id pub-id-type="medline">37816837</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sandmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Riepenhausen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Plagwitz</surname><given-names>L</given-names> </name><name name-style="western"><surname>Varghese</surname><given-names>J</given-names> </name></person-group><article-title>Systematic analysis of ChatGPT, Google search and Llama 2 for clinical decision support tasks</article-title><source>Nat Commun</source><year>2024</year><month>03</month><day>6</day><volume>15</volume><issue>1</issue><fpage>2050</fpage><pub-id pub-id-type="doi">10.1038/s41467-024-46411-8</pub-id><pub-id pub-id-type="medline">38448475</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Veen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Van Uden</surname><given-names>C</given-names> </name><name name-style="western"><surname>Blankemeier</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Adapted large language models can outperform medical experts in clinical text summarization</article-title><source>Nat Med</source><year>2024</year><month>04</month><volume>30</volume><issue>4</issue><fpage>1134</fpage><lpage>1142</lpage><pub-id pub-id-type="doi">10.1038/s41591-024-02855-5</pub-id><pub-id pub-id-type="medline">38413730</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fraile Navarro</surname><given-names>D</given-names> </name><name name-style="western"><surname>Coiera</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hambly</surname><given-names>TW</given-names> </name><etal/></person-group><article-title>Expert evaluation of large language models for clinical dialogue summarization</article-title><source>Sci Rep</source><year>2025</year><month>01</month><day>7</day><volume>15</volume><issue>1</issue><fpage>1195</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-84850-x</pub-id><pub-id pub-id-type="medline">39774141</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goh</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gallo</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hom</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Large language model influence on diagnostic reasoning: a randomized clinical trial</article-title><source>JAMA Netw Open</source><year>2024</year><month>10</month><day>1</day><volume>7</volume><issue>10</issue><fpage>e2440969</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.40969</pub-id><pub-id pub-id-type="medline">39466245</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ovsyannikova</surname><given-names>D</given-names> </name><name name-style="western"><surname>de Mello</surname><given-names>VO</given-names> </name><name name-style="western"><surname>Inzlicht</surname><given-names>M</given-names> </name></person-group><article-title>Third-party evaluators perceive AI as more compassionate than expert humans</article-title><source>Commun Psychol</source><year>2025</year><month>01</month><day>10</day><volume>3</volume><issue>1</issue><fpage>4</fpage><pub-id pub-id-type="doi">10.1038/s44271-024-00182-6</pub-id><pub-id pub-id-type="medline">39794410</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayers</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Poliak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dredze</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Comparing physician and artificial intelligence chatbot responses to patient questions posted to a public social media forum</article-title><source>JAMA Intern Med</source><year>2023</year><month>06</month><day>1</day><volume>183</volume><issue>6</issue><fpage>589</fpage><lpage>596</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id><pub-id pub-id-type="medline">37115527</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elyoseph</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Hadar-Shoval</surname><given-names>D</given-names> </name><name name-style="western"><surname>Asraf</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lvovsky</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT outperforms humans in emotional awareness evaluations</article-title><source>Front Psychol</source><year>2023</year><volume>14</volume><fpage>1199058</fpage><pub-id pub-id-type="doi">10.3389/fpsyg.2023.1199058</pub-id><pub-id pub-id-type="medline">37303897</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kernberg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gold</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Mohan</surname><given-names>V</given-names> </name></person-group><article-title>Using ChatGPT-4 to create structured medical notes from audio recordings of physician-patient encounters: comparative study</article-title><source>J Med Internet Res</source><year>2024</year><month>04</month><day>22</day><volume>26</volume><fpage>e54419</fpage><pub-id pub-id-type="doi">10.2196/54419</pub-id><pub-id pub-id-type="medline">38648636</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="web"><article-title>Rapuris/TIMS_AI_Editing_Pipeline</article-title><source>GitHub</source><access-date>2025-12-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/Rapuris/TIMS_AI_Editing_Pipeline">https://github.com/Rapuris/TIMS_AI_Editing_Pipeline</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>GPT-4o summarization prompt.</p><media xlink:href="medinform_v14i1e80205_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Supplementary tables.</p><media xlink:href="medinform_v14i1e80205_app2.doc" xlink:title="DOC File, 17 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Supplementary figure.</p><media xlink:href="medinform_v14i1e80205_app3.doc" xlink:title="DOC File, 231 KB"/></supplementary-material></app-group></back></article>