<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">50428</article-id><article-id pub-id-type="doi">10.2196/50428</article-id><title-group><article-title>Examining Linguistic Differences in Electronic Health Records for Diverse Patients With Diabetes: Natural Language Processing Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Bilotta</surname><given-names>Isabel</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tonidandel</surname><given-names>Scott</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Liaw</surname><given-names>Winston R</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>King</surname><given-names>Eden</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Carvajal</surname><given-names>Diana N</given-names></name><degrees>MPH, MD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Taylor</surname><given-names>Ayana</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Thamby</surname><given-names>Julie</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Xiang</surname><given-names>Yang</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tao</surname><given-names>Cui</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff9">9</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hansen</surname><given-names>Michael</given-names></name><degrees>MPH, MS, MD</degrees><xref ref-type="aff" rid="aff10">10</xref></contrib></contrib-group><aff id="aff1"><institution>Deutser</institution>, <addr-line>Houston</addr-line><addr-line>TX</addr-line>, <country>United States</country></aff><aff id="aff2"><institution>Belk College of Business, University of North Carolina at Charlotte</institution>, <addr-line>Charlotte</addr-line><addr-line>NC</addr-line>, <country>United States</country></aff><aff id="aff3"><institution>Department of Health Systems and Population Health Sciences, University of Houston Tilman J. Fertitta Family College of Medicine</institution>, <addr-line>Houston</addr-line><addr-line>TX</addr-line>, <country>United States</country></aff><aff id="aff4"><institution>Department of Psychological Sciences, Rice University</institution>, <addr-line>Houston</addr-line><addr-line>TX</addr-line>, <country>United States</country></aff><aff id="aff5"><institution>Department of Family &#x0026; Community Medicine, University of Maryland</institution>, <addr-line>Baltimore</addr-line><addr-line>MD</addr-line>, <country>United States</country></aff><aff id="aff6"><institution>Department of Physical Medicine and Rehabilitation, University of California, Los Angeles</institution>, <addr-line>Los Angeles</addr-line><addr-line>CA</addr-line>, <country>United States</country></aff><aff id="aff7"><institution>Duke University School of Medicine</institution>, <addr-line>Durham</addr-line><addr-line>NC</addr-line>, <country>United States</country></aff><aff id="aff8"><institution>Peng Cheng Laboratory</institution>, <addr-line>Shenzhen</addr-line>, <country>China</country></aff><aff id="aff9"><institution>Department of Artificial Intelligence and Informatics, Mayo Clinic</institution>, <addr-line>Jacksonville</addr-line><addr-line>FL</addr-line>, <country>United States</country></aff><aff id="aff10"><institution>Depatment of Family and Community Medicine, Baylor College of Medicine</institution>, <addr-line>Houston</addr-line><addr-line>TX</addr-line>, <country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lovis</surname><given-names>Christian</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Senst</surname><given-names>Benjamin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Chatzimina</surname><given-names>Maria</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Jing</surname><given-names>Xia</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Winston R Liaw, MPH, MD<email>winstonrliaw@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>23</day><month>5</month><year>2024</year></pub-date><volume>12</volume><elocation-id>e50428</elocation-id><history><date date-type="received"><day>30</day><month>06</month><year>2023</year></date><date date-type="rev-recd"><day>26</day><month>09</month><year>2023</year></date><date date-type="accepted"><day>23</day><month>04</month><year>2024</year></date></history><copyright-statement>&#x00A9; Isabel Bilotta, Scott Tonidandel, Winston R Liaw, Eden King, Diana N Carvajal, Ayana Taylor, Julie Thamby, Yang Xiang, Cui Tao, Michael Hansen. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 23.5.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2024/1/e50428"/><abstract><sec><title>Background</title><p>Individuals from minoritized racial and ethnic backgrounds experience pernicious and pervasive health disparities that have emerged, in part, from clinician bias.</p></sec><sec><title>Objective</title><p>We used a natural language processing approach to examine whether linguistic markers in electronic health record (EHR) notes differ based on the race and ethnicity of the patient. To validate this methodological approach, we also assessed the extent to which clinicians perceive linguistic markers to be indicative of bias.</p></sec><sec sec-type="methods"><title>Methods</title><p>In this cross-sectional study, we extracted EHR notes for patients who were aged 18 years or older; had more than 5 years of diabetes diagnosis codes; and received care between 2006 and 2014 from family physicians, general internists, or endocrinologists practicing in an urban, academic network of clinics. The race and ethnicity of patients were defined as <italic>White non-Hispanic</italic>, <italic>Black non-Hispanic</italic>, or <italic>Hispanic or Latino</italic>. We hypothesized that Sentiment Analysis and Social Cognition Engine (SEANCE) components (ie, negative adjectives, positive adjectives, joy words, fear and disgust words, politics words, respect words, trust verbs, and well-being words) and mean word count would be indicators of bias if racial differences emerged. We performed linear mixed effects analyses to examine the relationship between the outcomes of interest (the SEANCE components and word count) and patient race and ethnicity, controlling for patient age. To validate this approach, we asked clinicians to indicate the extent to which they thought variation in the use of SEANCE language domains for different racial and ethnic groups was reflective of bias in EHR notes.</p></sec><sec sec-type="results"><title>Results</title><p>We examined EHR notes (n=12,905) of Black non-Hispanic, White non-Hispanic, and Hispanic or Latino patients (n=1562), who were seen by 281 physicians. A total of 27 clinicians participated in the validation study. In terms of bias, participants rated negative adjectives as 8.63 (SD 2.06), fear and disgust words as 8.11 (SD 2.15), and positive adjectives as 7.93 (SD 2.46) on a scale of 1 to 10, with 10 being extremely indicative of bias. Notes for Black non-Hispanic patients contained significantly more negative adjectives (coefficient 0.07, SE 0.02) and significantly more fear and disgust words (coefficient 0.007, SE 0.002) than those for White non-Hispanic patients. The notes for Hispanic or Latino patients included significantly fewer positive adjectives (coefficient &#x2212;0.02, SE 0.007), trust verbs (coefficient &#x2212;0.009, SE 0.004), and joy words (coefficient &#x2212;0.03, SE 0.01) than those for White non-Hispanic patients.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This approach may enable physicians and researchers to identify and mitigate bias in medical interactions, with the goal of reducing health disparities stemming from bias.</p></sec></abstract><kwd-group><kwd>bias</kwd><kwd>sociodemographic factors</kwd><kwd>health care disparities</kwd><kwd>natural language processing</kwd><kwd>sentiment analysis</kwd><kwd>diabetes</kwd><kwd>electronic health record</kwd><kwd>racial</kwd><kwd>ethnic</kwd><kwd>diversity</kwd><kwd>Hispanic</kwd><kwd>medical interaction</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Language and communication play a significant, if not primary, role in social relations across different cultures [<xref ref-type="bibr" rid="ref1">1</xref>]. Language has increasingly been recognized as a relevant form of data that describe relations and behavior [<xref ref-type="bibr" rid="ref2">2</xref>]. One of the most intimate forms of communication between individuals occurs between clinicians and patients during clinical visits. However, these encounters may be undermined by different forms of bias directed toward patients from certain racial and ethnic minority groups [<xref ref-type="bibr" rid="ref3">3</xref>]. Generally, <italic>bias</italic> refers to an evaluation, decision, perception, or action in favor of or against a person or group compared to another. Bias can be blatant, wherein it is characterized by deliberate actions (eg, racist comments) that are intentionally and overtly discriminatory [<xref ref-type="bibr" rid="ref4">4</xref>]. Bias can also be subtle, including &#x201C;actions that are ambiguous in intent to harm, difficult to detect, low in intensity, and often unintentional but are nevertheless deleterious&#x201D; to targets [<xref ref-type="bibr" rid="ref4">4</xref>]. Subtle bias by health care clinicians is linked to negative outcomes for racial and ethnic minority patients, particularly Black non-Hispanic and Hispanic or Latino patients [<xref ref-type="bibr" rid="ref5">5</xref>].</p></sec><sec id="s1-2"><title>Race and Racial Bias in Medical Interactions</title><p>Health disparities between racial and ethnic groups have historically been attributed to varying levels of socioeconomic status, as well as genetic and biological factors that were thought to predispose groups to different medical conditions. Research has emerged over the past few decades demonstrating that in fact, there is no biological basis for racial and ethnic differences. Humans share 99.9% of their genome, and the 0.1% variation cannot be explained or elucidated by race [<xref ref-type="bibr" rid="ref6">6</xref>]. Race describes physical traits considered socially significant, and ethnicity denotes a shared cultural heritage, such as language, practices, and beliefs [<xref ref-type="bibr" rid="ref7">7</xref>]. As such, race and ethnicity are social constructs, and since the landmark report <italic>Unequal Treatment</italic> in 2002 detailed the impact of racial and ethnic discrimination in patient-clinician interactions, research interest in this area has burgeoned [<xref ref-type="bibr" rid="ref8">8</xref>]. Relative to White non-Hispanic patients, Black non-Hispanic and Hispanic or Latino patients are less likely to &#x2018;&#x2018;engender empathic responses from clinicians, establish rapport with clinicians, receive sufficient information, and be encouraged to participate in medical decision making&#x201D; [<xref ref-type="bibr" rid="ref9">9</xref>]. A lack of relationship building [<xref ref-type="bibr" rid="ref10">10</xref>], reduced positive patient and clinician affect [<xref ref-type="bibr" rid="ref11">11</xref>], decreased patient trust [<xref ref-type="bibr" rid="ref12">12</xref>], and fewer patient questions [<xref ref-type="bibr" rid="ref13">13</xref>] are all more likely outcomes for Black non-Hispanic and Hispanic or Latino patients compared to White non-Hispanic patients during medical interactions. Indeed, the 2018 <italic>National Healthcare Disparities Report</italic> revealed that, compared to White non-Hispanic patients, Black non-Hispanic patients receive inferior care on 40% of quality measures, and Hispanic or Latino patients receive worse care on 35% of quality measures, many of which indicate biased and discriminatory behaviors by clinicians [<xref ref-type="bibr" rid="ref14">14</xref>]. For example, indicators were worse for Black non-Hispanic and Hispanic or Latino patients than White non-Hispanic patients for measures such as &#x201C;physicians sometimes or never showed respect for what they had to say&#x201D; and &#x201C;physicians sometimes or never spent enough time with them&#x201D; [<xref ref-type="bibr" rid="ref14">14</xref>]. Black non-Hispanic and Hispanic or Latino patients are more likely to report racial and ethnic bias and discrimination during medical encounters compared to White non-Hispanic patients [<xref ref-type="bibr" rid="ref15">15</xref>]. Yet, less is known about the manifestations and details of such experiences during the clinician-patient interaction [<xref ref-type="bibr" rid="ref16">16</xref>] and whether racial and ethnic discrepancies in care can be observed in the content of electronic health records (EHRs). Similar to the thesis described in <italic>Unequal Treatment</italic>, we hypothesized that the mitigation of bias at the clinician level is needed to improve patient outcomes for diverse racial and ethnic populations and narrow the disparities gap. To address bias, researchers need to understand how to measure its existence, and clinicians need to be informed of its manifestations.</p></sec><sec id="s1-3"><title>Research Contributions</title><p>Bias can have many forms&#x2014;blatant, subtle, malevolent, or benevolent&#x2014;all of which can be indicated by language. With increasing access to EHR documentation and advances in natural language processing, we may be better equipped to identify differences in clinician encounters with patients of diverse racial and ethnic backgrounds. This study searched for linguistic discrepancies in EHRs using a natural language processing approach followed by linear mixed effect model analyses. EHRs are digital summaries of the clinician-patient encounter and include the clinician&#x2019;s assessment of the interaction, as well as the patient&#x2019;s health history. Since the clinician is responsible for inputting information, as well as reviewing the information inputted by other care clinicians in the EHR for each patient encounter, the contents of the EHR may be particularly useful in illuminating biases that clinicians hold toward patients of different racial and ethnic backgrounds. Although several studies have indicated that clinician bias occurs, particularly in racially and ethnically discordant interactions (ie, when the patient and clinician are of different racial and ethnic backgrounds), relatively little research has examined the ways in which the clinician may be thinking about the patient and how the clinician&#x2019;s sentiment and cognitions are reflected in the language of the EHR [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. EHRs can include many years of patient-clinician interactions, with multiple clinicians having access to them, allowing for biases to be passed on and potentially impact future medical decisions.</p><p>Our data set contained EHR notes for a large sample of White non-Hispanic, Black non-Hispanic, and Hispanic or Latino patients with diabetes in the Southern United States. The natural language processing tool, Sentiment Analysis and Social Cognition Engine (SEANCE), was applied to assess multiple linguistic markers in the EHR text [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. We then explored whether 8 of the 20 SEANCE components (see <xref ref-type="table" rid="table1">Table 1</xref>) differed for patients of different races and ethnicities [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Description of SEANCE<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> components.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Component label</td><td align="left" valign="bottom">Indices, n<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="bottom">Key indices<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="bottom">Language examples</td></tr></thead><tbody><tr><td align="left" valign="top">Negative adjectives</td><td align="char" char="." valign="top">18</td><td align="left" valign="top">NRC<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup> negative adjectives, NRC disgust adjectives, NRC anger adjectives, GI<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup> negative adjectives, and Hu-Liu<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup> negative adjectives</td><td align="left" valign="top">Unkind, bad, cruel, hurtful, and intolerant</td></tr><tr><td align="left" valign="top">Positive adjectives</td><td align="char" char="." valign="top">9</td><td align="left" valign="top">Hu-Liu positive adjectives, VADER<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup> positive adjectives, GI positive adjectives, and Lasswell<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup> positive affect adjectives</td><td align="left" valign="top">Supportive, kind, great, and nice</td></tr><tr><td align="left" valign="top">Joy words</td><td align="char" char="." valign="top">8</td><td align="left" valign="top">NRC joy adjectives, NRC anticipation adjectives, and NRC surprise adjectives</td><td align="left" valign="top">Admiration, advocacy, elated, glad, liking, and pleased</td></tr><tr><td align="left" valign="top">Fear and disgust words</td><td align="char" char="." valign="top">8</td><td align="left" valign="top">NRC disgust nouns, NRC negative nouns, NRC fear nouns, and NRC anger nouns</td><td align="left" valign="top">Abnormal, adverse, attack, cringe, criticize, distress, intimidate, unequal, and stigma</td></tr><tr><td align="left" valign="top">Politics words</td><td align="char" char="." valign="top">7</td><td align="left" valign="top">GI politics nouns and Lasswell power nouns</td><td align="left" valign="top">Alliance, ally, authorize, civil, concession, consent, and oppose</td></tr><tr><td align="left" valign="top">Respect words</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">Lasswell respect nouns</td><td align="left" valign="top">Status, honor, recognition, and prestige</td></tr><tr><td align="left" valign="top">Trust verbs</td><td align="char" char="." valign="top">5</td><td align="left" valign="top">NRC trust verbs, NRC joy verbs, and NRC positive verbs</td><td align="left" valign="top">Affirm, advise, confide, and cooperating</td></tr><tr><td align="left" valign="top">Well-being words</td><td align="char" char="." valign="top">4</td><td align="left" valign="top">Lasswell well-being physical nouns and Lasswell well-being total nouns</td><td align="left" valign="top">Alive, ambulance, adjust, afraid, blood, clinic, and nutrition</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SEANCE: Sentiment Analysis and Social Cognition Engine.</p></fn><fn id="table1fn2"><p><sup>b</sup>Indices refer to the number of dictionary lists from which the component was developed.</p></fn><fn id="table1fn3"><p><sup>c</sup>The key indices came from the following dictionary lists: NRC Emotion Lexicon [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], the Harvard-IV dictionary list used by the General Inquirer [<xref ref-type="bibr" rid="ref23">23</xref>], the Hu-Liu polarity word lists [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], the Valence Aware Dictionary and Sentiment Reasoner [<xref ref-type="bibr" rid="ref24">24</xref>], the Lasswell dictionary lists [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], and the Geneva Affect Label Coder database [<xref ref-type="bibr" rid="ref27">27</xref>]. For a thorough review of the SEANCE indices and corresponding dictionaries, see Crossley et al [<xref ref-type="bibr" rid="ref18">18</xref>].</p></fn><fn id="table1fn4"><p><sup>d</sup>NRC: NRC Emotion Lexicon.</p></fn><fn id="table1fn5"><p><sup>e</sup>GI: General Inquirer.</p></fn><fn id="table1fn6"><p><sup>f</sup>Hu-Liu: Hu-Liu polarity word lists.</p></fn><fn id="table1fn7"><p><sup>g</sup>VADER: Valence Aware Dictionary and Sentiment Reasoner.</p></fn><fn id="table1fn8"><p><sup>h</sup>Lasswell: Lasswell dictionary lists.</p></fn></table-wrap-foot></table-wrap><p>We hypothesized that the SEANCE components for negative adjectives, positive adjectives, joy words, fear and disgust words, politics words, respect words, trust verbs, and well-being words and the mean word count in the notes would be indicators of bias, as these concepts have been linked to bias in nonmedical contexts. Ng&#x2019;s [<xref ref-type="bibr" rid="ref28">28</xref>] review of linguistic racial bias in verbiage offers the rationale for our choice of fear and disgust words, politics words, respect words, and trust verbs as indicators of bias, whereas the work of Li et al [<xref ref-type="bibr" rid="ref29">29</xref>] examining gender differences in standardized writing assessment provides further support for our use of SEANCE as a tool for examining biases in language. We selected positive and negative adjectives, well-being words, politics words, and word count indicators as prior research demonstrates that clinicians may be less likely to establish rapport and provide appropriate medications and are more inclined to show negative attitudes and be dismissive toward Black non-Hispanic and Hispanic or Latino patients as a result of their unconscious racial and ethnic biases [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>Specifically, we investigated which aspects of communication differ and whether differences are indicative of biased interactions. Any systematic variation in language can convey differential perceptions, attitudes, and expectations. For example, words such as &#x201C;resistant&#x201D; or &#x201C;non-compliant&#x201D; could reflect bias if (all else being equal) they tend to be used more to reflect people from some racial or ethnic backgrounds than others. This work aimed to elucidate for clinicians and researchers where discrepancies in communication emerge in the EHR and whether these differences are indicative of racial and ethnic bias. We also assessed the extent to which clinicians perceive linguistic markers to be indicative of bias.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Sample</title><p>This was a cross-sectional study using EHR-derived physician notation of outpatient clinical encounters. We extracted EHR encounters (n=15,460) for patients (n=1647) who were aged 18 years or older; had more than 5 years of diabetes diagnosis codes; and received care between 2006 and 2014 from family physicians, general internists, or endocrinologists practicing in an urban, academic network of clinics. We chose this disease because of its high prevalence (11.3% in the United States) and chose to examine outpatient visits because of the relative scope of annual outpatient visits (1 billion) relative to hospital admissions (32 million) [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. The demographic variables collected were patient race and ethnicity, sex, and age. The race and ethnicity of patients were defined as <italic>White non-Hispanic</italic>, <italic>Black non-Hispanic</italic>, or <italic>Hispanic or Latino</italic> (see <xref ref-type="table" rid="table2">Table 2</xref> for a summary of patient demographics).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Patient demographics of the final sample.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Variable</td><td align="left" valign="bottom">Value (n=1562)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><bold>Age (years)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Mean (SD)</td><td align="left" valign="top">68.74 (13.76)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Range</td><td align="left" valign="top">20-102</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Median (IQR)</td><td align="left" valign="top">69 (61-78)</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Sex, n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Female</td><td align="left" valign="top">871 (55.74)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Male</td><td align="left" valign="top">691 (44.26)</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Race and ethnicity, n (%)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">White non-Hispanic</td><td align="left" valign="top">682 (43.66)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Black non-Hispanic</td><td align="left" valign="top">755 (48.34)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Hispanic or Latino</td><td align="left" valign="top">125 (8)</td></tr></tbody></table></table-wrap></sec><sec id="s2-2"><title>SEANCE Algorithm</title><p>SEANCE is a lexical scoring algorithm that includes over 200 word vectors (also referred to as indices or features) designed to assess sentiment, cognition, and social order, which were developed from preexisting and widely used databases such as EmoLex and SenticNet [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. In addition to the core indices, SEANCE allows for several customized indices, including filtering for particular parts of speech and controlling for instances of negation [<xref ref-type="bibr" rid="ref18">18</xref>]. Since SEANCE computes such a large quantity of indices, Crossley et al [<xref ref-type="bibr" rid="ref18">18</xref>] developed 20 components from all the indices using principal component analysis (PCA) [<xref ref-type="bibr" rid="ref18">18</xref>]. These components are essentially clusters of related indices in SEANCE and allow users to interpret the SEANCE output at a more macro level. This process enabled them to summarize the SEANCE indices into a smaller and more interpretable set of variables. In the PCA by Crossley et al [<xref ref-type="bibr" rid="ref18">18</xref>], they retained even the smallest components, setting a conservative cutoff point for inclusion (ie, 1% for variance explained by each component). The analyses for this research were run on a subset of 8 of the 20 <italic>components</italic> that Crossley et al [<xref ref-type="bibr" rid="ref18">18</xref>] developed. We selected these 8 components a priori (see <xref ref-type="table" rid="table1">Table 1</xref> for a description of the selected components).</p><p>We chose SEANCE instead of other natural language processing tools, such as Linguistic Inquiry and Word Count (LIWC), because it contains a larger number of core indices taken from multiple lexicons, as well as 20 components, and is based on the most recent improvements in sentiment analysis [<xref ref-type="bibr" rid="ref18">18</xref>]. In their validation of SEANCE, Crossley et al [<xref ref-type="bibr" rid="ref18">18</xref>] found that SEANCE components demonstrated significantly greater accuracy than LIWC indices (<italic>P</italic>&#x003C;.001) for 3 of the 4 review types examined. In addition to the core indices, SEANCE allows for several customized indices, including filtering for parts of speech (also known as &#x201C;parts-of-speech tagging&#x201D;) and controlling for instances of negation, which LIWC does not offer. We analyzed all words in the EHR (ie, <italic>not</italic> single parts of speech), but we controlled for negation. For example, this means that &#x201C;not good&#x201D; would be recognized as <italic>not being positive</italic> by SEANCE, as opposed to LIWC, which would see the word &#x201C;good&#x201D; and count it as positive.</p></sec><sec id="s2-3"><title>Validation of the Sentiment Analysis Approach</title><p>To provide validation of the sentiment analysis approach used in this study, we surveyed subject-matter experts in EHR note writing (ie, physicians, physician assistants, and nurse practitioners) to garner their perspectives on the appropriateness of the linguistic components identified in our pilot study as indicators of subtle racial and ethnic bias in EHR notes. The team of researchers for this study included industrial-organizational psychologists who have expertise in bias and discrimination; however, it was also valuable to garner opinions from clinicians who are experts in EHR note writing and who understand the differences in the types of language used. To recruit participants, we used a combination of opportunistic and snowball sampling, starting with individuals within our personal networks. Through a web-based program, we asked participants to indicate the extent to which they thought the language domains (eg, negative adjectives, fear and disgust words, etc) were reflective of bias in EHR notes. Participants were told the following:</p><disp-quote><p>One type of language that could represent bias reflects the amount of NEGATIVE ADJECTIVES contained in the electronic health record. Examples of negative adjectives include &#x201C;unkind,&#x201D; &#x201C;bad,&#x201D; &#x201C;harmful,&#x201D; &#x201C;intolerant,&#x201D; and &#x201C;stupid.&#x201D; If these kinds of words were used to describe Black or LatinX patients more than White patients, to what extent do you think this would be indicative of racial bias? Please indicate the extent of your agreement on the 1 to 10 scale below.</p></disp-quote><p>The same formatting was used for each of the linguistic components, with component-specific language examples offered so participants understood the types of sentiment that each component was designed to assess.</p></sec><sec id="s2-4"><title>Cross-Classified Linear Mixed Effects Models</title><p>We used the <italic>lme4</italic> package in R (R Foundation for Statistical Computing) to perform linear mixed effects analyses of the relationships between the outcomes of interest (SEANCE components and word count) and patient race and ethnicity, controlling for patient age. We ran an identical analysis, treating 8 different SEANCE components and the mean word count in the EHR as the dependent variables, while leaving all other variables consistent across the models. The same steps of entering fixed and random effects were applied across all cross-classified linear mixed effects models with different dependent variables (ie, negative adjectives, positive adjectives, well-being words, trust verbs, fear and disgust words, joy words, politics words, respect words, and mean word count).</p><p>We first ran a null model with only the random intercepts. We then added random effects and applied a crossed design (vs a traditional nested structure), leading us to have intercepts for physicians and patients. Then, we ran a model with the random intercepts as well as the fixed effects. As fixed effects, we entered <italic>race and ethnicity</italic> and <italic>age</italic> (without an interaction term) into the model. For all models examined, the intercept variation can be attributed primarily to different physicians rather than patients. We used a 95% CI to determine statistical significance. To be more conservative, given that we ran multiple tests, we also computed an additional set of CIs at the 99th percentile.</p></sec><sec id="s2-5"><title>Ethical Considerations</title><p>We obtained ethics approval from the University of Texas Health Science Center&#x2019;s Committee for the Protection of Human Subjects (HSC-MS-18-0431) and the Rice University Institutional Review Board (IRB-FY2021-325). Participants consented and received a US $25 gift card after completing the survey. EHR data were deidentified prior to the analysis.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Description and Justification for Cross-Classified Analyses</title><p>An initial inspection of the data revealed that 2 physicians were extreme outliers, accounting for 16.53% (2555/15,460) of the notes in our sample. To ensure that the overrepresentation of these physicians would not bias the results, we removed those notes from the data set (taking us from our initial sample of 15,460 visits with 283 physicians and 1647 patients to 12,905 visits with 281 physicians and 1562 patients; <xref ref-type="table" rid="table2">Table 2</xref>). The distribution of visits by patients indicates an average of 8.27 visits per patient with a minimum of 1, a median of 5, and a maximum of 97. Physicians see 11.72 patients on average, with a median of 2 and a maximum of 143, suggesting a skewed distribution. Despite the relatively large number of patients seen by some physicians, these physicians accounted for substantially fewer patient notes than the 2 physicians that were previously removed. Patients see 2.11 physicians on average, with a minimum of 1 and a maximum of 12; however, the distribution suggests that 6.6% (109/1647) of patients saw 5 or more physicians. Moreover, 742 (45.1%) of the 1647 patients saw 1 physician, whereas 119 (7.2%) saw 4 physicians. In our data set, patients can have multiple visits to a variety of physicians, indicating that patient visits are not nested within physicians. Further, physicians may see different patients with no consistent overlap of patients between physicians, indicating that physicians are not nested within patients. Thus, there is no clear hierarchical nesting of patients within physicians (or vice versa), which suggests that a cross-classified design is more appropriate than a traditional, hierarchical, multilevel model structure.</p></sec><sec id="s3-2"><title>Cross-Classified Linear Mixed Effects Model Results</title><p>In the negative adjective component model (<xref ref-type="table" rid="table3">Table 3</xref>), the random effects of patient (&#x03C3;<sup>2</sup>=0.02) and physician (&#x03C3;<sup>2</sup>=0.12) indicated that intercept variation in use of negative adjectives is mainly a function of the physician rather than the patient. The physician random effect was over 5 times as large as the random effect for the patient; the intraclass correlation (ICC) for physicians was 0.41 and the ICC for patients was 0.07 (ICC<sub>total</sub>=0.481). This pattern of results in random effects and ICC values for patients and physicians was consistent across the other 8 models. Overall, 2 of the 5 relationships (ie, the significant difference in positive adjectives for Hispanic or Latino and White non-Hispanic patient notes, and the significant difference in trust verbs for Hispanic or Latino and White non-Hispanic patient notes) that were previously significant at the 95th percentile had CIs that included zero at the 99th percentile. For 3 of the SEANCE components&#x2014;well-being, politics, and respect words&#x2014;and for the overall word count, there was not a statistically significant difference between the 3 races and ethnicities. In contrast, for all the other remaining SEANCE components, there was a statistically significant race and ethnicity effect for either Black non-Hispanic or Hispanic or Latino patients relative to White non-Hispanic patients. Specifically, notes for Black non-Hispanic patients contained significantly more negative adjectives and fear and disgust words than those for White non-Hispanic patients. Notes for Hispanic or Latino patients included significantly fewer positive adjectives, trust verbs, and joy words than those for White non-Hispanic patients. As such, across most of the SEANCE components, we observed favoritism of White non-Hispanic patients in terms of note content.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Fixed effects model results for negative adjectives, positive adjectives, well-being words, trust verbs, joy words, politics words, respect words, fear and disgust words, and word count.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="4">Variables<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">Negative adjectives</td><td align="left" valign="top">Positive adjectives</td><td align="left" valign="top">Well-being words</td><td align="left" valign="top">Trust verbs</td><td align="left" valign="top">Joy words</td><td align="left" valign="top">Politics words</td><td align="left" valign="top">Respect words</td><td align="left" valign="top">Fear and disgust words</td><td align="left" valign="top">Word count</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="13"><bold>Fixed effect estimates</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="12"><bold>Age (years)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top" colspan="2">&#x03B2; (SE)</td><td align="left" valign="top">&#x2212;0.00 (0.00)</td><td align="left" valign="top">0.00 (0.00)</td><td align="left" valign="top">.0002 (0.00009)</td><td align="left" valign="top">&#x2212;0.00007 (0.00008)</td><td align="left" valign="top">0.000002 (0.0002)</td><td align="left" valign="top">&#x2212;0.00009 (0.00004)</td><td align="left" valign="top">&#x2212;0.00004 (0.00005)</td><td align="left" valign="top">0.000005 (0.00)</td><td align="left" valign="top">&#x2212;0.43 (0.68)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top" colspan="2">95% CI</td><td align="left" valign="top">&#x2212;0.002 to 0.0003</td><td align="left" valign="top">&#x2212;0.002 to 0.00</td><td align="left" valign="top">0.0006 to 0.0004<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.002 to 0.0008</td><td align="left" valign="top">&#x2212;0.0004 to 0.0004</td><td align="left" valign="top">&#x2212;0.0002 to &#x2212;0.000007<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.0004 to 0.0004</td><td align="left" valign="top">&#x2212;0.0001 to 0.0002</td><td align="left" valign="top">&#x2212;1.76 to 0.90</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="12"><bold>Race and ethnicity</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top" colspan="11"><bold>White non-Hispanic (reference)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x03B2; (SE)</td><td align="left" valign="top">0.42 (0.05)</td><td align="left" valign="top">&#x2212;0.24 (0.017)</td><td align="left" valign="top">0.18 (0.007)</td><td align="left" valign="top">0.16 (0.007)</td><td align="left" valign="top">0.32 (0.02)</td><td align="left" valign="top">0.07 (0.003)</td><td align="left" valign="top">0.05 (0.004)</td><td align="left" valign="top">0.17 (0.007)</td><td align="left" valign="top">868.50 (54.45)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">95% CI</td><td align="left" valign="top">0.32 to 0.53</td><td align="left" valign="top">&#x2212;0.26 to &#x2212;0.21</td><td align="left" valign="top">0.17 to 0.20</td><td align="left" valign="top">0.14 to 0.17</td><td align="left" valign="top">0.28 to 0.35</td><td align="left" valign="top">0.06 to 0.07</td><td align="left" valign="top">0.04 to 0.05</td><td align="left" valign="top">0.16 to 0.19</td><td align="left" valign="top">761.84 to 975.17</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top" colspan="11"><bold>Black non-Hispanic</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x03B2; (SE)</td><td align="left" valign="top">0.07 (0.02)</td><td align="left" valign="top">0.02 (0.004)</td><td align="left" valign="top">0.004 (0.002)</td><td align="left" valign="top">&#x2212;0.003 (0.002)</td><td align="left" valign="top">&#x2212;0.01 (0.006)</td><td align="left" valign="top">0.001 (0.001)</td><td align="left" valign="top">&#x2212;0.001 (0.001)</td><td align="left" valign="top">0.007 (0.002)</td><td align="left" valign="top">20.61 (19.01)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">95% CI</td><td align="left" valign="top">0.04 to 0.11<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.006 to 0.01</td><td align="left" valign="top">&#x2212;0.0007 to 0.009</td><td align="left" valign="top">&#x2212;0.007 to 0.001</td><td align="left" valign="top">&#x2212;0.02 to 0.0004</td><td align="left" valign="top">&#x2212;0.001 to 0.004</td><td align="left" valign="top">&#x2212;0.004 to 0.002</td><td align="left" valign="top">0.003 to 0.01<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;16.71 to 57.84</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top" colspan="11"><bold>Hispanic or Latino</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">&#x03B2; (SE)</td><td align="left" valign="top">0.02 (0.03)</td><td align="left" valign="top">&#x2212;0.02 (0.007)</td><td align="left" valign="top">0.002 (0.004)</td><td align="left" valign="top">&#x2212;0.009 (0.004)</td><td align="left" valign="top">&#x2212;0.03 (0.01)</td><td align="left" valign="top">&#x2212;0.0009 (0.003)</td><td align="left" valign="top">0.0006 (0.002)</td><td align="left" valign="top">&#x2212;0.002 (0.004)</td><td align="left" valign="top">15.73 (32.30)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="char" char="." valign="top">95% CI</td><td align="left" valign="top">&#x2212;0.03 to 0.08</td><td align="left" valign="top">&#x2212;0.03 to &#x2212;0.004<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.007 to 0.01</td><td align="left" valign="top">&#x2212;0.02 to &#x2212;0.001<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.05 to &#x2212;0.01<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">&#x2212;0.005 to 0.003</td><td align="left" valign="top">&#x2212;0.004 to 0.005</td><td align="left" valign="top">&#x2212;0.01 to 0.006</td><td align="left" valign="top">&#x2212;47.61 to 78.98</td></tr><tr><td align="left" valign="top" colspan="13"><bold>Random effects, estimate (SE)</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="3">U0 patient</td><td align="left" valign="top">0.02 (0.14)</td><td align="left" valign="top">0.0008 (0.03)</td><td align="left" valign="top">0.0004 (0.02)</td><td align="left" valign="top">0.0002 (0.02)</td><td align="left" valign="top">0.0006 (0.02)</td><td align="left" valign="top">0.00001 (0.004)</td><td align="left" valign="top">0.00002 (0.005)</td><td align="left" valign="top">0.0004 (0.02)</td><td align="left" valign="top">27,878 (167.0)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top" colspan="3">U0 physician</td><td align="left" valign="top">0.12 (0.34)</td><td align="left" valign="top">0.006 (0.08)</td><td align="left" valign="top">0.003 (0.05)</td><td align="left" valign="top">0.003 (0.05)</td><td align="left" valign="top">0.02 (0.15)</td><td align="left" valign="top">0.0002 (0.016)</td><td align="left" valign="top">0.0005 (0.02)</td><td align="left" valign="top">0.003 (0.05)</td><td align="left" valign="top">119,489 (345.7)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Random effects are presented as estimate and SE. For the fixed effect estimates, cell entries are parameter (&#x03B2;) estimates, SE, and 95% CIs. White non-Hispanic was the reference group for race and ethnicity.</p></fn><fn id="table3fn2"><p><sup>b</sup>Significant effects based on the 95% CIs.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Sentiment Analysis Validation</title><p>In all, 27 participants completed the surveys (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for the demographics of the participants). On a scale of 1 to 10, with 10 being extremely indicative of bias, participants rated negative adjectives as 8.63 (SD 2.06), fear and disgust words as 8.11 (SD 2.15), positive adjectives as 7.93 (SD 2.46), trust verbs as 7.56 (SD 2.64), and joy words as 6.81 (SD 2.47). The means and SDs for each of the components are reported in <xref ref-type="table" rid="table4">Table 4</xref>. The results of this preliminary analysis provide support for the validity of the linguistic components as indicators of bias in EHRs, as our sample of clinicians regard them as highly suggestive of bias if used differently for patients of diverse racial and ethnic backgrounds.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Subject-matter expert assessment of bias based on specific linguistic markers.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Component</td><td align="left" valign="bottom" colspan="2">Score, mean (SD)<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Negative adjectives</td><td align="char" char="." valign="top" colspan="2">8.63 (2.06)</td></tr><tr><td align="left" valign="top">Fear and disgust words</td><td align="char" char="." valign="top" colspan="2">8.11 (2.15)</td></tr><tr><td align="left" valign="top">Positive adjectives</td><td align="char" char="." valign="top" colspan="2">7.93 (2.46)</td></tr><tr><td align="left" valign="top">Joy words</td><td align="char" char="." valign="top" colspan="2">6.81 (2.47)</td></tr><tr><td align="left" valign="top">Trust verbs</td><td align="char" char="." valign="top" colspan="2">7.56 (2.64)</td></tr><tr><td align="left" valign="top">Politics words</td><td align="char" char="." valign="top" colspan="2">7.07 (2.32)</td></tr><tr><td align="left" valign="top">Respect nouns</td><td align="char" char="." valign="top" colspan="2">7.56 (2.55)</td></tr><tr><td align="left" valign="top">Well-being words</td><td align="char" char="." valign="top" colspan="2">5.56 (2.55)</td></tr><tr><td align="left" valign="top">Mean word count</td><td align="char" char="." valign="top" colspan="2">6.11 (2.19)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Scale ranges from 1 (<italic>Not at all indicative of bias</italic>) to 10 (<italic>Extremely indicative of bias</italic>).</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We found that the words that physicians use in EHR notes differ based on the racial and ethnic backgrounds of patients. Specifically, for Black non-Hispanic patients, notes consisted of words that convey negativity, fear, and disgust. When seeing Hispanic or Latino patients, physicians used fewer positive words and were less likely to use words that communicate trust and joy. Our findings are consistent with others who have documented that physicians communicate in the EHR differently (ie, more negatively) when caring for patients from some minority groups [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref17">17</xref>], which may ultimately result in adverse and inequitable health outcomes for patients. Our results also align with other papers that found that stigmatizing language is more commonly used in EHRs for minority populations [<xref ref-type="bibr" rid="ref38">38</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. Those papers used language guidelines [<xref ref-type="bibr" rid="ref38">38</xref>] and experts [<xref ref-type="bibr" rid="ref39">39</xref>] to identify stigmatizing language. We came to a similar conclusion by using established language dictionaries and contend that our approach allows for a more comprehensive assessment of language. For example, a prior paper used 15 descriptors [<xref ref-type="bibr" rid="ref42">42</xref>]. In contrast, our approach encompasses tens of thousands of words, including multiple word lists, positive and negative sentiments, and emotions. Thus, this method does not merely capture the presence or absence of stigmatizing language, but rather offers a broader glimpse of the clinician-patient relationship. Furthermore, the validation survey confirmed that subject-matter experts perceive the types of words included in this study to be indicative of bias when used differentially for patients of diverse racial and ethnic backgrounds. Taken together, these findings indicate that the language used differs for patients based on racial and ethnic backgrounds and that those differences are suggestive of bias. As a result, our paper is the first to use this particular method to examine outpatient, diabetes notes. Since diabetes quality measures already exist, our analysis allows researchers to link bias to differences in quality in future studies [<xref ref-type="bibr" rid="ref43">43</xref>].</p><p>EHR notes are important, although imperfect, assessments of physician attitudes toward their patients. With more and more time now being devoted to EHR documentation, physicians are increasingly burned out, which has led to the adoption of more efficient data entry strategies such as using templates, copy-pasting previous text, and inserting preset language [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Consequently, notes can be standardized, limiting our ability to assess physician attitudes and subconscious biases toward patients. Despite these caveats, notes remain the definitive and often sole account of what happened in the examination room, and based on these data, Black non-Hispanic and Hispanic or Latino patients are written about differently than White non-Hispanic patients.</p><p>The method described in this paper offers a scalable blueprint that provides clinicians with data about their interactions with patients and overcomes limitations of other traditional measures of bias. Existing measures require primary data collection through surveys, videotaped encounters, and confederate observations. Surveys assess perceptions of interactions and are prone to retrospective bias and socially desirable responding, whereas the time-consuming nature of encounters and observations lack scalability and limit the number of clinicians that can receive feedback at any given time. The relevance of alternative measures has also been questioned. For example, critics of the implicit association test have asked whether performance on the test is applicable to real-world contexts [<xref ref-type="bibr" rid="ref46">46</xref>], which may explain why some change their behavior when confronted with their own biases, whereas others do not [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref47">47</xref>]. In contrast, our method uses data that are automatically and universally collected through the course of delivering care and generated by physicians in actual encounters.</p></sec><sec id="s4-2"><title>Limitations</title><p>When interpreting our results, several limitations should be considered. First, due to limitations in our data, we are unable to determine which additional team members, including scribes, medical assistants, and residents, contributed to the notes. However, attending physicians are ultimately responsible for the content and have the authority and responsibility to modify language that is inconsistent with their values. Second, we lack information about physicians in this sample and do not have access to physician demographic characteristics (eg, their racial and ethnic backgrounds), although this would be an important next step. We attempted to account for this limitation by comparing language within rather than across physicians. Third, we included all language within notes, including physical exams, medications, and past medical histories. These sections can be guided by templates or not actively entered by physicians. We retained these parts in case the language within these sections contributed to variation. An alternative approach could assess only the history of present illness, assessment, and plan sections of the note and could yield different results. Additional work is needed to determine whether differential word choices reflect attitudes and behaviors toward patients. EHR notes serve a wide range of purposes. They convey medical information to others, remind physicians of their impressions, communicate plans to patients, provide justification for billing codes, and serve as legal evidence [<xref ref-type="bibr" rid="ref44">44</xref>]. Thus, specific phrases (eg, worsening, uncontrolled, or adherence) may be required for billing, compliance, and legal purposes and may not reflect bias toward patients. Finally, these results may not be generalizable to other conditions. Our findings may be unique to the language used for diabetes care and by clinicians who manage diabetes. Determining whether these results persist for different diseases (eg, cancer, heart disease, and acute injuries) is an important next step.</p></sec><sec id="s4-3"><title>Directions for Future Research</title><p>Additional research is needed to interpret and provide context for this exploratory work. To determine whether these measures are associated with bias, subject-matter experts could label notes using known patterns of bias (eg, the ratio of collective to personal pronouns, the amount and level of abstraction of speech, and passive vs active voice) [<xref ref-type="bibr" rid="ref48">48</xref>]. Further research is needed to understand whether biased language in notes reflects biased behaviors during encounters as well as inequitable health outcomes for some racial and ethnic minority populations. Conducting further experiments (eg, with research actors as patients in a mock medical visit) could help determine whether biased language in notes reflects manifestations of bias during encounters (eg, less eye contact, hostile language, or less time spent on education and counseling). If bias is confirmed, we need to determine whether clinicians who use differential language provide worse care and quality for minority patients. Ultimately, this tool may be used to identify and mitigate bias. Future studies should assess whether receiving feedback using this method leads to behavior change and whether changing the language used in EHR notes leads to changes in patient interactions. Although many strategies for reducing bias exist&#x2014;such as affirming egalitarian goals, seeking common-group identities, perspective taking, and individuation&#x2014;it is unclear which approach best complements our proposed method [<xref ref-type="bibr" rid="ref5">5</xref>].</p></sec><sec id="s4-4"><title>Conclusion</title><p>In this novel, exploratory work, we used natural language processing and found that compared to encounters with White non-Hispanic patients, physicians use language conveying more negativity, fear, and disgust in their encounters with some racial and ethnic minority patients. If confirmed in future studies, these features could be used to make clinicians aware of their biases with the goal of reducing racial and ethnic discrimination and the resulting health inequities.</p></sec></sec></body><back><ack><p>This work was supported by a Rice Anti-Racism Research Grant through Rice University.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb2">ICC</term><def><p>intraclass correlation</p></def></def-item><def-item><term id="abb3">LIWC</term><def><p>Linguistic Inquiry and Word Count</p></def></def-item><def-item><term id="abb4">PCA</term><def><p>principal component analysis</p></def></def-item><def-item><term id="abb5">SEANCE</term><def><p>Sentiment Analysis and Social Cognition Engine</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maass</surname><given-names>A</given-names></name><name name-style="western"><surname>Karasawa</surname><given-names>M</given-names></name><name name-style="western"><surname>Politi</surname><given-names>F</given-names></name><name name-style="western"><surname>Suga</surname><given-names>S</given-names></name></person-group><article-title>Do verbs and adjectives play different roles in different cultures? a cross-linguistic analysis of person representation</article-title><source>J Pers Soc Psychol</source><year>2006</year><month>05</month><volume>90</volume><issue>5</issue><fpage>734</fpage><lpage>750</lpage><pub-id pub-id-type="doi">10.1037/0022-3514.90.5.734</pub-id><pub-id pub-id-type="medline">16737371</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Boroditsky</surname><given-names>L</given-names></name><name name-style="western"><surname>Schmidt</surname><given-names>LA</given-names></name><name name-style="western"><surname>Phillips</surname><given-names>W</given-names></name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Gentner</surname><given-names>D</given-names></name><name name-style="western"><surname>Goldin-Meadow</surname><given-names>S</given-names></name></person-group><article-title>Sex, syntax, and semantics</article-title><source>Language in Mind: Advances in the Study of Language and Thought</source><year>2003</year><publisher-name>The MIT Press</publisher-name><pub-id pub-id-type="doi">10.7551/mitpress/4117.001.0001</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hall</surname><given-names>WJ</given-names></name><name name-style="western"><surname>Chapman</surname><given-names>MV</given-names></name><name name-style="western"><surname>Lee</surname><given-names>KM</given-names></name><etal/></person-group><article-title>Implicit racial/ethnic bias among health care professionals and its influence on health care outcomes: a systematic review</article-title><source>Am J Public Health</source><year>2015</year><month>12</month><volume>105</volume><issue>12</issue><fpage>e60</fpage><lpage>e76</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2015.302903</pub-id><pub-id pub-id-type="medline">26469668</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jones</surname><given-names>KP</given-names></name><name name-style="western"><surname>Peddie</surname><given-names>CI</given-names></name><name name-style="western"><surname>Gilrane</surname><given-names>VL</given-names></name><name name-style="western"><surname>King</surname><given-names>EB</given-names></name><name name-style="western"><surname>Gray</surname><given-names>AL</given-names></name></person-group><article-title>Not so subtle: a meta-analytic investigation of the correlates of subtle and overt discrimination</article-title><source>J Manag</source><year>2016</year><month>07</month><day>10</day><volume>42</volume><issue>6</issue><fpage>1588</fpage><lpage>1613</lpage><pub-id pub-id-type="doi">10.1177/0149206313506466</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zestcott</surname><given-names>CA</given-names></name><name name-style="western"><surname>Blair</surname><given-names>IV</given-names></name><name name-style="western"><surname>Stone</surname><given-names>J</given-names></name></person-group><article-title>Examining the presence, consequences, and reduction of implicit bias in health care: a narrative review</article-title><source>Group Process Intergroup Relat</source><year>2016</year><month>07</month><volume>19</volume><issue>4</issue><fpage>528</fpage><lpage>542</lpage><pub-id pub-id-type="doi">10.1177/1368430216642029</pub-id><pub-id pub-id-type="medline">27547105</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ahn</surname><given-names>SM</given-names></name><name name-style="western"><surname>Kim</surname><given-names>TH</given-names></name><name name-style="western"><surname>Lee</surname><given-names>S</given-names></name><etal/></person-group><article-title>The first Korean genome sequence and analysis: full genome sequencing for a socio-ethnic group</article-title><source>Genome Res</source><year>2009</year><month>09</month><volume>19</volume><issue>9</issue><fpage>1622</fpage><lpage>1629</lpage><pub-id pub-id-type="doi">10.1101/gr.092197.109</pub-id><pub-id pub-id-type="medline">19470904</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Chadha</surname><given-names>N</given-names></name><name name-style="western"><surname>Lim</surname><given-names>B</given-names></name><name name-style="western"><surname>Kane</surname><given-names>M</given-names></name><name name-style="western"><surname>Rowland</surname><given-names>B</given-names></name></person-group><article-title>Toward the abolition of biological race in medicine</article-title><source>Othering &#x0026; Belonging Institute</source><year>2020</year><month>05</month><day>13</day><access-date>2023-06-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://belonging.berkeley.edu/toward-abolition-biological-race-medicine-8">https://belonging.berkeley.edu/toward-abolition-biological-race-medicine-8</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>Institute of Medicine</collab></person-group><source>Unequal Treatment: Confronting Racial and Ethnic Disparities in Health Care</source><year>2003</year><publisher-name>National Academies Press</publisher-name><pub-id pub-id-type="doi">10.17226/10260</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ferguson</surname><given-names>WJ</given-names></name><name name-style="western"><surname>Candib</surname><given-names>LM</given-names></name></person-group><article-title>Culture, language, and the doctor-patient relationship</article-title><source>Fam Med</source><year>2002</year><month>05</month><volume>34</volume><issue>5</issue><fpage>353</fpage><lpage>361</lpage><pub-id pub-id-type="medline">12038717</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siminoff</surname><given-names>LA</given-names></name><name name-style="western"><surname>Graham</surname><given-names>GC</given-names></name><name name-style="western"><surname>Gordon</surname><given-names>NH</given-names></name></person-group><article-title>Cancer communication patterns and the influence of patient characteristics: disparities in information-giving and affective behaviors</article-title><source>Patient Educ Couns</source><year>2006</year><month>09</month><volume>62</volume><issue>3</issue><fpage>355</fpage><lpage>360</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2006.06.011</pub-id><pub-id pub-id-type="medline">16860520</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>RL</given-names></name><name name-style="western"><surname>Roter</surname><given-names>D</given-names></name><name name-style="western"><surname>Powe</surname><given-names>NR</given-names></name><name name-style="western"><surname>Cooper</surname><given-names>LA</given-names></name></person-group><article-title>Patient race/ethnicity and quality of patient&#x2013;physician communication during medical visits</article-title><source>Am J Public Health</source><year>2004</year><month>12</month><volume>94</volume><issue>12</issue><fpage>2084</fpage><lpage>2090</lpage><pub-id pub-id-type="doi">10.2105/ajph.94.12.2084</pub-id><pub-id pub-id-type="medline">15569958</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jacobs</surname><given-names>EA</given-names></name><name name-style="western"><surname>Rolle</surname><given-names>I</given-names></name><name name-style="western"><surname>Ferrans</surname><given-names>CE</given-names></name><name name-style="western"><surname>Whitaker</surname><given-names>EE</given-names></name><name name-style="western"><surname>Warnecke</surname><given-names>RB</given-names></name></person-group><article-title>Understanding African Americans' views of the trustworthiness of physicians</article-title><source>J Gen Intern Med</source><year>2006</year><month>06</month><volume>21</volume><issue>6</issue><fpage>642</fpage><lpage>647</lpage><pub-id pub-id-type="doi">10.1111/j.1525-1497.2006.00485.x</pub-id><pub-id pub-id-type="medline">16808750</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eggly</surname><given-names>S</given-names></name><name name-style="western"><surname>Hamel</surname><given-names>LM</given-names></name><name name-style="western"><surname>Foster</surname><given-names>TS</given-names></name><etal/></person-group><article-title>Randomized trial of a question prompt list to increase patient active participation during interactions with Black patients and their oncologists</article-title><source>Patient Educ Couns</source><year>2017</year><month>05</month><volume>100</volume><issue>5</issue><fpage>818</fpage><lpage>826</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2016.12.026</pub-id><pub-id pub-id-type="medline">28073615</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="web"><article-title>National Healthcare Quality &#x0026; Disparities Reports</article-title><source>Agency for Healthcare Research and Quality</source><access-date>2023-06-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ahrq.gov/research/findings/nhqrdr/index.html">https://www.ahrq.gov/research/findings/nhqrdr/index.html</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shavers</surname><given-names>VL</given-names></name><name name-style="western"><surname>Fagan</surname><given-names>P</given-names></name><name name-style="western"><surname>Jones</surname><given-names>D</given-names></name><etal/></person-group><article-title>The state of research on racial/ethnic discrimination in the receipt of health care</article-title><source>Am J Public Health</source><year>2012</year><month>05</month><volume>102</volume><issue>5</issue><fpage>953</fpage><lpage>966</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2012.300773</pub-id><pub-id pub-id-type="medline">22494002</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Penner</surname><given-names>LA</given-names></name><name name-style="western"><surname>Dovidio</surname><given-names>JF</given-names></name><name name-style="western"><surname>West</surname><given-names>TV</given-names></name><etal/></person-group><article-title>Aversive racism and medical interactions with Black patients: a field study</article-title><source>J Exp Soc Psychol</source><year>2010</year><month>03</month><day>1</day><volume>46</volume><issue>2</issue><fpage>436</fpage><lpage>440</lpage><pub-id pub-id-type="doi">10.1016/j.jesp.2009.11.004</pub-id><pub-id pub-id-type="medline">20228874</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hagiwara</surname><given-names>N</given-names></name><name name-style="western"><surname>Slatcher</surname><given-names>RB</given-names></name><name name-style="western"><surname>Eggly</surname><given-names>S</given-names></name><name name-style="western"><surname>Penner</surname><given-names>LA</given-names></name></person-group><article-title>Physician racial bias and word use during racially discordant medical interactions</article-title><source>Health Commun</source><year>2017</year><month>04</month><volume>32</volume><issue>4</issue><fpage>401</fpage><lpage>408</lpage><pub-id pub-id-type="doi">10.1080/10410236.2016.1138389</pub-id><pub-id pub-id-type="medline">27309596</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Crossley</surname><given-names>SA</given-names></name><name name-style="western"><surname>Kyle</surname><given-names>K</given-names></name><name name-style="western"><surname>McNamara</surname><given-names>DS</given-names></name></person-group><article-title>Sentiment Analysis and Social Cognition Engine (SEANCE): an automatic tool for sentiment, social cognition, and social-order analysis</article-title><source>Behav Res Methods</source><year>2017</year><month>06</month><volume>49</volume><issue>3</issue><fpage>803</fpage><lpage>821</lpage><pub-id pub-id-type="doi">10.3758/s13428-016-0743-z</pub-id><pub-id pub-id-type="medline">27193159</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Crossley</surname><given-names>SA</given-names></name><name name-style="western"><surname>Skalicky</surname><given-names>S</given-names></name><name name-style="western"><surname>Dascalu</surname><given-names>M</given-names></name></person-group><article-title>Moving beyond classic readability formulas: new methods and new models</article-title><source>J Res Read</source><year>2019</year><month>11</month><volume>42</volume><issue>3-4</issue><fpage>541</fpage><lpage>561</lpage><pub-id pub-id-type="doi">10.1111/1467-9817.12283</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>M</given-names></name><name name-style="western"><surname>Liu</surname><given-names>B</given-names></name></person-group><article-title>Mining and summarizing customer reviews</article-title><source>KDD &#x2019;04: Proceedings of the Tenth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source><year>2004</year><publisher-name>Association for Computing Machinery</publisher-name><fpage>168</fpage><lpage>177</lpage><pub-id pub-id-type="doi">10.1145/1014052.1014073</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>B</given-names></name><name name-style="western"><surname>Hu</surname><given-names>M</given-names></name><name name-style="western"><surname>Cheng</surname><given-names>J</given-names></name></person-group><article-title>Opinion observer: analyzing and comparing opinions on the web</article-title><source>WWW '05: Proceedings of the 14th International Conference on World Wide Web</source><year>2005</year><publisher-name>Association for Computing Machinery</publisher-name><fpage>342</fpage><lpage>351</lpage><pub-id pub-id-type="doi">10.1145/1060745.1060797</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Mohammad</surname><given-names>SM</given-names></name><name name-style="western"><surname>Turney</surname><given-names>PD</given-names></name></person-group><article-title>Emotions evoked by common words and phrases: using Mechanical Turk to create an emotion lexicon</article-title><source>Proceedings of the NAACL HLT 2010 Workshop on Computational Approaches to Analysis and Generation of Emotion in Text</source><year>2010</year><access-date>2024-05-10</access-date><publisher-name>Association for Computational Linguistics</publisher-name><fpage>26</fpage><lpage>34</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/W10-0204/">https://aclanthology.org/W10-0204/</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Stone</surname><given-names>PJ</given-names></name><name name-style="western"><surname>Dunphy</surname><given-names>DC</given-names></name><name name-style="western"><surname>Smith</surname><given-names>MS</given-names></name></person-group><source>The General Inquirer: A Computer System for Content Analysis</source><year>1966</year><publisher-name>MIT Press</publisher-name></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hutto</surname><given-names>C</given-names></name><name name-style="western"><surname>Gilbert</surname><given-names>E</given-names></name></person-group><article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title><source>Proceedings of the International AAAI Conference on Web and Social Media</source><year>2014</year><month>05</month><day>16</day><volume>8</volume><issue>1</issue><fpage>216</fpage><lpage>225</lpage><pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lasswell</surname><given-names>HD</given-names></name><name name-style="western"><surname>Namenwirth</surname><given-names>J</given-names></name></person-group><source>The Lasswell Value Dictionary</source><year>1969</year><publisher-name>Yale University Press</publisher-name></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Namenwirth</surname><given-names>J</given-names></name><name name-style="western"><surname>Weber</surname><given-names>R</given-names></name></person-group><source>Dynamics of Culture</source><year>1987</year><publisher-name>Allen &#x0026; Unwin</publisher-name></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scherer</surname><given-names>KR</given-names></name></person-group><article-title>What are emotions? and how can they be measured?</article-title><source>Social Science Information</source><year>2005</year><month>12</month><volume>44</volume><issue>4</issue><fpage>695</fpage><lpage>729</lpage><pub-id pub-id-type="doi">10.1177/0539018405058216</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ng</surname><given-names>SH</given-names></name></person-group><article-title>Language-based discrimination: blatant and subtle forms</article-title><source>J Lang Soc Psychol</source><year>2007</year><month>06</month><volume>26</volume><issue>2</issue><fpage>106</fpage><lpage>122</lpage><pub-id pub-id-type="doi">10.1177/0261927X07300074</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Z</given-names></name><name name-style="western"><surname>Chen</surname><given-names>MY</given-names></name><name name-style="western"><surname>Banerjee</surname><given-names>J</given-names></name></person-group><article-title>Using corpus analyses to help address the DIF interpretation: gender differences in standardized writing assessment</article-title><source>Front Psychol</source><year>2020</year><month>06</month><day>3</day><volume>11</volume><fpage>1088</fpage><pub-id pub-id-type="doi">10.3389/fpsyg.2020.01088</pub-id><pub-id pub-id-type="medline">32581944</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blair</surname><given-names>IV</given-names></name><name name-style="western"><surname>Steiner</surname><given-names>JF</given-names></name><name name-style="western"><surname>Fairclough</surname><given-names>DL</given-names></name><etal/></person-group><article-title>Clinicians&#x2019; implicit ethnic/racial bias and perceptions of care among Black and Latino patients</article-title><source>Ann Fam Med</source><year>2013</year><volume>11</volume><issue>1</issue><fpage>43</fpage><lpage>52</lpage><pub-id pub-id-type="doi">10.1370/afm.1442</pub-id><pub-id pub-id-type="medline">23319505</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chapman</surname><given-names>EN</given-names></name><name name-style="western"><surname>Kaatz</surname><given-names>A</given-names></name><name name-style="western"><surname>Carnes</surname><given-names>M</given-names></name></person-group><article-title>Physicians and implicit bias: how doctors may unwittingly perpetuate health care disparities</article-title><source>J Gen Intern Med</source><year>2013</year><month>11</month><volume>28</volume><issue>11</issue><fpage>1504</fpage><lpage>1510</lpage><pub-id pub-id-type="doi">10.1007/s11606-013-2441-1</pub-id><pub-id pub-id-type="medline">23576243</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sabin</surname><given-names>JA</given-names></name><name name-style="western"><surname>Greenwald</surname><given-names>AG</given-names></name></person-group><article-title>The influence of implicit bias on treatment recommendations for 4 common pediatric conditions: pain, urinary tract infection, attention deficit hyperactivity disorder, and asthma</article-title><source>Am J Public Health</source><year>2012</year><month>05</month><volume>102</volume><issue>5</issue><fpage>988</fpage><lpage>995</lpage><pub-id pub-id-type="doi">10.2105/AJPH.2011.300621</pub-id><pub-id pub-id-type="medline">22420817</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sue</surname><given-names>DW</given-names></name><name name-style="western"><surname>Capodilupo</surname><given-names>CM</given-names></name><name name-style="western"><surname>Torino</surname><given-names>GC</given-names></name><etal/></person-group><article-title>Racial microaggressions in everyday life: implications for clinical practice</article-title><source>Am Psychol</source><year>2007</year><volume>62</volume><issue>4</issue><fpage>271</fpage><lpage>286</lpage><pub-id pub-id-type="doi">10.1037/0003-066X.62.4.271</pub-id><pub-id pub-id-type="medline">17516773</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>Statistics about diabetes</article-title><source>American Diabetes Association</source><access-date>2023-06-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://diabetes.org/about-us/statistics/about-diabetes">https://diabetes.org/about-us/statistics/about-diabetes</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="web"><article-title>Ambulatory care use and physician office visits</article-title><source>Centers for Disease Control and Prevention</source><access-date>2023-06-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/nchs/fastats/physician-visits.htm">https://www.cdc.gov/nchs/fastats/physician-visits.htm</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="web"><article-title>Fast facts on U.S. hospitals</article-title><source>American Hospital Association</source><access-date>2023-06-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.aha.org/statistics/fast-facts-us-hospitals">https://www.aha.org/statistics/fast-facts-us-hospitals</ext-link></comment></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cambria</surname><given-names>E</given-names></name><name name-style="western"><surname>Havasi</surname><given-names>C</given-names></name><name name-style="western"><surname>Hussain</surname><given-names>A</given-names></name></person-group><article-title>SenticNet 2: a semantic and affective resource for opinion mining and sentiment analysis</article-title><source>Proceedings of the Twenty-Fifth International Florida Artificial Intelligence Research Society Conference (FLAIRS 2012)</source><year>2012</year><access-date>2024-05-10</access-date><publisher-name>AAAI Press</publisher-name><fpage>202</fpage><lpage>207</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://cdn.aaai.org/ocs/4411/4411-21497-1-PB.pdf">https://cdn.aaai.org/ocs/4411/4411-21497-1-PB.pdf</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Himmelstein</surname><given-names>G</given-names></name><name name-style="western"><surname>Bates</surname><given-names>D</given-names></name><name name-style="western"><surname>Zhou</surname><given-names>L</given-names></name></person-group><article-title>Examination of stigmatizing language in the electronic health record</article-title><source>JAMA Netw Open</source><year>2022</year><month>01</month><day>4</day><volume>5</volume><issue>1</issue><fpage>e2144967</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.44967</pub-id><pub-id pub-id-type="medline">35084481</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>M</given-names></name><name name-style="western"><surname>Oliwa</surname><given-names>T</given-names></name><name name-style="western"><surname>Peek</surname><given-names>ME</given-names></name><name name-style="western"><surname>Tung</surname><given-names>EL</given-names></name></person-group><article-title>Negative patient descriptors: documenting racial bias in the electronic health record</article-title><source>Health Aff (Millwood)</source><year>2022</year><month>02</month><volume>41</volume><issue>2</issue><fpage>203</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1377/hlthaff.2021.01423</pub-id><pub-id pub-id-type="medline">35044842</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barcelona</surname><given-names>V</given-names></name><name name-style="western"><surname>Scharp</surname><given-names>D</given-names></name><name name-style="western"><surname>Idnay</surname><given-names>BR</given-names></name><etal/></person-group><article-title>A qualitative analysis of stigmatizing language in birth admission clinical notes</article-title><source>Nurs Inq</source><year>2023</year><month>07</month><volume>30</volume><issue>3</issue><fpage>e12557</fpage><pub-id pub-id-type="doi">10.1111/nin.12557</pub-id><pub-id pub-id-type="medline">37073504</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Goddu</surname><given-names>PA</given-names></name><name name-style="western"><surname>O&#x2019;Conor</surname><given-names>KJ</given-names></name><name name-style="western"><surname>Lanzkron</surname><given-names>S</given-names></name><etal/></person-group><article-title>Do words matter? stigmatizing language and the transmission of bias in the medical record</article-title><source>J Gen Intern Med</source><year>2018</year><month>05</month><volume>33</volume><issue>5</issue><fpage>685</fpage><lpage>691</lpage><pub-id pub-id-type="doi">10.1007/s11606-017-4289-2</pub-id><pub-id pub-id-type="medline">29374357</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>J</given-names></name><name name-style="western"><surname>Saha</surname><given-names>S</given-names></name><name name-style="western"><surname>Chee</surname><given-names>B</given-names></name><name name-style="western"><surname>Taylor</surname><given-names>J</given-names></name><name name-style="western"><surname>Beach</surname><given-names>MC</given-names></name></person-group><article-title>Physician use of stigmatizing language in patient medical records</article-title><source>JAMA Netw Open</source><year>2021</year><month>07</month><day>1</day><volume>4</volume><issue>7</issue><fpage>e2117052</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.17052</pub-id><pub-id pub-id-type="medline">34259849</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>Comprehensive diabetes care (CDC)</article-title><source>National Committee for Quality Assurance</source><access-date>2017-11-29</access-date><comment><ext-link ext-link-type="uri" xlink:href="http://www.ncqa.org/report-cards/health-plans/state-of-health-care-quality/2016-table-of-contents/diabetes-care">http://www.ncqa.org/report-cards/health-plans/state-of-health-care-quality/2016-table-of-contents/diabetes-care</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ho</surname><given-names>YX</given-names></name><name name-style="western"><surname>Gadd</surname><given-names>CS</given-names></name><name name-style="western"><surname>Kohorst</surname><given-names>KL</given-names></name><name name-style="western"><surname>Rosenbloom</surname><given-names>ST</given-names></name></person-group><article-title>A qualitative analysis evaluating the purposes and practices of clinical documentation</article-title><source>Appl Clin Inform</source><year>2014</year><month>02</month><day>26</day><volume>5</volume><issue>1</issue><fpage>153</fpage><lpage>168</lpage><pub-id pub-id-type="doi">10.4338/ACI-2013-10-RA-0081</pub-id><pub-id pub-id-type="medline">24734130</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weis</surname><given-names>JM</given-names></name><name name-style="western"><surname>Levy</surname><given-names>PC</given-names></name></person-group><article-title>Copy, paste, and cloned notes in electronic health records</article-title><source>Chest</source><year>2014</year><month>03</month><volume>145</volume><issue>3</issue><fpage>632</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1378/chest.13-0886</pub-id><pub-id pub-id-type="medline">27845637</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sukhera</surname><given-names>J</given-names></name><name name-style="western"><surname>Wodzinski</surname><given-names>M</given-names></name><name name-style="western"><surname>Rehman</surname><given-names>M</given-names></name><name name-style="western"><surname>Gonzalez</surname><given-names>CM</given-names></name></person-group><article-title>The implicit association test in health professions education: a meta-narrative review</article-title><source>Perspect Med Educ</source><year>2019</year><month>10</month><volume>8</volume><issue>5</issue><fpage>267</fpage><lpage>275</lpage><pub-id pub-id-type="doi">10.1007/s40037-019-00533-8</pub-id><pub-id pub-id-type="medline">31535290</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Ryn</surname><given-names>M</given-names></name><name name-style="western"><surname>Hardeman</surname><given-names>R</given-names></name><name name-style="western"><surname>Phelan</surname><given-names>SM</given-names></name><etal/></person-group><article-title>Medical school experiences associated with change in implicit racial bias among 3547 students: a medical student CHANGES study report</article-title><source>J Gen Intern Med</source><year>2015</year><month>12</month><volume>30</volume><issue>12</issue><fpage>1748</fpage><lpage>1756</lpage><pub-id pub-id-type="doi">10.1007/s11606-015-3447-7</pub-id><pub-id pub-id-type="medline">26129779</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von Hippel</surname><given-names>W</given-names></name><name name-style="western"><surname>Sekaquaptewa</surname><given-names>D</given-names></name><name name-style="western"><surname>Vargas</surname><given-names>P</given-names></name></person-group><article-title>The linguistic intergroup bias as an implicit indicator of prejudice</article-title><source>J Exp Soc Psychol</source><year>1997</year><month>09</month><volume>33</volume><issue>5</issue><fpage>490</fpage><lpage>509</lpage><pub-id pub-id-type="doi">10.1006/jesp.1997.1332</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Demographics of the validation study participants.</p><media xlink:href="medinform_v12i1e50428_app1.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material></app-group></back></article>