<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="review-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i5e36388</article-id>
      <article-id pub-id-type="pmid">35639450</article-id>
      <article-id pub-id-type="doi">10.2196/36388</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Review</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Review</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Evaluation and Mitigation of Racial Bias in Clinical Machine Learning Models: Scoping Review</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Lovis</surname>
            <given-names>Christian</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Turbe</surname>
            <given-names>Hugues</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Huang</surname>
            <given-names>Jonathan</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3428-9952</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Galal</surname>
            <given-names>Galal</given-names>
          </name>
          <degrees>MD, MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Anesthesiology</institution>
            <institution>Northwestern University Feinberg School of Medicine</institution>
            <addr-line>420 E Superior St</addr-line>
            <addr-line>Chicago, IL, 60611</addr-line>
            <country>United States</country>
            <phone>1 (312) 503 8194</phone>
            <email>galal.galal@nm.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3525-859X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Etemadi</surname>
            <given-names>Mozziyar</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6324-9220</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Vaidyanathan</surname>
            <given-names>Mahesh</given-names>
          </name>
          <degrees>MD, MBA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4311-8896</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Anesthesiology</institution>
        <institution>Northwestern University Feinberg School of Medicine</institution>
        <addr-line>Chicago, IL</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical Engineering</institution>
        <institution>Northwestern University</institution>
        <addr-line>Evanston, IL</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Digital Health &#38; Data Science Curricular Thread</institution>
        <institution>Northwestern University Feinberg School of Medicine</institution>
        <addr-line>Chicago, IL</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Galal Galal <email>galal.galal@nm.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>5</month>
        <year>2022</year>
      </pub-date>
      <volume>10</volume>
      <issue>5</issue>
      <elocation-id>e36388</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>27</day>
          <month>3</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Jonathan Huang, Galal Galal, Mozziyar Etemadi, Mahesh Vaidyanathan. Originally published in JMIR Medical Informatics (https://medinform.jmir.org), 31.05.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on https://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://medinform.jmir.org/2022/5/e36388" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Racial bias is a key concern regarding the development, validation, and implementation of machine learning (ML) models in clinical settings. Despite the potential of bias to propagate health disparities, racial bias in clinical ML has yet to be thoroughly examined and best practices for bias mitigation remain unclear.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>Our objective was to perform a scoping review to characterize the methods by which the racial bias of ML has been assessed and describe strategies that may be used to enhance algorithmic fairness in clinical ML.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A scoping review was conducted in accordance with the Preferred Reporting Items for Systematic Reviews and Meta-analyses (PRISMA) Extension for Scoping Reviews. A literature search using PubMed, Scopus, and Embase databases, as well as Google Scholar, identified 635 records, of which 12 studies were included.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Applications of ML were varied and involved diagnosis, outcome prediction, and clinical score prediction performed on data sets including images, diagnostic studies, clinical text, and clinical variables. Of the 12 studies, 1 (8%) described a model in routine clinical use, 2 (17%) examined prospectively validated clinical models, and the remaining 9 (75%) described internally validated models. In addition, 8 (67%) studies concluded that racial bias was present, 2 (17%) concluded that it was not, and 2 (17%) assessed the implementation of bias mitigation strategies without comparison to a baseline model. Fairness metrics used to assess algorithmic racial bias were inconsistent. The most commonly observed metrics were equal opportunity difference (5/12, 42%), accuracy (4/12, 25%), and disparate impact (2/12, 17%). All 8 (67%) studies that implemented methods for mitigation of racial bias successfully increased fairness, as measured by the authors’ chosen metrics. Preprocessing methods of bias mitigation were most commonly used across all studies that implemented them.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The broad scope of medical ML applications and potential patient harms demand an increased emphasis on evaluation and mitigation of racial bias in clinical ML. However, the adoption of algorithmic fairness principles in medicine remains inconsistent and is limited by poor data availability and ML model reporting. We recommend that researchers and journal editors emphasize standardized reporting and data availability in medical ML studies to improve transparency and facilitate evaluation for racial bias.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>machine learning</kwd>
        <kwd>race</kwd>
        <kwd>bias</kwd>
        <kwd>racial bias</kwd>
        <kwd>scoping review</kwd>
        <kwd>algorithm</kwd>
        <kwd>algorithmic fairness</kwd>
        <kwd>clinical machine learning</kwd>
        <kwd>medical machine learning</kwd>
        <kwd>fairness</kwd>
        <kwd>assessment</kwd>
        <kwd>model</kwd>
        <kwd>diagnosis</kwd>
        <kwd>outcome prediction</kwd>
        <kwd>score prediction</kwd>
        <kwd>prediction</kwd>
        <kwd>mitigation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In recent years, artificial intelligence (AI) has drawn significant attention in medicine as machine learning (ML) techniques show an increasing promise of clinical impact. Driven by unprecedented data accessibility and computational capacity, ML has been reported to reach parity with human clinicians in a variety of tasks [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. ML is poised to benefit patients and physicians by optimizing clinical workflows, enhancing diagnosis, and supporting personalized health care interventions [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. Decision support tools based on ML have already been implemented across health systems [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], and the continued proliferation of clinical ML will impact patients in all fields of medicine.</p>
        <p>However, despite its appeal, significant barriers remain to the full realization of clinically integrated ML. Key concerns include limited model transparency due to the “black box” of ML, inadequate reporting standards, and the need for prospective validation in clinical settings [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Racial bias in clinical ML is a crucial challenge arising from these limitations and must be addressed to ensure fairness in clinical implementation of ML. As ML is premised on prediction of novel outcomes based on previously seen examples, unintended discrimination is a natural consequence of algorithm development involving training data that reflect real-world inequities [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p>Equity in health care remains a continual pursuit [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Bias and disparities along dimensions of race, age, and gender have been shown to impact health care access and delivery, evident in varied settings, such as race correction in clinical algorithms or clinical trial enrollment and adverse event monitoring [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Considering the growing body of literature demonstrating profound adverse impacts of health care inequities on patient outcomes, mitigation of the numerous and insidious sources of potential bias in medicine requires remains a critical challenge to prevent harm to patients [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Thus, the potential for algorithms to perpetuate health disparities must be carefully weighed when incorporating ML models into clinical practice [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>].</p>
        <p>Algorithmic fairness is an area of ML research guiding model development with the aim of preventing discrimination involving protected groups, which are defined by attributes such as race, gender, religion, physiologic variability, preexisting conditions, physical ability, and sexual orientation [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. However, application of algorithmic fairness principles in the medical ML literature remains nascent [<xref ref-type="bibr" rid="ref20">20</xref>]. Greater awareness of the potential harms of bias in clinical ML as well as methods to evaluate and mitigate them is needed to support clinicians and researchers across the health care and data science disciplines, who must evaluate and implement clinical ML models with a critical eye toward algorithmic fairness. The objective of this study is to characterize the impact and mitigation of racial bias in clinical ML to date and describe best practices for research efforts extending algorithmic fairness to medicine.</p>
      </sec>
      <sec>
        <title>Bias and Fairness in Machine Learning</title>
        <p>In the setting of algorithmic fairness, bias is present when an algorithm systematically favors one outcome over another. Bias may be introduced into an ML algorithm throughout all steps of the development process, which involves data collection, data selection, model training, and model deployment [<xref ref-type="bibr" rid="ref13">13</xref>]. Examples of these sources of bias are shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, and their definitions are given in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Notably, historical bias may be present even if all steps of model development are optimally performed. This is of particular concern in the evaluation of racial bias in clinical ML, given the presence of existing and historical health care disparities [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>Depending on the context, bias in clinical ML may not be harmful and can even be used to overcome inequality [<xref ref-type="bibr" rid="ref13">13</xref>]. In situations in which targeting a well-defined subpopulation above all others is desirable, an ML algorithm biased toward a particular group may be used to proactively mitigate existing disparities. However, bias may arise when ML models designed to serve the needs of a specific clinical population—such as a particular community or high-risk demographic—are inappropriately applied to other populations or when more general models are applied to specific populations. Additionally, ML algorithms tend to overfit to the data on which they are trained, which entails the learning of spurious relationships present in the training data set and may result in a lack of generalizability to other settings. As a result, a model that appears unbiased in one setting may display bias in another. Thus, bias in clinical ML must be considered in the light of the context and particular population of interest.</p>
        <p>Bias in an ML model may lead to unfairness if not appropriately evaluated and accounted for. Fairness in ML is achieved when algorithmic decision-making does not favor an individual or group based on protected attributes. Research efforts have emphasized group fairness over individual fairness, given the need for algorithms that consider existing differences between populations—whether intrinsic or extrinsic—while preventing discrimination between groups [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Crucially, improving model fairness does not necessarily require compromising accuracy overall [<xref ref-type="bibr" rid="ref22">22</xref>]. For instance, an unfair disease-screening tool might have poor sensitivity for disease detection in one low-risk population subgroup compared to another with higher risk; improving the fairness of this tool would entail adjusting the model to have more similar sensitivities between subgroups. In this study, we examine the racial bias of clinical ML in terms of model fairness with respect to race.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>The clinical machine learning development workflow (orange boxes) offers several opportunities (blue boxes) to evaluate and mitigate potential biases introduced by the data set or model. Preprocessing methods seek to adjust the existing data set to preempt biases resulting from inadequate data representation or labeling. In-processing methods impose fairness constraints as additional metrics optimized by the model during training or present data in a structured manner to avoid biases in the sampling process. Postprocessing methods account for model biases by adjusting model outputs or changing the way they are used.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e36388_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Assessing and Achieving Fairness in Machine Learning</title>
        <p>Group fairness is quantified by evaluating the similarity of a given statistical metric between predictions made for different groups. Group fairness indicators encountered in this review are defined in <xref ref-type="table" rid="table1">Table 1</xref>. Critical examinations of different methods for evaluating fairness in ML, both in general application [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>] and in the context of health care [<xref ref-type="bibr" rid="ref21">21</xref>], have been previously described, though applications in clinical ML remain limited. It is important to note that fairness metrics may be at odds with one another, depending on the context and application [<xref ref-type="bibr" rid="ref25">25</xref>]; thus, evaluation of an appropriate metric, given the clinical situation of interest, is paramount [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
        <p>Approaches to bias mitigation fall into 3 major categories (<xref rid="figure1" ref-type="fig">Figure 1</xref>): <italic>preprocessing</italic>, in which inequities in data are removed prior to model training; <italic>in-processing</italic>, in which the model training process is conducted to actively prevent discrimination; and <italic>postprocessing</italic>, in which outputs of a trained model are adjusted to achieve fairness [<xref ref-type="bibr" rid="ref13">13</xref>]. Preprocessing can be performed by resampling existing data, incorporating new data, or adjusting data labels. In-processing methods use adversarial techniques, impose constraints and regularization, or ensure fairness of underlying representations during training. Finally, postprocessing entails group-specific modification of decision thresholds or outcomes to ensure fairness in the application of model predictions. Different approaches may be optimal depending on the setting and stage of model development.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Group fairness metrics encountered in this review.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="850"/>
            <thead>
              <tr valign="top">
                <td>Term</td>
                <td>Description</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>AUROC<sup>a</sup></td>
                <td>Assesses overall classifier performance by measuring the TPR<sup>b</sup> and FPR<sup>c</sup> of a classifier at different thresholds.</td>
              </tr>
              <tr valign="top">
                <td>Average odds</td>
                <td>Compares the average of the TPR and FPR for the classification outcome between protected and unprotected groups.</td>
              </tr>
              <tr valign="top">
                <td>Balanced accuracy</td>
                <td>A measure of accuracy corrected for data imbalance, calculated as the average of sensitivity and specificity for a group.</td>
              </tr>
              <tr valign="top">
                <td>Calibration</td>
                <td>Assesses how well the risk score or probability predictions reflect actual outcomes.</td>
              </tr>
              <tr valign="top">
                <td>Disparate impact</td>
                <td>Measures deviation from statistical parity, calculated as the ratio of the rate of the positive outcome between protected and unprotected groups. Ideally, the disparate impact is 1.</td>
              </tr>
              <tr valign="top">
                <td>Equal opportunity</td>
                <td>For classification tasks in which one outcome is preferred over the other, equal opportunity is satisfied when the preferred outcome is predicted with equal accuracy between protected and unprotected groups. Ideally, the TPR or FNR<sup>d</sup> disparity between groups is 0.</td>
              </tr>
              <tr valign="top">
                <td>Equalized odds</td>
                <td>The TPR and FPR are equal between protected and unprotected groups.</td>
              </tr>
              <tr valign="top">
                <td>Error rate</td>
                <td>Compares the error rate of predictions, calculated as the number of incorrect predictions divided by the total number of predictions, between protected and unprotected groups. Ideally, the error rate disparity between groups is 0.</td>
              </tr>
              <tr valign="top">
                <td>Statistical parity</td>
                <td>Statistical parity (also known as demographic parity) is satisfied when the rate of positive outcomes is equal between protected and unprotected groups.</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>TPR: true-positive rate.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>FPR: false-positive rate.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>FNR: false-negative rate.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>We performed a scoping review of racial bias and algorithmic fairness in clinical ML models in accordance with the Preferred Reporting Items for Systematic Reviews and Meta-analyses (PRISMA) 2020 guidelines [<xref ref-type="bibr" rid="ref27">27</xref>] and PRISMA Extension for Scoping Reviews [<xref ref-type="bibr" rid="ref28">28</xref>]. The review protocol was not registered and is available upon request to the authors. The PubMed MEDLINE (National Library of Medicine), Scopus (Elsevier), and Embase (Elsevier) databases were queried by combining terminology pertaining to ML, race, and bias as keywords. Additional records were identified using Google Scholar search. The exact search strategy is detailed in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Study Selection</title>
        <p>After duplicate record removal, studies were initially screened by title and abstract and then screened for final inclusion by full text review. All screening was performed independently by 2 reviewers. Studies were selected based on the following inclusion criteria: peer-reviewed original research, English language, full text available, development or evaluation of a clinically relevant ML model, and evaluation of bias of the model regarding racial or ethnic groups. Studies other than full-length papers were excluded. ML was defined as a computer algorithm that improves automatically via training on data [<xref ref-type="bibr" rid="ref4">4</xref>]. Per PRISMA guidelines, any disagreements regarding study inclusion based on these criteria were reconciled by discussion.</p>
      </sec>
      <sec>
        <title>Data Abstraction</title>
        <p>Relevant data were abstracted from included papers by 1 reviewer. Data of interest included the clinical objective of ML models, identification of racial bias, impact of racial bias, metrics for bias assessment, mitigation of racial bias, methods for bias mitigation, data set size, data source, ML model architecture, and availability of computer code used for data preparation and ML model development. The methodological quality of included studies was not assessed, given the scoping nature of this review [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Study Characteristics</title>
        <p>The literature search was performed on September 8, 2021, and identified 635 records (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Of these, 26 (4.1%) full-text papers were reviewed and 12 (46.2%) were included in the final analysis [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref40">40</xref>].</p>
        <p>Characteristics of the included studies are summarized in <xref ref-type="table" rid="table2">Table 2</xref>. Data sets and models used are summarized in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Of the 12 studies, 3 (25%) were published in 2019, 5 (42%) in 2020, and 4 (33%) in 2021. In addition, 9 (75%) studies originated from the United States, 1 (8%) from Canada, 1 (8%) from Sweden, and 1 (8%) from both the United Kingdom and Nigeria. Applications of ML were varied and involved diagnosis, outcome prediction, and clinical score prediction performed on data sets including images, diagnostic studies, clinical text, and clinical variables. Furthermore, 1 (8%) study described a model in routine clinical use [<xref ref-type="bibr" rid="ref36">36</xref>], 2 (17%) examined prospectively validated clinical models [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], and the remaining 9 (75%) described internally validated models.</p>
        <p>Of the 12 studies, 5 (42%) published code used for analysis, 3 (25%) made model development code available [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref39">39</xref>], 2 (17%) published bias analysis code [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], 1 (8%) published code relevant to debiasing [<xref ref-type="bibr" rid="ref30">30</xref>], and 1 (8%) published data selection code [<xref ref-type="bibr" rid="ref33">33</xref>]. In addition, 1 (8%) study used publicly available code for analysis [<xref ref-type="bibr" rid="ref31">31</xref>], and code was specified as available upon request in 1 (8%) study [<xref ref-type="bibr" rid="ref35">35</xref>]. Bias of an ML model was evaluated using an external database in 8 (67%) studies [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], single-institutional data in 3 (25%) studies [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], and data from 2 institutions in 2 (17%) studies [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. No institutional data sets were published. Convolutional neural networks (CNNs) were the predominant ML modeling technique used (5/12, 42%), followed by logistic regression (3/12, 25%), least absolute shrinkage and selection operator (LASSO; 2/12, 17%), and extreme gradient boosting (XGBoost; 2/12, 17%). In addition, 3 (25%) studies evaluated models adapted from existing neural network architectures: ResNet50 in 2 (17%) studies [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>] and DenseNet in the other [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
        <p>Of the 12 studies, 9 (75%) evaluated a model developed internally by the same researchers [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], 2 (17%) evaluated a model developed externally by separate researchers [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], and 1 (8%) evaluated both internally and externally developed models [<xref ref-type="bibr" rid="ref34">34</xref>]. In addition, 8 (67%) studies concluded that racial bias was present [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>-<xref ref-type="bibr" rid="ref39">39</xref>], 2 (17%) concluded that bias was not present [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref40">40</xref>], and 2 (17%) assessed the implementation of bias mitigation strategies without comparison to a baseline model [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. A variety of methods were used to assess the presence of algorithmic racial bias: 3 (25%) studies used multiple metrics to assess fairness [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], while the remaining 9 (75%) used a single metric. The most commonly used fairness metrics were equal opportunity difference [<xref ref-type="bibr" rid="ref41">41</xref>], defined either as the difference in the true-positive rate (TPR) or the false-negative rate (FNR) between subgroups (5/12, 42%) [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]; accuracy (4/12, 25%) [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]; and disparate impact (2/12, 17%) [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>The approaches and efficacy of bias mitigation methods used in the studies evaluated are summarized in <xref ref-type="table" rid="table3">Table 3</xref>. All 8 (67%) studies that implemented methods for mitigation of racial bias successfully increased fairness, as measured by the authors’ chosen metrics [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Preprocessing bias mitigation was the most commonly used strategy (7/13, 54%). In addition, 1 (8%) study removed race information from the training data, though superior improvements in disparate impact and equal opportunity difference were achieved by reweighing [<xref ref-type="bibr" rid="ref37">37</xref>]. Furthermore, 2 (17%) studies performed in-processing bias mitigation using the prejudice remover regularizer [<xref ref-type="bibr" rid="ref42">42</xref>] or adversarial debiasing during model training [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. However, in both studies, in-processing was ineffective in reducing bias and was outperformed by other bias mitigation methods. Finally, 1 (8%) study evaluated multiple types of ML models for bias during the development process, concluding that a LASSO model was preferable to conditional random forest, gradient boosting, and ensemble models for racially unbiased dementia ascertainment [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>PRISMA flowchart of study inclusion. ML: machine learning; PRISMA: Preferred Reporting Items for Systematic Reviews and Meta-analyses.</p>
          </caption>
          <graphic xlink:href="medinform_v10i5e36388_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Study characteristics.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="120"/>
            <col width="160"/>
            <col width="160"/>
            <col width="120"/>
            <col width="180"/>
            <col width="120"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Author (year)</td>
                <td>Clinical objective</td>
                <td>How was fairness evaluated?</td>
                <td>Was racial bias identified?</td>
                <td>How was the AI<sup>a</sup> model biased?</td>
                <td>Was racial bias mitigated?</td>
                <td>Protected class</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Abubakar et al (2020) [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                <td>Identification of images of burns vs healthy skin</td>
                <td>Accuracy</td>
                <td>Yes</td>
                <td>Poor accuracy of models trained on a Caucasian data set and validated on an African data set and vice versa</td>
                <td>Yes</td>
                <td>Dark-skinned patients, light-skinned patients</td>
              </tr>
              <tr valign="top">
                <td>Allen et al (2020) [<xref ref-type="bibr" rid="ref30">30</xref>]</td>
                <td>Intensive care unit (ICU) mortality prediction</td>
                <td>Equal opportunity difference (FNR<sup>b</sup> disparity)</td>
                <td>N/A<sup>c</sup></td>
                <td>N/A</td>
                <td>Yes</td>
                <td>Non-White patients</td>
              </tr>
              <tr valign="top">
                <td>Briggs and Hollmén (2020) [<xref ref-type="bibr" rid="ref31">31</xref>]</td>
                <td>Prediction of future health care expenditures of individual patients</td>
                <td>Balanced accuracy, statistical parity, disparate impact, average odds, equal opportunity</td>
                <td>N/A</td>
                <td>N/A</td>
                <td>Yes</td>
                <td>Black patients</td>
              </tr>
              <tr valign="top">
                <td>Burlina et al (2021) [<xref ref-type="bibr" rid="ref32">32</xref>]</td>
                <td>Diagnosis of diabetic retinopathy from fundus photography</td>
                <td>Accuracy</td>
                <td>Yes</td>
                <td>Lower diagnostic accuracy in darker-skinned individuals compared to lighter-skinned individuals</td>
                <td>Yes</td>
                <td>Dark-skinned patients</td>
              </tr>
              <tr valign="top">
                <td>Chen et al (2019) [<xref ref-type="bibr" rid="ref33">33</xref>]</td>
                <td>ICU mortality prediction, psychiatric readmission prediction</td>
                <td>Error rate (0-1 loss)</td>
                <td>Yes</td>
                <td>Differences in error rates in ICU mortality between racial groups</td>
                <td>No</td>
                <td>Non-White patients</td>
              </tr>
              <tr valign="top">
                <td>Gianattasio et al (2020) [<xref ref-type="bibr" rid="ref34">34</xref>]</td>
                <td>Dementia status classification</td>
                <td>Sensitivity, specificity, accuracy</td>
                <td>Yes</td>
                <td>Existing algorithms varying in sensitivity and specificity between race/ethnicity groups</td>
                <td>Yes</td>
                <td>Hispanic, non-Hispanic Black patients</td>
              </tr>
              <tr valign="top">
                <td>Noseworthy et al (2020) [<xref ref-type="bibr" rid="ref35">35</xref>]</td>
                <td>Prediction of left ventricular ejection fraction ≤35% from the electrocardiogram (ECG)</td>
                <td>AUROC<sup>d</sup></td>
                <td>No</td>
                <td>N/A</td>
                <td>No</td>
                <td>Non-White patients</td>
              </tr>
              <tr valign="top">
                <td>Obermeyer et al (2019) [<xref ref-type="bibr" rid="ref36">36</xref>]</td>
                <td>Prediction of future health care expenditures of individual patients</td>
                <td>Calibration</td>
                <td>Yes</td>
                <td>Black patients with a higher burden than White patients at the same algorithmic risk score</td>
                <td>Yes</td>
                <td>Black patients</td>
              </tr>
              <tr valign="top">
                <td>Park et al (2021) [<xref ref-type="bibr" rid="ref37">37</xref>]</td>
                <td>Prediction of postpartum depression and postpartum mental health service utilization</td>
                <td>Disparate impact, equal opportunity difference (TPR<sup>e</sup> disparity)</td>
                <td>Yes</td>
                <td>Black women with a worse health status than White women at the same predicted risk level</td>
                <td>Yes</td>
                <td>Black patients</td>
              </tr>
              <tr valign="top">
                <td>Seyyed-Kalantari et al (2021) [<xref ref-type="bibr" rid="ref38">38</xref>]</td>
                <td>Diagnostic label prediction from chest X-rays</td>
                <td>Equal opportunity difference (TPR disparity)</td>
                <td>Yes</td>
                <td>Greater TPR disparity in Hispanic patients</td>
                <td>No</td>
                <td>Non-White patients</td>
              </tr>
              <tr valign="top">
                <td>Thompson et al (2021) [<xref ref-type="bibr" rid="ref39">39</xref>]</td>
                <td>Identification of opioid misuse from clinical notes</td>
                <td>Equal opportunity difference (FNR disparity)</td>
                <td>Yes</td>
                <td>Greater FNR in the Black subgroup than in the White subgroup</td>
                <td>Yes</td>
                <td>Black patients</td>
              </tr>
              <tr valign="top">
                <td>Wissel et al (2019) [<xref ref-type="bibr" rid="ref40">40</xref>]</td>
                <td>Assignment of surgical candidacy score for patients with epilepsy using clinical notes</td>
                <td>Regression analysis of the impact of the race variable on the candidacy score</td>
                <td>No</td>
                <td>N/A</td>
                <td>No</td>
                <td>Non-White patients</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>AI: artificial intelligence.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>FNR: false-negative rate.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>N/A: not applicable.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>AUROC: area under the receiver operating characteristic curve.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>TPR: true-positive rate.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Bias mitigation methods among reviewed studies.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="240"/>
            <col width="730"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Description of strategies used</td>
                <td>Effectiveness</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Preprocessing</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reweighing training data</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>An equal opportunity difference (FNR<sup>a</sup> difference) of 0.016 (<italic>P</italic>=.20) was achieved for intensive care unit (ICU) mortality prediction [<xref ref-type="bibr" rid="ref33">33</xref>].</p>
                    </list-item>
                    <list-item>
                      <p>The mean fairness measure (average of statistical parity difference, disparate impact measure, average odds difference, and equal opportunity difference) improved to 0.06 from 0.12 for prediction of health care costs [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
                    </list-item>
                    <list-item>
                      <p>Disparate impact improved from 0.31 to 0.79, and the equal opportunity (TPR<sup>b</sup>) difference improved from –0.19 to 0.02 for prediction of postpartum depression development; prediction of mental health service use in pregnant individuals improved from 0.45 to 0.85 and –0.11 to –0.02, respectively [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Combining data sets to increase heterogeneity</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>The accuracy of skin burn identification increased to 99.5% using a combined data set compared to 83.4% and 87.5% when trained on an African and evaluated on a Caucasian data set and vice versa [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Generating synthetic minority class data</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Disparity in diabetic retinopathy diagnostic accuracy improved from 12.5% to 7.5% and 0.5% when augmenting with retina appearance-optimized images and diabetic retinopathy status-optimized images created with a generative adversarial network, respectively [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Adjusting label selection</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Improved congruence in health outcomes between groups after developing models to predict other labels for health status besides financial expenditures [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Removing race information from training data</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Disparate impact improved from 0.31 to 0.61 and equal opportunity (TPR) difference improved from –0.19 to –0.05 for prediction of postpartum depression development; respective improvements from 0.45 to 0.63 and –0.11 to –0.04 for prediction of mental health service use in pregnant individuals [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>In-processing</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Use of a regularizer during training</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>Disparate impact improved, but accuracy and the equal opportunity (TPR) difference decreased when implementing the prejudice remover regularizer in prediction of postpartum depression in pregnant individuals [<xref ref-type="bibr" rid="ref40">40</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Adversarial debiasing</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>The mean fairness measure (average of statistical parity difference, disparate impact measure, average odds difference, and equal opportunity difference) worsened to 0.07 from 0.05 for prediction of health care costs [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Postprocessing</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Calibration</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>The equal opportunity (FNR) difference improved from 0.15 to 0.03 for identification of opioid misuse [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Reject option-based classification</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>The mean fairness measure (average of statistical parity difference, disparate impact measure, average odds difference, and equal opportunity difference) improved to 0.09 from 0.15 for prediction of health care costs [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Varying cut-point selection</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>The equal opportunity (FNR) difference improved from 0.15 to 0.04 for identification of opioid misuse [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
                    </list-item>
                    <list-item>
                      <p>The congruence in sensitivity and specificity between groups improved without reduction in accuracy for classification of dementia status [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
                    </list-item>
                  </list>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>FNR: false-negative rate.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>TPR: true-positive rate.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Given the pressing issue of equity in health care and the rapid development of medical ML applications, racial bias must be thoroughly evaluated in clinical ML models in order to protect patient safety and prevent the algorithmic encoding of inequality. Algorithmic fairness is a relatively novel field within the discipline of ML, and its application to medical ML remains nascent. In our evaluation of the literature describing mitigation of racial bias in clinical ML, we identified a variety of bias mitigation methods, which when applied successfully increase fairness and demonstrate the feasibility and importance of racial bias evaluation in the medical ML development process. Based on our findings, there is a need for heightened awareness of algorithmic fairness concepts, increased data availability, and improved reporting transparency in medical ML development to ensure fairness in clinical ML.</p>
      </sec>
      <sec>
        <title>Impact of Racial Bias in Clinical Machine Learning</title>
        <p>The broad scope of medical ML applications and potential patient harms following deployment across health care systems demand an increased emphasis on evaluation and mitigation of racial bias in clinical ML. Screening and outcome prediction tasks are commonly examined among reviewed studies. Racial bias in such tasks is particularly concerning as decisions made from flawed models trained on data, which reflect historical inequities in disease diagnosis and care delivery, may perpetuate inequalities by shaping clinical decision-making [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Evaluation and mitigation of potential biases must occur throughout the model development life cycle to protect patients from algorithmic unfairness.</p>
        <p>Reviewed studies frequently identified racial bias in clinical ML models. Notably, 1 algorithm in clinical use for prediction of future health care expenditures was found to discriminate against Black patients when compared to White patients, potentially contributing to disparities in health care delivery [<xref ref-type="bibr" rid="ref36">36</xref>]. Other ML models that possibly demonstrate racial bias remain in preclinical states of development. Several studies have explicitly studied racial bias against Black patients compared to White patients. For example, 2 studies demonstrated that ML algorithms predicted similar risk scores in Black and White patients, though the Black patients were less healthy [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], and another demonstrated that an opioid misuse classifier had a higher FNR for Black patients [<xref ref-type="bibr" rid="ref39">39</xref>]. Disparities in mortality prediction and X-ray diagnosis were identified in other races and ethnic groups [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], as well as disparities in burn identification and diabetic retinopathy identification in dark-skinned versus lighter-skinned patients [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Although conclusions cannot be drawn regarding the prevalence of racial bias among published clinical ML studies, the broad scope of clinical ML models susceptible to racial bias in this review exposes the potential of racial bias encoded in ML models to negatively impact patients across all aspects of health care.</p>
      </sec>
      <sec>
        <title>Assessment of Racial Bias</title>
        <p>Clinical ML models must be carefully evaluated for potential biases imposed upon patients. Different fairness metrics may highlight different aspects of fairness relevant to a particular clinical setting; therefore, evaluation of all appropriate fairness metrics is needed when evaluating for potential bias. For example, calibration is particularly important to models performing risk prediction, while equal opportunity and disparate impact are relevant to screening and diagnostic settings. Inconsistent choice of fairness metrics among studies included in this review shows the need for a more standardized assessment process of racial bias in clinical ML. Some studies assessed fairness using metrics such as accuracy, area under the receiver operating characteristic curve (AUROC), and correlation of outcome with race, which may not sufficiently evaluate fairness [<xref ref-type="bibr" rid="ref21">21</xref>]. Moreover, there are inherent trade-offs to the use of different fairness metrics [<xref ref-type="bibr" rid="ref25">25</xref>], and static fairness criteria may even lead to delayed harms in the long term [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>Obermeyer et al [<xref ref-type="bibr" rid="ref36">36</xref>] present an example of using model calibration in conjunction with varied outcome labels to successfully de-bias an algorithm used to manage population health, and case studies have examined trade-offs of bias evaluation metrics in other settings, such as criminal justice [<xref ref-type="bibr" rid="ref44">44</xref>], which may also serve as useful frameworks for clinical ML researchers. Use of “causal models,” which allow for closely tailored examination of discriminatory relationships in data, is another opportunity for investigation and mitigation of biased model behavior [<xref ref-type="bibr" rid="ref45">45</xref>]. An increased focus from medical journals on bias evaluation checklists applicable to clinical ML models, such as the Prediction Model Risk of Bias Assessment Tool (PROBAST), is desirable to further emphasize vigilance regarding biased ML models [<xref ref-type="bibr" rid="ref46">46</xref>]. Ultimately, more thorough analysis of fairness criteria in clinical ML will allow researchers to better contextualize and act on potential biases.</p>
        <p>Clinical ML researchers should also be aware of potential barriers to ML fairness when adapting pretrained models and data representations. For instance, deep neural networks performing image processing tasks are frequently pretrained on large data sets and then fine-tuned to adapt to other tasks. Methods for removal of spurious variations from such models have been described, such as joint learning and unlearning algorithms, which account for contributions of undesirable variations during model development [<xref ref-type="bibr" rid="ref47">47</xref>]. Language models trained in an unsupervised manner on vast amounts of text may learn biases present in training data [<xref ref-type="bibr" rid="ref48">48</xref>]. Similarly, biases have been described in word embeddings [<xref ref-type="bibr" rid="ref49">49</xref>], which are vectorized word representations used as inputs to ML models. Identification of bias in embeddings raises concerns about performance disparities in clinical applications of natural language processing if the bias is not screened for and appropriately addressed [<xref ref-type="bibr" rid="ref50">50</xref>]. The lack of interpretability often inherent to ML models heightens the need for thorough evaluation of their potential biases.</p>
      </sec>
      <sec>
        <title>Creating Fair Models</title>
        <p>Preprocessing and postprocessing methods of bias mitigation were successfully implemented among the publications reviewed for this study. Postprocessing methods appear to be easier to implement and may allow tailoring of imperfect models to new settings [<xref ref-type="bibr" rid="ref51">51</xref>]. However, using preprocessing and in-processing to create unbiased data sets and algorithms at the outset of model development is desirable to facilitate the creation of fair, generalizable models. Continued evaluation of these techniques in clinical contexts is needed to inform best practices.</p>
        <p>As data quality is generally the limiting factor to development of robust ML models, improvements to data generally translates directly into model performance improvements. Supplementation of data sets using generative models to synthesize patient data may be a viable approach to address data limitations. A study by Burlina et al [<xref ref-type="bibr" rid="ref32">32</xref>] illustrated this fact by using a generative adversarial network to synthesize fundoscopy images while reducing class imbalance. However, though data limitations may contribute to disparities in model performance across racial groups, algorithmic unfairness may arise from other underlying biases in data as well [<xref ref-type="bibr" rid="ref38">38</xref>]. Publications included in this review demonstrated improved fairness in ML models using multisource data sets, which may mitigate biases in the data collection process of single-source data sets [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Moreover, care must also be taken to ensure that multi-institutional data sets are appropriately prepared and used due to evidence that site-specific signatures contribute to bias in ML models [<xref ref-type="bibr" rid="ref52">52</xref>]. Finally, protected attributes should not simply be ignored during model development, an approach called “fairness through unawareness,” as models may be able to infer protected group membership from other data features. Additionally, omission of protected attributes may cause bias if a legitimate relationship exists between the attribute and outcome of interest [<xref ref-type="bibr" rid="ref19">19</xref>].</p>
        <p>Several online resources aggregate examples and code implementations of published fairness evaluation and bias mitigation methods. Some examples of these resources include Aequitas, Artificial Intelligence Fairness 360 (IBM, Armonk, NY, United States), and Fairlearn (Microsoft Corporation, Redmond, WA, United States) [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. Additionally, TensorFlow, a popular deep learning framework, includes a tool for evaluation of fairness indicators. Work by Briggs et al [<xref ref-type="bibr" rid="ref31">31</xref>] highlights the feasibility and positive impact of standardized methodologies for addressing bias using a variety of performance indicators and mitigation techniques. Greater adoption of these and other strategies in fairness evaluation and bias mitigation will help set standard benchmarks for fairness in clinical ML.</p>
      </sec>
      <sec>
        <title>The Role of Transparency and Data Availability</title>
        <p>ML is often characterized as a black box due to its limited interpretability, which is particularly problematic when attempting to address and prevent racial biases in clinical ML [<xref ref-type="bibr" rid="ref55">55</xref>]. Although research in recent years has yielded significant progress in explainable ML methods [<xref ref-type="bibr" rid="ref56">56</xref>], publication of model development code and data sets remains the most straightforward approach to transparency. Regrettably, medical ML research falls far short of these standards [<xref ref-type="bibr" rid="ref57">57</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. Code and data availability was inconsistent among the publications included in this review, and the majority of studies evaluated racial bias using publicly available data sets, including the Medical Information Mart for Intensive Care (MIMIC) [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], Kaggle EyePACS [<xref ref-type="bibr" rid="ref32">32</xref>], and Dissecting Bias [<xref ref-type="bibr" rid="ref31">31</xref>]. Considering the vast number of private, institutional data sets used to develop clinical ML models, there is a crucial need for future publications to maximize transparency, ensuring the ability to evaluate for fairness in clinical ML.</p>
        <p>Increased publication of institutional data sets would facilitate the interdisciplinary collaboration needed to translate concepts of fairness in ML into the realm of medicine. Improved availability of data sets would also enable researchers to more easily validate existing models and perform fairness evaluations on different patient populations, translating benefits of ML across populations. Additionally, collaboration between institutions to maintain diverse, broadly representative data sets would facilitate the development of generalizable models free of the biases inherent to single-institutional data. However, ethical and patient confidentiality considerations may limit publication of clinical data. In contrast, publication of code and trained models, which are infrequently made available in the clinical ML literature [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref59">59</xref>], would similarly allow researchers to assess clinical ML on diverse populations without limitations imposed by patient privacy standards or institutional data-sharing regulations. Another possible paradigm to mitigate bias by training on diversely representative data sets while maintaining data privacy is federated learning, which involves piecewise training of an ML model on separate data sets and removes the need for data sharing during model development [<xref ref-type="bibr" rid="ref60">60</xref>].</p>
        <p>Moreover, increased emphasis on fairness in clinical ML through adoption of model development and reporting guidelines is needed [<xref ref-type="bibr" rid="ref59">59</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. Reporting guidelines for medical ML studies are inconsistently adopted, due in part to a lack of editorial policies among medical journals [<xref ref-type="bibr" rid="ref1">1</xref>]. Moreover, reporting of demographic information needed to assess biases due to data sets is lacking [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>]. The proposed Minimum Information for Medical AI Reporting guideline addresses these concerns by recommending that clinical ML studies report information necessary for understanding potential biases, including relevant demographic information of patient data used for model development [<xref ref-type="bibr" rid="ref64">64</xref>]. In conjunction with upcoming reporting guidelines tailored to clinical ML [<xref ref-type="bibr" rid="ref61">61</xref>], efforts to improve reporting quality will contribute to a standardized framework for fairness evaluation and bias mitigation in clinical ML.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>As with any literature review, there are limitations to this study. Given the heterogeneity of terminology used to describe ML and racial bias, our search may have overlooked relevant publications. Additionally, we were limited by publication bias as we excluded publications other than full-length manuscripts, and researchers may be less likely to publish results confirming the absence of racial bias in a clinical ML model. Finally, the novelty of ML fairness in medicine and the resulting paucity of literature on this topic, as well as the breadth of relevant subjects encompassed, prevented us from obtaining the quantity and quality of data required to perform a systematic review or meta-analysis. In particular, the lack of standardized methods to evaluate and mitigate bias precludes any definitive conclusions regarding their suitability in clinical ML applications. However, the scoping review provides a methodological framework for critical evaluation of a previously uncharacterized area of research and draws attention to the lack of standardization regarding racial bias mitigation in clinical ML development. We emphasize the need for further work to build on this important aspect of the medical ML literature.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Algorithmic fairness in clinical ML is a primary concern in its ethical adoption. As medical ML applications continue to approach widespread adoption across a multitude of clinical settings, potential racial biases in ML models must be proactively evaluated and mitigated in order to prevent patient harm and propagation of inequities in health care. The adoption of algorithmic fairness principles in medicine remains nascent, and further research is needed to standardize best practices for fairness evaluation and bias mitigation. We recommend that researchers and journal editors emphasize standardized reporting and data availability in ML studies to improve transparency and facilitate future research. Continued interrogation of biases in clinical ML models is needed to ensure fairness and maximize the benefits of ML in medicine.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary data file containing bias definitions, search strategy, and a table with study data set characteristics.</p>
        <media xlink:href="medinform_v10i5e36388_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 215 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">FNR</term>
          <def>
            <p>false-negative rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FPR</term>
          <def>
            <p>false-positive rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LASSO</term>
          <def>
            <p>least absolute shrinkage and selection operator</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PRISMA</term>
          <def>
            <p>Preferred Reporting Items for Systematic Reviews and Meta-analyses</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">TPR</term>
          <def>
            <p>true-positive rate</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <fn-group>
      <fn fn-type="con">
        <p>No part of this work has been previously published.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nagendran</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lovejoy</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Gordon</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Komorowski</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Harvey</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JPA</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Maruthappu</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence versus clinicians: systematic review of design, reporting standards, and claims of deep learning studies</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>03</month>
          <day>25</day>
          <volume>368</volume>
          <fpage>m689</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=32213531"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.m689</pub-id>
          <pub-id pub-id-type="medline">32213531</pub-id>
          <pub-id pub-id-type="pmcid">PMC7190037</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McKinney</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Sieniek</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Godbole</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Godwin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Antropova</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ashrafian</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Back</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chesus</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Etemadi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Vicente</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Halling-Brown</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hassabis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jansen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ledsam</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Melnick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Mostofi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Peng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Reicher</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Romera-Paredes</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sidebottom</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Suleyman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tse</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>De Fauw</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shetty</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>International evaluation of an AI system for breast cancer screening</article-title>
          <source>Nature</source>
          <year>2020</year>
          <month>01</month>
          <day>01</day>
          <volume>577</volume>
          <issue>7788</issue>
          <fpage>89</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1038/s41586-019-1799-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>CJP</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ming</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence versus clinicians in disease diagnosis: systematic review</article-title>
          <source>JMIR Med Inform</source>
          <year>2019</year>
          <month>08</month>
          <day>16</day>
          <volume>7</volume>
          <issue>3</issue>
          <fpage>e10010</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2019/3/e10010/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10010</pub-id>
          <pub-id pub-id-type="medline">31420959</pub-id>
          <pub-id pub-id-type="pii">v7i3e10010</pub-id>
          <pub-id pub-id-type="pmcid">PMC6716335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>High-performance medicine: the convergence of human and artificial intelligence</article-title>
          <source>Nat Med</source>
          <year>2019</year>
          <month>01</month>
          <day>7</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <lpage>56</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0300-7</pub-id>
          <pub-id pub-id-type="medline">30617339</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0300-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abul-Husn</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Kenny</surname>
              <given-names>EE</given-names>
            </name>
          </person-group>
          <article-title>Personalized medicine and the power of electronic health records</article-title>
          <source>Cell</source>
          <year>2019</year>
          <month>03</month>
          <day>21</day>
          <volume>177</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <lpage>69</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0092-8674(19)30222-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cell.2019.02.039</pub-id>
          <pub-id pub-id-type="medline">30901549</pub-id>
          <pub-id pub-id-type="pii">S0092-8674(19)30222-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6921466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obermeyer</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Emanuel</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Predicting the future: big data, machine learning, and clinical medicine</article-title>
          <source>N Engl J Med</source>
          <year>2016</year>
          <month>09</month>
          <day>29</day>
          <volume>375</volume>
          <issue>13</issue>
          <fpage>1216</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27682033"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMp1606181</pub-id>
          <pub-id pub-id-type="medline">27682033</pub-id>
          <pub-id pub-id-type="pmcid">PMC5070532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Domingo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Galal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Soni</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Mukhin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bayer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Byrd</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Caron</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Creamer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gilstrap</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gwardys</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hogue</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kadiyam</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Massa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salamone</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Slavicek</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Suna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ware</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Xinos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yuen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Barnard</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Etemadi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Preventing delayed and missed care by applying artificial intelligence to trigger radiology imaging follow-up</article-title>
          <source>NEJM Catalyst</source>
          <year>2022</year>
          <month>03</month>
          <day>16</day>
          <volume>3</volume>
          <issue>4</issue>
          <pub-id pub-id-type="doi">10.1056/cat.21.0469</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Avati</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Harman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Downing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>Improving palliative care with deep learning</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2018</year>
          <month>12</month>
          <day>12</day>
          <volume>18</volume>
          <issue>Suppl 4</issue>
          <fpage>122</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-018-0677-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-018-0677-8</pub-id>
          <pub-id pub-id-type="medline">30537977</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-018-0677-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC6290509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Baxter</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The practical implementation of artificial intelligence technologies in medicine</article-title>
          <source>Nat Med</source>
          <year>2019</year>
          <month>01</month>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>30</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30617336"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-018-0307-0</pub-id>
          <pub-id pub-id-type="medline">30617336</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-018-0307-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6995276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Arnold</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sippy</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>de Kamps</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Konigorski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lippert</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilthorpe</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Tennant</surname>
              <given-names>PWG</given-names>
            </name>
          </person-group>
          <article-title>Time to reality check the promises of machine learning-powered precision medicine</article-title>
          <source>Lancet Digital Health</source>
          <year>2020</year>
          <month>12</month>
          <volume>2</volume>
          <issue>12</issue>
          <fpage>e677</fpage>
          <lpage>e680</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30200-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikesalingam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Suleyman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Key challenges for delivering clinical impact with artificial intelligence</article-title>
          <source>BMC Med</source>
          <year>2019</year>
          <month>10</month>
          <day>29</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>195</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-019-1426-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-019-1426-2</pub-id>
          <pub-id pub-id-type="medline">31665002</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-019-1426-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC6821018</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Challen</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pitt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gompels</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tsaneva-Atanasova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence, bias and clinical safety</article-title>
          <source>BMJ Qual Saf</source>
          <year>2019</year>
          <month>03</month>
          <day>12</day>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>231</fpage>
          <lpage>237</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://qualitysafety.bmj.com/lookup/pmidlookup?view=long&#38;pmid=30636200"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjqs-2018-008370</pub-id>
          <pub-id pub-id-type="medline">30636200</pub-id>
          <pub-id pub-id-type="pii">bmjqs-2018-008370</pub-id>
          <pub-id pub-id-type="pmcid">PMC6560460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mehrabi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Morstatter</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Saxena</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Galstyan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A survey on bias and fairness in machine learning</article-title>
          <source>ACM Comput Surv</source>
          <year>2021</year>
          <month>07</month>
          <volume>54</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>35</lpage>
          <pub-id pub-id-type="doi">10.1145/3457607</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>ZD</given-names>
            </name>
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Bassett</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>How structural racism works: racist policies as a root cause of U.S. racial health inequities</article-title>
          <source>N Engl J Med</source>
          <year>2021</year>
          <month>02</month>
          <day>25</day>
          <volume>384</volume>
          <issue>8</issue>
          <fpage>768</fpage>
          <lpage>773</lpage>
          <pub-id pub-id-type="doi">10.1056/nejmms2025396</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>DW</given-names>
            </name>
          </person-group>
          <article-title>Digital health equity as a necessity in the 21st Century Cures Act era</article-title>
          <source>JAMA</source>
          <year>2020</year>
          <month>06</month>
          <day>16</day>
          <volume>323</volume>
          <issue>23</issue>
          <fpage>2381</fpage>
          <lpage>2382</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2020.7858</pub-id>
          <pub-id pub-id-type="medline">32463421</pub-id>
          <pub-id pub-id-type="pii">2766776</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Unger</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Vaidya</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Albain</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>LeBlanc</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Minasian</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Gotay</surname>
              <given-names>CC</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Fisch</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Blanke</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Hershman</surname>
              <given-names>DL</given-names>
            </name>
          </person-group>
          <article-title>Sex differences in risk of severe adverse events in patients receiving immunotherapy, targeted therapy, or chemotherapy in cancer clinical trials</article-title>
          <source>JCO</source>
          <year>2022</year>
          <month>05</month>
          <day>01</day>
          <volume>40</volume>
          <issue>13</issue>
          <fpage>1474</fpage>
          <lpage>1486</lpage>
          <pub-id pub-id-type="doi">10.1200/jco.21.02377</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vyas</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Eisenstein</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Hidden in plain sight: reconsidering the use of race correction in clinical algorithms</article-title>
          <source>N Engl J Med</source>
          <year>2020</year>
          <month>08</month>
          <day>27</day>
          <volume>383</volume>
          <issue>9</issue>
          <fpage>874</fpage>
          <lpage>882</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/Mk3QOF/QN3U"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/NEJMms2004740</pub-id>
          <pub-id pub-id-type="medline">32853499</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paulus</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Predictably unequal: understanding and addressing concerns that algorithmic clinical prediction may increase health disparities</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <month>07</month>
          <day>30</day>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>99</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0304-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0304-9</pub-id>
          <pub-id pub-id-type="medline">32821854</pub-id>
          <pub-id pub-id-type="pii">304</pub-id>
          <pub-id pub-id-type="pmcid">PMC7393367</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkomar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Howell</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Ensuring fairness in machine learning to advance health equity</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <month>12</month>
          <day>04</day>
          <volume>169</volume>
          <issue>12</issue>
          <fpage>866</fpage>
          <pub-id pub-id-type="doi">10.7326/m18-1990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>IY</given-names>
            </name>
            <name name-style="western">
              <surname>Pierson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ferryman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Ethical machine learning in healthcare</article-title>
          <source>Annu Rev Biomed Data Sci</source>
          <year>2021</year>
          <month>07</month>
          <day>20</day>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>123</fpage>
          <lpage>144</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-biodatasci-092820-114757</pub-id>
          <pub-id pub-id-type="medline">34396058</pub-id>
          <pub-id pub-id-type="pmcid">PMC8362902</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fletcher</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Nakeshimana</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Olubeko</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Addressing fairness, bias, and appropriate use of artificial intelligence and machine learning in global health</article-title>
          <source>Front Artif Intell</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>561802</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/frai.2020.561802"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2020.561802</pub-id>
          <pub-id pub-id-type="medline">33981989</pub-id>
          <pub-id pub-id-type="pii">561802</pub-id>
          <pub-id pub-id-type="pmcid">PMC8107824</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wick</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Panda</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tristan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Unlocking fairness: a trade-off revisited</article-title>
          <year>2019</year>
          <conf-name>Advances in Neural Information Processing Systems 32: Annual Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 2019</conf-date>
          <conf-loc>Vancouver, BC, Canada</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jtristan.github.io/papers/neurips19.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rubin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Fairness definitions explained</article-title>
          <year>2018</year>
          <conf-name>Proceedings of the International Workshop on Software Fairness</conf-name>
          <conf-date>2018</conf-date>
          <conf-loc>Gothenburg, Sweden</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3194770.3194776</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scheidegger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatasubramanian</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhary</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A comparative study of fairness-enhancing interventions in machine learning</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the Conference on Fairness, Accountability, Transparency</conf-name>
          <conf-date>2019</conf-date>
          <conf-loc>Atlanta, GA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3287560.3287589</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kleinberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mullainathan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inherent trade-offs in the fair determination of risk scores</article-title>
          <year>2017</year>
          <conf-name>8th Innovations in Theoretical Computer Science Conference (ITCS 2017)</conf-name>
          <conf-date>January 2017</conf-date>
          <conf-loc>Berkeley, CA</conf-loc>
          <pub-id pub-id-type="doi">10.4230/LIPIcs.ITCS.2017.43</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCradden</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mazwi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Ethical limitations of algorithmic fairness solutions in health care machine learning</article-title>
          <source>Lancet Digital Health</source>
          <year>2020</year>
          <month>05</month>
          <volume>2</volume>
          <issue>5</issue>
          <fpage>e221</fpage>
          <lpage>e223</lpage>
          <pub-id pub-id-type="doi">10.1016/s2589-7500(20)30065-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Page</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>McKenzie</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Bossuyt</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Boutron</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Mulrow</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Shamseer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tetzlaff</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Glanville</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Grimshaw</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Hróbjartsson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lalu</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Loder</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Mayo-Wilson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McGuinness</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Welch</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Whiting</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title>
          <source>BMJ</source>
          <year>2021</year>
          <month>03</month>
          <day>29</day>
          <volume>372</volume>
          <fpage>n71</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.bmj.com/lookup/pmidlookup?view=long&#38;pmid=33782057"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id>
          <pub-id pub-id-type="medline">33782057</pub-id>
          <pub-id pub-id-type="pmcid">PMC8005924</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tricco</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Lillie</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zarin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Levac</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Moher</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Peters</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Horsley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Weeks</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Akl</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hartling</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Aldcroft</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Garritty</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lewin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Godfrey</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Langlois</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Soares-Weiser</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moriarty</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tunçalp</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SE</given-names>
            </name>
          </person-group>
          <article-title>PRISMA Extension for Scoping Reviews (PRISMA-ScR): checklist and explanation</article-title>
          <source>Ann Intern Med</source>
          <year>2018</year>
          <month>09</month>
          <day>04</day>
          <volume>169</volume>
          <issue>7</issue>
          <fpage>467</fpage>
          <pub-id pub-id-type="doi">10.7326/M18-0850</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abubakar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ugail</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bukar</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Assessment of human skin burns: a deep transfer learning approach</article-title>
          <source>J Med Biol Eng</source>
          <year>2020</year>
          <month>04</month>
          <day>24</day>
          <volume>40</volume>
          <issue>3</issue>
          <fpage>321</fpage>
          <lpage>333</lpage>
          <pub-id pub-id-type="doi">10.1007/s40846-020-00520-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mataraso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Siefkas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Burdick</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Braden</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dellinger</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pellegrini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Green-Saxena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A racially unbiased, machine learning approach to prediction of mortality: algorithm development study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>10</month>
          <day>22</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e22400</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/4/e22400/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22400</pub-id>
          <pub-id pub-id-type="medline">33090117</pub-id>
          <pub-id pub-id-type="pii">v6i4e22400</pub-id>
          <pub-id pub-id-type="pmcid">PMC7644374</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Briggs</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hollmén</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Appice</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tsoumakas</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Manolopoulos</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Matwin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Mitigating discrimination in clinical machine learning decision support using algorithmic processing techniques</article-title>
          <source>International Conference on Discovery Science. Vol 12323</source>
          <year>2020</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer International</publisher-name>
          <fpage>19</fpage>
          <lpage>33</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burlina</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Pacheco</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Bressler</surname>
              <given-names>NM</given-names>
            </name>
          </person-group>
          <article-title>Addressing artificial intelligence bias in retinal diagnostics</article-title>
          <source>Transl Vis Sci Technol</source>
          <year>2021</year>
          <month>02</month>
          <day>05</day>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>13</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tvst.arvojournals.org/article.aspx?doi=10.1167/tvst.10.2.13"/>
          </comment>
          <pub-id pub-id-type="doi">10.1167/tvst.10.2.13</pub-id>
          <pub-id pub-id-type="medline">34003898</pub-id>
          <pub-id pub-id-type="pii">2772266</pub-id>
          <pub-id pub-id-type="pmcid">PMC7884292</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Can AI help reduce disparities in general medical and mental health care?</article-title>
          <source>AMA J Ethics</source>
          <year>2019</year>
          <month>02</month>
          <day>01</day>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>E167</fpage>
          <lpage>179</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journalofethics.ama-assn.org/article/can-ai-help-reduce-disparities-general-medical-and-mental-health-care/2019-02"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/amajethics.2019.167</pub-id>
          <pub-id pub-id-type="medline">30794127</pub-id>
          <pub-id pub-id-type="pii">amajethics.2019.167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gianattasio</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ciarleglio</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Power</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Development of algorithmic dementia ascertainment for racial/ethnic disparities research in the US Health and Retirement Study</article-title>
          <source>Epidemiology (Cambridge, Mass)</source>
          <year>2020</year>
          <volume>31</volume>
          <issue>1</issue>
          <fpage>126</fpage>
          <lpage>133</lpage>
          <pub-id pub-id-type="doi">10.1097/ede.0000000000001101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noseworthy</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Attia</surname>
              <given-names>ZI</given-names>
            </name>
            <name name-style="western">
              <surname>Brewer</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Hayes</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kapa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez-Jimenez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Assessing and mitigating bias in medical artificial intelligence: the effects of race and ethnicity on a deep learning model for ECG analysis</article-title>
          <source>Circ Arrhythm Electrophysiol</source>
          <year>2020</year>
          <month>03</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>e007988</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32064914"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/CIRCEP.119.007988</pub-id>
          <pub-id pub-id-type="medline">32064914</pub-id>
          <pub-id pub-id-type="pmcid">PMC7158877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obermeyer</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Powers</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vogeli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mullainathan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Dissecting racial bias in an algorithm used to manage the health of populations</article-title>
          <source>Science</source>
          <year>2019</year>
          <month>10</month>
          <day>25</day>
          <volume>366</volume>
          <issue>6464</issue>
          <fpage>447</fpage>
          <lpage>453</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aax2342</pub-id>
          <pub-id pub-id-type="medline">31649194</pub-id>
          <pub-id pub-id-type="pii">366/6464/447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sylla</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dankwa-Mullan</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Koski</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>AK</given-names>
            </name>
          </person-group>
          <article-title>Comparison of methods to reduce bias from clinical prediction models of postpartum depression</article-title>
          <source>JAMA Netw Open</source>
          <year>2021</year>
          <month>04</month>
          <day>01</day>
          <volume>4</volume>
          <issue>4</issue>
          <fpage>e213909</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2021.3909"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2021.3909</pub-id>
          <pub-id pub-id-type="medline">33856478</pub-id>
          <pub-id pub-id-type="pii">2778568</pub-id>
          <pub-id pub-id-type="pmcid">PMC8050742</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seyyed-Kalantari</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>CheXclusion: fairness gaps in deep chest X-ray classifiers</article-title>
          <source>Pacific Symp Biocomput</source>
          <year>2021</year>
          <volume>26</volume>
          <fpage>232</fpage>
          <lpage>243</lpage>
          <pub-id pub-id-type="doi">10.1142/9789811232701_0022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thompson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bhalla</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Boley</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>McCluskey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Churpek</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Karnik</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Afshar</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Bias and fairness assessment of a natural language processing opioid misuse classifier: detection and mitigation of electronic health record data disadvantages across racial subgroups</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>10</month>
          <day>12</day>
          <volume>28</volume>
          <issue>11</issue>
          <fpage>2393</fpage>
          <lpage>2403</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34383925"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab148</pub-id>
          <pub-id pub-id-type="medline">34383925</pub-id>
          <pub-id pub-id-type="pii">6349190</pub-id>
          <pub-id pub-id-type="pmcid">PMC8510285</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wissel</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Greiner</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Glauser</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Mangano</surname>
              <given-names>FT</given-names>
            </name>
            <name name-style="western">
              <surname>Santel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pestian</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Szczesniak</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Dexheimer</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>Investigation of bias in an epilepsy machine learning algorithm trained on physician notes</article-title>
          <source>Epilepsia</source>
          <year>2019</year>
          <month>09</month>
          <day>23</day>
          <volume>60</volume>
          <issue>9</issue>
          <fpage>e93</fpage>
          <lpage>e98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31441044"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/epi.16320</pub-id>
          <pub-id pub-id-type="medline">31441044</pub-id>
          <pub-id pub-id-type="pmcid">PMC6731998</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Price</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Srebro</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Equality of opportunity in supervised learning</article-title>
          <year>2016</year>
          <conf-name>Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kamishima</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Akaho</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Asoh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sakuma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Fairness-aware classifier with prejudice remover regularizer</article-title>
          <year>2012</year>
          <conf-name>Joint European Conference on Machine Learning and Knowledge Discovery in Databases</conf-name>
          <conf-date>September 2012</conf-date>
          <conf-loc>Berlin, Heidelberg</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-642-33486-3_3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rolf</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Simchowitz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Delayed impact of fair machine learning</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence</conf-name>
          <conf-date>August 10-16, 2019</conf-date>
          <conf-loc>Macao</conf-loc>
          <pub-id pub-id-type="doi">10.24963/ijcai.2019/862</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rodolfa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salomon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Haynes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mendieta</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ghani</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Case study: predictive fairness to reduce misdemeanor recidivism through social service interventions</article-title>
          <year>2020</year>
          <conf-name>Proceedings of the 2020 Conference on Fairness, Accountability, and Transparency</conf-name>
          <conf-date>2020</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3351095.3372863</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kusner</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Loftus</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>The long road to fairer algorithms</article-title>
          <source>Nature</source>
          <year>2020</year>
          <month>02</month>
          <day>04</day>
          <volume>578</volume>
          <issue>7793</issue>
          <fpage>34</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1038/d41586-020-00274-3</pub-id>
          <pub-id pub-id-type="medline">32020122</pub-id>
          <pub-id pub-id-type="pii">10.1038/d41586-020-00274-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Whiting</surname>
              <given-names>PF</given-names>
            </name>
            <name name-style="western">
              <surname>Westwood</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Kleijnen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mallett</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>PROBAST: a tool to assess the risk of bias and applicability of prediction model studies</article-title>
          <source>Ann Intern Med</source>
          <year>2019</year>
          <month>01</month>
          <day>01</day>
          <volume>170</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <pub-id pub-id-type="doi">10.7326/m18-1376</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alvi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zisserman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nellåker</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Turning a blind eye: explicit removal of biases and variation from deep neural network embeddings</article-title>
          <year>2018</year>
          <conf-name>15th European Conference on Computer Vision</conf-name>
          <conf-date>September 2018</conf-date>
          <conf-loc>Munich, Germany</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-030-11009-3_34</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vig</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gehrmann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Belinkov</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Investigating gender bias in language models using causal mediation analysis</article-title>
          <year>2020</year>
          <conf-name>Advances in Neural Information Processing Systems 33: Annual Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 2020</conf-date>
          <conf-loc>Virtual Conference</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.neurips.cc/paper/2020/file/92650b2e92217715fe312e6fa7b90d82-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Straw</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Callison-Burch</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence in mental health and the biases of language based models</article-title>
          <source>PLoS One</source>
          <year>2020</year>
          <month>12</month>
          <day>17</day>
          <volume>15</volume>
          <issue>12</issue>
          <fpage>e0240376</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0240376"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0240376</pub-id>
          <pub-id pub-id-type="medline">33332380</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-06032</pub-id>
          <pub-id pub-id-type="pmcid">PMC7745984</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bolukbasi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saligrama</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kalai</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Man is to computer programmer as woman is to homemaker? Debiasing word embeddings</article-title>
          <year>2016</year>
          <conf-name>Advances in Neural Information Processing Systems 29: Annual Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://papers.nips.cc/paper/2016/file/a486cd07e4ac3d270571622f4f316ec5-Paper.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Macheret</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno-Machado</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A tutorial on calibration measurements and calibration models for clinical prediction models</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>04</month>
          <day>01</day>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>621</fpage>
          <lpage>633</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32106284"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocz228</pub-id>
          <pub-id pub-id-type="medline">32106284</pub-id>
          <pub-id pub-id-type="pii">5762806</pub-id>
          <pub-id pub-id-type="pmcid">PMC7075534</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Howard</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Dolezal</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kochanny</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schulte</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Heij</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Huo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nanda</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Olopade</surname>
              <given-names>OI</given-names>
            </name>
            <name name-style="western">
              <surname>Kather</surname>
              <given-names>JN</given-names>
            </name>
            <name name-style="western">
              <surname>Cipriani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Grossman</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>The impact of site-specific digital histology signatures on deep learning model accuracy and bias</article-title>
          <source>Nat Commun</source>
          <year>2021</year>
          <month>07</month>
          <day>20</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>4423</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41467-021-24698-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41467-021-24698-1</pub-id>
          <pub-id pub-id-type="medline">34285218</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41467-021-24698-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8292530</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellamy</surname>
              <given-names>RKE</given-names>
            </name>
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hind</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>Sc</given-names>
            </name>
            <name name-style="western">
              <surname>Houde</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kannan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lohia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Martino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mojsilovic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ramamurthy</surname>
              <given-names>Kn</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sattigeri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Varshney</surname>
              <given-names>Kr</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>AI Fairness 360: an extensible toolkit for detecting and mitigating algorithmic bias</article-title>
          <source>IBM J Res Dev</source>
          <year>2019</year>
          <month>7</month>
          <day>1</day>
          <volume>63</volume>
          <issue>4/5</issue>
          <fpage>4:1</fpage>
          <lpage>4:15</lpage>
          <pub-id pub-id-type="doi">10.1147/jrd.2019.2942287</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wexler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pushkarna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bolukbasi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wattenberg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Viegas</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The what-if tool: interactive probing of machine learning models</article-title>
          <source>IEEE Trans Visual Comput Graphics</source>
          <year>2020</year>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>56</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1109/tvcg.2019.2934619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rudin</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Stop explaining black box machine learning models for high stakes decisions and use interpretable models instead</article-title>
          <source>Nat Mach Intell</source>
          <year>2019</year>
          <month>5</month>
          <day>13</day>
          <volume>1</volume>
          <issue>5</issue>
          <fpage>206</fpage>
          <lpage>215</lpage>
          <pub-id pub-id-type="doi">10.1038/s42256-019-0048-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tjoa</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A survey on explainable artificial intelligence (XAI): toward medical XAI</article-title>
          <source>IEEE Trans Neural Netw Learning Syst</source>
          <year>2021</year>
          <month>11</month>
          <volume>32</volume>
          <issue>11</issue>
          <fpage>4793</fpage>
          <lpage>4813</lpage>
          <pub-id pub-id-type="doi">10.1109/tnnls.2020.3027314</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MBA</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marinsek</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganath</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Foschini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Reproducibility in machine learning for health research: still a ways to go</article-title>
          <source>Sci Transl Med</source>
          <year>2021</year>
          <month>03</month>
          <day>24</day>
          <volume>13</volume>
          <issue>586</issue>
          <fpage>eabb1655</fpage>
          <pub-id pub-id-type="doi">10.1126/scitranslmed.abb1655</pub-id>
          <pub-id pub-id-type="medline">33762434</pub-id>
          <pub-id pub-id-type="pii">13/586/eabb1655</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Driggs</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Thorpe</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbey</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ursprung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aviles-Rivero</surname>
              <given-names>AI</given-names>
            </name>
            <name name-style="western">
              <surname>Etmann</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McCague</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Beer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weir-McCall</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Gkrania-Klotsas</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rudd</surname>
              <given-names>JHF</given-names>
            </name>
            <name name-style="western">
              <surname>Sala</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schönlieb</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Common pitfalls and recommendations for using machine learning to detect and prognosticate for COVID-19 using chest radiographs and CT scans</article-title>
          <source>Nat Mach Intell</source>
          <year>2021</year>
          <month>03</month>
          <day>15</day>
          <volume>3</volume>
          <issue>3</issue>
          <fpage>199</fpage>
          <lpage>217</lpage>
          <pub-id pub-id-type="doi">10.1038/s42256-021-00307-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shlobin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>DeCuypere</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lam</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep learning for outcome prediction in neurosurgery: a systematic review of design, reporting, and reproducibility</article-title>
          <source>Neurosurgery</source>
          <year>2022</year>
          <volume>90</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1227/neu.0000000000001736</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sheller</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Reina</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Pati</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kotrotsou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Milchenko</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Marcus</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Colen</surname>
              <given-names>RR</given-names>
            </name>
            <name name-style="western">
              <surname>Bakas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Federated learning in medicine: facilitating multi-institutional collaborations without sharing patient data</article-title>
          <source>Sci Rep</source>
          <year>2020</year>
          <month>07</month>
          <day>28</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>12598</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-020-69250-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-020-69250-1</pub-id>
          <pub-id pub-id-type="medline">32724046</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-020-69250-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC7387485</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wawira Gichoya</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Equity in essence: a call for operationalising fairness in machine learning for healthcare</article-title>
          <source>BMJ Health Care Inform</source>
          <year>2021</year>
          <month>04</month>
          <day>28</day>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>e100289</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://informatics.bmj.com/lookup/pmidlookup?view=long&#38;pmid=33910923"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjhci-2020-100289</pub-id>
          <pub-id pub-id-type="medline">33910923</pub-id>
          <pub-id pub-id-type="pii">bmjhci-2020-100289</pub-id>
          <pub-id pub-id-type="pmcid">PMC8733939</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cahan</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Seneviratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lossio-Ventura</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>JPA</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Reporting of demographic data and representativeness in machine learning models using electronic health records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>12</month>
          <day>09</day>
          <volume>27</volume>
          <issue>12</issue>
          <fpage>1878</fpage>
          <lpage>1884</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32935131"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa164</pub-id>
          <pub-id pub-id-type="medline">32935131</pub-id>
          <pub-id pub-id-type="pii">5906103</pub-id>
          <pub-id pub-id-type="pmcid">PMC7727384</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>LN</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Kassamali</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Mita</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Nambudiri</surname>
              <given-names>VE</given-names>
            </name>
          </person-group>
          <article-title>Bias in, bias out: underreporting and underrepresentation of diverse skin types in machine learning research for skin cancer detection-a scoping reviewA scoping review</article-title>
          <source>J Am Acad Dermatol</source>
          <year>2021</year>
          <month>07</month>
          <day>10</day>
          <pub-id pub-id-type="doi">10.1016/j.jaad.2021.06.884</pub-id>
          <pub-id pub-id-type="medline">34252465</pub-id>
          <pub-id pub-id-type="pii">S0190-9622(21)02086-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hernandez-Boussard</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Bozkurt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannidis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>MINIMAR (MINimum Information for Medical AI Reporting): developing reporting standards for artificial intelligence in health care</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>12</month>
          <day>09</day>
          <volume>27</volume>
          <issue>12</issue>
          <fpage>2011</fpage>
          <lpage>2015</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32594179"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa088</pub-id>
          <pub-id pub-id-type="medline">32594179</pub-id>
          <pub-id pub-id-type="pii">5864179</pub-id>
          <pub-id pub-id-type="pmcid">PMC7727333</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
