<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i2e16765</article-id>
      <article-id pub-id-type="pmid">32069213</article-id>
      <article-id pub-id-type="doi">10.2196/16765</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Analysis of Massive Online Medical Consultation Service Data to Understand Physicians’ Economic Return: Observational Data Mining Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sokolova</surname>
            <given-names>Marina</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Benis</surname>
            <given-names>Arriel</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Dotson</surname>
            <given-names>W. David</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Segall</surname>
            <given-names>Richard</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Jiang</surname>
            <given-names>Jinglu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Binghamton University</institution>
            <addr-line>4400 Vestal Pkwy E</addr-line>
            <addr-line>Binghamton, NY, 13902</addr-line>
            <country>United States</country>
            <phone>1 6077773016</phone>
            <email>jingluj@binghamton.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6464-9683</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Cameron</surname>
            <given-names>Ann-Frances</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5003-5282</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Yang</surname>
            <given-names>Ming</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0182-5974</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Binghamton University</institution>
        <addr-line>Binghamton, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>HEC Montreal</institution>
        <addr-line>Montreal, QC</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Central University of Finance and Economics</institution>
        <addr-line>Beijing</addr-line>
        <country>China</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jinglu Jiang <email>jingluj@binghamton.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>2</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>2</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>2</issue>
      <elocation-id>e16765</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>10</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>19</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>14</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>1</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Jinglu Jiang, Ann-Frances Cameron, Ming Yang. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 18.02.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/2/e16765/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Online health care consultation has become increasingly popular and is considered a potential solution to health care resource shortages and inefficient resource distribution. However, many online medical consultation platforms are struggling to attract and retain patients who are willing to pay, and health care providers on the platform have the additional challenge of standing out in a crowd of physicians who can provide comparable services.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study used machine learning (ML) approaches to mine massive service data to (1) identify the important features that are associated with patient payment, as opposed to free trial–only appointments; (2) explore the relative importance of these features; and (3) understand how these features interact, linearly or nonlinearly, in relation to payment.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The dataset is from the largest China-based online medical consultation platform, which covers 1,582,564 consultation records between patient-physician pairs from 2009 to 2018. ML techniques (ie, hyperparameter tuning, model training, and validation) were applied with four classifiers—logistic regression, decision tree (DT), random forest, and gradient boost—to identify the most important features and their relative importance for predicting paid vs free-only appointments.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>After applying the ML feature selection procedures, we identified 11 key features on the platform, which are potentially useful to predict payment. For the binary ML classification task (paid vs free services), the 11 features as a whole system achieved very good prediction performance across all four classifiers. DT analysis further identified five distinct subgroups of patients delineated by five top-ranked features: previous offline connection, total dialog, physician response rate, patient privacy concern, and social return. These subgroups interact with the physician differently, resulting in different payment outcomes.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The results show that, compared with features related to physician reputation, service-related features, such as service delivery quality (eg, consultation dialog intensity and physician response rate), patient source (eg, online vs offline returning patients), and patient involvement (eg, provide social returns and reveal previous treatment), appear to contribute more to the patient’s payment decision. Promoting multiple timely responses in patient-provider interactions is essential to encourage payment.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>Web-based health services</kwd>
        <kwd>remote consultation</kwd>
        <kwd>machine learning</kwd>
        <kwd>data mining</kwd>
        <kwd>decision tree</kwd>
        <kwd>patient involvement</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Online health care solutions are increasingly popular [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>], with reports that they are preferred by more than 70% of patients [<xref ref-type="bibr" rid="ref4">4</xref>]. This study focuses on <italic>multisided online medical consultation platforms</italic> where various health care providers from different hospitals and medical institutes provide remote medical consultation services to patients. This type of digital health care service is experiencing significant growth and research attention [<xref ref-type="bibr" rid="ref5">5</xref>]. These platforms offer many benefits, such as reduced medical costs, improved medical service efficiency, more efficient health care resource distribution, and fewer health care resource shortages in remote areas [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>Despite the popularity and potential benefits, some online medical consultation platforms are struggling to attract and retain patients who are willing to pay for these services, for example, patient dissatisfaction after an initial failed experience, fear that diagnoses are made with limited consideration of patients’ medical history, and concerns about privacy may impede patients’ use of online consultation [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. In addition, online medical consultation usually follows the Pareto principle in that 80% of the services are provided by 20% of the physicians on the platform [<xref ref-type="bibr" rid="ref1">1</xref>], suggesting that many health care service providers on the platform have the challenge of attracting patients and standing out in the crowd of physicians who can provide comparable services [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. To entice patients to their platform and promote payment, many platforms employ a multitiered pricing strategy that allows the coexistence of free (ie, the free trials) and paid versions (ie, the premium) of services [<xref ref-type="bibr" rid="ref13">13</xref>]. As a consequence, patients may be more willing to pay for the service, and physicians may be able to access a broader range of patients.</p>
        <p>Several features associated with patient payment in online medication consultation platforms have been frequently examined by previous research. Physician reputation—both online and offline—is the most frequently examined physician characteristic [<xref ref-type="bibr" rid="ref14">14</xref>]. As medical consultation is highly professional, physicians need to be credible or trustworthy to attract and retain paying patients. A physician’s affiliation, seniority, and location are usually used as proxies for reputation [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Patient evaluation, which is the feedback left on the platform by previous patients about the physician, is also frequently examined [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. It is often displayed in the form of ratings, stars, reviews, and virtual gifts. This feedback is visible to other patients on the platform and may serve as signals of service quality, which impact patients’ willingness to pay. Although less frequently examined, patient-physician interaction may be an important feature as well. The frequency and depth of interaction on the platform (eg, the amount of service or the frequency of service) show the ability and willingness of a physician to provide high-quality service, which may influence patient payment [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      </sec>
      <sec>
        <title>Gaps and Objectives</title>
        <p>This existing research is useful; however, these service and physician-related features are often examined in isolation and often using a linear regression approach. Thus, the understanding of how various features interact to generate impacts is currently lacking—although some features might be important enough to generate impacts on their own, others may only have impacts when combined with other features. To extend existing research, new approaches are needed, which take advantage of the massive data on these platforms and help uncover the complex dynamics between these various features and their interactions and payment. Thus, the objectives of this study were to determine (1) the important features of online medical consultation services that are associated with patient payment, as opposed to free trial–only appointments; (2) the relative importance of these features; and (3) how these features interact, linearly or nonlinearly, in relation with payment. We focus on mining feature importance because knowing the features (and their interactions), which influence payment, will help platforms and physicians identify high-value online medical consultations. Although many features may impact payment, we are particularly interested in those, which are publicly visible on the platform, such as characteristics of physicians and their interaction with patients and patient feedback, rather than nonvisible features, such as patients’ economic status and their general attitude toward technology. This is because publicly visible features contain information and signals that, through observational learning and social influence [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], may influence patient payment.</p>
        <p>To this end, we examine a massive dataset from the largest China-based online medical consultation platform (1.5 million patient-physician consultation records) spanning 10 years. Predictive models are developed by employing classic machine learning (ML) procedures (ie, feature selection, hyperparameter tuning, model training, and validation) with logistic regression (LR), simple decision tree (DT), random forest (RF), and gradient boost (GB) classifiers. The importance ranking of these features is identified through regression coefficients, level of DT splits, and feature importance scores provided by RF and GB algorithms.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Empirical Setting and Dataset</title>
        <p>Our empirical setting is a multisided online medical consultation platform based in China. It is one of the largest medical platforms, and more than half a million physicians from over 9400 hospitals have set up their profiles and provided consultation services on the platform. The platform follows a service model that allows the coexistence of free and paid consultation services (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> for more details).</p>
        <p>Our dataset includes 10 years of consultation records (approximately 2.3 million records from January 2009 to August 2018) between patient-physician pairs from three departments that have received the most visits (ie, pediatrics, gynecology, and dermatology, according to the platform report) across six geographic areas—three of the areas are those with the richest health care resources (Beijing municipality, Guangdong province, and Zhejiang province) and three are remote areas with the fewest health care resources (Shanxi province, Tibet province, and Qinghai province). Each record is a consultation history that includes picture- and text-based dialogs and service purchase records between patient <italic>i</italic> and physician <italic>j</italic> (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Machine Learning Task and Initial Feature Selection</title>
        <p>Our focal outcomes are whether a consultation record includes payment and the relative importance of the features on the platform that can predict payment. Although a consultation record may include multiple times of payments, we do not consider payment intensity or types. Accordingly, the objective of our ML task is to solve a binary classification problem—classifying consultation records into free services only (labeled as <italic>free</italic>) or those including some type of financial payment (labeled as <italic>paid</italic>). The consultation with a <italic>paid</italic> label is our positive class in ML prediction.</p>
        <p>The initial 18 features were identified by drawing on variables that have been examined in previous studies (see <xref ref-type="table" rid="table1">Table 1</xref> for definition and coding of features) and were consistently visible on the platform. Features that are visible to platform users (eg, visitors, patients, and physicians) may influence payment, as they potentially allow patient learning and valuation to occur before the actual consumption of the consultation service. Although the importance of online physician reputation has been demonstrated in previous studies [<xref ref-type="bibr" rid="ref19">19</xref>], physicians’ online rating was not included in this study. Owing to the changes in platform design, online reputation scores (eg, stars, ratings, and reviews) are not consistent over time. In addition, we observed that most physicians have very good ratings with little variation (mean 3.80, SD 0.34), which would have made this feature less useful as a predictor. This ceiling effect has been reported in the previous study using the same context [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. However, features such as social returns and service intensity were included and can reflect physicians’ online reputation to some extent [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Key predictive features and coding description.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="260"/>
            <col width="530"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Feature</td>
                <td>Description</td>
                <td>Reference</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>Physician reputation related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hospital ranking<sup>a</sup></td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Ranking 1] Equals 1 if primary care hospital, 0 otherwise.</p>
                    </list-item>
                    <list-item>
                      <p>[Ranking 2] Equals 1 if secondary care hospital, 0 otherwise.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Physician seniority</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Title 1] Equals 1 if chief physician, 0 otherwise.</p>
                    </list-item>
                    <list-item>
                      <p>[Title 2] Equals 1 if associate chief physician, 0 otherwise.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hospital location</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Loc] Equals 1 if health care resource–rich areas, 0 otherwise.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Physician tenure</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Tenure] The number of months the physician has been registered on the platform.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Service intensity</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Intensity] The average number of patients served per month during the physician’s tenure (=total patients served/tenure).</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref7">7</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Patient related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Previous formal examination</td>
                <td>A function provided by the platform allowing patients to reveal their medical status:<break/><list list-type="bullet"><list-item><p>Status 1: no formal health care examination before the consultation.</p></list-item><list-item><p>Status 2: a formal health care examination before the consultation.</p></list-item><list-item><p>Status 3: private (ie, detailed consultation information is not directly visible by other patients).</p></list-item></list>(coded into dummies)<break/><list list-type="bullet"><list-item><p>[PriorExam] Equals 1 if none, 0 otherwise.</p></list-item><list-item><p>[Private] Equals 1 if set as private, 0 otherwise.</p></list-item></list></td>
                <td>N/A<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Offline connection</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Offline] A check-in function provided by the platform to indicate patients’ offline connection with the physicians. Equals 1 if the patient used the check-in function, 0 otherwise.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref16">16</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Service delivery related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Service duration</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Duration] Number of days between the initial post and last post of patient i’s interaction with physician j.</p>
                    </list-item>
                  </list>
                </td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total dialog</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[TotalD] Total number of posts within patient i’s interaction with physician j.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref18">18</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Physician posts</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[PhysicianP] Number of posts initiated by physician j within patient i’s interaction with physician j.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref3">3</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Response rate</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Response] The rate of response of a physician (=PhysicianP/TotalP).</p>
                    </list-item>
                  </list>
                </td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Answer frequency</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Answer_frq] The average number of answers (including notifications and reminders) by the physician per day during patient i’s interaction with physician j (=PhysicianP/Duration).</p>
                    </list-item>
                  </list>
                </td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Social return</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Social] A function provided by the platform to allow patients to send virtual gifts to the physician. Equals 1 if patient i gave any virtual gift to physician j at any time during patient i’s interaction with physician j.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Patient involvement related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Patient posts</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[PatientP] Number of posts initiated by patient i within that patient’s interaction with physician j.</p>
                    </list-item>
                  </list>
                </td>
                <td>[<xref ref-type="bibr" rid="ref3">3</xref>]</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Question frequency</td>
                <td>
                  <list list-type="bullet">
                    <list-item>
                      <p>[Question_frq] The average number of posts by the patient per day during patient i’s interaction with physician j (=PatientP/Duration).</p>
                    </list-item>
                  </list>
                </td>
                <td> N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Hospital ranking in China is a three-tier system (primary, secondary, and tertiary institutions) based on the hospital’s ability to provide medical care, education, and research; thus, physicians who have been able to secure a position at a primary care hospital are generally considered to be of higher reputation [<xref ref-type="bibr" rid="ref24">24</xref>].</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Cleaning and Analysis Pipeline</title>
        <p>First, data were prepared by removing consultation records that did not fit the scope of the study (eg, consultation occurred before 2009 and after 2018 and samples with unqualified tags). We also excluded records with over 50% of missing values (N=84,582) and outliers using the 95% quantile as the threshold (N=674,767; see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for a detailed description of data cleaning procedure).</p>
        <p>In the second step, four data-driven feature selection techniques were applied to identify the right features to use in the ML classification (low variance filtering, high correlation filtering, backward feature selection, and forward feature selection) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. The objective of this procedure is to find the features that are highly correlated with the outcome but ideally uncorrelated with each other [<xref ref-type="bibr" rid="ref27">27</xref>] so that the resulting features can build a relatively parsimonious model (see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> for a detailed description of feature selection procedure).</p>
        <p>In step 3, the ML model was constructed through three nested procedures: hyperparameter optimization, model training, and validation (see <xref rid="figure1" ref-type="fig">Figure 1</xref>). Four common ML classifiers were purposefully chosen—LR, DT, RF, and GB—because they are mainstream ML techniques for classification problems [<xref ref-type="bibr" rid="ref13">13</xref>] accessible by general data consumers through data analysis tools and platforms (eg, Python, R, SAS, and RapidMiner). LR was used in previous studies with small datasets [<xref ref-type="bibr" rid="ref2">2</xref>], and the latter three are tree-based approaches with different resampling strategies and cost function optimization techniques (ie, boosting vs bagging and gradient descent algorithm). Depending on the ML classifier, different sets of hyperparameters need to be configured to ensure that the algorithm reaches its best classification performance (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for a detailed explanation of optimization and analysis procedures). We conducted our analysis on the KoNstanz Information MinEr platform.</p>
        <p>The performances of the resulting ML models were compared in step 4. We used six evaluation metrics, which are commonly accepted in ML classification and can reflect different aspects of ML model performance (eg, correctly assign the paid services with a paid label vs the probability that an ML classifier will successfully classify a case in the right class) [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> for detailed explanation of our evaluation metrics).</p>
        <p>We investigated research objectives 2 and 3 through step 5, which examines feature importance. The four classifiers that we used provide different feature importance indicators—the regression coefficients in LR, level of splits for DT, and feature importance indices for both GB and RF (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>, and the study by Friedman [<xref ref-type="bibr" rid="ref30">30</xref>]).</p>
        <p>For steps 3 to 5, there are some particularities of our data that may bias our results (eg, imbalanced data). Thus, we perform several additional tests to examine the robustness of the model. The results of these additional analyses indicate that our model is robust to sample distribution (eg, imbalances, classes, and outliers) and potential systematic differences (eg, geographic location and market changes), as indicated by only minor changes in the model performance measures (see <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> for the results of these additional analyses).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Analysis pipeline. AUC: area under the receiver operating characteristic curve; ML: machine learning.</p>
          </caption>
          <graphic xlink:href="medinform_v8i2e16765_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Feature Selection Results and Descriptive Statistics</title>
        <p>After data cleaning, 1,582,564 qualified records remained for further analysis. Among these records, 1,089,662 (68.85%) were free trial–only, whereas 492,902 (31.15%) involved at least one premium payment. After performing four feature selection techniques (step 2, see <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>), we retained the ones that are selected by forward, backward, low variance filtering, and high correlation filtering approaches. In response to our first research objective regarding which features of online medical consultation services are associated with patient payment, our feature selection analysis suggested 11 key features (see <xref ref-type="table" rid="table2">Table 2</xref>)—the seven eliminated features were thus considered as less useful because of either high correlation with the included features (ie, redundant features) or low variance explained (ie, low explanatory power).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Summary statistics of features.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="100"/>
            <col width="120"/>
            <col width="100"/>
            <col width="160"/>
            <col width="160"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Service feature</td>
                <td>All, mean (SD)</td>
                <td>Free-only<sup>a</sup>, mean (SD)</td>
                <td>Paid<sup>a</sup>, mean (SD)</td>
                <td>All (minimum, maximum)</td>
                <td>Free-only (minimum, maximum)</td>
                <td>Paid (minimum, maximum)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Physician reputation related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Hospital ranking 2</td>
                <td>0.02 (0.15)</td>
                <td>0.02 (0.16)</td>
                <td>0.01 (0.12)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Physician title 1</td>
                <td>0.46 (0.5)</td>
                <td>0.43 (0.5)</td>
                <td>0.53 (0.5)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Patient related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>PriorExam</td>
                <td>0.19 (0.39)</td>
                <td>0.06 (0.24)</td>
                <td>0.47 (0.5)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Private</td>
                <td>0.08 (0.27)</td>
                <td>0.07 (0.25)</td>
                <td>0.1 (0.3)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Offline connection</td>
                <td>0.71 (0.45)</td>
                <td>0.87 (0.33)</td>
                <td>0.36 (0.48)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Service delivery related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total dialog</td>
                <td>7.44 (6.38)</td>
                <td>6.27 (5.03)</td>
                <td>10.04 (8.06)</td>
                <td>1, 35</td>
                <td>1, 31</td>
                <td>1, 35</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Response rate</td>
                <td>0.19 (0.16)</td>
                <td>0.18 (0.16)</td>
                <td>0.2 (0.17)</td>
                <td>0, 0.875</td>
                <td>0, 0.75</td>
                <td>0, 0.875</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Answer frequency</td>
                <td>0.22 (0.33)</td>
                <td>0.24 (0.35)</td>
                <td>0.18 (0.29)</td>
                <td>0, 1.25</td>
                <td>0, 1</td>
                <td>0, 1.25</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Social return</td>
                <td>0.18 (0.38)</td>
                <td>0.18 (0.38)</td>
                <td>0.18 (0.29)</td>
                <td>0, 1</td>
                <td>0, 1</td>
                <td>0, 1</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Patient involvement related</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Patient posts</td>
                <td>5.79 (5.10)</td>
                <td>5.05 (4.38)</td>
                <td>7.43 (6.10)</td>
                <td>1, 28</td>
                <td>1, 28</td>
                <td>1, 28</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Question frequency</td>
                <td>1.11 (1.2)</td>
                <td>1.2 (1.24)</td>
                <td>0.92 (1.07)</td>
                <td>0, 5.5</td>
                <td>0, 5.5</td>
                <td>0, 5.5</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>Mean differences between free and paid services are all significant (<italic>P</italic>&#60;.001), except for social return (<italic>P</italic>=.025).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Machine Learning Model Performance and Feature Importance Ranking</title>
        <p>Next, the overall model performance was examined (step 4; see <xref ref-type="table" rid="table3">Table 3</xref>). As we have an imbalanced dataset (ie, the ratio between paid and free-only services is around 1:2), area under the receiver operating characteristic curve (AUC), F measure, and balanced accuracy are less biased and more informative than other measures. GB exhibited the best overall performance (balanced accuracy=0.973, F measure=0.97, and AUC=1). However, all classifiers performed well, indicating that our predictive model with 11 selected features exhibits significant classification performance. Explanation of each measure is presented in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>In investigating our research objective on the relative importance of the 11 features, the four ML classifiers yielded relatively consistent results in the top-ranked and low-ranked features, whereas the ones in the middle were less consistent (<xref ref-type="table" rid="table4">Table 4</xref>). Offline connection, response rate, social return, total dialog, diagnoses from a prior examination, and private status consistently ranked high, whereas physician title, question frequency, and the second-tier hospital ranking were consistently ranked low.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Machine learning model performance evaluation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="430"/>
            <col width="170"/>
            <col width="120"/>
            <col width="140"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Model performance measurement</td>
                <td>Logistic regression</td>
                <td>Decision tree</td>
                <td>Gradient boost</td>
                <td> Random forest</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Recall</td>
                <td>0.851</td>
                <td>0.949</td>
                <td>0.952</td>
                <td>0.908</td>
              </tr>
              <tr valign="top">
                <td>Precision</td>
                <td>0.896</td>
                <td>0.989</td>
                <td>0.988</td>
                <td>0.984</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>0.956</td>
                <td>0.995</td>
                <td>0.995</td>
                <td>0.993</td>
              </tr>
              <tr valign="top">
                <td>F measure</td>
                <td>0.873</td>
                <td>0.969</td>
                <td>0.970</td>
                <td>0.944</td>
              </tr>
              <tr valign="top">
                <td>Balanced accuracy</td>
                <td>0.903</td>
                <td>0.972</td>
                <td>0.973</td>
                <td>0.951</td>
              </tr>
              <tr valign="top">
                <td>Area under the receiver operating characteristic curve</td>
                <td>1.000</td>
                <td>0.988</td>
                <td>1.000</td>
                <td>0.988</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Key features listed in descending order of importance.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="230"/>
            <col width="230"/>
            <col width="220"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td>Service feature</td>
                <td>Logistic regression (coefficient<sup>a</sup>)</td>
                <td>Decision tree (level of splits)</td>
                <td>Gradient boost (importance, %)</td>
                <td>Random forest (importance, %)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Response rate (−13.89)</td>
                <td>Offline connection (1)</td>
                <td>Offline connection (30)</td>
                <td>Offline connection (24)</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Offline connection (−4.99)</td>
                <td>Social return (2)</td>
                <td>Total dialog (30)</td>
                <td>PriorExam (20)</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Social return (−3.11)</td>
                <td>Total dialog (2)</td>
                <td>Response rate (25)</td>
                <td>Total dialog (18)</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Patient posts (−2.63)</td>
                <td>Private (3)</td>
                <td>Social return (8)</td>
                <td>Response rate (17)</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Total dialog (2.47)</td>
                <td>Response rate (3)</td>
                <td>Private (6)</td>
                <td>Patient post (9)</td>
              </tr>
              <tr valign="top">
                <td>6</td>
                <td>PriorExam (1.70)</td>
                <td>PriorExam (4)</td>
                <td>Patient posts (1)</td>
                <td>Social return (7)</td>
              </tr>
              <tr valign="top">
                <td>7</td>
                <td>Private (−0.99)</td>
                <td>Answer_frq (4)</td>
                <td>PriorExam (0)</td>
                <td>Private (2)</td>
              </tr>
              <tr valign="top">
                <td>8</td>
                <td>Ranking 2 (−0.305)</td>
                <td>Patient posts (6)</td>
                <td>Answer_frq (0)</td>
                <td>Answer_frq (2)</td>
              </tr>
              <tr valign="top">
                <td>9</td>
                <td>Answer_frq (−0.14)</td>
                <td>Question_frq (6)</td>
                <td>Question_frq (0)</td>
                <td>Question_frq (1)</td>
              </tr>
              <tr valign="top">
                <td>10</td>
                <td>Question_frq (−0.13)</td>
                <td>Ranking 2 (8)</td>
                <td>Title1 (0)</td>
                <td>Title1 (0)</td>
              </tr>
              <tr valign="top">
                <td>11</td>
                <td>Title1 (−0.089)</td>
                <td>Title 1 (9)</td>
                <td>Ranking 2 (0)</td>
                <td>Ranking 2 (0)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>For logistic regression, a regularization procedure (see <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) is applied, so large weight coefficients are penalized for avoiding overfitting. All coefficients are significant (<italic>P</italic>&#60;.001).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Interpreting Key Patient Subcategories Based on Feature Configurations</title>
        <p>To address the third research objective, we examined how these features interact in relation to patient payments. A tree structure was used because it explicitly displays the feature hierarchies and classification outcomes at each tree split. Five key feature configurations emerged, which describe five subgroups of patients who interact with physicians differently, yielding different payment outcomes. By applying the learned tree structure on the full dataset, these five subgroups covered 85.2% of the total population, using a combination of only four key features (ie, offline, total dialog, response rate, and social return). Note that the DT algorithm has the capability to fully classify the whole population (in our case, at 10 layers), but the configurations become complex and practically less useful. Thus, we used the subgroups up to the third layer (see <xref rid="figure2" ref-type="fig">Figure 2</xref> and <xref ref-type="table" rid="table5">Table 5</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Decision tree for identifying patient subgroups with the full dataset.</p>
          </caption>
          <graphic xlink:href="medinform_v8i2e16765_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Decision tree–based configuration of feature contributions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="340"/>
            <col width="220"/>
            <col width="150"/>
            <col width="260"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Top feature configurations<sup>a</sup></td>
                <td>Number of cases in the node, n</td>
                <td>Dominant outcome</td>
                <td>Percentage of dominant cases, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Subgroup 1: These configurations suggest a simple type of follow-up service resulting from previous offline diagnoses</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Offline AND low total dialog (≤11.5)</td>
                <td>850,338</td>
                <td>Free</td>
                <td>819,363 (96.36)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Offline AND low total dialog (≤11.5) AND low response rate (≤0.35)</td>
                <td>743,165</td>
                <td>Free</td>
                <td>735,571 (98.98)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Subgroup 2: This configuration suggests a complex service extension from the previous offline diagnoses, which requires intensive patient-provider interaction.</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Offline AND high total dialog (≤11.5) AND high response rate (&#62;0.25)</td>
                <td>99,355</td>
                <td>Paid</td>
                <td>99,355 (100)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Subgroup 3: These configurations suggest the patient has no offline connection with the physician but is paying a premium for the online consultation rather than using a social return to show gratitude.</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonoffline AND no social return</td>
                <td>335,047</td>
                <td>Paid</td>
                <td>281,247 (83.94)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonoffline AND no social return AND nonprivate</td>
                <td>248,294</td>
                <td>Paid</td>
                <td>242,265 (97.57)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Subgroup 4: This configuration suggests the patient has no offline connection with the physician, has a less intensive online consultation experience, and offers a social return as compensation instead of payment.</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonoffline AND social return AND low total dialog (≤10.5)</td>
                <td>80,804</td>
                <td>Free</td>
                <td>73,839 (91.38)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Subgroup 5: This configuration suggests the patient has no offline connection with the physician and engages in an intensive online interaction, providing both payment and a social return as compensation.</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Nonoffline AND social return AND high total dialog (&#62;10.5)</td>
                <td>36,152</td>
                <td>Paid</td>
                <td>25,899 (71.64)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>For each tree split, if no dominant outcome emerges (ie, free cases &#60;80% or paid cases &#60;70% at the focal split), we do not consider it as an important subgroup because additional service features are required to better classify these cases.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We can observe that patients who have previous offline consultations with the physician are less likely to pay. It is possible that these patients tend to take free opportunities to clarify simple unsolved issues after their offline visits, as indicated by increasing the proportion of free services in the presence of low total dialog and low response rates from the physicians (subgroup 1). However, if complex issues emerge, these patients may still prefer to return to the offline health care channel rather than pay for the premium online service.</p>
        <p>A second type of returning patients (subgroup 2) may have complex issues and decide to stay online and pay. This represents a complex service extension: these returning patients may have complex issues that require highly interactive patient-physician communication. Thus, these returning patients frequently communicate with the physicians (probably because of the complexity of the issue) and receive frequent responses, which, in turn, are associated with a high probability of payment.</p>
        <p>For online patients who have no prior connection with the physician, those who do not provide social returns (eg, thank you letters and virtual gifts) seem more likely to pay (subgroup 3). There may be a psychological compensation effect [<xref ref-type="bibr" rid="ref31">31</xref>] where giving virtual gifts substitutes for the actual payment and balances the sense of <italic>guilt</italic> after receiving free services. However, in cases where the service between patients and physicians with no offline connection is highly interactive (ie, large amount of dialog), patients provide both virtual gifts and premium payment to show their appreciation (subgroup 4 vs subgroup 5).</p>
        <p>The high-level presence of <italic>private</italic> in one of the tree branches deserves more attention. <italic>Privacy</italic> represents a function provided by the platform, which allows patients to set their dialogs as private, so they cannot be viewed by other people. From previous studies, we know that one of the major reasons that patients do not use online health care services is privacy concerns [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. Patients who use this function may have a higher privacy concern than those who do not use it. As online medical consultation requires patients to reveal sensitive health-related information, patients who allow this information to be publicly displayed probably have lower privacy concerns and may be more likely to be more engaged in the online consultation and subsequent diagnosis. Owing to this heightened engagement, they may be more likely to pay after the initial free interactions (subgroup 3).</p>
        <p>In summary, the source of patients (offline returning or online directly) seems to be a key differentiator for payment, which may be because of the different motivations and service requirements inherent in these two types of patients. Patient-physician interaction representing service delivery quality is another key differentiator (eg, total dialogs, response rate, and patient posts), which also indicates the importance of patient involvement and physician’s timely response during the consultation. Privacy setting and social return, two features pertaining to the platform functionality, play important roles as well.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we focused on online medical consultation, a type of emerging digital health care service that has received much attention in recent years. Our objective was to understand the features of online medical consultation services that contribute to payment so that the platform can identify high-value services and take actions to better manage service providers and their offerings. As an initial study using ML approaches to identify key features and to make predictions, we did not aim to incrementally improve prediction accuracy by engineering the features or developing new algorithms. Rather, our goal was to develop a predictive model that has both sufficient explanatory power and practical interpretability so that it can be used by medical consultation platforms and service providers.</p>
        <p>The high performance across the ML algorithms demonstrates that our 11-feature model is a useful predictive tool (research objective 1). In terms of feature importance (research objectives 2 and 3), our results show that although physician reputation is important, service delivery quality and patient involvement appear to contribute more to the payment. We further identified five patient subgroups based on DT feature configurations. The configurations show how features related to patient characteristics, platform functionalities, and patient-provider interaction are combined to result in different payment outcomes. These configurations highlight the offline connection and responsive service delivery as key differentiators for payment vs free trial–only services.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>First, decisions made during the feature selection procedure may cause bias in the subsequent analysis. Although the results of this study achieved satisfactory overall performance, a different set of features that are comparable with the current ones can be used to cross-validate our model.</p>
        <p>Second, although the platform provides various long- and short-term service options, to ensure consistency in data cleaning and interpretability of results, we only included short-term services based on the service tags available. However, future research should examine long-term service subscription, as patients’ decision-making criteria can be very different than for short-term service subscription.</p>
        <p>Third, considering problems with data quality and limited variability, we did not include the platform’s online physician reputation ratings. However, future research could focus on physicians whose ratings do vary over time to observe how noticeable changes in ratings influence payment.</p>
        <p>Fourth, our analysis was based on the Chinese context. Considering the cultural differences and health care regulations, our results may have limited generalizability to other contexts. However, the mechanisms and types of interactions that have been found are generic enough to be promoted and managed in different online medical consultation platforms and in different countries. Furthermore, the Chinese context itself is quite large and should be of interest on its own.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Although the majority of features in our predictive model were examined in existing research on payment for online medication consultation, several new features specific to this type of platform and some surprising differences from existing research also emerged. Unexpectedly, physicians’ offline reputation, as indicated by the title and the affiliated hospital ranking, does not rank high in the ML algorithms and does not appear in the top three levels of the tree structure. These physician offline reputation features are frequently employed by previous studies in similar contexts [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Although our LR results exhibit significant coefficients for these offline reputation features, in the tree structure, they only play a role in combination with other features in the lower levels. It is likely that patients experience different stages of awareness and learning during the phases of physician selection, free service, and paid service [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. Although physician reputation may increase patients’ initial service awareness and influence physician selection, it seems that service experience (ie, service quality and intensive involvement) is a more important payment differentiator. Thus, our results show that regression may not be the best method to detect the impacts of various predictors and may yield oversimplified interpretation—regression only shows a linear additive relationship and excludes collinearity, whereas in reality, complex interactions and multiple paths to payment may exist.</p>
        <p>In contrast to previous results that show the positive influence of prior physician-patient social ties on payment [<xref ref-type="bibr" rid="ref18">18</xref>], our results show that a prior offline relationship with the physician does not always seem to be a facilitating factor for online payment. Although one subgroup of offline patients with existing social ties with the physician exhibits interactive service experiences and makes online payments, another offline subgroup seems to only use free services for simple follow-ups without deepening the online portion of the relationship and thus avoiding payment. Thus, it may be difficult for patients to completely shift their health care practices and habits from the offline to the online setting.</p>
        <p>Previous studies also highlight virtual gifts as a positive signal for payment [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. However, our findings suggest that virtual gifts may be a double-edged sword. For patients who have no prior offline connections with the physician, allowing them to show gratitude with a virtual gift function may not be a good strategy, as this type of patient may substitute this virtual gift for payment. However, if the service is intensive, virtual gifts and payment will be additive rather than substitutive.</p>
        <p>In line with previous literature on online service delivery, responsive service is a key antecedent of payment [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Encouraging patient engagement (eg, encouraging multiple timely interactions with the physician) may help promote payment. As each response to the physician counts as one free trial for the patients, reluctance to consult further may arise at the end of each conversation turn. Persuading patients to keep on responding in a timely manner should be beneficial for establishing long-term patient-physician collaboration and attracting payments.</p>
        <p>Previous studies in similar contexts generally use a linear regression approach; however, we employ ML—with its ability to mine massive fine-grained behavior data [<xref ref-type="bibr" rid="ref37">37</xref>]—to explore the associations and predictive power of various consultation service–related features. The various classifiers based on different ML philosophies for a binary classification problem provide complementary views of how the model can help us understand payment. The feature ranking and configuration results from four ML approaches indicate that these features are not generating linear impacts, a finding that was not evident in previous studies.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Online delivery of health care services is increasingly common and gives patients a new channel and expanded options for accessing health care services. However, many online medical consultation platforms are struggling to attract and retain patients who are willing to pay, and health care providers on the platform have the additional challenge of standing out in a crowd of physicians who can provide comparable services. This study explores the key features that contribute to patient payment in the online health care consultation market. By mining massive consultation data using ML approaches, our results show that features related to service delivery quality (eg, consultation dialog intensity and physician response rate), patient source (eg, online vs offline returning patients), and patient involvement (eg, provide social returns and reveal previous treatment) appear to contribute more to the patient’s payment decision than features related to physician reputation. We further identified five key feature configurations to help classify different interaction patterns between patients and physicians, which result in different payment outcomes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Background information.</p>
        <media xlink:href="medinform_v8i2e16765_app1.docx" xlink:title="DOCX File , 445 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Data cleaning and analysis pipeline.</p>
        <media xlink:href="medinform_v8i2e16765_app2.docx" xlink:title="DOCX File , 33 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Data-driven feature selection.</p>
        <media xlink:href="medinform_v8i2e16765_app3.docx" xlink:title="DOCX File , 20 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Additional analysis results.</p>
        <media xlink:href="medinform_v8i2e16765_app4.docx" xlink:title="DOCX File , 108 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the receiver operating characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DT</term>
          <def>
            <p>decision tree</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GB</term>
          <def>
            <p>gradient boost</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was undertaken, in part, thanks to funding from the Canada Research Chairs program (awarded to the second author) and the Natural Science Foundation of China (Nos 71301172, 71571180 awarded to the third author).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>The impact of the internet on health consultation market concentration: an econometric analysis of secondary data</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <month>10</month>
          <day>28</day>
          <volume>18</volume>
          <issue>10</issue>
          <fpage>e276</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/10/e276/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.6423</pub-id>
          <pub-id pub-id-type="medline">27793793</pub-id>
          <pub-id pub-id-type="pii">v18i10e276</pub-id>
          <pub-id pub-id-type="pmcid">PMC5106558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Investigating the effect of paid and free feedback about physicians' telemedicine services on patients' and physicians' behaviors: panel data analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>03</month>
          <day>22</day>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>e12156</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/3/e12156/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/12156</pub-id>
          <pub-id pub-id-type="medline">30900997</pub-id>
          <pub-id pub-id-type="pii">v21i3e12156</pub-id>
          <pub-id pub-id-type="pmcid">PMC6450473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>PK</given-names>
            </name>
          </person-group>
          <article-title>Improving the effectiveness of online healthcare platforms: an empirical study with multi-period patient-doctor consultation data</article-title>
          <source>Int J Prod Econ</source>
          <year>2019</year>
          <volume>207</volume>
          <fpage>70</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ijpe.2018.11.009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cordina</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <source>McKinsey &#38; Company</source>
          <year>2018</year>
          <month>07</month>
          <access-date>2019-05-05</access-date>
          <comment>Healthcare Consumerism 2018: An Update on the Journey<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mckinsey.com/industries/healthcare-systems-and-services/our-insights/healthcare-consumerism-2018">https://www.mckinsey.com/industries/healthcare-systems-and-services/our-insights/healthcare-consumerism-2018</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <source>Garner Insights</source>
          <year>2018</year>
          <month>03</month>
          <access-date>2019-05-05</access-date>
          <comment>Online Doctor Consultation Market: Global Market Synopsis, Growth Factors, Industry Segmentation, Regional Analysis And Competitive Analysis 2017 - 2025<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://garnerinsights.com/Online-Doctor-Consultation-Market-Global-Market-Synopsis-Growth-Factors-Industry-Segmentation-Regional-Analysis-And-Competitive-Analysis-2017---2025">http://garnerinsights.com/Online-Doctor-Consultation-Market-Global-Market-Synopsis-Growth-Factors-Industry-Segmentation-Regional-Analysis- And-Competitive-Analysis-2017---2025</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Online selection of a physician by patients: empirical study from elaboration likelihood perspective</article-title>
          <source>Comput Human Behav</source>
          <year>2017</year>
          <month>08</month>
          <volume>73</volume>
          <fpage>403</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2017.03.060</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>The effect of online effort and reputation of physicians on patients' choice: 3-wave data analysis of China's good doctor website</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>03</month>
          <day>8</day>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>e10170</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/3/e10170/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/10170</pub-id>
          <pub-id pub-id-type="medline">30848726</pub-id>
          <pub-id pub-id-type="pii">v21i3e10170</pub-id>
          <pub-id pub-id-type="pmcid">PMC6429049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xiang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Understanding a Moderating Effect of Physicians' Endorsement to Online Workload: An Empirical Study in Online Health-Care Communities</article-title>
          <source>Proceedings of the 2017 IEEE International Conference on Big Data</source>
          <year>2017</year>
          <conf-name>Big Data'17</conf-name>
          <conf-date>December 11-14, 2017</conf-date>
          <conf-loc>Boston, MA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bigdata.2017.8258570</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Economic success of physicians in the online consultation market: a signaling theory perspective</article-title>
          <source>Int J Electron Comm</source>
          <year>2019</year>
          <month>03</month>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>244</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1080/10864415.2018.1564552</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenhalgh</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Vijayaraghavan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wherton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Byrne</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell-Richards</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hanson</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ramoutar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gutteridge</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hodkinson</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Collard</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Virtual online consultations: advantages and limitations (VOCAL) study</article-title>
          <source>BMJ Open</source>
          <year>2016</year>
          <month>01</month>
          <day>29</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>e009388</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://bmjopen.bmj.com/cgi/pmidlookup?view=long&#38;pmid=26826147"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2015-009388</pub-id>
          <pub-id pub-id-type="medline">26826147</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2015-009388</pub-id>
          <pub-id pub-id-type="pmcid">PMC4735312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Online medical consultation: a review</article-title>
          <source>Int J Community Med Public Health</source>
          <year>2018</year>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>1230</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.18203/2394-6040.ijcmph20181195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Exploring the effects of patient-generated and system-generated information on patients’ online search, evaluation and decision</article-title>
          <source>Electron Commer Res Appl</source>
          <year>2015</year>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>192</fpage>
          <lpage>203</lpage>
          <pub-id pub-id-type="doi">10.1016/j.elerap.2015.04.001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Making freemium work</article-title>
          <source>Harvard business review. . ISSN</source>
          <year>2014</year>
          <volume>92</volume>
          <issue>5</issue>
          <fpage>0017</fpage>
          <lpage>9</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The impact of individual and organizational reputation on physicians’ appointments online</article-title>
          <source>Int J Electron Comm</source>
          <year>2016</year>
          <month>06</month>
          <volume>20</volume>
          <issue>4</issue>
          <fpage>551</fpage>
          <lpage>77</lpage>
          <pub-id pub-id-type="doi">10.1080/10864415.2016.1171977</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>How doctors gain social and economic returns in online health-care communities: a professional capital perspective</article-title>
          <source>J Manag Inf Syst</source>
          <year>2017</year>
          <month>08</month>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>487</fpage>
          <lpage>519</lpage>
          <pub-id pub-id-type="doi">10.1080/07421222.2017.1334480</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Jing</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Free for Caring? The effect of offering free online medical-consulting services on physician performance in e-health care</article-title>
          <source>Telemed J E Health</source>
          <year>2019</year>
          <month>10</month>
          <volume>25</volume>
          <issue>10</issue>
          <fpage>979</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.1089/tmj.2018.0216</pub-id>
          <pub-id pub-id-type="medline">30566383</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Exploring the impact of word-of-mouth about physicians' service quality on patient choice based on online health communities</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2016</year>
          <month>11</month>
          <day>26</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>151</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-016-0386-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-016-0386-0</pub-id>
          <pub-id pub-id-type="medline">27888834</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-016-0386-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC5124243</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Vogel</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Doctor–patient relationship strength’s impact in an online healthcare community</article-title>
          <source>Inform Technol Dev</source>
          <year>2017</year>
          <month>03</month>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>279</fpage>
          <lpage>300</lpage>
          <pub-id pub-id-type="doi">10.1080/02681102.2017.1283287</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Exploring the effects of online rating and the activeness of physicians on the number of patients in an online health community</article-title>
          <source>Telemed J E Health</source>
          <year>2019</year>
          <month>11</month>
          <volume>25</volume>
          <issue>11</issue>
          <fpage>1090</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1089/tmj.2018.0192</pub-id>
          <pub-id pub-id-type="medline">30676279</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>DY</given-names>
            </name>
          </person-group>
          <article-title>Understanding the social learning effect in contagious switching behavior</article-title>
          <source>Manage Sci</source>
          <year>2019</year>
          <volume>65</volume>
          <issue>10</issue>
          <fpage>4771</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1287/mnsc.2018.3173</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Investor platform choice: herding, platform attributes, and regulations</article-title>
          <source>J Manag Inf Syst</source>
          <year>2018</year>
          <month>03</month>
          <volume>35</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <lpage>116</lpage>
          <pub-id pub-id-type="doi">10.1080/07421222.2018.1440770</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Bagozzi</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Contribution behavior in virtual communities: cognitive, emotional, and social influences</article-title>
          <source>MIS Q</source>
          <year>2014</year>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>143</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.25300/misq/2014/38.1.07</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Exploring the social influence of multichannel access in an online health community</article-title>
          <source>J Assoc Inf Sci Technol</source>
          <year>2018</year>
          <volume>69</volume>
          <issue>1</issue>
          <fpage>98</fpage>
          <lpage>109</lpage>
          <pub-id pub-id-type="doi">10.1002/asi.23928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daemmrich</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The political economy of healthcare reform in China: negotiating public and private</article-title>
          <source>Springerplus</source>
          <year>2013</year>
          <volume>2</volume>
          <fpage>448</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/24052932"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/2193-1801-2-448</pub-id>
          <pub-id pub-id-type="medline">24052932</pub-id>
          <pub-id pub-id-type="pii">512</pub-id>
          <pub-id pub-id-type="pmcid">PMC3776089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Feature Selection for High-dimensional Data: A Fast Correlation-Based Filter Solution</article-title>
          <source>Proceedings of the 20th International Conference on Machine Learning</source>
          <year>2003</year>
          <conf-name>ICML'03</conf-name>
          <conf-date>August 21-24, 2003</conf-date>
          <conf-loc>Washington, DC</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>KNIME</source>
          <year>2015</year>
          <access-date>2019-05-01</access-date>
          <comment>Seven Techniques for Data Dimensionality Reduction<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.knime.com/blog/seven-techniques-for-data-dimensionality-reduction">https://www.knime.com/blog/seven-techniques-for-data-dimensionality-reduction</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Correlation-based Feature Selection for Discrete and Numeric Class Machine Learning</article-title>
          <source>Proceedings of the Seventeenth International Conference on Machine Learning</source>
          <year>2000</year>
          <conf-name>ICML'00</conf-name>
          <conf-date>June 29 - July 2, 2000</conf-date>
          <conf-loc>CA, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Batista</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Prati</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Monard</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>A study of the behavior of several methods for balancing machine learning training data</article-title>
          <source>SIGKDD Explor Newsl</source>
          <year>2004</year>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>20</fpage>
          <pub-id pub-id-type="doi">10.1145/1007730.1007735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sokolova</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lapalme</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A systematic analysis of performance measures for classification tasks</article-title>
          <source>Inf Process Manag</source>
          <year>2009</year>
          <volume>45</volume>
          <issue>4</issue>
          <fpage>427</fpage>
          <lpage>37</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2009.03.002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Greedy function approximation: a gradient boosting machine</article-title>
          <source>Ann Stat</source>
          <year>2001</year>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>1189</fpage>
          <lpage>232</lpage>
          <pub-id pub-id-type="doi">10.1214/aos/1013203451</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bäckman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Psychological compensation: a theoretical framework</article-title>
          <source>Psychol Bull</source>
          <year>1992</year>
          <month>09</month>
          <volume>112</volume>
          <issue>2</issue>
          <fpage>259</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="doi">10.1037/0033-2909.112.2.259</pub-id>
          <pub-id pub-id-type="medline">1454895</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Angst</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Adoption of electronic health records in the presence of privacy concerns: the elaboration likelihood model and individual persuasion</article-title>
          <source>MIS Q</source>
          <year>2009</year>
          <month>06</month>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>339</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.2307/20650295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bansal</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zahedi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gefen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The impact of personal dispositions on information sensitivity, privacy concern and trust in disclosing health information online</article-title>
          <source>Decis Support Syst</source>
          <year>2010</year>
          <month>05</month>
          <volume>49</volume>
          <issue>2</issue>
          <fpage>138</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1016/j.dss.2010.01.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>CZ</given-names>
            </name>
            <name name-style="western">
              <surname>Au</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>HS</given-names>
            </name>
          </person-group>
          <article-title>Effects of freemium strategy in the mobile app market: an empirical study of google play</article-title>
          <source>J Manag Inf Syst</source>
          <year>2014</year>
          <volume>31</volume>
          <issue>3</issue>
          <fpage>326</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1080/07421222.2014.995564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Storey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cankurtaran</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Papastathopoulou</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hultink</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Success factors for service innovation: a meta-analysis</article-title>
          <source>J Prod Innov Manag</source>
          <year>2016</year>
          <month>09</month>
          <volume>33</volume>
          <issue>5</issue>
          <fpage>527</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.1111/jpim.12307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wakefield</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Blodgett</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Customer response to intangible and tangible service factors</article-title>
          <source>Psychol Mark</source>
          <year>1999</year>
          <month>01</month>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>68</lpage>
          <pub-id pub-id-type="doi">10.1002/(sici)1520-6793(199901)16:1&#60;51::aid-mar4&#62;3.0.co;2-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martens</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Provost</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Fortuny</surname>
              <given-names>EJ</given-names>
            </name>
          </person-group>
          <article-title>Mining massive fine-grained behavior data to improve predictive analytics</article-title>
          <source>MIS Q</source>
          <year>2016</year>
          <volume>40</volume>
          <issue>4</issue>
          <fpage>869</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.25300/misq/2016/40.4.04</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
