<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JMI</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
      <journal-title>JMIR Medical Informatics</journal-title>
      <issn pub-type="epub">2291-9694</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i4e16970</article-id>
      <article-id pub-id-type="pmid">32319959</article-id>
      <article-id pub-id-type="doi">10.2196/16970</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting Inpatient Falls Using Natural Language Processing of Nursing Records Obtained From Japanese Electronic Medical Records: Case-Control Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Aminbeidokhti</surname>
            <given-names>Amirhossein</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Bellei</surname>
            <given-names>Ericles</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Boukhechba</surname>
            <given-names>Mehdi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nakatani</surname>
            <given-names>Hayao</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3112-0406</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Nakao</surname>
            <given-names>Masatoshi</given-names>
          </name>
          <degrees>RN, MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5238-1752</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Uchiyama</surname>
            <given-names>Hidefumi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Pharmaceutical Research Department</institution>
            <institution>Global Pharmaceutical R&#38;D Division</institution>
            <institution>Neopharma Japan Co Ltd</institution>
            <addr-line>Iidabashi Grand Bloom 4F</addr-line>
            <addr-line>2-10-2 Fujimi, Chiyoda-ku</addr-line>
            <addr-line>Tokyo, 102-0071</addr-line>
            <country>Japan</country>
            <phone>81 90 3896 2658</phone>
            <email>huchiyam@gmail.com</email>
          </address>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5767-110X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Toyoshiba</surname>
            <given-names>Hiroyoshi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5952-8473</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Ochiai</surname>
            <given-names>Chikayuki</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3614-1951</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>NTT Medical Center Tokyo</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Pharmaceutical Research Department</institution>
        <institution>Global Pharmaceutical R&#38;D Division</institution>
        <institution>Neopharma Japan Co Ltd</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Research Development Department</institution>
        <institution>Lifescience AI Business Division</institution>
        <institution>FRONTEO Inc</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Tokyo Healthcare University</institution>
        <addr-line>Tokyo</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hidefumi Uchiyama <email>huchiyam@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>4</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>4</month>
        <year>2020</year>
      </pub-date>
      <volume>8</volume>
      <issue>4</issue>
      <elocation-id>e16970</elocation-id>
      <history>
        <date date-type="received">
          <day>7</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>11</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>1</day>
          <month>1</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>22</day>
          <month>1</month>
          <year>2020</year>
        </date>
      </history>
      <copyright-statement>©Hayao Nakatani, Masatoshi Nakao, Hidefumi Uchiyama, Hiroyoshi Toyoshiba, Chikayuki Ochiai. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 22.04.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="http://medinform.jmir.org/2020/4/e16970/" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Falls in hospitals are the most common risk factor that affects the safety of inpatients and can result in severe harm. Therefore, preventing falls is one of the most important areas of risk management for health care organizations. However, existing methods for predicting falls are laborious and costly.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The objective of this study is to verify whether hospital inpatient falls can be predicted through the analysis of a single input—unstructured nursing records obtained from Japanese electronic medical records (EMRs)—using a natural language processing (NLP) algorithm and machine learning.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The nursing records of 335 fallers and 408 nonfallers for a 12-month period were extracted from the EMRs of an acute care hospital and randomly divided into a learning data set and test data set. The former data set was subjected to NLP and machine learning to extract morphemes that contributed to separating fallers from nonfallers to construct a model for predicting falls. Then, the latter data set was used to determine the predictive value of the model using receiver operating characteristic (ROC) analysis.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The prediction of falls using the test data set showed high accuracy, with an area under the ROC curve, sensitivity, specificity, and odds ratio of mean 0.834 (SD 0.005), mean 0.769 (SD 0.013), mean 0.785 (SD 0.020), and mean 12.27 (SD 1.11) for five independent experiments, respectively. The morphemes incorporated into the final model included many words closely related to known risk factors for falls, such as the use of psychotropic drugs, state of consciousness, and mobility, thereby demonstrating that an NLP algorithm combined with machine learning can effectively extract risk factors for falls from nursing records.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>We successfully established that falls among hospital inpatients can be predicted by analyzing nursing records using an NLP algorithm and machine learning. Therefore, it may be possible to develop a fall risk monitoring system that analyzes nursing records daily and alerts health care professionals when the fall risk of an inpatient is increased.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>fall</kwd>
        <kwd>risk factor</kwd>
        <kwd>prediction</kwd>
        <kwd>nursing record</kwd>
        <kwd>natural language processing</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Falls are the most common risk factor affecting the safety of hospital inpatients. They often result in a severe injury, such as a femoral fracture or head trauma, which can be life-threatening or affect the patient’s quality of life. After analyzing data from 1263 hospitals, Bouldin et al [<xref ref-type="bibr" rid="ref1">1</xref>] reported that the rate of falls in the United States was 3.56 per 1000 patient-days during a 27-month study period and that 26.1% of these falls (0.93 per 1000 patient-days) resulted in injury. In Japan, a 2016 report from the Japan Federation of Democratic Medical Institutions indicated that the rates of falls and falls causing injury were 4.40 and 0.29 per 1000 patient-days, respectively [<xref ref-type="bibr" rid="ref2">2</xref>]. Therefore, the prevention of falls is one of the most important areas of risk management for health care organizations. The Joint Commission, which is involved in the accreditation and certification of US health care organizations and programs, has strongly recommended taking strategic action for fall prevention, including the use of a standardized assessment tool to identify risks [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
      </sec>
      <sec>
        <title>Prior Work</title>
        <p>A variety of methods have been developed to predict the risk of falls for hospital inpatients, such as the Morse Fall Scale [<xref ref-type="bibr" rid="ref4">4</xref>], St Thomas’s Risk Assessment Tool in Falling Elderly Inpatients (STRATIFY) [<xref ref-type="bibr" rid="ref5">5</xref>], Hendrich Fall Risk Model (HFRM) [<xref ref-type="bibr" rid="ref6">6</xref>], and the revised Hendrich II Fall Risk Model [<xref ref-type="bibr" rid="ref7">7</xref>]. All these methods have been used and evaluated [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. However, such risk assessment methods invariably involve time-consuming processes, such as interviews, observation, and intervention [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>], which interrupt the work of health care professionals, and the additional workload contributes to an increase in medical costs.</p>
        <p>Moreover, several studies, including systematic reviews, have demonstrated that no single intervention, including patient tags and movement sensors, efficiently reduces fall incidents in any setting, whereas multifactorial assessment linked to appropriate interventions is successful [<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, no common combination of risk factors was discovered in these studies [<xref ref-type="bibr" rid="ref17">17</xref>], indicating that health care professionals still need to conduct multiple assessments for each risk factor in daily practice, including motor function, continence, mental state, and medication. Thus, a less laborious assessment tool that can predict the risk of falls with high precision without initial intervention is desirable.</p>
        <p>With recent advances in information technology, several groups have attempted to apply natural language processing (NLP) to text analysis of electronic medical records (EMRs) to achieve the early diagnosis of conditions such as peripheral arterial disease [<xref ref-type="bibr" rid="ref18">18</xref>], asthma [<xref ref-type="bibr" rid="ref19">19</xref>], and multiple sclerosis [<xref ref-type="bibr" rid="ref20">20</xref>]. In these studies, NLP was used to find specific words or phrases in a predefined dictionary that described the symptoms or signs of each disease. Following these studies, we apply artificial intelligence to EMRs to analyze the risk of falls.</p>
      </sec>
      <sec>
        <title>Goal of This Study</title>
        <p>Our primary objective is to determine whether hospital inpatient falls can be predicted through the analysis of the unstructured text of hospital nursing records in Japanese EMRs using an NLP algorithm and machine learning. In nursing records, nurses write daily information about a patient’s nursing care, the patient’s response, and other events or factors that may affect the patient’s well-being based on observation and experience [<xref ref-type="bibr" rid="ref21">21</xref>]. Thus, nursing records contain valuable information for clinical practice but have not been widely used for any type of risk assessment because they require a technique, such as NLP, to analyze and extract meanings of interest from free text or unstructured documents.</p>
        <p>We constructed a predictive model to assess the linguistic differences between the nursing records of fallers and nonfallers using our proprietary algorithm applying NLP in combination with machine learning and evaluated its performance using receiver operating characteristic (ROC) analysis. The advantages of our approach are that it allows us to assess various risk factors from a single input (nursing records), and it is less laborious and costly than previous approaches because it does not require additional observation or interviews.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>We used a case-control study because of the easy availability of nursing records in EMRs, limited computational capacity, and low rate of falls among inpatients. Because our main objective is to verify the feasibility of using nursing records to predict falls, we used only one hospital and one year of data to limit the cost and time of data extraction. For this study, we considered NTT Medical Center Tokyo (Tokyo, Japan), which is an acute hospital with 606 beds and an average hospital stay of 11.4 days. The Institutional Review Board of the hospital approved the study (Approval #15-267, June 25, 2015). The study period was from July 2014 to July 2015.</p>
      </sec>
      <sec>
        <title>Data</title>
        <p>Among 18,045 inpatients during the study period, 335 patients with one or more fall incidents (fallers) were identified from the incident reports of the hospital. As a control group, 408 patients without falls (nonfallers) were randomly selected. More nonfallers than fallers were chosen as a contingency if extracted data had to be discarded for unexpected reasons. Data were not discarded; therefore, all usable data were considered in the analysis. We are aware that the substantial difference between the total number of fallers and nonfallers can affect machine learning; however, we believe this is mitigated by the use of a case-control study, which is often used in rare medical cases such as rare diseases.</p>
        <p>Data on the two groups of patients were extracted from the EMR system by the EMR vendor and provided to the researchers after anonymization. The researchers constructed a case data set (fallers) and control data set (nonfallers). The nursing records were written in the EMR once a day or more frequently as necessary by several nurses using the subjective, objective, assessment, and plan style or free description. These contained (1) patients’ statements, (2) observations of the nurses, (3) results of vital check and various assessments, (4) descriptions of medical treatment and administration of drugs (or plan for them), (5) messages to and from patients, and (6) any other comments by nurses. Some parts of (3) and (4) were entered as preset form data, and others were unstructured data. Several records for one patient made on the same day were integrated into one nursing record. Thus, 25,145 nursing records were obtained, which consisted of 18,912 nursing records for fallers and 6233 for nonfallers. The prevalence of falls was 2.61 falls per 1000 patient-days during the study period. The characteristics of the patients and nursing records are shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p>
        <p>The entire nursing record data set was divided into a learning data set and test data set by generating random numbers for patient identification numbers assigned after anonymization.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Characteristics of the patients and nursing records.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="360"/>
            <col width="170"/>
            <col width="170"/>
            <col width="170"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Characteristics</td>
                <td>All patients</td>
                <td>Fallers</td>
                <td>Nonfallers</td>
                <td><italic>P</italic> value<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="2">Patients, n (% of total)</td>
                <td>743 (100)</td>
                <td>335 (45.1)</td>
                <td>408 (54.9)</td>
                <td>—<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <bold>Gender, n (% of total)</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>342 (100)</td>
                <td>156 (45.6)</td>
                <td>186 (54.4)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>401 (100)</td>
                <td>179 (44.6)</td>
                <td>222 (55.4)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">Age (years), mean (SD)</td>
                <td>67.0 (17.1)</td>
                <td>73.3 (13.3)</td>
                <td>65.5 (18.1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Nursing records, n</td>
                <td>25,145</td>
                <td>18,912</td>
                <td>6233</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Nursing records per patient, mean (SD)</td>
                <td>45.3 (43.5)</td>
                <td>68.1 (49.1)</td>
                <td>26.6 (26.4)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="2">Nursing record length,<sup>c</sup> mean (SD)</td>
                <td>5392.1 (4138.2)</td>
                <td>5628.4 (4202.6)</td>
                <td>4675.1 (3848.8)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Welch <italic>t</italic> test between fallers and nonfallers used.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Not applicable.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Number of Japanese or Chinese characters.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Data Exclusion</title>
        <p>The nursing records that did not satisfy the criterion of more than 50 Japanese or Chinese characters were excluded during tokenization and vectorization. This was a requirement of the Concept Encoder, which is described subsequently.</p>
      </sec>
      <sec>
        <title>Data Processing by Concept Encoder</title>
        <p>A model was constructed to sort the nursing records into two groups (“risk” and “no risk”) from the learning data set. The probability of being categorized in the risk group, hereafter referred to as the risk probability, was calculated for each nursing record in the test data set using an in-house algorithm for NLP and machine learning called Concept Encoder (FRONTEO, Inc, Tokyo, Japan; will be published elsewhere), which was constructed on a Python platform.</p>
      </sec>
      <sec>
        <title>Document and Word Embedding</title>
        <p>Concept Encoder performs text analysis by defining the line vector obtained from the document-word matrix as a document vector. First, each document is decomposed into morphemes (the smallest meaningful units of a language) by morphological analysis using MeCab version 0.996 [<xref ref-type="bibr" rid="ref22">22</xref>], and rules are applied to label each element at the morpheme level with a word. Morphemes that were not words were discarded before each element was labeled. Then the word labels are embedded in <italic>k</italic>-dimensional vector space [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Documents can also be embedded in the <italic>k</italic>-dimensional vector space by expanding the word-embedding method. Assuming that there are <italic>m</italic> documents and <italic>n</italic> words in all the nursing records used in the study, and they are embedded, these documents and words can be expressed as matrices <italic>D</italic> and <italic>W</italic>:</p>
        <graphic xlink:href="medinform_v8i4e16970_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <graphic xlink:href="medinform_v8i4e16970_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>where each row vector of matrices <italic>D</italic> and <italic>W</italic> corresponds to <italic>m</italic> documents and <italic>n</italic> words, respectively, from the nursing records in the study.</p>
        <p>It is well known that embedded vectors have interesting features, such as word analogy, and outperformed bag of words approaches in several linguistic tasks. These interesting features are retained after two matrices are multiplied because of the linearity of multiplication. For example, if <inline-graphic xlink:href="medinform_v8i4e16970_fig5.png" xlink:type="simple" mimetype="image"/> for two row vectors in <italic>W</italic>, then the inner product with <italic>d,</italic> which is a row vector in matrix <italic>D,</italic> holds <inline-graphic xlink:href="medinform_v8i4e16970_fig6.png" xlink:type="simple" mimetype="image"/>. Expanding this to the word analogy, if <inline-graphic xlink:href="medinform_v8i4e16970_fig7.png" xlink:type="simple" mimetype="image"/>, where <inline-graphic xlink:href="medinform_v8i4e16970_fig8.png" xlink:type="simple" mimetype="image"/> holds for four row vectors in <italic>W</italic>, then <inline-graphic xlink:href="medinform_v8i4e16970_fig9.png" xlink:type="simple" mimetype="image"/> holds for any row vector <italic>d</italic> in <italic>D</italic>. Hence, the product of these two matrices generates the <italic>DW</italic> matrix, which is a document-word matrix that also has these interesting features:</p>
        <graphic xlink:href="medinform_v8i4e16970_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>As seen in previous studies [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>], neural networks have been used to calculate <italic>D</italic> and <italic>W,</italic> and if the number of documents becomes large, then the calculation of these matrices is computationally intensive. Hence, the words included in the neural embedding are restricted to the top 1000 most popular words that occur in the documents in the learning data set, hereafter referred to as the “top 1000 words.”</p>
        <p>In this study, for <italic>W,</italic> the skip-gram with the negative sampling algorithm was used. The hyperparameter number of negative sampling was set to 5, and the number of dimensions for <italic>W</italic> was set to 300. For <italic>D,</italic> the distributed bag of words version of the paragraph vector (PV-DBOW) was used with the same negative sampling and embedding dimensions as <italic>W.</italic> After obtaining <italic>W</italic> and <italic>D,</italic> the <italic>DW</italic> matrix was calculated using matrix multiplication.</p>
      </sec>
      <sec>
        <title>Construction of the Fall Prediction Model</title>
        <p>For the construction of the fall prediction model, the <italic>DW</italic> matrix was derived from all documents and words in the learning data set. By attaching tags of 1 (for fallers) and 0 (for nonfallers) to each document, each line vector of the <italic>DW</italic> matrix (which corresponds to <italic>m</italic> documents) was associated with a tag of 0 or 1. Each word was subjected to adaptive weighting for optimum separation between fallers and nonfallers using a logistic regression model, and the weighted parameters were estimated by the Markov chain Monte Carlo (MCMC) method with a normal distribution as the prior distribution of weights. For the MCMC approach, the weighted parameters were estimated using posterior distributions, and uncertainty of the estimate was also considered by observing the distribution. The weighted parameters thus obtained were used as the fall prediction model to evaluate the test data set. Random bisection of the learning data set was conducted three times, and six models were constructed using the six bisected data sets. Because the sample size was not balanced between fallers and nonfallers, we used the synthetic minority oversampling technique in this step [<xref ref-type="bibr" rid="ref26">26</xref>] by using the function of “imblearn.over_sampling.SMOTE” from the library [<xref ref-type="bibr" rid="ref27">27</xref>] with the default setting and checked that samples for not majority class (“faller” or “imminent”) were resampled to be equal to those the major one in number. Morphemes that significantly contributed to the separation of the fallers and nonfallers in at least four of the six primary models (ie, “significant vocabulary”) were extracted and were used to construct the final model by the generation of the trimmed <italic>DW</italic> matrix followed by MCMC optimization.</p>
      </sec>
      <sec>
        <title>Evaluation of Documents in the Test Data Set</title>
        <p>For evaluation, documents in the test data set were tokenized to generate another matrix (hereafter called “<italic>DW</italic> for test”) using the top 1000 words followed by trimming it down using the significant vocabulary. The risk probability was calculated as the element-wise product of the corresponding line vector of the <italic>DW</italic> for test matrix and the final model. To assess the significance of differences, the Student <italic>t</italic> test was performed using R studio software (version 1.0.143).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Analysis of the Data Set</title>
        <p>Differences were observed between the groups of fallers and nonfallers for age, number of nursing records per patient (strongly correlated with the duration of hospitalization), and the length of nursing records (<xref ref-type="table" rid="table1">Table 1</xref>; <italic>P</italic>&#60;.001 by Welch <italic>t</italic> test). The ratios of fallers and nonfallers also varied among some clinical divisions of the hospital, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. However, matching for such factors was not performed because our primary aim was to determine whether it was possible to predict falls through comprehensive risk assessment using text analysis of nursing records regardless of risk factors already known or presumed from other information.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Number of inpatients per clinical division.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="460"/>
            <col width="170"/>
            <col width="170"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Clinical division</td>
                <td>Total (N=743), n</td>
                <td>Fallers (n=335), n</td>
                <td>Nonfallers (n=408), n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Gastroenterology</td>
                <td>107</td>
                <td>51</td>
                <td>56</td>
              </tr>
              <tr valign="top">
                <td>Surgery</td>
                <td>104</td>
                <td>42</td>
                <td>62</td>
              </tr>
              <tr valign="top">
                <td>Cardiology</td>
                <td>53</td>
                <td>22</td>
                <td>31</td>
              </tr>
              <tr valign="top">
                <td>Gynecology and obstetrics</td>
                <td>49</td>
                <td>4</td>
                <td>45</td>
              </tr>
              <tr valign="top">
                <td>Stroke unit</td>
                <td>44</td>
                <td>27</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>Orthopedic surgery</td>
                <td>41</td>
                <td>23</td>
                <td>18</td>
              </tr>
              <tr valign="top">
                <td>Respirology</td>
                <td>37</td>
                <td>20</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>Urology</td>
                <td>36</td>
                <td>12</td>
                <td>24</td>
              </tr>
              <tr valign="top">
                <td>Hematology</td>
                <td>32</td>
                <td>27</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td>Neurosurgery</td>
                <td>31</td>
                <td>19</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>Psychiatry</td>
                <td>30</td>
                <td>23</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Pain clinic</td>
                <td>27</td>
                <td>10</td>
                <td>17</td>
              </tr>
              <tr valign="top">
                <td>Otorhinolaryngology</td>
                <td>21</td>
                <td>1</td>
                <td>20</td>
              </tr>
              <tr valign="top">
                <td>Medical cooperation</td>
                <td>17</td>
                <td>7</td>
                <td>10</td>
              </tr>
              <tr valign="top">
                <td>Nephrology</td>
                <td>16</td>
                <td>9</td>
                <td>7</td>
              </tr>
              <tr valign="top">
                <td>Dermatology</td>
                <td>16</td>
                <td>3</td>
                <td>13</td>
              </tr>
              <tr valign="top">
                <td>Ophthalmology</td>
                <td>15</td>
                <td>4</td>
                <td>11</td>
              </tr>
              <tr valign="top">
                <td>Palliative care</td>
                <td>14</td>
                <td>9</td>
                <td>5</td>
              </tr>
              <tr valign="top">
                <td>Gamma knife center</td>
                <td>13</td>
                <td>1</td>
                <td>12</td>
              </tr>
              <tr valign="top">
                <td>Dentistry and oral surgery</td>
                <td>9</td>
                <td>3</td>
                <td>6</td>
              </tr>
              <tr valign="top">
                <td>General thoracic surgery</td>
                <td>8</td>
                <td>4</td>
                <td>4</td>
              </tr>
              <tr valign="top">
                <td>Neurology</td>
                <td>8</td>
                <td>6</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Emergency medicine</td>
                <td>5</td>
                <td>5</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Cardiovascular surgery</td>
                <td>4</td>
                <td>2</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Endocrinology and metabolism</td>
                <td>3</td>
                <td>0</td>
                <td>3</td>
              </tr>
              <tr valign="top">
                <td>General medicine</td>
                <td>2</td>
                <td>0</td>
                <td>2</td>
              </tr>
              <tr valign="top">
                <td>Psychosomatic medicine</td>
                <td>1</td>
                <td>1</td>
                <td>0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Model to Predict Falls</title>
        <p>The entire data set was divided into a learning data set and test data set as shown in <xref ref-type="table" rid="table3">Table 3</xref>. To construct a model to predict falls, tokenization and vectorization were performed on the learning data set. During this step, 12 nursing records (five for fallers and seven for nonfallers) that did not contain more than 50 Japanese or Chinese characters were excluded, leaving 9094 nursing records for fallers and 3513 nursing records for nonfallers. Using NLP and machine learning for the unstructured text of the learning data set, 378 morphemes that corresponded to significant vocabulary (ie, they contributed to separating fallers from nonfallers in at least four of the six primary models) were selected (a partial list is shown in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). To construct the final model, 378 columns that corresponded to the selected morphemes were extracted from the 1000 columns of the <italic>DW</italic> matrix generated using the learning data set and were again subjected to optimization to separate fallers from nonfallers using the MCMC method. Using the final model, the probability of each nursing record in the test data set being in the risk category was evaluated next.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Characteristics of patients and nursing records in the learning data set and test data set for prediction of falls.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="300"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <col width="100"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">Entire data set</td>
                <td>Total</td>
                <td>Fallers</td>
                <td>Nonfallers</td>
                <td><italic>P</italic> value<sup>a</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Learning data set</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Patients, n (% of total)</td>
                <td>371 (100)</td>
                <td>167 (45.0)</td>
                <td>204 (55.0)</td>
                <td>—<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Gender, n (% of total)</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>159 (100)</td>
                <td>78 (49.1)</td>
                <td>81 (50.1)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>212 (100)</td>
                <td>89 (42.0)</td>
                <td>123 (58.0)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Age (years), mean (SD)</td>
                <td>67.0 (17.0)</td>
                <td>73.4 (12.9)</td>
                <td>61.7 (18.1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records, n</td>
                <td>12,619</td>
                <td>9099</td>
                <td>3520</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records per patient, mean (SD)</td>
                <td>45.4 (41.9)</td>
                <td>66.4 (45.3)</td>
                <td>28.2 (29.3)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing record length<sup>c</sup>, mean (SD)</td>
                <td>4879.1 (2212.3)</td>
                <td>5559.4 (1961.9)</td>
                <td>4323.8 (2090.9)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Test data set</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Patients, n (% of total)</td>
                <td>372 (100)</td>
                <td>168 (45.2)</td>
                <td>204 (54.8)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Gender, n (% of total)</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>183 (100)</td>
                <td>78 (42.6)</td>
                <td>105 (57.4)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>189 (100)</td>
                <td>90 (47.6)</td>
                <td>99 (52.4)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Age (years), mean (SD)</td>
                <td>67.1 (17.1)</td>
                <td>73.2 (13.8)</td>
                <td>62.1 (18.1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records, n</td>
                <td>12,526</td>
                <td>9813</td>
                <td>2713</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records per patient, mean (SD)</td>
                <td>45.2 (45.1)</td>
                <td>69.8 (52.6)</td>
                <td>25.0 (23.0)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing record length,<sup>c</sup> mean (SD)</td>
                <td>4739.6 (2127.5)</td>
                <td>5522.9 (2005.8)</td>
                <td>4094.5 (2009.1)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>Welch <italic>t</italic> test between fallers and nonfallers used.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Not applicable.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>Number of Japanese or Chinese characters.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <boxed-text id="box1" position="float">
          <title>Morphemes used in the model for predicting falls. Morphemes related to known or potential risk factors (indicated in brackets) were extracted from 378 morphemes used in the final model of the first experiment.</title>
          <p>
            <bold>[Psychotropics]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Seroquel, Lendormin, Serenace</p>
            </list-item>
          </list>
          <p>
            <bold>[Mental status]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>recognition, dementia, arousal, mental status, somnolence willingness, cognitive function, orientation, esthesia, sleeplessness, anxiousness, Myslee</p>
            </list-item>
          </list>
          <p>
            <bold>[Motor function]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>postural change, aid, assistance, support, lower limb, rehabilitation, slippers, wheelchair, sitting square, torpor, self-standing, parallel bars, limb, daily life behavior, lumbar region, ride, body posture, dorsal region, gait, extension (of limbs), walking stick</p>
            </list-item>
          </list>
          <p>
            <bold>[Excretion]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>excretion, defecation, constipation, incontinence, Lasix, Pursennid, Biofermine</p>
            </list-item>
          </list>
          <p>
            <bold>[Oropharyngeal]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>mouth, sputum, hospital food, oral, water drinking, nausea, swallowing, vomiting, dentures, fluid, mouth rinse, eat</p>
            </list-item>
          </list>
          <p>
            <bold>[Circulation]</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>WBC (white blood cells), blood pressure, transfusion, anemia, mmHg, oxygen, neutrophil, blood, pulse, vein, bleeding, blood vessel, heartbeat, platelet</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Similar to the process used for the learning data set, nursing records with fewer than 50 characters (13 and 4 nursing records for fallers and nonfallers, respectively) were deleted from the test data set, leaving 9800 nursing records for fallers and 2709 nursing records for nonfallers. For each patient in the test data set, the mean value of the risk probabilities for all their nursing records was calculated as a patient risk score that was used to evaluate the performance for predicting falls by ROC analysis. To draw the ROC curve, we calculated the true positive rate and false positive rate using the patient risk score (continuous variables that range from 0 to 1) and category (faller or nonfaller) for each patient. Scanning the cutoff values from 0 to 1, the true and false positive rates were calculated from the confusion matrix for each cutoff value.</p>
        <p>As shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>A, the area under the ROC curve (AUC) was 0.835, which indicates excellent separation between fallers and nonfallers. Applying a threshold score of 0.5602, corresponding to the point on the ROC curve closest to the coordinate (0, 1), each patient was sorted into risk and no risk categories, as shown in the confusion matrix (<xref ref-type="table" rid="table4">Table 4</xref>). Then the sensitivity, specificity, and odds ratios were calculated (<xref ref-type="table" rid="table5">Table 5</xref>). Sensitivity and specificity are the most commonly used measures for diagnostic performance from the viewpoint of actual medical practice, in which the former is the rate of correct diagnosis among all disease patients and the latter is the rate of correct diagnosis among all normal patients. The odds ratio is the most commonly used measure in case-control studies.</p>
        <p>Next, the reproducibility of the analysis was examined by conducting similar experiments four more times (experiments 2 to 5). The model was constructed with a new learning data set, and the test data set was evaluated by generating random numbers for patient identification numbers, after which scatterplots were drawn to check correlations of patient risk scores between all combinations of two experiments (an example for experiments 1 and 4 is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>B). The analytical indexes for the five independent experiments demonstrated the high precision (<xref ref-type="table" rid="table5">Table 5</xref>) and reproducibility (<xref rid="figure1" ref-type="fig">Figure 1</xref>B and <xref ref-type="table" rid="table6">Table 6</xref>) of the model for the prediction of falls. These results demonstrated that text analysis of nursing records was an efficient method for predicting falls with high reproducibility.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Precision and reproducibility of the model for predicting falls using the test data set. Five independent experiments were conducted for the learning and testing steps. A: receiver operator characteristic (ROC) curve for experiment 1; B: scatterplot of patient risk scores for two of the five experiments (1 and 4). AUC: area under the curve.</p>
          </caption>
          <graphic xlink:href="medinform_v8i4e16970_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Confusion matrix of fall prediction for experiment 1.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="220"/>
            <col width="230"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Prediction</td>
                <td colspan="3">Patients</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Fallers, n</td>
                <td>Nonfallers, n</td>
                <td>Total, N</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Risk</td>
                <td>128</td>
                <td>39</td>
                <td>167</td>
              </tr>
              <tr valign="top">
                <td>No risk</td>
                <td>40</td>
                <td>165</td>
                <td>205</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>168</td>
                <td>204</td>
                <td>372</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Reproducibility of the model for predicting falls. A summary of evaluation indexes for the five experiments are shown.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Statistic</td>
                <td colspan="5">Experiment</td>
                <td>Mean (SD)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>2</td>
                <td>3</td>
                <td>4</td>
                <td>5</td>
                <td>　</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Area under the curve</td>
                <td>0.835</td>
                <td>0.831</td>
                <td>0.832</td>
                <td>0.842</td>
                <td>0.831</td>
                <td>0.834 (0.005)</td>
              </tr>
              <tr valign="top">
                <td>Sensitivity (95% CI)</td>
                <td>0.762<break/>(0.714-0.813)</td>
                <td>0.75<break/>(0.702-0.801)</td>
                <td>0.774<break/>(0.726-0.824)</td>
                <td>0.78<break/>(0.730-0.823)</td>
                <td>0.78<break/>(0.732-0.830)</td>
                <td>0.769<break/>(0.013)</td>
              </tr>
              <tr valign="top">
                <td>Specificity (95% CI)</td>
                <td>0.809<break/>(0.766-0.854)</td>
                <td>0.794<break/>(0.751-0.839)</td>
                <td>0.779<break/>(0.736-0.825)</td>
                <td>0.789<break/>(0.724-0.801)</td>
                <td>0.755<break/>(0.712-0.801)</td>
                <td>0.785<break/>(0.02)</td>
              </tr>
              <tr valign="top">
                <td>Odds ratio (95% CI)</td>
                <td>13.54<break/>(8.23-22.27)</td>
                <td>11.57<break/>(7.11-18.83)</td>
                <td>12.09<break/>(7.40-19.73)</td>
                <td>13.26<break/>(8.07-21.78)</td>
                <td>10.9<break/>(6.72-17.71)</td>
                <td>12.27<break/>(1.11)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Correlations (R<sup>2</sup> for linear regression) of all combinations of two out of five experiments are shown.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Experiment</td>
                <td>1</td>
                <td>2</td>
                <td>3</td>
                <td>4</td>
                <td>5</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>—</td>
                <td>0.939</td>
                <td>0.952</td>
                <td>0.946</td>
                <td>0.945</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>—</td>
                <td>—</td>
                <td>0.932</td>
                <td>0.937</td>
                <td>0.957</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>0.948</td>
                <td>0.957</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>0.945</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
                <td>—</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Imminent Precursors of Falls</title>
        <p>In the next step, the detection of the imminent precursors of falls was attempted by extracting specific features from the nursing records written several days before each incident. For the purpose, nursing records of all fallers were collected as “Faller data set” and then tagged with imminent (1-7 days before the fall) or not imminent (<xref ref-type="table" rid="table7">Table 7</xref>). After bisecting the faller data set into a learning data set and a test data set, the former was used to construct a model for discrimination of the tags by the same method described previously for risk/no risk categorization; that is, the final model was built from morphemes identified in at least four of the six primary models constructed using the learning data set. Then the final model was used to evaluate the probability of each faller nursing record in the test data set being placed in the imminent category, after which the performance of the detection of imminent precursors was evaluated using ROC analysis (<xref rid="figure2" ref-type="fig">Figure 2</xref>A) and the confusion matrix (<xref ref-type="table" rid="table8">Table 8</xref>). After four more independent examinations were performed in the same manner to check reproducibility, the average AUC of the ROC curve was 0.567 for the five experiments (<xref ref-type="table" rid="table9">Table 9</xref>), which demonstrates limited prediction of nursing records for imminent falls.</p>
        <p>Based on the hypothesis that the medical conditions of long-term inpatients would be stable, and changes in risk factors for falls would be difficult to detect, we also performed separate analyses of long-term and short-term inpatients. Fallers with more than 60 nursing records or 45 or less nursing records were selected as long-term and short-term inpatients, respectively, and the prediction of imminent falls was conducted for each group (<xref ref-type="table" rid="table7">Table 7</xref>).</p>
        <p>We found that improved prediction of imminent falls was achieved for short-term inpatients, with an AUC of mean 0.607 (SD 0.009) (for five independent experiments, <xref rid="figure2" ref-type="fig">Figure 2</xref>B and <xref ref-type="table" rid="table9">Tables 9</xref> and <xref ref-type="table" rid="table10">10</xref>), whereas prediction was poor for long-term inpatients (AUC mean 0.496, SD 0.011; summary table for the five experiments not shown). Confusion matrices were constructed for the short-term group, and the sensitivity, specificity, and odds ratios were calculated (<xref ref-type="table" rid="table9">Table 9</xref>). The results suggested that the calculated risk probability could be used to assess the imminent risk of falls for short-term inpatients at the time when each nursing record was written.</p>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Characteristics of patients and nursing records in the faller data set for detection of imminent precursors.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="270"/>
            <col width="220"/>
            <col width="230"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Faller data set</td>
                <td>All fallers</td>
                <td>&#62;60 Nursing records</td>
                <td>≤45 Nursing records</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Learning data set</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Patients, n</td>
                <td>167</td>
                <td>56</td>
                <td>91</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Gender, n</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>78</td>
                <td>32</td>
                <td>38</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>89</td>
                <td>24</td>
                <td>53</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Age (years), mean (SD)</td>
                <td>73.4 (12.9)</td>
                <td>74.7 (11.2)</td>
                <td>73.0 (12.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Nursing records, n</bold>
                </td>
                <td>9094</td>
                <td>5809</td>
                <td>2231</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Imminent<sup>a</sup></td>
                <td>1114</td>
                <td>487</td>
                <td>464</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Not imminent</td>
                <td>7980</td>
                <td>5322</td>
                <td>1767</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records per patient, mean (SD)</td>
                <td>54.5 (45.7)</td>
                <td>103.8 (45.7)</td>
                <td>24.5 (12.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing record length, mean (SD)</td>
                <td>5559.4 (1961.9)</td>
                <td>5363.34 (1879.5)</td>
                <td>5628.6 (2081.0)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Test data set</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Patients, n</td>
                <td>168</td>
                <td>56</td>
                <td>95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Gender, n</bold>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>78</td>
                <td>21</td>
                <td>48</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>90</td>
                <td>35</td>
                <td>47</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Age (years), mean (SD)</td>
                <td>73.2 (12.8)</td>
                <td>72.4 (12.9)</td>
                <td>74.0 (14.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Nursing records, n</bold>
                </td>
                <td>9813</td>
                <td>6693</td>
                <td>2239</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Imminent<sup>a</sup></td>
                <td>984</td>
                <td>424</td>
                <td>463</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Not imminent</td>
                <td>8829</td>
                <td>6269</td>
                <td>1776</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing records per patient, mean (SD)</td>
                <td>58.4 (54.1)</td>
                <td>119.5 (51.9)</td>
                <td>23.6 (12.6)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nursing record length, mean (SD)</td>
                <td>5522.9 (2005.8)</td>
                <td>5022.2 (2187.5)</td>
                <td>5662.8 (1890.6)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>Nursing records registered within seven days before a fall.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Precision of the model for detecting imminent precursors using the faller data set. Five independent experiments were conducted for the learning and testing steps to identify imminent precursors of falls among all fallers (A) and among fallers who were short-term patients (B). Receiver operating characteristic (ROC) curves for experiment 1 out of the five experiments are shown. AUC: area under the curve.</p>
          </caption>
          <graphic xlink:href="medinform_v8i4e16970_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Results of discrimination of imminent precursors of falls among all fallers. Confusion matrix for experiment 1 out of five experiments is shown.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="220"/>
            <col width="230"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Prediction</td>
                <td colspan="3">Nursing records</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Imminent</td>
                <td>Not imminent</td>
                <td>Total</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Imminent</td>
                <td>553</td>
                <td>4281</td>
                <td>4834</td>
              </tr>
              <tr valign="top">
                <td>Not imminent</td>
                <td>429</td>
                <td>4536</td>
                <td>4965</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>982</td>
                <td>8817</td>
                <td>9799</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table9">
          <label>Table 9</label>
          <caption>
            <p>Reproducibility of the model for detecting imminent precursors using the faller data set. Five independent experiments were conducted for the learning and testing steps to identify imminent precursors of falls among all fallers and among fallers who were shot-term patients.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="220"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Group and statistic</td>
                <td colspan="5">Experiment</td>
                <td>Mean (SD)</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>1</td>
                <td>2</td>
                <td>3</td>
                <td>4</td>
                <td>5</td>
                <td>
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>Fallers</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Area under the curve</td>
                <td>0.562</td>
                <td>0.576</td>
                <td>0.568</td>
                <td>0.566</td>
                <td>0.564</td>
                <td>0.567<break/>(0.005)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity (95% CI)</td>
                <td>0.563<break/>  
            (0.546-0.581)</td>
                <td>0.543<break/>  
            (0.526-0.560)</td>
                <td>0.611<break/>  
            (0.593-0.630)</td>
                <td>0.576<break/>  
            (0.559-0.594)</td>
                <td>0.536<break/>  
            (0.519-0.553)</td>
                <td>0.566<break/>(0.030)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Specificity (95% CI)</td>
                <td>0.514<break/>  
            (0.509-0.520)</td>
                <td>0.576<break/>  
            (0.571-0.582)</td>
                <td>0.477<break/>  
            (0.472-0.482)</td>
                <td>0.517<break/>  
            (0.512-0.522)</td>
                <td>0.558<break/>  
            (0.552-0.563)</td>
                <td>0.529<break/>(0.039)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Odds ratio (95% CI)</td>
                <td>1.37<break/>  
            (1.20-1.56)</td>
                <td>1.62<break/>  
            (1.42-1.84)</td>
                <td>1.43<break/>  
            (1.25-1.64)</td>
                <td>1.46<break/>  
            (1.27-1.66)</td>
                <td>1.45<break/>  
            (1.27-1.66)</td>
                <td>1.47<break/>(0.09)</td>
              </tr>
              <tr valign="top">
                <td colspan="7">
                  <bold>Fallers who were short-term patients</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Area under the curve</td>
                <td>0.613</td>
                <td>0.607</td>
                <td>0.595</td>
                <td>0.602</td>
                <td>0.618</td>
                <td>0.607<break/>(0.009)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sensitivity (95% CI)</td>
                <td>0.547<break/>  
            (0.522-0.572)</td>
                <td>0.649<break/>  
            (0.621-0.677)</td>
                <td>0.492<break/>  
            (0.470-0.515)</td>
                <td>0.607<break/>  
            (0.581-0.635)</td>
                <td>0.623<break/>  
            (0.596-0.651)</td>
                <td>0.584<break/>(0.063)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Specificity (95% CI)</td>
                <td>0.626<break/>  
            (0.613-0.641)</td>
                <td>0.524<break/>  
            (0.512-0.536)</td>
                <td>0.653<break/>  
            (0.639-0.668)</td>
                <td>0.548<break/>  
            (0.535-0.560)</td>
                <td>0.560<break/>  
            (0.547-0.573)</td>
                <td>0.582<break/>(0.055)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Odds ratio (95% CI)</td>
                <td>2.02<break/>  
            (1.64-2.49)</td>
                <td>2.03<break/>(1.64-2.51)</td>
                <td>1.83<break/>  
            (1.48-2.25)</td>
                <td>1.87<break/>  
            (1.52-2.31)</td>
                <td>2.10<break/>  
            (1.70-2.59)</td>
                <td>1.97<break/>(0.12)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table10">
          <label>Table 10</label>
          <caption>
            <p>Results of discrimination of imminent precursors of falls among fallers who were short-term patients. Confusion matrix for experiment 1 out of five experiments is shown.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="330"/>
            <col width="220"/>
            <col width="230"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td>Prediction</td>
                <td colspan="3">Nursing records</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Imminent</td>
                <td>Not imminent</td>
                <td>Total</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Imminent</td>
                <td>252</td>
                <td>663</td>
                <td>915</td>
              </tr>
              <tr valign="top">
                <td>Not imminent</td>
                <td>209</td>
                <td>1112</td>
                <td>1321</td>
              </tr>
              <tr valign="top">
                <td>Total</td>
                <td>461</td>
                <td>1775</td>
                <td>2236</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Our results confirmed it is possible to predict inpatient falls using text analysis of nursing records in a hospital EMR system, with an AUC of 0.834 across an average of five independent experiments. In many previous studies, the prediction of falls was based on specified risk factors, such as the use of psychotropic drugs [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref32">32</xref>], mental state (eg, disorientation, confusion, and delirium) [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref35">35</xref>], impaired motor function (eg, unstable gait and muscle weakness) [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref35">35</xref>], and excretory condition (eg, incontinence and frequent toileting) [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref35">35</xref>]. Additionally, the usefulness of nursing records for inpatient fall prediction was discussed recently [<xref ref-type="bibr" rid="ref36">36</xref>], and it was shown that nursing records contained words known as risk factors for inpatient falls and interventions used in daily practice using NLP analysis. However, all the words identified in the analysis were preselected using prior reports, risk assessment tools, and subject matter expert’s knowledge. By contrast, we did not focus on any specific factor or emphasize any specific keywords, topics, concepts, or fields throughout our NLP analysis of unstructured text in nursing records and subsequent machine learning. Despite this, we found many words closely related to the previously mentioned risk factors in the list of morphemes that contributed to the prediction of fall risk (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). Thus, the Concept Encoder successfully extracted known risk factors for falls as words with a statistically significant correlation to actual incidents. It is possible that several other words (or related concepts) that contribute to the model might be unknown risk factors. These candidate novel risk factors may not only be useful for predicting falls but also for determining the causes of falls or selecting interventions for prevention. In future work, we will conduct further numerical analyses of these candidates to examine their similarities or relationships, such as cluster analysis or context analysis based on the document-word embedding matrix (<italic>DW</italic>). If it is proven that words related to known and novel risk factors are effective for predicting falls, this might encourage hospital nurses to write nursing records that emphasize these factors, thus improving the quality of nursing records and allowing falls to be predicted with higher precision.</p>
        <p>There was a statistically significant difference between nursing records recorded one to seven days before a fall and others. This suggests that a fall risk monitoring system designed to analyze nursing records daily and alert health care professionals when an increase of fall risk is detected could be an effective tool for the prevention of falls. Recently, the authors developed a new version of Concept Encoder with improved computational capacity and deployed for a currently ongoing study using a larger data set (all nursing records for three years; approximately 520,000 nursing records from 900 fallers and 28,000 nonfallers). Encouraged by the early results of the study, which has shown considerable improvement in the prediction for imminent falls (AUC of approximately 0.73), the authors have developed the first version of the fall risk monitoring system.</p>
        <p>Because nursing records contain continuous information covering a broad context regardless of the underlying disease or complications and results of various medical tests and vital signs, this algorithm can be applied to construct models for predicting other specific medical interests, such as a sudden change of the patient’s condition or recurrence of acute illness. It also has the potential to be used as the basis of a multipurpose diagnosis and caregiving support system.</p>
        <p>Recent developments in machine learning technology have enhanced the range of application, but it is still rarely used in the health care field. One reason is that neural network analysis, such as deep learning, cannot provide human-interpretable models or rules because of the numerous layers in the learning process. This “black box problem,” that is, poor traceability of the learning and analysis processes, is one reason that machine learning has not been widely applied in the health care field. The algorithm that we used (Concept Encoder) achieves very efficient transformation from documents to a document-word matrix, after which even simple logistic regression analysis can successfully predict falls. Moreover, the characteristics and probability distribution of the data are provided in an interpretable manner. Thus, even after a machine learning process is used, it can perform statistical analyses with high levels of stability, reproducibility, and verifiability that are required in the health care field. In this field, evidence-based decision making is valued, and vast amounts of medical data have been accumulated over many years for this purpose. It seems possible that Concept Encoder can be applied to mine these precious assets with verifiable analysis.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>The low quantity of data may be a limitation in this study. However, due to the oversampling technique that we used, in which minority data were resampled to balance the two-group data set, we believe that the results of the study were not substantially affected by the low rate of falls. However, meta-analysis and a multicenter study will be considered in future work, which will generate more data. Additionally, we defined imminent as one to seven days before the fall. When we considered shorter time periods, such as one to three or one to five days before the fall, this reduced the number of imminent nursing records, which resulted in poorer prediction. In future work, larger data sets will enable the analysis of shorter time periods. Finally, as this is the first study to analyze nursing records using NLP and machine learning, there is no prior work available for comparison.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>We verified that text analysis of a single input—nursing records—using an NLP algorithm and machine learning was effective for the prediction of falls among hospital inpatients and the detection of imminent precursors of fall incidents. The approach was also able to extract useful information related to various types of fall risk factors, whether they are known or unknown, from the unstructured description of the nursing records. This can serve as a basis for a fall risk monitoring system (eg, screen-based) that can output risk factors for each high-risk patient together with the risk probability. We have already developed a prototype monitoring system and plan to start testing in collaboration with several hospitals. We are also developing an English version of our system for testing in English-speaking countries. Studies have reported that intervention is more successful when various health care professionals are involved as a team rather than taking a nursing-centric approach [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Thus, the output of data and risk factors provided by the system could be helpful for information sharing among teams of health care professionals at safety huddles or during handover.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HFRM</term>
          <def>
            <p>Hendrich Fall Risk Model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">MCMC</term>
          <def>
            <p>Markov chain Monte Carlo</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PV-DBOW</term>
          <def>
            <p>paragraph vector-distributed bag of words</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">STRATIFY</term>
          <def>
            <p>St Thomas’s Risk Assessment Tool in Falling Elderly Inpatients</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>HU was affiliated with FRONTEO Inc. at the time of the study and is currently affiliated with Neopharma Japan Co Ltd, which has no involvement in this study. We thank Hideki Takeda, Kohei Matsumoto, and Hiroki Ego for general support during the study. We thank Maxine Garcia, PhD, from the Edanz Group for rewriting a draft of this manuscript.</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>CO, HN, and MN contributed to the conception and design of the study. HN and MN collected the data. HT designed and developed the system. HU performed the data analysis. HU and HT wrote the manuscript, and all other authors reviewed and provided feedback with each draft. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>HT has patent JP 2017-214388. HT and HU have patents JP2018-088828 and JP2018-088829 pending. HT is and HU was an employee of FRONTEO Inc, which developed and marketed a fall prediction system based on the results of this research. All other authors have no conflicts to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bouldin</surname>
              <given-names>ELD</given-names>
            </name>
            <name name-style="western">
              <surname>Andresen</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Dunton</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Simon</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Waters</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Daniels</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Mion</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Shorr</surname>
              <given-names>RI</given-names>
            </name>
          </person-group>
          <article-title>Falls among adult patients hospitalized in the United States: prevalence and trends</article-title>
          <source>J Patient Saf</source>
          <year>2013</year>
          <month>03</month>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>13</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23143749"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/PTS.0b013e3182699b64</pub-id>
          <pub-id pub-id-type="medline">23143749</pub-id>
          <pub-id pub-id-type="pmcid">PMC3572247</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <source>Japan Federation of Democratic Medical Institutions</source>
          <access-date>2020-04-08</access-date>
          <comment>Rate of fall incident: report of quality improvement of medical care project 2016<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.min-iren.gr.jp/hokoku/hokoku_h28.html">https://www.min-iren.gr.jp/hokoku/hokoku_h28.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Joint Commission</collab>
          </person-group>
          <source>Sentinel Event Alert</source>
          <year>2015</year>
          <access-date>2020-04-08</access-date>
          <comment>Preventing falls and fall-related injuries in health care facilities<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jointcommission.org/assets/1/6/SEA_55_Falls_4_26_16.pdf">https://www.jointcommission.org/assets/1/6/SEA_55_Falls_4_26_16.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morse</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Tylko</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dixon</surname>
              <given-names>HA</given-names>
            </name>
          </person-group>
          <article-title>Characteristics of the fall-prone patient</article-title>
          <source>Gerontologist</source>
          <year>1987</year>
          <month>08</month>
          <volume>27</volume>
          <issue>4</issue>
          <fpage>516</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1093/geront/27.4.516</pub-id>
          <pub-id pub-id-type="medline">3623149</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Britton</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Seed</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Hopper</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>Development and evaluation of evidence based risk assessment tool (STRATIFY) to predict which elderly inpatients will fall: case-control and cohort studies</article-title>
          <source>BMJ</source>
          <year>1997</year>
          <month>10</month>
          <day>25</day>
          <volume>315</volume>
          <issue>7115</issue>
          <fpage>1049</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/9366729"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.315.7115.1049</pub-id>
          <pub-id pub-id-type="medline">9366729</pub-id>
          <pub-id pub-id-type="pmcid">PMC2127684</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hendrich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhuis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kippenbrock</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Soja</surname>
              <given-names>ME</given-names>
            </name>
          </person-group>
          <article-title>Hospital falls: development of a predictive model for clinical practice</article-title>
          <source>Appl Nurs Res</source>
          <year>1995</year>
          <month>08</month>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>129</fpage>
          <lpage>39</lpage>
          <pub-id pub-id-type="doi">10.1016/s0897-1897(95)80592-3</pub-id>
          <pub-id pub-id-type="medline">7668855</pub-id>
          <pub-id pub-id-type="pii">S0897-1897(95)80592-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hendrich</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Nyhuis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Validation of the Hendrich II Fall Risk Model: a large concurrent case/control study of hospitalized patients</article-title>
          <source>Appl Nurs Res</source>
          <year>2003</year>
          <month>02</month>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>9</fpage>
          <lpage>21</lpage>
          <pub-id pub-id-type="doi">10.1053/apnr.2003.YAPNR2</pub-id>
          <pub-id pub-id-type="medline">12624858</pub-id>
          <pub-id pub-id-type="pii">S0897189702109025</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Daly</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>McMurdo</surname>
              <given-names>MET</given-names>
            </name>
          </person-group>
          <article-title>Risk factors and risk assessment tools for falls in hospital in-patients: a systematic review</article-title>
          <source>Age Ageing</source>
          <year>2004</year>
          <month>03</month>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>122</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1093/ageing/afh017</pub-id>
          <pub-id pub-id-type="medline">14960426</pub-id>
          <pub-id pub-id-type="pii">33/2/122</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Papaioannou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Giangregorio</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Thabane</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Reizgys</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A systematic review and meta-analysis of studies using the STRATIFY tool for prediction of falls in hospital patients: how well does it work?</article-title>
          <source>Age Ageing</source>
          <year>2008</year>
          <month>11</month>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>621</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18829693"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ageing/afn203</pub-id>
          <pub-id pub-id-type="medline">18829693</pub-id>
          <pub-id pub-id-type="pii">afn203</pub-id>
          <pub-id pub-id-type="pmcid">PMC5104555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aranda-Gallardo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Morales-Asencio</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Canca-Sanchez</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Barrero-Sojo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-Jimenez</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Morales-Fernandez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de</surname>
              <given-names>LME</given-names>
            </name>
            <name name-style="western">
              <surname>Moya-Suarez</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Mora-Banderas</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Instruments for assessing the risk of falls in acute hospitalized patients: a systematic review and meta-analysis</article-title>
          <source>BMC Health Serv Res</source>
          <year>2013</year>
          <month>04</month>
          <day>02</day>
          <volume>13</volume>
          <fpage>122</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/1472-6963-13-122"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6963-13-122</pub-id>
          <pub-id pub-id-type="medline">23547708</pub-id>
          <pub-id pub-id-type="pii">1472-6963-13-122</pub-id>
          <pub-id pub-id-type="pmcid">PMC3637640</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Matarese</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ivziku</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bartolozzi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Piredda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>De Marinis</surname>
              <given-names>MG</given-names>
            </name>
          </person-group>
          <article-title>Systematic review of fall risk screening tools for older patients in acute hospitals</article-title>
          <source>J Adv Nurs</source>
          <year>2015</year>
          <month>06</month>
          <volume>71</volume>
          <issue>6</issue>
          <fpage>1198</fpage>
          <lpage>209</lpage>
          <pub-id pub-id-type="doi">10.1111/jan.12542</pub-id>
          <pub-id pub-id-type="medline">25287867</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Connelly</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Victor</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Whitehead</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Genc</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Vanoli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>FC</given-names>
            </name>
            <name name-style="western">
              <surname>Gosney</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Strategies to prevent falls and fractures in hospitals and care homes and effect of cognitive impairment: systematic review and meta-analyses</article-title>
          <source>BMJ</source>
          <year>2007</year>
          <month>01</month>
          <day>13</day>
          <volume>334</volume>
          <issue>7584</issue>
          <fpage>82</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/17158580"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.39049.706493.55</pub-id>
          <pub-id pub-id-type="medline">17158580</pub-id>
          <pub-id pub-id-type="pii">bmj.39049.706493.55</pub-id>
          <pub-id pub-id-type="pmcid">PMC1767306</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Healey</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Haines</surname>
              <given-names>TP</given-names>
            </name>
          </person-group>
          <article-title>Preventing falls and fall-related injuries in hospitals</article-title>
          <source>Clin Geriatr Med</source>
          <year>2010</year>
          <month>11</month>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>645</fpage>
          <lpage>92</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cger.2010.06.005</pub-id>
          <pub-id pub-id-type="medline">20934615</pub-id>
          <pub-id pub-id-type="pii">S0749-0690(10)00053-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cameron</surname>
              <given-names>ID</given-names>
            </name>
            <name name-style="western">
              <surname>Gillespie</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Robertson</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>GR</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Cumming</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Kerse</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Interventions for preventing falls in older people in care facilities and hospitals</article-title>
          <source>Cochrane Database Syst Rev</source>
          <year>2012</year>
          <month>12</month>
          <day>12</day>
          <volume>12</volume>
          <fpage>CD005465</fpage>
          <pub-id pub-id-type="doi">10.1002/14651858.CD005465.pub3</pub-id>
          <pub-id pub-id-type="medline">23235623</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miake-Lye</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Hempel</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ganz</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Shekelle</surname>
              <given-names>PG</given-names>
            </name>
          </person-group>
          <article-title>Inpatient fall prevention programs as a patient safety strategy: a systematic review</article-title>
          <source>Ann Intern Med</source>
          <year>2013</year>
          <month>03</month>
          <day>05</day>
          <volume>158</volume>
          <issue>5 Pt 2</issue>
          <fpage>390</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-158-5-201303051-00005</pub-id>
          <pub-id pub-id-type="medline">23460095</pub-id>
          <pub-id pub-id-type="pii">1656443</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sahota</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Drummond</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kendrick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Grainge</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vass</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sach</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gladman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Avis</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>REFINE (REducing Falls in In-patieNt Elderly) using bed and bedside chair pressure sensors linked to radio-pagers in acute hospital care: a randomised controlled trial</article-title>
          <source>Age Ageing</source>
          <year>2014</year>
          <month>03</month>
          <volume>43</volume>
          <issue>2</issue>
          <fpage>247</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://ageing.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=24141253"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ageing/aft155</pub-id>
          <pub-id pub-id-type="medline">24141253</pub-id>
          <pub-id pub-id-type="pii">aft155</pub-id>
          <pub-id pub-id-type="pmcid">PMC3927772</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>O’Riordan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Prevention of falls in hospital</article-title>
          <source>Clin Med</source>
          <year>2017</year>
          <month>08</month>
          <day>01</day>
          <volume>17</volume>
          <issue>4</issue>
          <fpage>360</fpage>
          <lpage>362</lpage>
          <pub-id pub-id-type="doi">10.7861/clinmedicine.17-4-360</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Afzal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abram</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Identifying peripheral arterial disease cases using natural language processing of clinical notes</article-title>
          <source>IEEE EMBS Int Conf Biomed Health Inform 2016 Feb</source>
          <year>2016</year>
          <fpage>126</fpage>
          <lpage>131</lpage>
          <pub-id pub-id-type="doi">10.1109/bhi.2016.7455851</pub-id>
          <pub-id pub-id-type="medline">28111640</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wi</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rolfes</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Seabright</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Voge</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bachman</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Kita</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Croghan</surname>
              <given-names>IT</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Juhn</surname>
              <given-names>YJ</given-names>
            </name>
          </person-group>
          <article-title>Application of a Natural Language Processing Algorithm to Asthma Ascertainment. An Automated Chart Review</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2017</year>
          <month>08</month>
          <day>15</day>
          <volume>196</volume>
          <issue>4</issue>
          <fpage>430</fpage>
          <lpage>437</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28375665"/>
          </comment>
          <pub-id pub-id-type="doi">10.1164/rccm.201610-2006OC</pub-id>
          <pub-id pub-id-type="medline">28375665</pub-id>
          <pub-id pub-id-type="pmcid">PMC5564673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chase</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Mitrani</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>GG</given-names>
            </name>
            <name name-style="western">
              <surname>Fulgieri</surname>
              <given-names>DJ</given-names>
            </name>
          </person-group>
          <article-title>Early recognition of multiple sclerosis using natural language processing of the electronic health record</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2017</year>
          <month>02</month>
          <day>28</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>24</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-017-0418-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-017-0418-4</pub-id>
          <pub-id pub-id-type="medline">28241760</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-017-0418-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC5329909</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pickering</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Keeping good nursing records: a guide</article-title>
          <source>Community Eye Health</source>
          <year>2010</year>
          <month>12</month>
          <volume>23</volume>
          <issue>74</issue>
          <fpage>44</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21311663"/>
          </comment>
          <pub-id pub-id-type="medline">21311663</pub-id>
          <pub-id pub-id-type="pmcid">PMC3033612</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kudo</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Project Web Site</source>
          <access-date>2020-04-08</access-date>
          <comment>MeCab: Yet another part-of-speech and morphological analyzer<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://sourceforge.net/projects/mecab/">https://sourceforge.net/projects/mecab/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>arXiv.org</source>
          <year>2013</year>
          <access-date>2020-04-08</access-date>
          <comment>Efficient estimation of word representations in vector space<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781">https://arxiv.org/abs/1301.3781</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pennington</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Socher</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Manning</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Glove: Global Vectors for Word Representation</article-title>
          <year>2014</year>
          <month>10</month>
          <conf-name>2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conf-name>
          <conf-date>October 26–28, 2014</conf-date>
          <conf-loc>Doha, Qatar</conf-loc>
          <publisher-loc>Glove</publisher-loc>
          <publisher-name>global vectors for word representation. Empir Meth Nat Lang Process (EMNLP) 2014</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aclweb.org/anthology/D14-1162/"/>
          </comment>
          <pub-id pub-id-type="doi">10.3115/v1/d14-1162</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Olah</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Le</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <source>arXiv.org</source>
          <year>2015</year>
          <access-date>2020-04-08</access-date>
          <comment>Document embedding with paragraph vectors<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1507.07998">https://arxiv.org/abs/1507.07998</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: Synthetic Minority Over-sampling Technique</article-title>
          <source>jair</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>357</lpage>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>imblearn</collab>
          </person-group>
          <source>SMOTE</source>
          <access-date>2020-04-08</access-date>
          <comment>over_sampling<ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.over_sampling.SMOTE.html">https://imbalanced-learn.readthedocs.io/en/stable/generated/imblearn.over_sampling.SMOTE.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ballinger</surname>
              <given-names>BR</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsay</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>Accidents and drug treatment in a psychiatric hospital</article-title>
          <source>Br J Psychiatry</source>
          <year>1975</year>
          <month>05</month>
          <volume>126</volume>
          <fpage>462</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1192/bjp.126.5.462</pub-id>
          <pub-id pub-id-type="medline">1125522</pub-id>
          <pub-id pub-id-type="pii">S0007125000041532</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lichtenstein</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Cornell</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Malcolm</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Risk factors for hip fractures occurring in the hospital</article-title>
          <source>Am J Epidemiol</source>
          <year>1994</year>
          <month>11</month>
          <day>01</day>
          <volume>140</volume>
          <issue>9</issue>
          <fpage>830</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1093/oxfordjournals.aje.a117331</pub-id>
          <pub-id pub-id-type="medline">7977293</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Passaro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Volpato</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Romagnoni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Manzoli</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zuliani</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fellin</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Benzodiazepines with different half-life and falling in a hospitalized population: The GIFA study. Gruppo Italiano di Farmacovigilanza nell'Anziano</article-title>
          <source>J Clin Epidemiol</source>
          <year>2000</year>
          <month>12</month>
          <volume>53</volume>
          <issue>12</issue>
          <fpage>1222</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/s0895-4356(00)00254-7</pub-id>
          <pub-id pub-id-type="medline">11146268</pub-id>
          <pub-id pub-id-type="pii">S0895435600002547</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gales</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Menard</surname>
              <given-names>SM</given-names>
            </name>
          </person-group>
          <article-title>Relationship between the administration of selected medications and falls in hospitalized elderly patients</article-title>
          <source>Ann Pharmacother</source>
          <year>1995</year>
          <month>04</month>
          <volume>29</volume>
          <issue>4</issue>
          <fpage>354</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1177/106002809502900402</pub-id>
          <pub-id pub-id-type="medline">7633010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>CK</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Risk factors for falls in hospitalized older medical patients</article-title>
          <source>J Gerontol A Biol Sci Med Sci</source>
          <year>1999</year>
          <month>01</month>
          <volume>54</volume>
          <issue>1</issue>
          <fpage>M38</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.1093/gerona/54.1.m38</pub-id>
          <pub-id pub-id-type="medline">10026661</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmid</surname>
              <given-names>NA</given-names>
            </name>
          </person-group>
          <article-title>1989 Federal Nursing Service Award Winner. Reducing patient falls: a research-based comprehensive fall prevention program</article-title>
          <source>Mil Med</source>
          <year>1990</year>
          <month>05</month>
          <volume>155</volume>
          <issue>5</issue>
          <fpage>202</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="medline">2114579</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salgado</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lord</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Packer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ehrlich</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Factors associated with falling in elderly hospital patients</article-title>
          <source>Gerontology</source>
          <year>1994</year>
          <volume>40</volume>
          <issue>6</issue>
          <fpage>325</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1159/000213607</pub-id>
          <pub-id pub-id-type="medline">7867963</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gluck</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wientjes</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>GS</given-names>
            </name>
          </person-group>
          <article-title>An evaluation of risk factors for in-patient falls in acute and rehabilitation elderly care wards</article-title>
          <source>Gerontology</source>
          <year>1996</year>
          <volume>42</volume>
          <issue>2</issue>
          <fpage>104</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1159/000213779</pub-id>
          <pub-id pub-id-type="medline">9138972</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bjarnadottir</surname>
              <given-names>RI</given-names>
            </name>
            <name name-style="western">
              <surname>Lucero</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>What Can We Learn about Fall Risk Factors from EHR Nursing Notes? A Text Mining Study</article-title>
          <source>eGEMs</source>
          <year>2018</year>
          <month>09</month>
          <day>20</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>21</fpage>
          <pub-id pub-id-type="doi">10.5334/egems.237</pub-id>
          <pub-id pub-id-type="medline">30263902</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Venema</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Nailon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Skinner</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>High</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kennel</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Shifting the paradigm: an assessment of the quality of fall risk reduction in Nebraska hospitals</article-title>
          <source>J Rural Health</source>
          <year>2015</year>
          <volume>31</volume>
          <issue>2</issue>
          <fpage>135</fpage>
          <lpage>45</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1111/jrh.12088"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jrh.12088</pub-id>
          <pub-id pub-id-type="medline">25182938</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cracknell</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lovatt</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Winfield</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Arkhipkina</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>McDonagh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Green</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rooney</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Huddle up for safer healthcare: how frontline teams can work together to improve patient safety</article-title>
          <source>Future Hosp J</source>
          <year>2016</year>
          <month>06</month>
          <day>01</day>
          <volume>3</volume>
          <issue>Suppl 2</issue>
          <fpage>s31</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31098260"/>
          </comment>
          <pub-id pub-id-type="doi">10.7861/futurehosp.3-2s-s31</pub-id>
          <pub-id pub-id-type="medline">31098260</pub-id>
          <pub-id pub-id-type="pii">futurehosp</pub-id>
          <pub-id pub-id-type="pmcid">PMC6465903</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
