<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id><journal-id journal-id-type="publisher-id">medinform</journal-id><journal-id journal-id-type="index">7</journal-id><journal-title>JMIR Medical Informatics</journal-title><abbrev-journal-title>JMIR Med Inform</abbrev-journal-title><issn pub-type="epub">2291-9694</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v13i1e71687</article-id><article-id pub-id-type="doi">10.2196/71687</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Detecting Redundant Health Survey Questions by Using Language-Agnostic Bidirectional Encoder Representations From Transformers Sentence Embedding: Algorithm Development Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Kang</surname><given-names>Sunghoon</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Park</surname><given-names>Hyewon</given-names></name><degrees>BSN, RN</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Taira</surname><given-names>Ricky</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kim</surname><given-names>Hyeoneui</given-names></name><degrees>RN, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>College of Nursing, Seoul National University</institution><addr-line>103 Daehak-ro, Jongno-gu</addr-line><addr-line>Seoul</addr-line><country>Republic of Korea</country></aff><aff id="aff2"><institution>The Department of Radiological Sciences, David Geffen School of Medicine, University of California, Los Angeles</institution><addr-line>Los Angeles</addr-line><addr-line>CA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Benis</surname><given-names>Arriel</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Rampadarath</surname><given-names>Anand</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Radvilaite</surname><given-names>Urte</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Hyeoneui Kim, RN, PhD, College of Nursing, Seoul National University, 103 Daehak-ro, Jongno-gu, Seoul, 03080, Republic of Korea, 82 027408483; <email>ifilgood@snu.ac.kr</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>10</day><month>6</month><year>2025</year></pub-date><volume>13</volume><elocation-id>e71687</elocation-id><history><date date-type="received"><day>24</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>16</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>20</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sunghoon Kang, Hyewon Park, Ricky Taira, Hyeoneui Kim. Originally published in JMIR Medical Informatics (<ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org">https://medinform.jmir.org</ext-link>), 10.6.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://medinform.jmir.org/">https://medinform.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://medinform.jmir.org/2025/1/e71687"/><abstract><sec><title>Background</title><p>As the importance of person-generated health data (PGHD) in health care and research has increased, efforts have been made to standardize survey-based PGHD to improve its usability and interoperability. Standardization efforts such as the Patient-Reported Outcomes Measurement Information System (PROMIS) and the National Institutes of Health (NIH) Common Data Elements (CDE) repository provide effective tools for managing and unifying health survey questions. However, previous methods using ontology-mediated annotation are not only labor-intensive and difficult to scale but also challenging for identifying semantic redundancies in survey questions, especially across multiple languages.</p></sec><sec><title>Objective</title><p>The goal of this work was to compute the semantic similarity among publicly available health survey questions to facilitate the standardization of survey-based PGHD.</p></sec><sec sec-type="methods"><title>Methods</title><p>We compiled various health survey questions authored in both English and Korean from the NIH CDE repository, PROMIS, Korean public health agencies, and academic publications. Questions were drawn from various health lifelog domains. A randomized question pairing scheme was used to generate a semantic text similarity dataset consisting of 1758 question pairs. The similarity scores between each question pair were assigned by 2 human experts. The tagged dataset was then used to build 4 classifiers featuring bag-of-words, sentence-bidirectional encoder representations from transformers (SBERT) with bidirectional encoder representations from transformers (BERT)&#x2013;based embeddings, SBERT with language-agnostic BERT sentence embedding (LaBSE), and GPT-4o. The algorithms were evaluated using traditional contingency statistics.</p></sec><sec sec-type="results"><title>Results</title><p>Among the 3 algorithms, SBERT-LaBSE demonstrated the highest performance in assessing the question similarity across both languages, achieving area under the receiver operating characteristic and precision-recall curves of &#x003E;0.99. Additionally, SBERT-LaBSE proved effective in identifying cross-lingual semantic similarities. The SBERT-LaBSE algorithm excelled at aligning semantically equivalent sentences across both languages but encountered challenges in capturing subtle nuances and maintaining computational efficiency. Future research should focus on testing with larger multilingual datasets and on calibrating and normalizing scores across the health lifelog domains to improve consistency.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study introduces the SBERT-LaBSE algorithm for calculating the semantic similarity across 2 languages, showing that it outperforms BERT-based models, the GPT-4o model, and the bag-of-words approach, highlighting its potential in improving the semantic interoperability of survey-based PGHD across language barriers.</p></sec></abstract><kwd-group><kwd>person-generated health data</kwd><kwd>PGHD</kwd><kwd>bidirectional encoder representations from transformers</kwd><kwd>BERT</kwd><kwd>semantic similarity</kwd><kwd>language-agnostic BERT sentence embedding</kwd><kwd>LaBSE</kwd><kwd>sentence-bidirectional encoder representations from transformers</kwd><kwd>SBERT</kwd><kwd>interoperability</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Person-generated health data (PGHD) is becoming increasingly important in managing individual health. PGHD encompass health-related information that individuals create and collect outside traditional clinical environments, helping them monitor and manage their well-being [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Examples of PGHD include biometric data from wearable devices and self-reported information such as patient-reported outcomes. Since PGHD has the potential for continuously capturing health insights beyond health care settings, there is growing interest in leveraging PGHD to support clinical care [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. In parallel, PGHD is increasingly explored as a resource for patient-centered outcomes research [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. However, there are several challenges in the effective use of PGHD, including developing robust data management systems, ensuring data security, deploying it seamlessly into clinical workflows, and maintaining high data quality [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>].</p><p>Standardizing survey-based PGHD is a critical step in enabling its broader use [<xref ref-type="bibr" rid="ref9">9</xref>]. An important aspect of standardization is to identify redundancies in the form of semantic equivalencies. These redundancies may arise because the clarity, tone, tense, directness, and formality of the language can be phrased differently for the same purposeful inquiry depending upon the author. For example, emotional symptoms may be captured by questions such as &#x201C;Do you feel like withdrawing from family or friends?&#x201D; or &#x201C;I don&#x2019;t really want to talk to people around me.&#x201D; This variation makes identifying semantically equivalent questions&#x2014;and thus standardizing survey-based PGHD&#x2014;a complex task. Efforts such as the Patient-Reported Outcomes Measurement Information System (PROMIS) and the National Institutes of Health (NIH) Common Data Elements (CDE) repository aim to provide standardized health survey questions. PROMIS, a consensus-based item bank designed for managing patient-reported outcomes, offers standardized measures that are applicable across various diseases and clinical settings [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. These measures have helped health care providers across various clinical settings, including pain management [<xref ref-type="bibr" rid="ref13">13</xref>], orthopedics [<xref ref-type="bibr" rid="ref14">14</xref>], and primary care [<xref ref-type="bibr" rid="ref15">15</xref>]; in cancer care [<xref ref-type="bibr" rid="ref16">16</xref>]; in managing patient symptoms; in tailoring treatments; and in improving communication between patients and clinicians. The NIH CDE repository, through metadata tagging, also plays a key role in standardizing data elements, including health surveys [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Both PROMIS and the CDE repository are essential for enhancing the interoperability of health data.</p><p>In practice, the deployment of PGHD acquisition applications requires that survey questions be drawn from these established standardized resources. Data collected using questions outside of these resources still require additional efforts to achieve standardization. Although previous studies have explored ontology-mediated methods to identify semantically equivalent health questions [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>], annotating each question with ontology concepts is labor-intensive and lacks scalability as such knowledge sources expand. As a complementary approach, deep learning and transformer-based methods have been applied to semantic textual similarity (STS) tasks in clinical texts, including radiology and pathology reports [<xref ref-type="bibr" rid="ref19">19</xref>], clinical notes [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>], and medical question-answer pairs [<xref ref-type="bibr" rid="ref23">23</xref>]. A range of models has been explored, such as convolutional neural networks [<xref ref-type="bibr" rid="ref19">19</xref>]; transformer-based architectures such as bidirectional encoder representations from transformers (BERT), robustly optimized BERT approach, and XLNet [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]; and the Siamese network [<xref ref-type="bibr" rid="ref23">23</xref>]. Despite their promising performance, most of these models have been limited to single-language settings&#x2014;predominantly English [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>] or Chinese [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Consequently, cross-lingual STS remains underexplored, highlighting the need for standardization efforts that promote semantic interoperability across languages.</p><p>To address these challenges, we developed Standardized PGHD Utilization Resources and Tools (SPURT), which supports the standardization and reuse of survey-based PGHD by identifying semantically equivalent questions and facilitating the storage, retrieval, and sharing of these data. Unlike PROMIS and the NIH CDE repository, SPURT annotates and stores health survey questions in both English and Korean while also detecting semantically redundant questions. This ensures the use of consistent question formats whenever possible. Technically, assessing semantic similarity between texts is well-established and widely applied for managing text resources [<xref ref-type="bibr" rid="ref24">24</xref>]. However, SPURT faces 2 unique challenges in its assigned task. First, it must effectively assess semantic similarities within or between 2 different languages&#x2014;English and Korean. Although multilingual embeddings can be used to address this challenge [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], they often perform less effectively for low-resource languages such as Korean compared to high-resource languages such as English [<xref ref-type="bibr" rid="ref27">27</xref>]. One common solution is to translate low-resource languages into high-resource ones before embedding, but this approach risks losing or distorting the original meaning [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Second, it must ensure computational efficiency for real-time semantic comparisons between questions. Calculating semantic similarity by using large language models such as BERT is computationally expensive, with a time complexity of O (N!). For example, computing the similarity of approximately 10,000 sentence pairs can take around 65 hours using a V100 graphics processing unit [<xref ref-type="bibr" rid="ref30">30</xref>]. Given that SPURT is designed to be a real-time, reactive data processing tool, achieving reasonable response times is crucial for its functionality.</p><p>This study presents the development of a novel algorithm for detecting redundant questions, addressing the challenges outlined above. The algorithm utilizes sentence-BERT (SBERT), a variant of BERT designed for efficient sentence-level semantic similarity calculations [<xref ref-type="bibr" rid="ref30">30</xref>] along with language-agnostic BERT sentence embedding (LaBSE) [<xref ref-type="bibr" rid="ref31">31</xref>] to enhance multilingual capability. Sentence-BERT is a model specifically designed for calculating STS between sentences, and LaBSE is an embedding that supports efficient cross-lingual STS by mapping multilingual sentences into a shared embedding space. The SBERT-LaBSE algorithm integrates the strengths of both models and facilitates the identification of semantically equivalent questions across languages.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Corpus Description: The STS Dataset</title><p>An STS dataset contains text pairs along with predefined similarity scores that quantify their semantic closeness [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. This study shows an STS dataset that fine-tunes pretrained language models and evaluates our algorithms&#x2019; performance in determining the semantic similarity between health-related questions.</p><p>We collected English and Korean questions from self-reported questionnaires covering 5 health lifelog domains, that is, diet, physical activity, living environment, stress management, and sleep. English questions (n=1222) were sourced from the NIH CDE repository, PROMIS, and academic publications, while Korean questions (n=963) were gathered from web-based resources provided by public health agencies and hospitals in Korea [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>To build the STS dataset, we began by randomly selecting 5 seed questions from each of the 5 health lifelog domains in Korean, resulting in 25 seed questions. For each question, correspondingly similar questions for Korean were identified, resulting in 25 similar seed questions for each language. This correspondence of seed questions was performed to minimize the effects of semantic complexity on algorithm performance. We then randomly selected 30 comparison questions for each seed question, which yielded a total of 1500 question pairs (750 in each language).</p><p>The gold standard for semantic similarity between the question pairs was determined by 2 researchers with nursing backgrounds who independently scored the similarity of each question pair, following a standardized scoring protocol (<xref ref-type="table" rid="table1">Table 1</xref>). The agreement between the researchers, as measured by Cohen &#x03BA;, varied by the health lifelog domains: 0.91 for diet, 0.72 for living environment, 0.83 for physical activity, 0.86 for sleep, and 1.0 for stress management, with an average Cohen &#x03BA; of 0.86 across all the health lifelog domains.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Scoring protocol for semantic similarity. The seed question was &#x201C;In the past month, have you ever had chest pain when you were not performing any physical activity?&#x201D;</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Score</td><td align="left" valign="bottom">Scoring protocol</td><td align="left" valign="bottom">Examples</td></tr></thead><tbody><tr><td align="left" valign="top">4</td><td align="left" valign="top">Minor differences in word choice from the seed question but takes the same form of response</td><td align="left" valign="top">In the past month, have you had chest pain when you were not doing physical activity?</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Share the same key topic, although some details may be added, altered, or omitted from the seed question</td><td align="left" valign="top">Do you feel pain in your chest when you do physical activity?</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">The key topics are similar but more specific or general than that of the seed question</td><td align="left" valign="top">Has your doctor ever said that you have a heart condition and that you should only perform physical activity recommended by a doctor?</td></tr><tr><td align="left" valign="top">1</td><td align="left" valign="top">Does not share the core topic from the seed question or belongs to a completely different health lifelog domain</td><td align="left" valign="top">Have you done general conditioning exercises in the past 4 weeks?</td></tr></tbody></table></table-wrap><p>Upon completion of this annotation process, we observed that the initial distribution of the similarity scores was imbalanced&#x2014;skewed heavily toward lower similarity scores. Only 2.3% (7/300) of the pairs received a score of 4, and 4.9% (15/300) received a score of 3. To address this imbalance, we supplemented the dataset with an additional 117 English and 142 Korean question pairs from other sources, chosen to increase the frequency of semantically similar (ie, higher scores) samples in the evaluation STS dataset. These additions brought the final evaluation set to 820 question pairs (410 in each language) with the following distribution: 12.2% (99/810), 30.5% (247/810), 26.8% (217/810), and 30.5% (247/810) for scores 4, 3, 2, and 1, respectively.</p><p>Using a similar procedure as described above, we compiled a second English STS dataset for fine-tuning our pretrained language models. This fine-tuning dataset included 938 annotated English question pairs. The fine-tuning set had a distribution of 6.2% (58/938) scoring 4, 14% (131/938) scoring 3, 23.5% (220/938) scoring 2, and 56.4% (529/938) scoring 1.</p><p>In total, the STS dataset consisted of 1758 question pairs, broken down into 820 for evaluation testing (410 English and 410 Korean) and 938 in English for classifier model refinement (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The process of constructing the STS dataset is illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The process of preparing the semantic textual similarity dataset for fine-tuning and evaluation. CDE: common data elements; NIH: National Institutes of Health; PGHD: person-generated health data; PROMIS: Patient-Reported Outcomes Measurement Information System; Rand: random selection; STS: semantic textual similarity.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e71687_fig01.png"/></fig></sec><sec id="s2-2"><title>Semantic Similarity Calculation Algorithms</title><sec id="s2-2-1"><title>Overview</title><p>We developed 4 classifiers to compare their performance capability for distinguishing the binary task of semantic similarity between STS question pairs. These were (1) the bag-of-words (BoW) model, (2) SBERT with BERT-based embeddings (SBERT-BERT), (3) LaBSE, and (4) the GPT-4o model (GPT-4o). Among these, the SBERT-BERT algorithm was included to serve as a translation-dependent baseline, enabling comparison with multilingual models such as SBERT-BERT and GPT-4o. Model fine-tuning and algorithm development were performed using Python (version 3.11).</p></sec><sec id="s2-2-2"><title>BoW Classifier</title><p>The BoW algorithm, a traditional language model that represents sentences by their word frequency, serves as the baseline [<xref ref-type="bibr" rid="ref41">41</xref>]. The BoW model&#x2019;s vocabulary was derived from the STS dataset, comprising 1349 unique word forms after stop-word removal and lemmatization. Each sentence was represented as a 1349D vector based on the vocabulary. Cosine similarity was used to calculate the semantic distance of the question pairs. For Korean questions, translation to English was performed using the Google Translator application programming interface prior to similarity calculation [<xref ref-type="bibr" rid="ref42">42</xref>].</p></sec><sec id="s2-2-3"><title>The SBERT-BERT Algorithm</title><p>The SBERT-BERT large language model was derived from the pretrained BERT-based model, which has 12 layers, a 768D hidden layer, 12 attention heads, and 110 million parameters [<xref ref-type="bibr" rid="ref30">30</xref>]. SBERT-BERT supports only English. We fine-tuned the pretrained SBERT-BERT model to optimize its performance for identifying semantic equivalency among health questions by using the 938 English question pairs described above. The fine-tuning was performed with a batch size of 32, 8 epochs, and a learning rate of 2e-5, which were deemed optimal after testing various configurations. The AdamW optimizer was used for model optimization [<xref ref-type="bibr" rid="ref43">43</xref>]. The fine-tuned SBERT-BERT algorithm was then evaluated using the test STS dataset of 410 English question pairs and 410 Korean question pairs. As previously stated, the Korean questions were translated into English using the Google Translator application programming interface to execute the evaluation.</p></sec><sec id="s2-2-4"><title>The SBERT-LaBSE Algorithm</title><p>The SBERT-LaBSE algorithm differs from SBERT-BERT in that it supports multiple languages within a single embedding space [<xref ref-type="bibr" rid="ref31">31</xref>]. The pretrained SBERT-LaBSE model was derived from the LaBSE model, which also consists of 12 layers, a 768D hidden layer, 12 attention heads, and 110 million parameters [<xref ref-type="bibr" rid="ref31">31</xref>]. Fine-tuning was performed in the same manner as for SBERT-BERT. Unlike the other models, SBERT-LaBSE can assess the semantic similarity of English and Korean questions without requiring translation.</p></sec><sec id="s2-2-5"><title>The GPT-4o Algorithm</title><p>The GPT-4o model, a state-of-the-art large language model, is designed to understand and generate text in multiple languages, including English and Korean [<xref ref-type="bibr" rid="ref44">44</xref>]. Unlike the SBERT-BERT and SBERT-LaBSE, which rely on fixed embeddings for similarity calculation, the GPT-4o operates as a generative model that dynamically evaluates semantic similarity based on contextual understanding. However, in this study, we utilized GPT-4o in a deterministic manner to predict the score of sentence pairs. Each sentence pair was presented with a specific instruction asking to evaluate the score on a scale from 1 to 4 (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Fine-tuning of the GPT-4o model was conducted using the fine-tuning application programming interface from the OpenAI platform [<xref ref-type="bibr" rid="ref45">45</xref>].</p></sec></sec><sec id="s2-3"><title>Performance Evaluation</title><p>The performance of the similarity calculation algorithms was evaluated as a binary classification problem to simplify interpretation. The 4-point ordinal similarity scores from the STS dataset were converted into binary labels, where scores of 3 and 4 were categorized as similar and scores of 1 and 2 as dissimilar.</p><p>Optimal thresholds for predicting similarity were determined for the continuous similarity scores, which ranged from &#x2013;1 to 1. Precision, recall, and <italic>F</italic><sub>1</sub>-scores were calculated to assess algorithm performance, and the area under the curve for both the receiver operating characteristic and precision-recall curves were examined. The processes used by the 3 algorithms to calculate similarity are illustrated in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Similarity calculation with the 4 algorithms. API: application programming interface; BERT: bidirectional encoder representations from transformers; BoW: bag-of-words; Eng: English; Kor: Korean; LaBSE: language-agnostic bidirectional encoder representations from transformers sentence embedding; SBERT: sentence-bidirectional encoder representations from transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e71687_fig02.png"/></fig></sec><sec id="s2-4"><title>Ethical Considerations</title><p>This study does not involve human participants, intervention, or identifiable private information. The analysis was based on publicly available and nonidentifiable health survey questions from open repositories and published sources. As such, it does not fall under the scope of human subject research as defined by the Seoul National University institutional review board. According to Article 2 and Article 8, Paragraph 2 of the Seoul National University institutional review board regulations (regulation 27, effective September 11, 2023), studies that do not involve human participants or human-derived materials are exempt from institutional review board review. Therefore, this study was not submitted for ethical review. No informed consent, compensation, or privacy protection measures were applicable, as no human participants were involved, and no personal data were collected or analyzed.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The performance of the 3 models for classifying similar versus dissimilar question pairs when aggregating the 5 health lifelog domains is summarized in <xref ref-type="table" rid="table2">Table 2</xref> and <xref ref-type="fig" rid="figure3">Figure 3</xref>. In the zero-shot trials (ie, without the model refining stage), there were minimal differences in performance among the 3 algorithms for both English and Korean questions. All algorithms exhibited higher recall than precision in both languages. After fine-tuning, the SBERT-BERT algorithm showed substantial improvement, particularly for English questions, in which the <italic>F</italic><sub>1</sub>-score increased from 0.65 to 0.96. For Korean questions, the improvement was moderate, with the <italic>F</italic><sub>1</sub>-score progressing from 0.68 to 0.73. In contrast, SBERT-LaBSE demonstrated significant improvements for both languages post fine-tuning. For English questions, the <italic>F</italic><sub>1</sub>-scores increased from 0.66 to 0.98, while for Korean, the <italic>F</italic><sub>1</sub>-scores increased from 0.68 to 0.98. Fine tuning for both SBERT-BERT and SBERT-LaBSE models resulted in noticeable balanced performance between recall and precision. Similarly, GPT-4o exhibited improved performance following fine-tuning, with its <italic>F</italic><sub>1</sub>-scores increasing from 0.69 to 0.79 for the English questions and from 0.67 to 0.79 for the Korean questions. However, the degree of improvement was smaller than that observed in SBERT models.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance metrics for the 3 algorithms, combining the health lifelog domains.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Performance<break/>metrics</td><td align="left" valign="top">BoW<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">GPT-4o pretrained</td><td align="left" valign="top">GPT-4o fine-tuned</td><td align="left" valign="top" colspan="2">SBERT<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> with pretrained</td><td align="left" valign="top" colspan="2">SBERT with fine-tuned</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="top">BERT-base</td><td align="left" valign="top">LaBSE<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">BERT-base</td><td align="left" valign="top">LaBSE</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="9">English question pairs (n=410)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.6112</td><td align="left" valign="top">0.6683</td><td align="left" valign="top">0.8463</td><td align="left" valign="top">0.6308</td><td align="left" valign="top">0.5917</td><td align="left" valign="top">0.9702</td><td align="left" valign="top">0.9853</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.5279</td><td align="left" valign="top">0.5753</td><td align="left" valign="top">0.9590</td><td align="left" valign="top">0.5451</td><td align="left" valign="top">0.5111</td><td align="left" valign="top">0.9668</td><td align="left" valign="top">0.9818</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.8161</td><td align="left" valign="top">0.8514</td><td align="left" valign="top">0.6686</td><td align="left" valign="top">0.7989</td><td align="left" valign="top">0.9253</td><td align="left" valign="top">0.9632</td><td align="left" valign="top">0.9839</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.6411</td><td align="left" valign="top">0.6866</td><td align="left" valign="top">0.7879</td><td align="left" valign="top">0.6480</td><td align="left" valign="top">0.6585</td><td align="left" valign="top">0.9649</td><td align="left" valign="top">0.9828</td></tr><tr><td align="left" valign="top" colspan="9">Korean question pairs (n=410)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.6610</td><td align="left" valign="top">0.6512</td><td align="left" valign="top">0.8488</td><td align="left" valign="top">0.6659</td><td align="left" valign="top">0.6878</td><td align="left" valign="top">0.7576</td><td align="left" valign="top">0.9839</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.5732</td><td align="left" valign="top">0.5620</td><td align="left" valign="top">0.9520</td><td align="left" valign="top">0.5760</td><td align="left" valign="top">0.6054</td><td align="left" valign="top">0.6929</td><td align="left" valign="top">0.9818</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.8057</td><td align="left" valign="top">0.8286</td><td align="left" valign="top">0.6800</td><td align="left" valign="top">0.8229</td><td align="left" valign="top">0.7714</td><td align="left" valign="top">0.7817</td><td align="left" valign="top">0.9806</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.6698</td><td align="left" valign="top">0.6697</td><td align="left" valign="top">0.7933</td><td align="left" valign="top">0.6776</td><td align="left" valign="top">0.6784</td><td align="left" valign="top">0.7332</td><td align="left" valign="top">0.9812</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>BoW: bag-of-words.</p></fn><fn id="table2fn2"><p><sup>b</sup>SBERT: sentence-bidirectional encoder representations from transformers.</p></fn><fn id="table2fn3"><p><sup>c</sup>LaBSE: language-agnostic bidirectional encoder representations from transformers sentence embedding.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Receiver operating characteristic and precision-recall curves for pretrained and fine-tuned embeddings on English and Korean questions, combining the health lifelog domains. AUC: area under the curve; BERT: bidirectional encoder representations from transformers; BoW: bag-of-words; Eng: English; Kor: Korean; LaBSE: language-agnostic bidirectional encoder representations from transformers sentence embedding; PR: precision-recall; ROC: receiver operating characteristic; SBERT: sentence-bidirectional encoder representations from transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e71687_fig03.png"/></fig><p><xref ref-type="table" rid="table3">Table 3</xref> presents the performance of the 2 SBERT algorithms across the 5 health lifelog domains. For all the health lifelog domains, the fine-tuned SBERT-BERT and SBERT-LaBSE algorithms demonstrated high performance on English questions, with receiver operating characteristic and precision-recall area under the curve values exceeding 0.95 and approaching 0.99. However, the SBERT-BERT algorithm struggled with the English-translated Korean questions, particularly in the physical activity domain. In contrast, the SBERT-LaBSE algorithm consistently delivered strong performance across all the health lifelog domains even for Korean questions.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Performance metrics of the sentence-bidirectional encoder representations from transformers&#x2013;based algorithms with fine-tuned bidirectional encoder representations from transformers and language-agnostic bidirectional encoder representations from transformers sentence embedding models by the health lifelog domains.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Performance<break/>metrics</td><td align="left" valign="top" colspan="6">English question pairs (n=410)</td><td align="left" valign="top" colspan="6">Korean question pairs (n=410)</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom">DL<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup><break/>(n=80)</td><td align="left" valign="bottom">HLE<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup><break/>(n=80)</td><td align="left" valign="bottom">PA<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup><break/>(n=80)</td><td align="left" valign="bottom">Sleep<break/>(n=85)</td><td align="left" valign="bottom">Stress<break/>(n=85)</td><td align="left" valign="bottom">All</td><td align="left" valign="bottom">DL<break/>(n=80)</td><td align="left" valign="bottom">HLE<break/>(n=80)</td><td align="left" valign="bottom">PA<break/>(n=80)</td><td align="left" valign="bottom">Sleep<break/>(n=85)</td><td align="left" valign="bottom">Stress<break/>(n=85)</td><td align="left" valign="bottom">All</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="14">BoW<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.7215</td><td align="left" valign="top">0.7250</td><td align="left" valign="top">0.6625</td><td align="left" valign="top">0.4118</td><td align="left" valign="top">0.7176</td><td align="left" valign="top">0.6112</td><td align="left" valign="top">0.7250</td><td align="left" valign="top">0.8125</td><td align="left" valign="top">0.7250</td><td align="left" valign="top">0.5765</td><td align="left" valign="top">0.5882</td><td align="left" valign="top">0.6610</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.7000</td><td align="left" valign="top">0.6383</td><td align="left" valign="top">0.5952</td><td align="left" valign="top">0.4118</td><td align="left" valign="top">0.6279</td><td align="left" valign="top">0.5279</td><td align="left" valign="top">0.6585</td><td align="left" valign="top">0.7941</td><td align="left" valign="top">0.6275</td><td align="left" valign="top">0.0000</td><td align="left" valign="top">0.5000</td><td align="left" valign="top">0.5732</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.6176</td><td align="left" valign="top">0.8571</td><td align="left" valign="top">0.7143</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.7714</td><td align="left" valign="top">0.8161</td><td align="left" valign="top">0.7714</td><td align="left" valign="top">0.7714</td><td align="left" valign="top">0.9143</td><td align="left" valign="top">0.0000</td><td align="left" valign="top">0.8286</td><td align="left" valign="top">0.8057</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.6563</td><td align="left" valign="top">0.7317</td><td align="left" valign="top">0.6494</td><td align="left" valign="top">0.5833</td><td align="left" valign="top">0.6923</td><td align="left" valign="top">0.6411</td><td align="left" valign="top">0.7105</td><td align="left" valign="top">0.7826</td><td align="left" valign="top">0.7442</td><td align="left" valign="top">0.0000</td><td align="left" valign="top">0.6237</td><td align="left" valign="top">0.6698</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ROC<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup> AUC<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top">0.7297</td><td align="left" valign="top">0.7457</td><td align="left" valign="top">0.6810</td><td align="left" valign="top">0.5820</td><td align="left" valign="top">0.7611</td><td align="left" valign="top">0.6976</td><td align="left" valign="top">0.7667</td><td align="left" valign="top">0.7937</td><td align="left" valign="top">0.7933</td><td align="left" valign="top">0.5937</td><td align="left" valign="top">0.6609</td><td align="left" valign="top">0.7174</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PR<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> AUC</td><td align="left" valign="top">0.7250</td><td align="left" valign="top">0.6718</td><td align="left" valign="top">0.6301</td><td align="left" valign="top">0.5025</td><td align="left" valign="top">0.6834</td><td align="left" valign="top">0.6036</td><td align="left" valign="top">0.7394</td><td align="left" valign="top">0.7373</td><td align="left" valign="top">0.7519</td><td align="left" valign="top">0.4498</td><td align="left" valign="top">0.5985</td><td align="left" valign="top">0.6267</td></tr><tr><td align="left" valign="top" colspan="14">GPT-4o fine-tuned</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.9367</td><td align="left" valign="top">0.9750</td><td align="left" valign="top">0.9625</td><td align="left" valign="top">0.9765</td><td align="left" valign="top">0.9765</td><td align="left" valign="top">0.7873</td><td align="left" valign="top">0.8875</td><td align="left" valign="top">0.9375</td><td align="left" valign="top">0.7750</td><td align="left" valign="top">0.8000</td><td align="left" valign="top">0.8471</td><td align="left" valign="top">0.8293</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.8919</td><td align="left" valign="top">0.9714</td><td align="left" valign="top">0.9444</td><td align="left" valign="top">0.9459</td><td align="left" valign="top">0.9459</td><td align="left" valign="top">0.7403</td><td align="left" valign="top">0.9643</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.8696</td><td align="left" valign="top">0.9500</td><td align="left" valign="top">0.9583</td><td align="left" valign="top">0.7586</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.9706</td><td align="left" valign="top">0.9714</td><td align="left" valign="top">0.9714</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.7701</td><td align="left" valign="top">0.7714</td><td align="left" valign="top">0.8571</td><td align="left" valign="top">0.5714</td><td align="left" valign="top">0.5429</td><td align="left" valign="top">0.6571</td><td align="left" valign="top">0.8800</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.9296</td><td align="left" valign="top">0.9714</td><td align="left" valign="top">0.9577</td><td align="left" valign="top">0.9722</td><td align="left" valign="top">0.9722</td><td align="left" valign="top">0.7549</td><td align="left" valign="top">0.8571</td><td align="left" valign="top">0.9231</td><td align="left" valign="top">0.6897</td><td align="left" valign="top">0.6909</td><td align="left" valign="top">0.7797</td><td align="left" valign="top">0.8148</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ROC AUC</td><td align="left" valign="top">0.9598</td><td align="left" valign="top">0.9838</td><td align="left" valign="top">0.9727</td><td align="left" valign="top">0.9863</td><td align="left" valign="top">0.9757</td><td align="left" valign="top">0.8524</td><td align="left" valign="top">0.9340</td><td align="left" valign="top">0.9444</td><td align="left" valign="top">0.8295</td><td align="left" valign="top">0.8823</td><td align="left" valign="top">0.8769</td><td align="left" valign="top">0.8737</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PR AUC</td><td align="left" valign="top">0.9138</td><td align="left" valign="top">0.9672</td><td align="left" valign="top">0.9418</td><td align="left" valign="top">0.9629</td><td align="left" valign="top">0.9354</td><td align="left" valign="top">0.7757</td><td align="left" valign="top">0.8969</td><td align="left" valign="top">0.9344</td><td align="left" valign="top">0.7510</td><td align="left" valign="top">0.7969</td><td align="left" valign="top">0.8182</td><td align="left" valign="top">0.7862</td></tr><tr><td align="left" valign="top" colspan="14">SBERT<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> with fine-tuned BERT-base<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.9646</td><td align="left" valign="top">0.9625</td><td align="left" valign="top">0.9800</td><td align="left" valign="top">0.9906</td><td align="left" valign="top">0.9835</td><td align="left" valign="top">0.9702</td><td align="left" valign="top">0.8525</td><td align="left" valign="top">0.8125</td><td align="left" valign="top">0.7025</td><td align="left" valign="top">0.7200</td><td align="left" valign="top">0.7882</td><td align="left" valign="top">0.7576</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.9650</td><td align="left" valign="top">0.9502</td><td align="left" valign="top">0.9784</td><td align="left" valign="top">0.9836</td><td align="left" valign="top">0.9830</td><td align="left" valign="top">0.9668</td><td align="left" valign="top">0.8037</td><td align="left" valign="top">0.7391</td><td align="left" valign="top">0.6175</td><td align="left" valign="top">0.6036</td><td align="left" valign="top">0.7108</td><td align="left" valign="top">0.6929</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.9529</td><td align="left" valign="top">0.9657</td><td align="left" valign="top">0.9771</td><td align="left" valign="top">0.9943</td><td align="left" valign="top">0.9771</td><td align="left" valign="top">0.9632</td><td align="left" valign="top">0.8800</td><td align="left" valign="top">0.8914</td><td align="left" valign="top">0.8629</td><td align="left" valign="top">0.9543</td><td align="left" valign="top">0.8229</td><td align="left" valign="top">0.7817</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.9585</td><td align="left" valign="top">0.9571</td><td align="left" valign="top">0.9770</td><td align="left" valign="top">0.9887</td><td align="left" valign="top">0.9799</td><td align="left" valign="top">0.9649</td><td align="left" valign="top">0.8384</td><td align="left" valign="top">0.8062</td><td align="left" valign="top">0.7176</td><td align="left" valign="top">0.7376</td><td align="left" valign="top">0.7622</td><td align="left" valign="top">0.7332</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ROC AUC</td><td align="left" valign="top">0.9859</td><td align="left" valign="top">0.9698</td><td align="left" valign="top">0.9923</td><td align="left" valign="top">0.9929</td><td align="left" valign="top">0.9936</td><td align="left" valign="top">0.9867</td><td align="left" valign="top">0.9125</td><td align="left" valign="top">0.8563</td><td align="left" valign="top">0.7901</td><td align="left" valign="top">0.8411</td><td align="left" valign="top">0.8462</td><td align="left" valign="top">0.8412</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PR AUC</td><td align="left" valign="top">0.9858</td><td align="left" valign="top">0.9480</td><td align="left" valign="top">0.9925</td><td align="left" valign="top">0.9870</td><td align="left" valign="top">0.9918</td><td align="left" valign="top">0.9800</td><td align="left" valign="top">0.9008</td><td align="left" valign="top">0.7969</td><td align="left" valign="top">0.7640</td><td align="left" valign="top">0.8109</td><td align="left" valign="top">0.8244</td><td align="left" valign="top">0.8134</td></tr><tr><td align="left" valign="top" colspan="14">SBERT with fine-tuned LaBSE</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">0.9848</td><td align="left" valign="top">0.9900</td><td align="left" valign="top">0.9875</td><td align="left" valign="top">0.9906</td><td align="left" valign="top">0.9906</td><td align="left" valign="top">0.9853</td><td align="left" valign="top">0.9775</td><td align="left" valign="top">0.9975</td><td align="left" valign="top">0.9850</td><td align="left" valign="top">0.9859</td><td align="left" valign="top">0.9835</td><td align="left" valign="top">0.9839</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision</td><td align="left" valign="top">0.9716</td><td align="left" valign="top">0.9889</td><td align="left" valign="top">0.9728</td><td align="left" valign="top">0.9944</td><td align="left" valign="top">0.9889</td><td align="left" valign="top">0.9818</td><td align="left" valign="top">0.9719</td><td align="left" valign="top">0.9944</td><td align="left" valign="top">0.9836</td><td align="left" valign="top">0.9775</td><td align="left" valign="top">0.9886</td><td align="left" valign="top">0.9818</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Recall</td><td align="left" valign="top">0.9941</td><td align="left" valign="top">0.9886</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.9829</td><td align="left" valign="top">0.9886</td><td align="left" valign="top">0.9839</td><td align="left" valign="top">0.9771</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.9829</td><td align="left" valign="top">0.9886</td><td align="left" valign="top">0.9714</td><td align="left" valign="top">0.9806</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.9826</td><td align="left" valign="top">0.9885</td><td align="left" valign="top">0.9861</td><td align="left" valign="top">0.9884</td><td align="left" valign="top">0.9887</td><td align="left" valign="top">0.9828</td><td align="left" valign="top">0.9743</td><td align="left" valign="top">0.9972</td><td align="left" valign="top">0.9828</td><td align="left" valign="top">0.9829</td><td align="left" valign="top">0.9797</td><td align="left" valign="top">0.9812</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>ROC AUC</td><td align="left" valign="top">0.9965</td><td align="left" valign="top">0.9929</td><td align="left" valign="top">0.9987</td><td align="left" valign="top">0.9989</td><td align="left" valign="top">0.9979</td><td align="left" valign="top">0.9968</td><td align="left" valign="top">0.9893</td><td align="left" valign="top">0.9976</td><td align="left" valign="top">0.9962</td><td align="left" valign="top">0.9971</td><td align="left" valign="top">0.9930</td><td align="left" valign="top">0.9951</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PR AUC</td><td align="left" valign="top">0.9964</td><td align="left" valign="top">0.9901</td><td align="left" valign="top">0.9984</td><td align="left" valign="top">0.9985</td><td align="left" valign="top">0.9975</td><td align="left" valign="top">0.9960</td><td align="left" valign="top">0.9872</td><td align="left" valign="top">0.9958</td><td align="left" valign="top">0.9947</td><td align="left" valign="top">0.9957</td><td align="left" valign="top">0.9927</td><td align="left" valign="top">0.9934</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>DL: dietary lifestyle.</p></fn><fn id="table3fn2"><p><sup>b</sup>HLE: human living environment.</p></fn><fn id="table3fn3"><p><sup>c</sup>PA: physical activity.</p></fn><fn id="table3fn4"><p><sup>d</sup>BoW: bag-of-words.</p></fn><fn id="table3fn5"><p><sup>e</sup>ROC: receiver operating characteristic.</p></fn><fn id="table3fn6"><p><sup>f</sup>AUC: area under the curve.</p></fn><fn id="table3fn7"><p><sup>g</sup>PR: precision-recall.</p></fn><fn id="table3fn8"><p><sup>h</sup>SBERT: sentence-bidirectional encoder representations from transformers.</p></fn><fn id="table3fn9"><p><sup>i</sup>BERT: bidirectional encoder representations from transformers.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="table" rid="table4">Table 4</xref> presents the optimal cutoff values for the 3 algorithms. The pretrained SBERT-BERT and SBERT-LaBSE models showed considerable variation in the cutoff values across the 5 health lifelog domains. However, after fine-tuning, these variations decreased, indicating that fine-tuning helped stabilize the algorithms. Despite this improvement, the SBERT-LaBSE algorithm still exhibited more variability in the cutoff values across the health lifelog domains compared to SBERT-BERT, suggesting that further calibration may be required for SBERT-LaBSE. <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> provides example question pairs from each health lifelog domain, along with the similarity scores assigned by human reviewers and predicted by the 3 algorithms.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Optimal cutoff for algorithms on bag-of-words and pretrained and fine-tuned SBERT-BERT and SBERT&#x2013;LaBSE in each health lifelog domain.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Health lifelog<break/>domain</td><td align="left" valign="top">Bag-of-words</td><td align="left" valign="top" colspan="2">SBERT<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> with pretrained</td><td align="left" valign="top" colspan="2">SBERT with fine-tuned</td></tr><tr><td align="left" valign="bottom" colspan="2"/><td align="left" valign="bottom"/><td align="left" valign="top">BERT-base<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="top">LaBSE</td><td align="left" valign="top">BERT-base</td><td align="left" valign="top">LaBSE<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7">English question pairs (n=410)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dietary lifestyle</td><td align="left" valign="top">0.2887</td><td align="left" valign="top">0.6274</td><td align="left" valign="top">0.5359</td><td align="left" valign="top">0.6349</td><td align="left" valign="top">0.6262</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Human living environment</td><td align="left" valign="top">0.1291</td><td align="left" valign="top">0.5369</td><td align="left" valign="top">0.3965</td><td align="left" valign="top">0.6151</td><td align="left" valign="top">0.6425</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physical activity</td><td align="left" valign="top">0.3162</td><td align="left" valign="top">0.3667</td><td align="left" valign="top">0.4822</td><td align="left" valign="top">0.6304</td><td align="left" valign="top">0.6202</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sleep</td><td align="left" valign="top">0.0000</td><td align="left" valign="top">0.6790</td><td align="left" valign="top">0.2456</td><td align="left" valign="top">0.6617</td><td align="left" valign="top">0.6574</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stress</td><td align="left" valign="top">0.1054</td><td align="left" valign="top">0.5817</td><td align="left" valign="top">0.3807</td><td align="left" valign="top">0.6359</td><td align="left" valign="top">0.5958</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All</td><td align="left" valign="top">0.1291</td><td align="left" valign="top">0.5816</td><td align="left" valign="top">0.3796</td><td align="left" valign="top">0.6278</td><td align="left" valign="top">0.6091</td></tr><tr><td align="left" valign="top" colspan="7">Korean question pairs (n=410)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dietary lifestyle</td><td align="left" valign="top">0.2887</td><td align="left" valign="top">0.5990</td><td align="left" valign="top">0.3103</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.6568</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Human living environment</td><td align="left" valign="top">0.2582</td><td align="left" valign="top">0.5475</td><td align="left" valign="top">0.5603</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.7138</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Physical activity</td><td align="left" valign="top">0.1491</td><td align="left" valign="top">0.4778</td><td align="left" valign="top">0.6004</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.6741</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sleep</td><td align="left" valign="top">0.9354</td><td align="left" valign="top">0.4837</td><td align="left" valign="top">0.9215</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.6849</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Stress</td><td align="left" valign="top">0.1091</td><td align="left" valign="top">0.6647</td><td align="left" valign="top">0.4481</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.6586</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>All</td><td align="left" valign="top">0.1336</td><td align="left" valign="top">0.5320</td><td align="left" valign="top">0.5753</td><td align="left" valign="top">0.5639</td><td align="left" valign="top">0.6531</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SBERT: sentence-bidirectional encoder representations from transformers.</p></fn><fn id="table4fn2"><p><sup>b</sup>BERT: bidirectional encoder representations from transformers.</p></fn><fn id="table4fn3"><p><sup>c</sup>LaBSE: language-agnostic bidirectional encoder representations from transformers sentence embedding.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="fig" rid="figure4">Figure 4</xref> illustrates that SBERT-LaBSE effectively determined semantic similarities between the 2 languages, with slightly better performance in identifying the semantic similarities of English questions relative to the Korean seed questions. The complete results of the cross-language semantic similarity analysis are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Performance of the cross-language semantic similarity determination. AUC: area under the curve; BERT: bidirectional encoder representations from transformers; BoW: bag-of-words; Eng: English; Kor: Korean; LaBSE: language-agnostic bidirectional encoder representations from transformers sentence embedding; PR: precision-recall; ROC: receiver operating characteristic; SBERT: sentence-bidirectional encoder representations from transformers.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="medinform_v13i1e71687_fig04.png"/></fig></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study demonstrates the utility of large language models for determining semantic similarities among health questions to facilitate the standardization of survey-based health data. Our results indicate that the fine-tuned SBERT algorithms were significantly more effective than the traditional BoW approach in identifying semantic similarities for both English and Korean questions. Furthermore, the SBERT-LaBSE algorithm demonstrated superior performance particularly for Korean questions, suggesting that it is a more effective method than the SBERT-BERT algorithm, which relies on English translation, for assessing semantic similarity in non-English texts. Notably, the SBERT-LaBSE algorithm outperformed the GPT-4o algorithm, particularly in Korean. Although it is possible that the full potential of the GPT-4o algorithm was not realized, the results clearly show that for the specific task examined in this study, the fine-tuned SBERT algorithms achieved better performance than GPT-4o, with significantly lower computational costs [<xref ref-type="bibr" rid="ref46">46</xref>].</p><p>The SBERT-LaBSE algorithm&#x2019;s success with Korean questions can be attributed to its structural design and the limitations of language translation. Structurally, LaBSE aligns semantically equivalent words or sentences from different languages into a unified embedding space, preserving semantic consistency across languages. This allows for more accurate semantic similarity assessments. In contrast, the SBERT-BERT algorithm&#x2019;s lower performance with Korean questions may be due to meaning loss or distortion during translation, which disrupts semantic comparisons between languages [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Although previous studies have noted that LaBSE may struggle with subtle, sentence-level nuances, limiting its performance in fine-grained similarity tasks [<xref ref-type="bibr" rid="ref47">47</xref>], our study shows that the SBERT-LaBSE algorithm effectively captured the meanings in both English and Korean sentences, outperforming the SBERT-BERT model. However, this finding should be validated with a larger and more diverse dataset that includes a broader range of syntactic features.</p></sec><sec id="s4-2"><title>Limitations</title><p>When implemented in the SPURT system with 1835 questions in the comparison space, the SBERT-LaBSE algorithm evaluated the similarity of a new question in just 0.03 seconds. This was achieved on a Naver Cloud Platform server with 8GB RAM and no graphics processing unit [<xref ref-type="bibr" rid="ref48">48</xref>]. Despite its impressive performance, LaBSE&#x2019;s 440 million parameters&#x2014;4 times that of BERT base&#x2014;make it a resource-intensive option, potentially increasing costs for complex tasks. This resource demand may limit its applicability on resource-constrained devices such as mobile platforms [<xref ref-type="bibr" rid="ref49">49</xref>]. To address these limitations, future work will explore techniques such as distillation [<xref ref-type="bibr" rid="ref50">50</xref>] and the use of small language models [<xref ref-type="bibr" rid="ref51">51</xref>], with the goal of reducing model size while maintaining performance.</p><p>This study has some limitations. First, the cutoff values for the similarity scores were not uniformly calibrated across the 5 health lifelog domains, leading to inconsistencies in how similarity scores were interpreted. For example, the SBERT-LaBSE algorithm assigned a similarity score of 0.7 to the dietary question pair &#x201C;I&#x2019;ve binge eaten&#x201D; and &#x201C;Do you ever overeat?&#x201D; and identified them as similar. However, the algorithm correctly identified the human living environment questions, that is, &#x201C;Have you moved in the past 5 years?&#x201D; and &#x201C;In the last 5 years, the number of people in this community has?&#x201D; as dissimilar while assigning the same similarity score of 0.7 to the pair. These inconsistencies may impact the accurate interpretation of similarity scores, highlighting the need for future work to focus on calibrating and normalizing scores across the health lifelog domains to ensure greater consistency. Second, our evaluation was conducted on a small set of English and Korean question pairs. Future studies should explore the feasibility of applying the SBERT-LaBSE algorithm to a broader range of sentence types from diverse domains. Additionally, by incorporating texts from more diverse languages, future research can investigate the algorithm&#x2019;s potential to overcome language barriers and facilitate semantic interoperability.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>Previous methods that relied on metadata tagging [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] and ontology-mediated annotation [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>] were effective in providing structured mappings between concepts, facilitating interoperability. However, they struggled with comparing the meanings of survey questions composed in multiple languages and addressing semantically redundant questions. This study leverages fine-tuned large language models such as SBERT-BERT and SBERT-LaBSE to assess semantic similarity. In particular, the fine-tuned SBERT-LaBSE algorithm demonstrates the potential to enhance semantic interoperability by capturing semantic similarities across multiple languages with high performance.</p></sec><sec id="s4-4"><title>Conclusion</title><p>This study highlights the potential of large language models in identifying semantic redundancy in survey-based PGHD collections. Specifically, the SBERT-LaBSE algorithm excelled in classifying semantic similarity across diverse question formats in 2 languages. Our findings demonstrate that SBERT-LaBSE outperforms the traditional BERT-based algorithm, the GPT-4o algorithm, and the conventional BoW approach in both languages, highlighting its capacity to improve semantic interoperability of PGHD across language barriers.</p></sec></sec></body><back><ack><p>We are thankful to our colleagues Eunyeong Lim and Jeongha Kim for their help with the semantic textual similarity dataset preparation. This study was supported in part by a grant from the National Research Foundation of Korea (grant 2022R1A2C201136011).</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BERT</term><def><p>bidirectional encoder representations from transformers</p></def></def-item><def-item><term id="abb2">BoW</term><def><p>bag-of-words</p></def></def-item><def-item><term id="abb3">CDE</term><def><p>common data elements</p></def></def-item><def-item><term id="abb4">LaBSE</term><def><p>language-agnostic bidirectional encoder representations from transformers sentence embedding</p></def></def-item><def-item><term id="abb5">NIH</term><def><p>National Institutes of Health</p></def></def-item><def-item><term id="abb6">PGHD</term><def><p>person-generated health data</p></def></def-item><def-item><term id="abb7">PROMIS</term><def><p>Patient-Reported Outcomes Measurement Information System</p></def></def-item><def-item><term id="abb8">SBERT</term><def><p>sentence-bidirectional encoder representations from transformers</p></def></def-item><def-item><term id="abb9">SPURT</term><def><p>Standardized Patient-Generated Health Data Utilization Resources and Tools</p></def></def-item><def-item><term id="abb10">STS</term><def><p>semantic textual similarity</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Shapiro</surname><given-names>M</given-names> </name><name name-style="western"><surname>Johnston</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wald</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mon</surname><given-names>D</given-names> </name></person-group><article-title>Patient-generated health data. White paper</article-title><source>Official Website of the Assistant Secretary for Technology Policy/Office of the National Coordinator for Health IT</source><year>2012</year><access-date>2025-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.healthit.gov/sites/default/files/rti_pghd_whitepaper_april_2012.pdf">https://www.healthit.gov/sites/default/files/rti_pghd_whitepaper_april_2012.pdf</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Patient-generated health data</article-title><source>Official Website of the Assistant Secretary for Technology Policy/Office of the National Coordinator for Health IT</source><access-date>2025-05-08</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.healthit.gov/topic/scientific-initiatives/pcor/patient-generated-health-data-pghd">https://www.healthit.gov/topic/scientific-initiatives/pcor/patient-generated-health-data-pghd</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Islind</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Lindroth</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lundin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Steineck</surname><given-names>G</given-names> </name></person-group><article-title>Shift in translations: data work with patient-generated health data in clinical practice</article-title><source>Health Informatics J</source><year>2019</year><month>09</month><volume>25</volume><issue>3</issue><fpage>577</fpage><lpage>586</lpage><pub-id pub-id-type="doi">10.1177/1460458219833097</pub-id><pub-id pub-id-type="medline">30866707</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sanger</surname><given-names>PC</given-names> </name><name name-style="western"><surname>Hartzler</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lordon</surname><given-names>RJ</given-names> </name><etal/></person-group><article-title>A patient-centered system in a provider-centered world: challenges of incorporating post-discharge wound data into practice</article-title><source>J Am Med Inform Assoc</source><year>2016</year><month>05</month><volume>23</volume><issue>3</issue><fpage>514</fpage><lpage>525</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocv183</pub-id><pub-id pub-id-type="medline">26977103</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adler-Milstein</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nong</surname><given-names>P</given-names> </name></person-group><article-title>Early experiences with patient generated health data: health system and patient perspectives</article-title><source>J Am Med Inform Assoc</source><year>2019</year><month>10</month><day>1</day><volume>26</volume><issue>10</issue><fpage>952</fpage><lpage>959</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocz045</pub-id><pub-id pub-id-type="medline">31329886</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chung</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Sandler</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Long</surname><given-names>MD</given-names> </name><etal/></person-group><article-title>Harnessing person-generated health data to accelerate patient-centered outcomes research: the Crohn&#x2019;s and Colitis Foundation of America PCORnet Patient Powered Research Network (CCFA Partners)</article-title><source>J Am Med Inform Assoc</source><year>2016</year><month>05</month><volume>23</volume><issue>3</issue><fpage>485</fpage><lpage>490</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocv191</pub-id><pub-id pub-id-type="medline">26911821</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Austin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Amtmann</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Use of patient-generated health data across healthcare settings: implications for health systems</article-title><source>JAMIA Open</source><year>2020</year><month>04</month><volume>3</volume><issue>1</issue><fpage>70</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooz065</pub-id><pub-id pub-id-type="medline">32607489</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdolkhani</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gray</surname><given-names>K</given-names> </name><name name-style="western"><surname>Borda</surname><given-names>A</given-names> </name><name name-style="western"><surname>DeSouza</surname><given-names>R</given-names> </name></person-group><article-title>Patient-generated health data management and quality challenges in remote patient monitoring</article-title><source>JAMIA Open</source><year>2019</year><month>12</month><volume>2</volume><issue>4</issue><fpage>471</fpage><lpage>478</lpage><pub-id pub-id-type="doi">10.1093/jamiaopen/ooz036</pub-id><pub-id pub-id-type="medline">32025644</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hussein</surname><given-names>R</given-names> </name><name name-style="western"><surname>Crutzen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gutenberg</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kulnik</surname><given-names>ST</given-names> </name><name name-style="western"><surname>Sareban</surname><given-names>M</given-names> </name><name name-style="western"><surname>Niebauer</surname><given-names>J</given-names> </name></person-group><article-title>Patient-generated health data (PGHD) interoperability: an integrative perspective</article-title><source>Stud Health Technol Inform</source><year>2021</year><month>05</month><day>27</day><volume>281</volume><fpage>228</fpage><lpage>232</lpage><pub-id pub-id-type="doi">10.3233/SHTI210154</pub-id><pub-id pub-id-type="medline">34042739</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cella</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yount</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rothrock</surname><given-names>N</given-names> </name><etal/></person-group><article-title>The Patient-Reported Outcomes Measurement Information System (PROMIS): progress of an NIH roadmap cooperative group during its first two years</article-title><source>Med Care</source><year>2007</year><month>05</month><volume>45</volume><issue>5 Suppl 1</issue><fpage>S3</fpage><lpage>S11</lpage><pub-id pub-id-type="doi">10.1097/01.mlr.0000258615.42478.55</pub-id><pub-id pub-id-type="medline">17443116</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cella</surname><given-names>D</given-names> </name><name name-style="western"><surname>Riley</surname><given-names>W</given-names> </name><name name-style="western"><surname>Stone</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The Patient-Reported Outcomes Measurement Information System (PROMIS) developed and tested its first wave of adult self-reported health outcome item banks: 2005-2008</article-title><source>J Clin Epidemiol</source><year>2010</year><month>11</month><volume>63</volume><issue>11</issue><fpage>1179</fpage><lpage>1194</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2010.04.011</pub-id><pub-id pub-id-type="medline">20685078</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pilkonis</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Reise</surname><given-names>SP</given-names> </name><etal/></person-group><article-title>Item banks for measuring emotional distress from the Patient-Reported Outcomes Measurement Information System (PROMIS&#x00AE;): depression, anxiety, and anger</article-title><source>Assessment</source><year>2011</year><month>09</month><volume>18</volume><issue>3</issue><fpage>263</fpage><lpage>283</lpage><pub-id pub-id-type="doi">10.1177/1073191111411667</pub-id><pub-id pub-id-type="medline">21697139</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amtmann</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cook</surname><given-names>KF</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>MP</given-names> </name><etal/></person-group><article-title>Development of a PROMIS item bank to measure pain interference</article-title><source>Pain</source><year>2010</year><month>07</month><volume>150</volume><issue>1</issue><fpage>173</fpage><lpage>182</lpage><pub-id pub-id-type="doi">10.1016/j.pain.2010.04.025</pub-id><pub-id pub-id-type="medline">20554116</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brodke</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Saltzman</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Brodke</surname><given-names>DS</given-names> </name></person-group><article-title>PROMIS for orthopaedic outcomes measurement</article-title><source>J Am Acad Orthop Surg</source><year>2016</year><month>11</month><volume>24</volume><issue>11</issue><fpage>744</fpage><lpage>749</lpage><pub-id pub-id-type="doi">10.5435/JAAOS-D-15-00404</pub-id><pub-id pub-id-type="medline">27661391</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Talib</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Stump</surname><given-names>TE</given-names> </name><etal/></person-group><article-title>Incorporating PROMIS symptom measures into primary care practice-a randomized clinical trial</article-title><source>J Gen Intern Med</source><year>2018</year><month>08</month><volume>33</volume><issue>8</issue><fpage>1245</fpage><lpage>1252</lpage><pub-id pub-id-type="doi">10.1007/s11606-018-4391-0</pub-id><pub-id pub-id-type="medline">29623512</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wagner</surname><given-names>LI</given-names> </name><name name-style="western"><surname>Schink</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bass</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Bringing PROMIS to practice: brief and precise symptom screening in ambulatory cancer care</article-title><source>Cancer</source><year>2015</year><month>03</month><day>15</day><volume>121</volume><issue>6</issue><fpage>927</fpage><lpage>934</lpage><pub-id pub-id-type="doi">10.1002/cncr.29104</pub-id><pub-id pub-id-type="medline">25376427</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>Common data elements repository</article-title><source>National Institutes of Health</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cde.nlm.nih.gov/home">https://cde.nlm.nih.gov/home</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>NIH strategic plan for data science</article-title><source>National Institutes of Health</source><access-date>2025-05-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://datascience.nih.gov/nih-strategic-plan-data-science">https://datascience.nih.gov/nih-strategic-plan-data-science</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>T</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Detection of medical text semantic similarity based on convolutional neural network</article-title><source>BMC Med Inform Decis Mak</source><year>2019</year><month>08</month><day>7</day><volume>19</volume><issue>1</issue><fpage>156</fpage><pub-id pub-id-type="doi">10.1186/s12911-019-0880-2</pub-id><pub-id pub-id-type="medline">31391038</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>He</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bian</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name></person-group><article-title>Measurement of semantic textual similarity in clinical texts: comparison of transformer-based models</article-title><source>JMIR Med Inform</source><year>2020</year><month>11</month><day>23</day><volume>8</volume><issue>11</issue><fpage>e19735</fpage><pub-id pub-id-type="doi">10.2196/19735</pub-id><pub-id pub-id-type="medline">33226350</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mahajan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Poddar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>JJ</given-names> </name><etal/></person-group><article-title>Identification of semantically similar sentences in clinical notes: iterative intermediate training using multi-task learning</article-title><source>JMIR Med Inform</source><year>2020</year><month>11</month><day>27</day><volume>8</volume><issue>11</issue><fpage>e22508</fpage><pub-id pub-id-type="doi">10.2196/22508</pub-id><pub-id pub-id-type="medline">33245284</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ormerod</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mart&#x00ED;nez Del Rinc&#x00F3;n</surname><given-names>J</given-names> </name><name name-style="western"><surname>Devereux</surname><given-names>B</given-names> </name></person-group><article-title>Predicting semantic similarity between clinical sentence pairs using transformer models: evaluation and representational analysis</article-title><source>JMIR Med Inform</source><year>2021</year><month>05</month><day>26</day><volume>9</volume><issue>5</issue><fpage>e23099</fpage><pub-id pub-id-type="doi">10.2196/23099</pub-id><pub-id pub-id-type="medline">34037527</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Q</given-names> </name><name name-style="western"><surname>He</surname><given-names>S</given-names> </name></person-group><article-title>Similarity matching of medical question based on Siamese network</article-title><source>BMC Med Inform Decis Mak</source><year>2023</year><month>04</month><day>6</day><volume>23</volume><issue>1</issue><fpage>55</fpage><pub-id pub-id-type="doi">10.1186/s12911-023-02161-z</pub-id><pub-id pub-id-type="medline">37024844</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Mihalcea</surname><given-names>R</given-names> </name><name name-style="western"><surname>Corley</surname><given-names>C</given-names> </name><name name-style="western"><surname>Strapparava</surname><given-names>C</given-names> </name></person-group><article-title>Corpus-based and knowledge-based measures of text semantic similarity</article-title><conf-name>Proceedings of the Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference</conf-name><conf-date>Jul 16-20, 2020</conf-date><conf-loc>Boston, MA</conf-loc><pub-id pub-id-type="doi">10.5555/1597538.1597662</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>B</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><conf-name>Proceedings of the North American Chapter of the Association for Computational Linguistics</conf-name><conf-date>Jun 2-9, 2019</conf-date><conf-loc>Minneapolis, MN</conf-loc><pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Artetxe</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schwenk</surname><given-names>H</given-names> </name></person-group><article-title>Massively multilingual sentence embeddings for zero-shot cross-lingual transfer and beyond</article-title><source>Trans Assoc Comput Linguist</source><year>2019</year><month>11</month><volume>7</volume><fpage>597</fpage><lpage>610</lpage><pub-id pub-id-type="doi">10.1162/tacl_a_00288</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Miao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>K</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Tsuruoka</surname><given-names>Y</given-names> </name></person-group><article-title>Enhancing cross-lingual sentence embedding for low-resource languages with word alignment</article-title><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Jun 16-21, 2024</conf-date><conf-loc>Mexico City, Mexico</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2024.findings-naacl.204</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Belinkov</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bisk</surname><given-names>Y</given-names> </name></person-group><article-title>Synthetic and natural noise both break neural machine translation</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 24, 2018</comment><pub-id pub-id-type="doi">10.48550/arXiv.1711.02173</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Tiyajamorn</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kajiwara</surname><given-names>T</given-names> </name><name name-style="western"><surname>Arase</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Onizuka</surname><given-names>M</given-names> </name></person-group><article-title>Language-agnostic representation from multilingual sentence encoders for cross-lingual similarity estimation</article-title><conf-name>Proceedings of Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Nov 7-11, 2021</conf-date><conf-loc>Online and Punta Cana, Dominican Republic</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.612</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Reimers</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gurevych</surname><given-names>I</given-names> </name></person-group><article-title>Sentence-BERT: sentence embeddings using siamese BERT-networks</article-title><conf-name>Proceedings of Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</conf-name><conf-date>Nov 3-7, 2019</conf-date><conf-loc>Hong Kong, China</conf-loc><pub-id pub-id-type="doi">10.18653/v1/D19-1410</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Feng</surname><given-names>F</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Cer</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Arivazhagan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name></person-group><article-title>Language-agnostic BERT sentence embedding</article-title><conf-name>Proceedings of 60th Annual Meeting of the Association for Computational Linguistics (Volume 1)</conf-name><conf-date>May 22-27, 2022</conf-date><conf-loc>Dublin, Ireland</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.62</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Agirre</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cer</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Diab</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Gonzalez-Agirre</surname><given-names>A</given-names> </name></person-group><article-title>SemEval-2012 task 6: a pilot on semantic textual similarity</article-title><access-date>2025-05-13</access-date><conf-name>Proceedings of the International Workshop on Semantic Evaluation</conf-name><conf-date>Jun 7-8, 2012</conf-date><conf-loc>Montr&#x00E9;al, Canada</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/S12-1051.pdf">https://aclanthology.org/S12-1051.pdf</ext-link></comment></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>D</given-names> </name></person-group><article-title>SimCSE: simple contrastive learning of sentence embeddings</article-title><conf-name>Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing</conf-name><conf-date>Nov 7-11, 2021</conf-date><conf-loc>Online and Punta Cana, Dominican Republic</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2021.emnlp-main.552</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ham</surname><given-names>J</given-names> </name><name name-style="western"><surname>Choe</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Park</surname><given-names>K</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>I</given-names> </name><name name-style="western"><surname>Soh</surname><given-names>H</given-names> </name></person-group><article-title>KorNLI and KORSTS: new benchmark datasets for Korean natural language understanding</article-title><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Nov 16-20, 2020</conf-date><pub-id pub-id-type="doi">10.18653/v1/2020.findings-emnlp.39</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mutinda</surname><given-names>FW</given-names> </name><name name-style="western"><surname>Yada</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wakamiya</surname><given-names>S</given-names> </name><name name-style="western"><surname>Aramaki</surname><given-names>E</given-names> </name></person-group><article-title>Semantic textual similarity in Japanese clinical domain texts using BERT</article-title><source>Methods Inf Med</source><year>2021</year><month>06</month><volume>60</volume><issue>S 01</issue><fpage>e56</fpage><lpage>e64</lpage><pub-id pub-id-type="doi">10.1055/s-0041-1731390</pub-id><pub-id pub-id-type="medline">34237783</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bi</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>CBLUE: a Chinese biomedical language understanding evaluation benchmark</article-title><conf-name>Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1)</conf-name><conf-date>May 22-27, 2022</conf-date><conf-loc>Dublin, Ireland</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2022.acl-long.544</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><source>HealthMeasures</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.healthmeasures.net">https://www.healthmeasures.net</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="web"><article-title>Introduction to Ministry of Food and Drug Safety [Article in Korean]</article-title><source>Republic of Korea&#x2019;s e-Government</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mfds.go.kr/index.do">https://www.mfds.go.kr/index.do</ext-link></comment></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="web"><article-title>Dietary lifestyle assessment [Article in Korean]</article-title><source>National Cancer Information Center</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cancer.go.kr/lay1/S1T226C228/contents.do">https://www.cancer.go.kr/lay1/S1T226C228/contents.do</ext-link></comment></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="web"><article-title>My dietary assessment [Article in Korean]</article-title><source>Seoul Asan Medical Center Health Screening &#x0026; Promotion Center</source><access-date>2024-10-15</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://health.amc.seoul.kr/health/maintain/check.do">https://health.amc.seoul.kr/health/maintain/check.do</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salton</surname><given-names>G</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>CS</given-names> </name></person-group><article-title>A vector space model for automatic indexing</article-title><source>Commun ACM</source><year>1975</year><month>11</month><volume>18</volume><issue>11</issue><fpage>613</fpage><lpage>620</lpage><pub-id pub-id-type="doi">10.1145/361219.361220</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="web"><article-title>Googletrans-py 4.0.0</article-title><source>The Python Package Index</source><access-date>2024-10-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pypi.org/project/googletrans-py">https://pypi.org/project/googletrans-py</ext-link></comment></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="web"><article-title>AdamW documentation</article-title><source>PyTorch</source><access-date>2024-10-10</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html">https://pytorch.org/docs/stable/generated/torch.optim.AdamW.html</ext-link></comment></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="web"><article-title>GPT-4o system card</article-title><source>OpenAI</source><access-date>2025-01-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/research/gpt-4o-system-card">https://openai.com/research/gpt-4o-system-card</ext-link></comment></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="web"><article-title>Fine-tuning guide</article-title><source>OpenAI</source><access-date>2025-01-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://platform.openai.com/docs/guides/fine-tuning">https://platform.openai.com/docs/guides/fine-tuning</ext-link></comment></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Brown</surname><given-names>T</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>B</given-names> </name><name name-style="western"><surname>Ryder</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Language models are few-shot learners</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 22, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2005.14165</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>W</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>G</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Han</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name></person-group><article-title>Multilingual sentence transformer as a multilingual word aligner</article-title><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Dec 7-11, 2022</conf-date><conf-loc>Abu Dhabi, United Arab Emirates</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2022.findings-emnlp.215</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="web"><source>Naver Cloud Platform</source><access-date>2024-10-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncloud.com/product/compute/server#detail">https://www.ncloud.com/product/compute/server#detail</ext-link></comment></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Mao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Nakagawa</surname><given-names>T</given-names> </name></person-group><article-title>LEALLA: learning lightweight language-agnostic sentence embeddings with knowledge distillation</article-title><conf-name>Proceedings of the 17th Conference of the European Chapter of the Association for Computational Linguistics</conf-name><conf-date>May 2-6, 2023</conf-date><conf-loc>Dubrovnik, Croatia</conf-loc><pub-id pub-id-type="doi">10.18653/v1/2023.eacl-main.138</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jiao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shang</surname><given-names>L</given-names> </name><etal/></person-group><article-title>TinyBERT: distilling BERT for natural language understanding</article-title><conf-name>Findings of the Association for Computational Linguistics</conf-name><conf-date>Nov 16-20, 2020</conf-date><pub-id pub-id-type="doi">10.18653/v1/2020.findings-emnlp.372</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Mo</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lu</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>A comprehensive survey of small language models in the era of large language models: techniques, enhancements, applications, collaboration with LLMS, and trustworthiness</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 28, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2411.03350</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Comparison of algorithms' performance on the semantic textual similarity dataset.</p><media xlink:href="medinform_v13i1e71687_app1.docx" xlink:title="DOCX File, 64 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>The instructions used for GPT-4o semantic similarity evaluation.</p><media xlink:href="medinform_v13i1e71687_app2.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Example question pairs with the scores from human review and predictions from the 3 algorithms.</p><media xlink:href="medinform_v13i1e71687_app3.docx" xlink:title="DOCX File, 34 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Performance metrics in the cross-language semantic similarity analysis.</p><media xlink:href="medinform_v13i1e71687_app4.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material></app-group></back></article>