<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
    <front>
        <journal-meta>
            <journal-id journal-id-type="publisher-id">JMI</journal-id>
            <journal-id journal-id-type="nlm-ta">JMIR Med Inform</journal-id>
            <journal-title>JMIR Medical Informatics</journal-title>
            <issn pub-type="epub">2291-9694</issn>
            <publisher>
                <publisher-name>Gunther Eysenbach</publisher-name>
                <publisher-loc>JMIR Publications Inc., Toronto, Canada</publisher-loc>
            </publisher>
        </journal-meta>
        <article-meta>
            <article-id pub-id-type="publisher-id">v3i3e27</article-id>
            <article-id pub-id-type="pmid">26232246</article-id>
            <article-id pub-id-type="doi">10.2196/medinform.4211</article-id>
            <article-categories>
                <subj-group subj-group-type="heading">
                    <subject>Original Paper</subject>
                </subj-group>
                <subj-group subj-group-type="article-type">
                    <subject>Original Paper</subject>
                </subj-group>
            </article-categories>
            <title-group>
                <article-title>Context-Sensitive Spelling Correction of Consumer-Generated Content on Health Care</article-title>
            </title-group>
            <contrib-group>
                <contrib contrib-type="editor">
                    <name>
                        <surname>Eysenbach</surname>
                        <given-names>Gunther</given-names>
                    </name>
                </contrib>
            </contrib-group>
            <contrib-group>
                <contrib contrib-type="reviewer">
                    <name>
                        <surname>Fiszman</surname>
                        <given-names>Marcelo</given-names>
                    </name>
                </contrib>
                <contrib contrib-type="reviewer">
                    <name>
                        <surname>Zhang</surname>
                        <given-names>Yaoyun</given-names>
                    </name>
                </contrib>
            </contrib-group>
            <contrib-group>
                <contrib contrib-type="author" id="contrib1" equal-contrib="yes">
                    <name name-style="western">
                        <surname>Zhou</surname>
                        <given-names>Xiaofang</given-names>
                    </name>
                    <xref rid="aff1" ref-type="aff">1</xref>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1155-364X</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib2" equal-contrib="yes">
                    <name name-style="western">
                        <surname>Zheng</surname>
                        <given-names>An</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3689-2712</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib3" equal-contrib="yes">
                    <name name-style="western">
                        <surname>Yin</surname>
                        <given-names>Jiaheng</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <xref rid="aff3" ref-type="aff">3</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-5567-2301</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib4">
                    <name name-style="western">
                        <surname>Chen</surname>
                        <given-names>Rudan</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0002-3293-687X</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib5">
                    <name name-style="western">
                        <surname>Zhao</surname>
                        <given-names>Xianyang</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2380-8099</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib6">
                    <name name-style="western">
                        <surname>Xu</surname>
                        <given-names>Wei</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-5174-8543</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib7">
                    <name name-style="western">
                        <surname>Cheng</surname>
                        <given-names>Wenqing</given-names>
                    </name>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0001-7840-1195</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib8" corresp="yes">
                    <name name-style="western">
                        <surname>Xia</surname>
                        <given-names>Tian</given-names>
                    </name>
                    <degrees>PhD</degrees>
                    <xref rid="aff2" ref-type="aff">2</xref>
                    <address>
                        <institution>Internet Technology and Engineering Research and Development Center</institution>
                        <institution>School of Electronic Information and Communications</institution>
                        <institution>Huazhong University of Science and Technology</institution>
                        <addr-line>Nanyi, Huazhong University of Science and Technology</addr-line>
                        <addr-line>1037 Luoyu Road, Hongshan</addr-line>
                        <addr-line>Wuhan, 430074</addr-line>
                        <country>China</country>
                        <phone>86 02787544704</phone>
                        <fax>86 02787544704</fax>
                        <email>tianxia@hust.edu.cn</email>
                    </address>
                    <xref rid="aff4" ref-type="aff">4</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-1721-6126</ext-link>
                </contrib>
                <contrib contrib-type="author" id="contrib9">
                    <name name-style="western">
                        <surname>Lin</surname>
                        <given-names>Simon</given-names>
                    </name>
                    <degrees>MD</degrees>
                    <xref rid="aff5" ref-type="aff">5</xref>
                    <ext-link ext-link-type="orcid">http://orcid.org/0000-0003-2876-2042</ext-link>
                </contrib>
            </contrib-group>
            <aff id="aff1">
                <sup>1</sup>
                <institution>The Department of Ophthalmology</institution>
                <institution>Wuhan Central Hospital</institution>
                <addr-line>Wuhan</addr-line>
                <country>China</country>
            </aff>
            <aff id="aff2">
                <sup>2</sup>
                <institution>Internet Technology and Engineering Research and Development Center</institution>
                <institution>School of Electronic Information and Communications</institution>
                <institution>Huazhong University of Science and Technology</institution>
                <addr-line>Wuhan</addr-line>
                <country>China</country>
            </aff>
            <aff id="aff3">
                <sup>3</sup>
                <institution>Department of Biostatistics and Computational Biology</institution>
                <institution>School of Life Science</institution>
                <institution>Fudan University</institution>
                <addr-line>Shanghai</addr-line>
                <country>China</country>
            </aff>
            <aff id="aff4">
                <sup>4</sup>
                <institution>Northwestern University Biomedical Informatics Center (NUBIC)</institution>
                <institution>Feinberg School of Medicine</institution>
                <institution>Northwestern University</institution>
                <addr-line>Chicago, IL</addr-line>
                <country>United States</country>
            </aff>
            <aff id="aff5">
                <sup>5</sup>
                <institution>Research Institute</institution>
                <institution>Nationwide Children's Hospital</institution>
                <addr-line>Columbus, OH</addr-line>
                <country>United States</country>
            </aff>
            <author-notes>
                <corresp>Corresponding Author: Tian Xia <email>tianxia@hust.edu.cn</email>
                </corresp>
            </author-notes>
            <pub-date pub-type="collection">
                <season>Jul-Sep</season>
                <year>2015</year>
            </pub-date>
            <pub-date pub-type="epub">
                <day>31</day>
                <month>07</month>
                <year>2015</year>
            </pub-date>
            <volume>3</volume>
            <issue>3</issue>
            <elocation-id>e27</elocation-id>
            <!--history from ojs - api-xml-->
            <history>
                <date date-type="received">
                    <day>17</day>
                    <month>2</month>
                    <year>2015</year>
                </date>
                <date date-type="rev-request">
                    <day>25</day>
                    <month>3</month>
                    <year>2015</year>
                </date>
                <date date-type="rev-recd">
                    <day>6</day>
                    <month>5</month>
                    <year>2015</year>
                </date>
                <date date-type="accepted">
                    <day>31</day>
                    <month>5</month>
                    <year>2015</year>
                </date>
            </history>
            <!--(c) the authors - correct author names and publication date here if necessary. Date in form ', dd.mm.yyyy' after jmir.org-->
            <copyright-statement>&#169;Xiaofang Zhou, An Zheng, Jiaheng Yin, Rudan Chen, Xianyang Zhao, Wei Xu, Wenqing Cheng, Tian Xia, Simon Lin. Originally published in JMIR Medical Informatics (http://medinform.jmir.org), 31.07.2015. </copyright-statement>
            <copyright-year>2015</copyright-year>
            <license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/2.0/">
                <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (http://creativecommons.org/licenses/by/2.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Informatics, is properly cited. The complete bibliographic information, a link to the original publication on http://medinform.jmir.org/, as well as this copyright and license information must be included.</p>
            </license>
            <self-uri xlink:href="http://medinform.jmir.org/2015/3/e27/" xlink:type="simple" />
            <abstract>
                <sec sec-type="background">
                    <title>Background</title>
                    <p>Consumer-generated content, such as postings on social media websites, can serve as an ideal source of information for studying health care from a consumer&#8217;s perspective. However, consumer-generated content on health care topics often contains spelling errors, which, if not corrected, will be obstacles for downstream computer-based text analysis.</p>
                </sec>
                <sec sec-type="objective">
                    <title>Objective</title>
                    <p>In this study, we proposed a framework with a spelling correction system designed for consumer-generated content and a novel ontology-based evaluation system which was used to efficiently assess the correction quality. Additionally, we emphasized the importance of context sensitivity in the correction process, and demonstrated why correction methods designed for electronic medical records (EMRs) failed to perform well with consumer-generated content.</p>
                </sec>
                <sec sec-type="methods">
                    <title>Methods</title>
                    <p>First, we developed our spelling correction system based on Google Spell Checker. The system processed postings acquired from MedHelp, a biomedical bulletin board system (BBS), and saved misspelled words (eg, sertaline) and corresponding corrected words (eg, sertraline) into two separate sets. Second, to reduce the number of words needing manual examination in the evaluation process, we respectively matched the words in the two sets with terms in two biomedical ontologies: RxNorm and Systematized Nomenclature of Medicine -- Clinical Terms (SNOMED CT). The ratio of words which could be matched and appropriately corrected was used to evaluate the correction system&#8217;s overall performance. Third, we categorized the misspelled words according to the types of spelling errors. Finally, we calculated the ratio of abbreviations in the postings, which remarkably differed between EMRs and consumer-generated content and could largely influence the overall performance of spelling checkers.</p>
                </sec>
                <sec sec-type="results">
                    <title>Results</title>
                    <p>An uncorrected word and the corresponding corrected word was called a spelling pair, and the two words in the spelling pair were its members. In our study, there were 271 spelling pairs detected, among which 58 (21.4%) pairs had one or two members matched in the selected ontologies. The ratio of appropriate correction in the 271 overall spelling errors was 85.2% (231/271). The ratio of that in the 58 spelling pairs was 86% (50/58), close to the overall ratio. We also found that linguistic errors took up 31.4% (85/271) of all errors detected, and only 0.98% (210/21,358) of words in the postings were abbreviations, which was much lower than the ratio in the EMRs (33.6%).</p>
                </sec>
                <sec sec-type="conclusions">
                    <title>Conclusions</title>
                    <p>We conclude that our system can accurately correct spelling errors in consumer-generated content. Context sensitivity is indispensable in the correction process. Additionally, it can be confirmed that consumer-generated content differs from EMRs in that consumers seldom use abbreviations. Also, the evaluation method, taking advantage of biomedical ontology, can effectively estimate the accuracy of the correction system and reduce manual examination time.</p>
                </sec>
            </abstract>
            <kwd-group>
                <kwd>spelling correction system</kwd>
                <kwd>context sensitive</kwd>
                <kwd>consumer-generated content</kwd>
                <kwd>biomedical ontology</kwd>
            </kwd-group>
        </article-meta>
    </front>
    <body>
        <sec>
            <title>Introduction</title>
            <sec>
                <title>Background</title>
                <p>In the last two decades, spelling correction methods for clinical texts have been studied extensively. Nevertheless, the majority of related studies mainly focused on the electronic medical record (EMR) [<xref ref-type="bibr" rid="ref1">1</xref>], but largely ignored consumer-generated content which has accumulated rapidly because of the development of online media and social networks. The consumers mentioned here include those who describe their symptoms and seek online medical assistance, and those who have been successfully cured and willing to share their treatment process experience on public websites or forums. Although there is no doubt that the EMR content is worthy of in-depth study, information in consumer-generated content is equally useful and informative, which has been discussed in a US National Research Council Committee Framework [<xref ref-type="bibr" rid="ref2">2</xref>] and in Zeng et al [<xref ref-type="bibr" rid="ref3">3</xref>]. Mining information in consumer-generated content based on large-scale text analysis becomes increasingly important in the context where social networks have become pervasive in recent years. For example, the useful relationship information between biomedical terms can be inferred based on texts extracted from postings in various online health communities written by patients. Obviously, the accuracy of these inferences relies on correctly spelled text. Therefore, the development of spelling correction methods for consumer-generated content is critical for ensuring the accuracy and efficiency of downstream text analysis.</p>
            </sec>
            <sec>
                <title>Related Work</title>
                <sec>
                    <title>Spelling Correction</title>
                    <p>Numerous approaches for correcting spelling errors, such as Levenshtein edit distance [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>] and semantic correction [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], have been proposed. The Levenshtein edit distance model demonstrates a method to measure the edit distance of converting one string to another, which is calculated by counting the number of four-letter operations&#8212;deletions, insertions, transpositions, and substitutions&#8212;during the conversion. For example, when correcting &#8220;plls&#8221; to &#8220;pills,&#8221; we need to insert the letter &#8220;i&#8221; which increases the edit distance by one. The candidate with the lowest edit distance will be recognized as the best replacement for the misspelled word. The semantic correction model utilizes context-sensitive detection and has been widely applied to studies using natural language processing (NLP). For example, Wong and Glance [<xref ref-type="bibr" rid="ref1">1</xref>] developed a robust system using semantic correction to correct misspelled words, especially abbreviation disambiguation, in progress notes. In addition, according to a study proposed by Ruch et al [<xref ref-type="bibr" rid="ref7">7</xref>], these two models can be combined: first, the Levenshtein edit distance is computed and the resulting candidate words are ranked according to the edit distance. Each word is then examined according to the context using semantic correction. Finally, the best suitable candidate is picked according to both edit distance and semantic meaning. There are other extensively used methods such as the Soundex system proposed by Odell and Russell [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>] and the n-gram model [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Some studies applied an integrated spelling correction application programming interface (API), such as GNU Aspell, Yahoo API, etc. Wong and Glance [<xref ref-type="bibr" rid="ref1">1</xref>] adopted and mixed GNU Aspell and Yahoo API corrective interfaces in their systems for real-time abbreviation disambiguation, which has achieved good results. These interfaces have become highly sound and mature after a long period of development.</p>
                </sec>
                <sec>
                    <title>Evaluation Methods</title>
                    <p>The mainstream evaluation methods for spelling correction systems can be ascribed into two types: horizontal comparison and longitudinal comparison. Horizontal comparison means that researchers test several different correction models with the same input, and then compare their performance and accuracy to prove the strength of the newly designed model. For example, in the study by Ruch et al , they compared the correction results of four different correction models derived from NLP. Longitudinal comparison is generally applied in evaluating methods which are used to improve and perfect existing spelling correction systems. This comparison mainly focuses on the difference between the spelling error correction rate before and after the improvement, as in Crowell et al [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
                    <p>Currently, most of the prevalent evaluation methods for spelling correction are based on manual inspection [<xref ref-type="bibr" rid="ref1">1</xref>]. Although it is accurate, the manual evaluation is time consuming, and not feasible to be applied in large-scale experiments. Therefore, we explored the use of formal ontologies to evaluate the effectiveness of spelling correction.</p>
                </sec>
                <sec>
                    <title>Spelling Error Classifications</title>
                    <p>Spelling errors are usually divided into different categories. Ruch et al classified misspelled words in EMRs into two categories. The first category, called typographical error, refers to spelling mistakes which lead to misspelled words becoming nonexistent in the dictionary. For instance, when a consumer spells &#8220;plls&#8221; instead of &#8220;pills,&#8221; there is no chance of finding &#8220;plls&#8221; in a lexicon. The second category, called <italic>linguistic error</italic>, refers to typing errors which cause a word&#8217;s original meaning to change, but the misspelled word still exists in the dictionary (eg, spelling &#8220;three pills&#8221; as &#8220;tree pills&#8221;). Syntactic and semantic spelling errors are included in this category. Similarly, many other studies, such as those of Jurafsky and James [<xref ref-type="bibr" rid="ref13">13</xref>] and Wilbur et al [<xref ref-type="bibr" rid="ref14">14</xref>], classified spelling errors according to whether misspelled words needed <italic>isolated-word error correction</italic> or <italic>context-dependent error correction</italic>. Our system followed Ruch&#8217;s classification method, categorizing spelling errors into <italic>typographical</italic> and <italic>linguistic errors</italic>.</p>
                </sec>
            </sec>
            <sec>
                <title>Limitations With Existing Approach</title>
                <p>There are several limitations that exist within the current approaches and hinder the correction process from achieving highly efficient performance. To begin with, some existing approaches will become less efficient and require an abundance of training data when processing large amounts of text. For example, according to Ruch et al, correction systems using Levenshtein edit distance require extremely large amounts of training data, which can be scarcely satisfied in real-world situations. Also, the semantic correction process is highly complex when the correction system needs to detect both typographical and linguistic errors [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
                <p>In addition, the context-related errors make up a large ratio of spelling errors in consumer-generated content (shown in the following sections). If we only focus on typographical errors [<xref ref-type="bibr" rid="ref15">15</xref>] in order to achieve high efficiency, then the accuracy of the correction system will be largely sacrificed, and overall system performance will appear much less desirable than approaches considering both linguistic and typographical errors.</p>
                <p>Moreover, unique features of consumer-generated content should also be taken into consideration in the correction process. Consumer-generated content differs from EMR content, in that there are many abbreviations written by clinical professionals in EMRs, which are rarely shown in consumer-generated content. EMRs contain abbreviated terms such as &#8220;VSS&#8221; (vital signs stable), &#8220;PVCs&#8221; (premature ventricular contractions), &#8223;NTG&#8221; (nitroglycerin), and &#8223;gtt&#8221; (guttae) to describe patients&#8217; physical and mental conditions in a quantitative and professional fashion, while consumers prefer to describe their conditions using common language such as &#8220;depressed,&#8221; &#8220;pain,&#8221; and &#8220;feel better.&#8221; This distinct feature leads to differences in spelling correction strategies between EMR and consumer-generated content [<xref ref-type="bibr" rid="ref1">1</xref>].</p>
            </sec>
            <sec>
                <title>Our Approach</title>
                <p>We proposed a spelling correction system based on Google Spell Checker, which is not only able to automatically correct both typographical and linguistic errors, but is also highly efficient thanks to Google Spell Checker&#8217;s core algorithms [<xref ref-type="bibr" rid="ref16">16</xref>]. Our system focuses on correcting spelling errors in daily medical vocabularies, rather than professional, but not commonly used, terminology like the methods proposed by Wang et al [<xref ref-type="bibr" rid="ref1">1</xref>], Doan et al [<xref ref-type="bibr" rid="ref17">17</xref>], and Patrick et al [<xref ref-type="bibr" rid="ref18">18</xref>]. It is a real-time and high-performance method that can be easily applied to studies requiring automatic correction of misspelled words.</p>
                <p>In order to shorten the evaluation period and preserve the reliability of the evaluation, we narrowed down the range of words being examined by matching these words with biomedical ontology items, and then manually examining the matched words. Ontologies consist of words and phrases describing and annotating concepts in many fields, such as biomedical informatics <italic> and artificial intelligence. To evaluate our system, we selected two</italic> biomedical ontologies: Systematized Nomenclature of Medicine -- Clinical Terms (SNOMED CT), which is focused on diseases and symptoms, and RxNorm, which is focused on drugs.</p>
            </sec>
        </sec>
        <sec sec-type="methods">
            <title>Methods</title>
            <sec>
                <title>Dataset</title>
                <p>In this study, we randomly selected 150 postings (21,358 words in total; <xref ref-type="app" rid="app1">Multimedia Appendix 1</xref>) from MedHelp&#8217;s bulletin board system (BBS) [<xref ref-type="bibr" rid="ref19">19</xref>]. This set of postings is related to a drug named <italic>Zoloft</italic> and contains consumers&#8217; descriptions of their symptoms and suggestions from others, such as doctors, pharmacists, and patients, who have already used Zoloft. <xref ref-type="fig" rid="figure1">Figure 1</xref> shows one example from the 150 postings.</p>
                <fig id="figure1" position="float">
                    <label>Figure 1</label>
                    <caption>
                        <p>Screenshot of a sample post from MedHelp's bulletin board system.</p>
                    </caption>
                    <graphic xlink:href="medinform_v3i3e27_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple" />
                </fig>
            </sec>
            <sec>
                <title>Tools Used in Our Study</title>
                <sec>
                    <title>Google Spell Checker</title>
                    <p>We based our system on Google Spell Checker, a state-of-art spelling correction tool which is embedded in Google Search and utilizes the Web pages as corpus. Our system can upload text segments, which need spelling checked, onto Google Search and spelling suggestions will be automatically generated by Google Spell Checker. Google Spell Checker&#8217;s high accuracy and efficiency have been proven by Jacquemont et al and Islam and Inkpen who applied Google&#8217;s search engine and Google Web 1T n-gram&#8212;a language model extracting nearly 1 trillion words from Web pages&#8212;into the spelling correction process.</p>
                </sec>
                <sec>
                    <title>National Center for Biomedical Ontology Annotator</title>
                    <p>To reduce the amount of manual work in the evaluation process, we used the National Center for Biomedical Ontology (NCBO) Annotator [<xref ref-type="bibr" rid="ref20">20</xref>] to match texts with formal ontologies. The NCBO is a website which contains all biomedical ontologies and relevant knowledge; ontology is a set of terms related to a certain subject, such as biochemistry and movement (eg, &#8220;<italic>Amino Acid Ontology</italic>&#8221; and &#8220;<italic>Cell Ontology</italic>&#8221;). The NCBO&#8217;s Annotator is used to search annotations of biomedicine-related texts in the given ontologies. After selecting ontologies and submitting original texts, users will obtain matched terms from the Annotator; terms exist in the designated ontologies. In addition, there is no need for the users to manually submit text one by one in NCBO&#8217;s website. A Web service is provided for all users to accomplish the text-mining jobs programmatically [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
                </sec>
            </sec>
            <sec>
                <title>Framework</title>
                <sec>
                    <title>Construction of Our Spelling Correction System</title>
                    <p>We developed our spelling correction system based on Google Spell Checker. The system works in three steps: text segmentation, text spelling correction, and text reconstruction.</p>
                    <p>In the first step&#8212;text segmentation&#8212;content (eg, a post from MedHelp) is automatically grouped into sets of less than 32 words, since Google&#8217;s search engine can only process 32 words at a time in the correction program. It is worth mentioning that, although our system divides the postings automatically, it does not destroy the complete structure of one sentence. According to the online data [<xref ref-type="bibr" rid="ref22">22</xref>], the average sentence length is 15 to 20 words, which is less than the 32-word requirement in the Google search engine. Additionally, the Google Spell Checker is able to consider the context of the candidates&#8217; suggestions, and evolves in accordance with the update of millions of Web pages [<xref ref-type="bibr" rid="ref16">16</xref>]. All the segments processed are saved in our database. In this way, when context-sensitive texts are separated, this will prevent changes to their original meaning.</p>
                    <p>In the second step&#8212;text spelling correction&#8212;our system uploads the segments saved in the database onto Google Search and downloads the feedback generated by Google Spell Checker. Google Spell Checker not only corrects typographical errors but also proposes suggestions for linguistic errors according to relations of context, including syntactic and semantic relations. The syntactic relation helps in correcting grammatical errors. For example, in some posts, &#8220;<italic>had</italic>&#8221; was misspelt as &#8220;<italic>has,</italic>&#8221; but it turns out that &#8220;<italic>had</italic>&#8221; was more suitable in the contexts. In these circumstances, our system can find this type of problem and deliver the correct output. The semantic relation is used in correcting consumer mistakes that may produce ambiguity (eg, mistakenly writing &#8220;<italic>three</italic>&#8221; as &#8220;<italic>tree</italic>&#8221;). These problems can be resolved using the Google Spell Checker because it can intelligently conclude the most probable text candidate according to the sentence meaning. After correcting the whole text, the system will output and save the corrected text. <xref ref-type="table" rid="table1">Table 1</xref> uses the sentence &#8220;I tooj tree pills last night before bad time&#8221; as an example, and explains how our system works on sentences. Each row shows how our system corrects a single word each time. The number in the second row, such as &#8220;-1&#8221; and &#8220;+3,&#8221; shows the position of each word in this sentence. For example, if we are presently focusing on the word &#8220;tooj,&#8221; then &#8220;-1&#8221; corresponds to &#8220;I&#8221; and &#8220;+3&#8221; corresponds to &#8220;last.&#8221; The column &#8220;Correction&#8221; shows the corrected results. The last column, &#8220;Error type,&#8221; is manually classified, which will be discussed in the Error Classification section.</p>
                    <p>In the third step&#8212;text reconstruction&#8212;our system reconstructs full-text segments in accordance to their original order. Throughout the above three-step operation, our system will successfully correct the input postings and save both the uncorrected and corrected texts into our database.</p>
                    <p>Thus, we entered the consumer-generated postings collected from MedHelp and followed the steps above. After the correction process, we obtained both misspelled and corresponding corrected words, <italic>respectively</italic> saved into the uncorrected (U) <italic>set and the corrected (C) set</italic>. For example, after processing the sentence &#8220;I tooj tree pills last night before bad time,&#8221; &#8220;tooj,&#8221; &#8220;tree,&#8221; and &#8220;bad&#8221; will be saved in set U and &#8220;took,&#8221; &#8220;three,&#8221; and &#8220;bed&#8221; in set C.</p>
                    <table-wrap position="float" id="table1">
                        <label>Table 1</label>
                        <caption>
                            <p>The spelling correction process.</p>
                        </caption>
                        <table width="667" border="1" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                            <col width="73" />
                            <col width="26" />
                            <col width="31" />
                            <col width="39" />
                            <col width="103" />
                            <col width="27" />
                            <col width="25" />
                            <col width="31" />
                            <col width="63" />
                            <col width="86" />
                            <thead>
                                <tr valign="bottom">
                                    <td>Step number</td>
                                    <td colspan="3">Word position<sup>a</sup>
                                    </td>
                                    <td>Misspelled word</td>
                                    <td colspan="3">Word position<sup>a</sup>
                                    </td>
                                    <td>Correction</td>
                                    <td>Error type</td>
                                </tr>
                                <tr valign="top">
                                    <td>
                                        <break />
                                    </td>
                                    <td>-3</td>
                                    <td>-2</td>
                                    <td>-1</td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>+1</td>
                                    <td>+2</td>
                                    <td>+3</td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>
                                        <break />
                                    </td>
                                </tr>
                            </thead>
                            <tbody>
                                <tr valign="top">
                                    <td>1</td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>I</td>
                                    <td>tooj</td>
                                    <td>tree</td>
                                    <td>pills</td>
                                    <td>last</td>
                                    <td>took</td>
                                    <td>typographical</td>
                                </tr>
                                <tr valign="top">
                                    <td>2</td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>I</td>
                                    <td>tooj</td>
                                    <td>tree</td>
                                    <td>pills</td>
                                    <td>last</td>
                                    <td>night</td>
                                    <td>three</td>
                                    <td>linguistic</td>
                                </tr>
                                <tr valign="top">
                                    <td>3</td>
                                    <td>last</td>
                                    <td>night</td>
                                    <td>before</td>
                                    <td>bad</td>
                                    <td>time</td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>
                                        <break />
                                    </td>
                                    <td>bed</td>
                                    <td>linguistic</td>
                                </tr>
                            </tbody>
                        </table>
                        <table-wrap-foot>
                            <fn id="table1fn1">
                                <p>
                                    <sup>a</sup>The number represents the position of each word in the sentence relative to the word presently being focused on.</p>
                            </fn>
                        </table-wrap-foot>
                    </table-wrap>
                </sec>
                <sec>
                    <title>Evaluation Process</title>
                    <p>During the evaluation of our system&#8217;s correction quality, first we used the NCBO Annotator Web service to decrease the number of words examined manually; we input corrected words from set C into the NCBO Annotator, selected the RxNorm and SNOMED CT ontologies, and then ran the Annotator search. The reason we selected these two ontologies was because the former, RxNorm, contains all of the terminologies of drugs available on the US market [<xref ref-type="bibr" rid="ref23">23</xref>], and the latter, SNOMED CT, contains a collection of clinical terms and is recognized as the most comprehensive health care terminology resource in the world [<xref ref-type="bibr" rid="ref24">24</xref>]. After the data had been completely scanned and processed, the NCBO Annotator presented the words which could be matched in RxNorm and SNOMED CT in a downloadable Web page (see <xref ref-type="fig" rid="figure2">Figure 2</xref>). We then downloaded and saved the matched words. Similarly, we also input uncorrected words from set U, acquired the words which could be matched in RxNorm and SNOMED CT, and then saved them into our database (see <xref ref-type="fig" rid="figure3">Figure 3</xref>). After this preprocessing, instead of examining all the words in set C and set U, we could only manually examine the matched words, count the number of words which were appropriately corrected, and then calculate the ratio of these corrections.</p>
                    <fig id="figure2" position="float">
                        <label>Figure 2</label>
                        <caption>
                            <p>Screenshot of the NCBO Annotator presenting words before spelling correction.</p>
                        </caption>
                        <graphic xlink:href="medinform_v3i3e27_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple" />
                    </fig>
                    <fig id="figure3" position="float">
                        <label>Figure 3</label>
                        <caption>
                            <p>Screenshot of the NCBO Annotator presenting words after spelling correction.</p>
                        </caption>
                        <graphic xlink:href="medinform_v3i3e27_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple" />
                    </fig>
                </sec>
                <sec>
                    <title>Error Classification</title>
                    <p>We manually classified the results into two sets of errors&#8212;typographical errors (set T) and linguistic errors (set L)&#8212;and invited two clinical doctors and a medical researcher to confirm the correctness of our classification.</p>
                </sec>
                <sec>
                    <title>Abbreviation Counts</title>
                    <p>In accordance with the definition in Wong and Glance [<xref ref-type="bibr" rid="ref1">1</xref>], abbreviations in this study refer to shortened forms of words, including acronyms, initialisms, and so on. Following this definition, we manually counted the number of abbreviations in the postings.</p>
                </sec>
            </sec>
        </sec>
        <sec sec-type="results">
            <title>Results</title>
            <p>Our spelling correction system detected 271 spelling errors in the selected postings (see <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>). For ease of explanation, we called an uncorrected word and its corresponding corrected word a spelling pair, and the two words in the spelling pair are its members. For example, &#8220;tooj&#8221; and &#8220;took&#8221; compose, and are the members of, the spelling pair. A total of 271 spelling pairs were detected, among which we found that 58 (21.4%) spelling pairs contained one or two matched members in the selected ontologies&#8212;a member able to be matched in the ontologies is called a matched word, and its pair is called a matched pair (see <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>). We ascribed the 58 matched pairs into two groups&#8212;positive and negative impact&#8212;to evaluate the accuracy of our system. Positive refers to misspelled words corrected appropriately and negative refers to those corrected inappropriately. The two impacts contain several different situations and their definitions are shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
            <p>In the correction process, we respectively recorded the number of situations defined above through manual inspection (see <xref ref-type="table" rid="table3">Table 3</xref>) and we asked three senior medical professionals&#8212;two clinical doctors and a medical researcher&#8212;to verify the correctness our classification.</p>
            <table-wrap position="float" id="table2">
                <label>Table 2</label>
                <caption>
                    <p>Definition for positive and negative impacts.</p>
                </caption>
                <table width="655" border="1" cellpadding="8" cellspacing="0" rules="groups" frame="hsides">
                    <col width="63" />
                    <col width="143" />
                    <col width="399" />
                    <thead>
                        <tr valign="top">
                            <td>Impact</td>
                            <td>Situation</td>
                            <td>Definition</td>
                        </tr>
                    </thead>
                    <tbody>
                        <tr valign="top">
                            <td>
                                <bold>Positive</bold>
                            </td>
                            <td>
                                <break />
                            </td>
                            <td>
                                <break />
                            </td>
                        </tr>
                        <tr valign="top">
                            <td>
                                <break />
                            </td>
                            <td>New match identified</td>
                            <td>Words cannot be found in the ontology before correction, but can be found after correction, and the corrected word is suitable in context.</td>
                        </tr>
                        <tr valign="top">
                            <td>
                                <break />
                            </td>
                            <td>Wrong match identified</td>
                            <td>Words can be found in the ontology before correction, and cannot be found after correction, but the uncorrected word is unsuitable in context.</td>
                        </tr>
                        <tr valign="top">
                            <td>
                                <break />
                            </td>
                            <td>Better match identified</td>
                            <td>Both words before and after correction can be found in the ontology and the corrected word is more suitable in context.</td>
                        </tr>
                        <tr valign="top">
                            <td>
                                <bold>Negative</bold>
                            </td>
                            <td>Right match missed</td>
                            <td>Either or both words before and after correction can be found in the ontology, but the corrected word is inappropriate in context.</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <table-wrap position="float" id="table3">
                <label>Table 3</label>
                <caption>
                    <p>Results of spelling correction experiment (n=58).</p>
                </caption>
                <table width="686" border="1" cellpadding="7" cellspacing="0" rules="groups" frame="hsides">
                    <col width="82" />
                    <col width="190" />
                    <col width="154" />
                    <col width="94" />
                    <col width="94" />
                    <thead>
                        <tr valign="top">
                            <td>Impact</td>
                            <td>Example</td>
                            <td>Effect</td>
                            <td>Ontology, n (%)</td>
                            <td>Representative letter</td>
                        </tr>
                    </thead>
                    <tbody>
                        <tr valign="top">
                            <td>New match identified/<break />positive</td>
                            <td>&#8220;converts to seretonin&#8221; &#8594;<break />&#8220;converts to serotonin&#8221;</td>
                            <td>A match of &#8220;serotonin&#8221; is found</td>
                            <td>37 (64)</td>
                            <td>A</td>
                        </tr>
                        <tr valign="top">
                            <td>Wrong match identified/<break />positive</td>
                            <td>&#8220;I took tree pills&#8221; &#8594;<break />&#8220;I took three pills&#8221;</td>
                            <td>The improper match of &#8220;tree&#8221; is avoided</td>
                            <td>8 (14)</td>
                            <td>B</td>
                        </tr>
                        <tr valign="top">
                            <td>Better match identified/<break />positive</td>
                            <td>&#8220;last night before bad time&#8221; &#8594;<break />&#8220;last night before bedtime&#8221;</td>
                            <td>A better match of &#8220;bedtime&#8221; replaces &#8220;bad time&#8221;</td>
                            <td>5 (9)</td>
                            <td>C</td>
                        </tr>
                        <tr valign="top">
                            <td>Right match missed/<break />negative</td>
                            <td>&#8220;I'm no chemist&#8221; &#8594;<break />&#8220;I'm no chemistry&#8221;</td>
                            <td>A wanted match of &#8220;chemist&#8221; disappears</td>
                            <td>8 (14)</td>
                            <td>D</td>
                        </tr>
                        <tr valign="top">
                            <td>Total</td>
                            <td>
                                <break />
                            </td>
                            <td>
                                <break />
                            </td>
                            <td>58 (100)</td>
                            <td>F</td>
                        </tr>
                    </tbody>
                </table>
            </table-wrap>
            <p>The first column gives the situations that we defined in <xref ref-type="table" rid="table2">Table 2</xref>, including detailed types and their impact. The second column shows one example for each situation about how our system corrects the spelling errors. The third column explains the effect of the correction process on the sentences. The fourth column presents the number of matched pairs that conform to the corresponding situation. The letters in the fifth column represent the corresponding number in the fourth column (ie, A=37, B=8, C=5, D=8, and F=58).</p>
            <p>From the results, it shows that 64% (A divided by F, 37/58) of the words could not be found in the ontology before correction, could be found in the ontology after correction, and the corrected words were suitable in the context by the situation definition.</p>
            <p>Similar to Wong and Glance [<xref ref-type="bibr" rid="ref1">1</xref>], we calculated the following expression to explain the performance of this system:</p>
            <p>Accuracy = (A+B+C)/F</p>
            <p>The accuracy&#8212;the ratio of misspelled words appropriately corrected in the 58 spelling pairs&#8212;was 86% (50/58). Also, we calculated the ratio of appropriate correction in the 271 overall spelling errors to be 85.2% (231/271). We also did a series of random sampling experiments; we randomly sampled 58 spelling pairs each time from the 271 spelling pairs. The trends of mean value and standard deviation are shown in <xref ref-type="fig" rid="figure4">Figure 4</xref>. The figure shows that, as the number of experiments increased, both the trends of mean value and standard deviation gradually became stable, respectively approaching 85.3% and 0.047.</p>
            <p>After the classification according to the types of spelling errors, from a total of 271 errors our system detected 186 (68.6%) typographical errors (saved in set T) and 85 (31.4%) linguistic errors (saved in set L). In addition, there were a total of 210 abbreviations, making up 0.98% of all words in the postings (n=21,358).</p>
            <fig id="figure4" position="float">
                <label>Figure 4</label>
                <caption>
                    <p>Trends of mean value and standard deviation with change in sample size.</p>
                </caption>
                <graphic xlink:href="medinform_v3i3e27_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple" />
            </fig>
        </sec>
        <sec sec-type="discussion">
            <title>Discussion</title>
            <sec>
                <title>System Performance</title>
                <p>From the correction results, we found that 64% (37/58) of the matched words were newly found, which proved that our correction process exerted a positive effect on increasing the accuracy of downstream biomedical research, such as NLP research. Using the same corpus, our system&#8217;s accuracy (50/58, 86%) was higher than that of most of the commonly used spelling checkers, including medical dictionary-based Aspell [<xref ref-type="bibr" rid="ref25">25</xref>], Microsoft Office Word 2013, and Jazzy Spell Checker [<xref ref-type="bibr" rid="ref26">26</xref>]. The result is shown in <xref ref-type="table" rid="table4">Table 4</xref> and the detailed data are included in <xref ref-type="app" rid="app2">Multimedia Appendix 2</xref>. This illustrates that our spelling correction system is a suitable and high-performance tool for consumer-generated content.</p>
                <table-wrap position="float" id="table4">
                    <label>Table 4</label>
                    <caption>
                        <p>Comparison of spell checking tools for finding correct words for misspelled words.</p>
                    </caption>
                    <table width="649" border="1" cellpadding="7" cellspacing="0" rules="groups" frame="hsides">
                        <col width="317" />
                        <col width="302" />
                        <thead>
                            <tr valign="top">
                                <td>Spell checking tool</td>
                                <td>Correct words found, n/n (%)</td>
                            </tr>
                        </thead>
                        <tbody>
                            <tr valign="top">
                                <td>Our method</td>
                                <td>50/58 (86)</td>
                            </tr>
                            <tr valign="top">
                                <td>Aspell [<xref ref-type="bibr" rid="ref25">25</xref>] with general dictionary</td>
                                <td>304/763 (39.8)</td>
                            </tr>
                            <tr valign="top">
                                <td>Aspell [<xref ref-type="bibr" rid="ref25">25</xref>] with medical dictionary</td>
                                <td>353/564 (62.6)</td>
                            </tr>
                            <tr valign="top">
                                <td>Microsoft Office Word 2013</td>
                                <td>313/431 (72.6)</td>
                            </tr>
                            <tr valign="top">
                                <td>Jazzy Spell Checker [<xref ref-type="bibr" rid="ref26">26</xref>]</td>
                                <td>240/574 (41.8)</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
                <p>It is noteworthy that, compared with other spelling checkers which usually provide several spelling suggestions to choose from for a spelling error, our method is more convenient and can directly provide the optimal candidate according to its context. In addition, in contrast to traditional spelling checkers such as Aspell, the corpus in our method does not need manual updates due to its Web page-based corpus. These characteristics are highly meaningful, especially for the automatic spell checking of big data.</p>
                <p>Moreover, unlike the method applied by Ruch et al [<xref ref-type="bibr" rid="ref7">7</xref>] in which spelling errors were artificially added into spelling error-free texts, our system obtained the original text directly from a health forum, which more objectively reflected the real situation of consumer-generated content.</p>
            </sec>
            <sec>
                <title>Classification</title>
                <p>From the classification results of spelling error types, it can be observed that errors in set L took up 31.4% of all spelling errors, which shows that correcting linguistic errors is indispensable during processing consumer-generated content. The systems that only focused on the correction of typographical errors, such as that of Peterson [<xref ref-type="bibr" rid="ref15">15</xref>], ignore a large number of the spelling errors.</p>
            </sec>
            <sec>
                <title>The Number of Abbreviations</title>
                <p>In EMRs, the ratio of abbreviations is 33.6% [<xref ref-type="bibr" rid="ref1">1</xref>], much higher than the ratio in consumer-generated content (0.98%). Therefore, detecting and correcting abbreviations in consumer-generated content appears to be much less important than in EMRs. Instead, from the results of classifying 271 spelling errors according to the meaning of corrected words (see <xref ref-type="table" rid="table5">Table 5</xref>), the correction systems for consumer-generated content should focus more on common vocabularies.</p>
                <p>In <xref ref-type="table" rid="table5">Table 5</xref>, common vocabulary refers to those words people frequently use in daily life (eg, &#8220;good,&#8221; &#8220;hadn&#8217;t,&#8221; and &#8220;loose&#8221;). Medical vocabulary refers to words that cannot be defined as a symptom, drug, or disease but are still used in the medical field such as &#8220;hygiene.&#8221; Extra space refers to situations in which consumers enter extra spaces between words (eg, &#8220;weight__is&#8221; where there are two spaces between &#8220;weight&#8221; and &#8220;is&#8221;).</p>
                <table-wrap position="float" id="table5">
                    <label>Table 5</label>
                    <caption>
                        <p>Classification of misspelled words (n=271).</p>
                    </caption>
                    <table width="649" border="1" cellpadding="7" cellspacing="0" rules="groups" frame="hsides">
                        <col width="317" />
                        <col width="302" />
                        <thead>
                            <tr valign="top">
                                <td>Type of word, or issue</td>
                                <td>Number of words, n (%)</td>
                            </tr>
                        </thead>
                        <tbody>
                            <tr valign="top">
                                <td>Common vocabulary</td>
                                <td>151 (55.7)</td>
                            </tr>
                            <tr valign="top">
                                <td>Symptom</td>
                                <td>8 (3.0)</td>
                            </tr>
                            <tr valign="top">
                                <td>Drug</td>
                                <td>12 (4.4)</td>
                            </tr>
                            <tr valign="top">
                                <td>Medical vocabulary</td>
                                <td>14 (5.2)</td>
                            </tr>
                            <tr valign="top">
                                <td>Disease</td>
                                <td>1 (0.4)</td>
                            </tr>
                            <tr valign="top">
                                <td>Extra space</td>
                                <td>85 (31.4)</td>
                            </tr>
                        </tbody>
                    </table>
                </table-wrap>
            </sec>
            <sec>
                <title>Evaluation</title>
                <p>During the evaluation process, only 58 words matched in the ontology, which was only about one-fifth (21.4%) of the number originally needed to process (n=271) and largely reduced the manual inspection time. This is the reason why we put forward the idea of using the NCBO Annotator to pick out the words related to the biomedical fields.</p>
                <p>The ratio of misspelled words, which were appropriately corrected in the 58 matched pairs, was close to the overall spelling errors (the difference was 0.97%, less than 1%), and the accuracy (50/58, 86%) fell within the reliable range, within one standard deviation from the mean value of 85.2% (range 80.6% to 89.9%). For these reasons, the NCBO Annotator can well represent the overall performance of our system.</p>
            </sec>
            <sec>
                <title>Future Work</title>
                <p>In future work, different types of ontologies are needed to test and verify whether our evaluation method can be applied in other fields. Moreover, we will add and mix more correction tools in addition to Google Spell Checker to promote the overall performance of our spelling correction system.</p>
            </sec>
            <sec>
                <title>Conclusions</title>
                <p>From this study, the following can be confirmed:</p>
                <p>1. Our system is suitable for spelling correction in consumer-generated content. The unique features in consumer-generated content have been identified and taken into consideration. Google Spell Checker displays high performance in spelling error detection and correction in consumer-generated content.</p>
                <p>2. Context sensitivity is indispensable in the correction process.</p>
                <p>3. Our evaluation method, taking advantage of biomedical ontology, can effectively evaluate the correction system and reduce manual inspection time on a large scale.</p>
                <p>4. In consumer-generated content, consumers rarely use abbreviations, unlike in EMRs.</p>
            </sec>
        </sec>
    </body>
    <back>
        <app-group>
            <app id="app1">
                <title>Multimedia Appendix 1</title>
                <p>Original data: 150 postings.</p>
                <media xlink:href="medinform_v3i3e27_app1.xlsx" xlink:title="XLSX File (Microsoft Excel File), 103KB" />
            </app>
            <app id="app2">
                <title>Multimedia Appendix 2</title>
                <p>Result file.</p>
                <media xlink:href="medinform_v3i3e27_app2.pdf" xlink:title="PDF File (Adobe PDF File), 338KB" />
            </app>
        </app-group>
        <glossary>
            <title>Abbreviations</title>
            <def-list>
                <def-item>
                    <term id="abb1">API</term>
                    <def>
                        <p>application programming interface</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb2">BBS</term>
                    <def>
                        <p>bulletin board system</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb3">C</term>
                    <def>
                        <p>corrected</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb4">EMR</term>
                    <def>
                        <p>electronic medical record</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb5">gtt</term>
                    <def>
                        <p>guttae</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb6">L</term>
                    <def>
                        <p>linguistic error</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb7">NCBO</term>
                    <def>
                        <p>National Center for Biomedical Ontology</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb8">NLP</term>
                    <def>
                        <p>natural language processing</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb9">NTG</term>
                    <def>
                        <p>nitroglycerin</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb10">PVCs</term>
                    <def>
                        <p>premature ventricular contractions</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb11">SNOMED CT</term>
                    <def>
                        <p>Systematized Nomenclature of Medicine -- Clinical Terms</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb12">T</term>
                    <def>
                        <p>typographical error</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb13">U</term>
                    <def>
                        <p>uncorrected</p>
                    </def>
                </def-item>
                <def-item>
                    <term id="abb14">VSS</term>
                    <def>
                        <p>vital signs stable</p>
                    </def>
                </def-item>
            </def-list>
        </glossary>
        <ack>
            <p>The authors would like to thank Dr Pan Du for his comments. This work is supported by the Chinese 1000 Young Talent program.</p>
        </ack>
        <fn-group>
            <fn fn-type="conflict">
                <p>None declared.</p>
            </fn>
        </fn-group>
        <ref-list>
            <ref id="ref1">
                <label>1</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Wong</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Glance</surname>
                            <given-names>D</given-names>
                        </name>
                    </person-group>
                    <article-title>Statistical semantic and clinician confidence analysis for correcting abbreviations and spelling errors in clinical progress notes</article-title>
                    <source>Artif Intell Med</source>
                    <year>2011</year>
                    <month>11</month>
                    <volume>53</volume>
                    <issue>3</issue>
                    <fpage>171</fpage>
                    <lpage>180</lpage>
                    <pub-id pub-id-type="doi">10.1016/j.artmed.2011.08.003</pub-id>
                    <pub-id pub-id-type="medline">21924593</pub-id>
                    <pub-id pub-id-type="pii">S0933-3657(11)00107-2</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref2">
                <label>2</label>
                <nlm-citation citation-type="book">
                    <person-group person-group-type="author">
                        <collab>National Research Council (US) Committee on A Framework for Developing a New Taxonomy of Disease</collab>
                    </person-group>
                    <source>Toward Precision Medicine: Building a Knowledge Network for Biomedical Research and a New Taxonomy of Disease</source>
                    <year>2011</year>
                    <publisher-loc>Washington, DC</publisher-loc>
                    <publisher-name>National Academies Press (US)</publisher-name>
                </nlm-citation>
            </ref>
            <ref id="ref3">
                <label>3</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Zeng</surname>
                            <given-names>Q</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kogan</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Ash</surname>
                            <given-names>N</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Greenes</surname>
                            <given-names>RA</given-names>
                        </name>
                    </person-group>
                    <article-title>Patient and clinician vocabulary: how different are they?</article-title>
                    <source>Stud Health Technol Inform</source>
                    <year>2001</year>
                    <volume>84</volume>
                    <issue>Pt 1</issue>
                    <fpage>399</fpage>
                    <lpage>403</lpage>
                    <pub-id pub-id-type="medline">11604772</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref4">
                <label>4</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Ristad</surname>
                            <given-names>E</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Yianilos</surname>
                            <given-names>P</given-names>
                        </name>
                    </person-group>
                    <article-title>Learning string-edit distance</article-title>
                    <source>IEEE Trans Pattern Anal Mach Intell</source>
                    <year>1998</year>
                    <month>05</month>
                    <volume>20</volume>
                    <issue>5</issue>
                    <fpage>522</fpage>
                    <lpage>532</lpage>
                    <pub-id pub-id-type="doi">10.1109/34.682181</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref5">
                <label>5</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Levenshtein</surname>
                            <given-names>VI</given-names>
                        </name>
                    </person-group>
                    <article-title>Binary codes capable of correcting deletions, insertions and reversals</article-title>
                    <source>Soviet Physics Doklady</source>
                    <year>1966</year>
                    <month>02</month>
                    <volume>10</volume>
                    <fpage>707</fpage>
                </nlm-citation>
            </ref>
            <ref id="ref6">
                <label>6</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Courtin</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Dujardin</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kowarski</surname>
                            <given-names>I</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Genthial</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>De Lima</surname>
                            <given-names>VL</given-names>
                        </name>
                    </person-group>
                    <article-title>Towards a complete detection/correction system</article-title>
                    <source>Proceedings of the International Conference on Current Issues in Computational Linguistics</source>
                    <year>1991</year>
                    <conf-name>International Conference on Current Issues in Computational Linguistics</conf-name>
                    <conf-date>1991</conf-date>
                    <conf-loc>Penang, Malaysia</conf-loc>
                    <fpage>158</fpage>
                    <lpage>173</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref7">
                <label>7</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Ruch</surname>
                            <given-names>P</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Baud</surname>
                            <given-names>R</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Geissb&#252;hler</surname>
                            <given-names>A</given-names>
                        </name>
                    </person-group>
                    <article-title>Using lexical disambiguation and named-entity recognition to improve spelling correction in the electronic patient record</article-title>
                    <source>Artif Intell Med</source>
                    <year>2003</year>
                    <month>10</month>
                    <volume>29</volume>
                    <issue>1-2</issue>
                    <fpage>169</fpage>
                    <lpage>184</lpage>
                    <pub-id pub-id-type="medline">12957786</pub-id>
                    <pub-id pub-id-type="pii">S0933365703000526</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref8">
                <label>8</label>
                <nlm-citation citation-type="web">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Russell</surname>
                            <given-names>RC</given-names>
                        </name>
                    </person-group>
                    <source>United States Patent and Trademark Office</source>
                    <year>1922</year>
                    <month>11</month>
                    <day>14</day>
                    <access-date>2015-07-19</access-date>
                    <publisher-loc>Washington, DC</publisher-loc>
                    <publisher-name>United States Patent Office</publisher-name>
                    <comment>Patent number: US001435663<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.google.com.hk/patents/US1435663?dq=1,435,663&#38;hl=en&#38;sa=X&#38;ved=0CB0Q6AEwAGoVChMI78js_4GAxwIVZOCmCh3IYADK">https://www.google.com.hk/patents/US1435663?dq=1,435,663&#38;hl=en&#38;sa=X&#38;ved=0CB0Q6AEwAGoVChMI78js_4GAxwIVZOCmCh3IYADK</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a99fFxoF</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref9">
                <label>9</label>
                <nlm-citation citation-type="web">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Russell</surname>
                            <given-names>RC</given-names>
                        </name>
                    </person-group>
                    <source>United States Patent and Trademark Office</source>
                    <year>1918</year>
                    <month>04</month>
                    <day>02</day>
                    <access-date>2015-07-17</access-date>
                    <publisher-loc>Washington, DC</publisher-loc>
                    <publisher-name>United States Patent Office</publisher-name>
                    <comment>Patent number: US001261167<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://www.google.com.hk/patents/US1261167?dq=1,261,167&#38;hl=en&#38;sa=X&#38;ved=0CB0Q6AEwAGoVChMIh-mFtIKAxwIVZCemCh3n2wUx">https://www.google.com.hk/patents/US1261167?dq=1,261,167&#38;hl=en&#38;sa=X&#38;ved=0CB0Q6AEwAGoVChMIh-mFtIKAxwIVZCemCh3n2wUx</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a5p0MoqO</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref10">
                <label>10</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Golding</surname>
                            <given-names>A</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Schabes</surname>
                            <given-names>Y</given-names>
                        </name>
                    </person-group>
                    <article-title>Combining trigram-based and feature-based methods for context-sensitive spelling correction</article-title>
                    <source>Proceedings of the 34th Annual Meeting of the Association for Computational Linguistics</source>
                    <year>1996</year>
                    <conf-name>34th Annual Meeting of the Association for Computational Linguistics</conf-name>
                    <conf-date>June 24-27, 1996</conf-date>
                    <conf-loc>Santa Cruz, CA</conf-loc>
                    <publisher-loc>Stroudsburg, PA</publisher-loc>
                    <publisher-name>Association for Computational Linguistics</publisher-name>
                    <fpage>71</fpage>
                    <lpage>78</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://aclweb.org/anthology/P/P96/P96-1000.pdf" />
                    </comment>
                </nlm-citation>
            </ref>
            <ref id="ref11">
                <label>11</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Stevenson</surname>
                            <given-names>M</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Guo</surname>
                            <given-names>Y</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Gaizauskas</surname>
                            <given-names>R</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Martinez</surname>
                            <given-names>D</given-names>
                        </name>
                    </person-group>
                    <article-title>Disambiguation of biomedical text using diverse sources of information</article-title>
                    <source>BMC Bioinformatics</source>
                    <year>2008</year>
                    <volume>9 Suppl 11</volume>
                    <fpage>S7</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.biomedcentral.com/1471-2105/9%20Suppl%2011/S7" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1186/1471-2105-9-S11-S7</pub-id>
                    <pub-id pub-id-type="medline">19025693</pub-id>
                    <pub-id pub-id-type="pii">1471-2105-9-S11-S7</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2586756</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref12">
                <label>12</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Crowell</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Zeng</surname>
                            <given-names>Q</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Ngo</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Lacroix</surname>
                            <given-names>E</given-names>
                        </name>
                    </person-group>
                    <article-title>A frequency-based technique to improve the spelling suggestion rank in medical queries</article-title>
                    <source>J Am Med Inform Assoc</source>
                    <year>2004</year>
                    <month>06</month>
                    <volume>11</volume>
                    <issue>3</issue>
                    <fpage>179</fpage>
                    <lpage>185</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=14764616" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1197/jamia.M1474</pub-id>
                    <pub-id pub-id-type="medline">14764616</pub-id>
                    <pub-id pub-id-type="pii">M1474</pub-id>
                    <pub-id pub-id-type="pmcid">PMC400516</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref13">
                <label>13</label>
                <nlm-citation citation-type="book">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Jurafsky</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Martin</surname>
                            <given-names>JH</given-names>
                        </name>
                    </person-group>
                    <source>Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition</source>
                    <year>2000</year>
                    <month>01</month>
                    <day>26</day>
                    <publisher-loc>Upper Saddle River, NJ</publisher-loc>
                    <publisher-name>Pearson Education</publisher-name>
                </nlm-citation>
            </ref>
            <ref id="ref14">
                <label>14</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Wilbur</surname>
                            <given-names>WJ</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kim</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Xie</surname>
                            <given-names>N</given-names>
                        </name>
                    </person-group>
                    <article-title>Spelling correction in the PubMed search engine</article-title>
                    <source>Inf Retr Boston</source>
                    <year>2006</year>
                    <month>11</month>
                    <volume>9</volume>
                    <issue>5</issue>
                    <fpage>543</fpage>
                    <lpage>564</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/18080004" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1007/s10791-006-9002-8</pub-id>
                    <pub-id pub-id-type="medline">18080004</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2137159</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref15">
                <label>15</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Peterson</surname>
                            <given-names>JL</given-names>
                        </name>
                    </person-group>
                    <article-title>Computer programs for detecting and correcting spelling errors</article-title>
                    <source>Commun ACM</source>
                    <year>1980</year>
                    <month>12</month>
                    <volume>23</volume>
                    <issue>12</issue>
                    <fpage>676</fpage>
                    <lpage>687</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref16">
                <label>16</label>
                <nlm-citation citation-type="confproc">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Jacquemont</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Jacquenet</surname>
                            <given-names>F</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Sebban</surname>
                            <given-names>M</given-names>
                        </name>
                    </person-group>
                    <article-title>Correct your text with Google</article-title>
                    <source>Proceedings of the IEEE/WIC/ACM International Conference on Web Intelligence</source>
                    <year>2007</year>
                    <conf-name>IEEE/WIC/ACM International Conference on Web Intelligence</conf-name>
                    <conf-date>November 2-5, 2007</conf-date>
                    <conf-loc>Fremont, CA</conf-loc>
                    <publisher-loc>Washington, DC</publisher-loc>
                    <publisher-name>IEEE</publisher-name>
                    <fpage>170</fpage>
                    <lpage>176</lpage>
                </nlm-citation>
            </ref>
            <ref id="ref17">
                <label>17</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Doan</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Bastarache</surname>
                            <given-names>L</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Klimkowski</surname>
                            <given-names>S</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Denny</surname>
                            <given-names>JC</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>H</given-names>
                        </name>
                    </person-group>
                    <article-title>Integrating existing natural language processing tools for medication extraction from discharge summaries</article-title>
                    <source>J Am Med Inform Assoc</source>
                    <year>2010</year>
                    <month>10</month>
                    <volume>17</volume>
                    <issue>5</issue>
                    <fpage>528</fpage>
                    <lpage>531</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=20819857" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1136/jamia.2010.003855</pub-id>
                    <pub-id pub-id-type="medline">20819857</pub-id>
                    <pub-id pub-id-type="pii">17/5/528</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2995674</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref18">
                <label>18</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Patrick</surname>
                            <given-names>J</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Li</surname>
                            <given-names>M</given-names>
                        </name>
                    </person-group>
                    <article-title>High accuracy information extraction of medication information from clinical notes: 2009 i2b2 medication extraction challenge</article-title>
                    <source>J Am Med Inform Assoc</source>
                    <year>2010</year>
                    <month>10</month>
                    <volume>17</volume>
                    <issue>5</issue>
                    <fpage>524</fpage>
                    <lpage>527</lpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jamia.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=20819856" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1136/jamia.2010.003939</pub-id>
                    <pub-id pub-id-type="medline">20819856</pub-id>
                    <pub-id pub-id-type="pii">17/5/524</pub-id>
                    <pub-id pub-id-type="pmcid">PMC2995676</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref19">
                <label>19</label>
                <nlm-citation citation-type="web">
                    <source>MedHelp</source>
                    <access-date>2015-07-16</access-date>
                    <comment>Zoloft<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.medhelp.org/tags/show/7661/Zoloft?section=subjects">http://www.medhelp.org/tags/show/7661/Zoloft?section=subjects</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a3pOlVQn</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref20">
                <label>20</label>
                <nlm-citation citation-type="web">
                    <source>BioPortal</source>
                    <access-date>2015-07-16</access-date>
                    <comment>Annotator<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://bioportal.bioontology.org/annotator">http://bioportal.bioontology.org/annotator</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a3pQXekb</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref21">
                <label>21</label>
                <nlm-citation citation-type="journal">
                    <person-group person-group-type="author">
                        <name name-style="western">
                            <surname>Xu</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Wang</surname>
                            <given-names>H</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Cheng</surname>
                            <given-names>W</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Fu</surname>
                            <given-names>D</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Xia</surname>
                            <given-names>T</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Kibbe</surname>
                            <given-names>WA</given-names>
                        </name>
                        <name name-style="western">
                            <surname>Lin</surname>
                            <given-names>SM</given-names>
                        </name>
                    </person-group>
                    <article-title>A framework for annotating human genome in disease context</article-title>
                    <source>PLoS One</source>
                    <year>2012</year>
                    <volume>7</volume>
                    <issue>12</issue>
                    <fpage>e49686</fpage>
                    <comment>
                        <ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0049686" />
                    </comment>
                    <pub-id pub-id-type="doi">10.1371/journal.pone.0049686</pub-id>
                    <pub-id pub-id-type="medline">23251346</pub-id>
                    <pub-id pub-id-type="pii">PONE-D-12-18102</pub-id>
                    <pub-id pub-id-type="pmcid">PMC3519466</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref22">
                <label>22</label>
                <nlm-citation citation-type="web">
                    <person-group person-group-type="author">
                        <collab>Nirmaldasan</collab>
                    </person-group>
                    <source>Readability Monitor</source>
                    <year>2008</year>
                    <month>07</month>
                    <day>28</day>
                    <access-date>2015-07-16</access-date>
                    <comment>The average sentence length<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="https://strainindex.wordpress.com/2008/07/28/the-average-sentence-length/">https://strainindex.wordpress.com/2008/07/28/the-average-sentence-length/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a3pM1oPP</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref23">
                <label>23</label>
                <nlm-citation citation-type="web">
                    <source>US National Library of Medicine</source>
                    <access-date>2015-07-16</access-date>
                    <comment>RxNorm<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.nlm.nih.gov/research/umls/rxnorm/">http://www.nlm.nih.gov/research/umls/rxnorm/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6a3pRpbXj</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref24">
                <label>24</label>
                <nlm-citation citation-type="web">
                    <source>International Health Terminology Standards Development Organisation</source>
                    <access-date>2015-05-06</access-date>
                    <comment>SNOMED CT<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://www.ihtsdo.org/snomed-ct/">http://www.ihtsdo.org/snomed-ct/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6YJiIBBek</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref25">
                <label>25</label>
                <nlm-citation citation-type="web">
                    <source>Aspell.net</source>
                    <access-date>2015-05-06</access-date>
                    <comment>GNU Aspell<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://aspell.net/">http://aspell.net/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6YJhUb44E</pub-id>
                </nlm-citation>
            </ref>
            <ref id="ref26">
                <label>26</label>
                <nlm-citation citation-type="web">
                    <source>sourceforge.net</source>
                    <access-date>2015-05-06</access-date>
                    <comment>Jazzy - Java spell check API<ext-link xmlns:xlink="http://www.w3.org/1999/xlink" xlink:type="simple" xlink:href="http://jazzy.sourceforge.net/">http://jazzy.sourceforge.net/</ext-link>
                    </comment>
                    <pub-id pub-id-type="other">6YJiDnQAK</pub-id>
                </nlm-citation>
            </ref>
        </ref-list>
    </back>
</article>
