<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    <div class="moz-cite-prefix">On 10/12/20 4:54 PM, Shawn Steele
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:MN2PR00MB067113EB97319E2B7AC0EF0582071@MN2PR00MB0671.namprd00.prod.outlook.com">
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
      <meta name="Generator" content="Microsoft Word 15 (filtered
        medium)">
      <style><!--
/* Font Definitions */
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:"Yu Gothic";
        panose-1:2 11 4 0 0 0 0 0 0 0;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"\@Yu Gothic";
        panose-1:2 11 4 0 0 0 0 0 0 0;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0in;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:blue;
        text-decoration:underline;}
span.EmailStyle19
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:8.5in 11.0in;
        margin:1.0in 1.0in 1.0in 1.0in;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:1913008283;
        mso-list-template-ids:-1333123198;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:1.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:"Courier New";
        mso-bidi-font-family:"Times New Roman";}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:1.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:2.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:3.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.0in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:4.5in;
        mso-level-number-position:left;
        text-indent:-.25in;
        mso-ansi-font-size:10.0pt;
        font-family:Wingdings;}
@list l1
        {mso-list-id:1966889172;
        mso-list-type:hybrid;
        mso-list-template-ids:-1878765158 1864168670 67698691 67698693 67698689 67698691 67698693 67698689 67698691 67698693;}
@list l1:level1
        {mso-level-start-at:0;
        mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;
        mso-fareast-font-family:"Yu Gothic";
        mso-bidi-font-family:"Times New Roman";}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:o;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:"Courier New";}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:none;
        mso-level-number-position:left;
        text-indent:-.25in;
        font-family:Wingdings;}
ol
        {margin-bottom:0in;}
ul
        {margin-bottom:0in;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
      <div class="WordSection1">
        <p class="MsoNormal">I’m having trouble with the attempt to be
          this prescriptive.<br>
          <br>
          These make sense:  “Use Unicode!”<o:p></o:p></p>
        <ul type="disc">
          <li class="MsoNormal"
            style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
            level1 lfo1">
            If possible, mandate use of UTF-8 without a BOM; diagnose
            the presence of a BOM in consumed text as an error, and
            produce text without a BOM.<o:p></o:p></li>
          <li class="MsoNormal"
            style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
            level1 lfo1">
            Alternatively, swallow the BOM if present.<o:p></o:p></li>
        </ul>
        <p class="MsoNormal"
          style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">After
          that the situation is clearly hopeless.  Applications should
          Use Unicode, eg: UTF-8, and clearly there are cases happening
          where that isn’t happening.  Trying to prescribe that
          negotiation should therefore happen, or that BOMs should be
          interpreted or whatever is fairly meaningless at that point.
           Given that the higher-order guidance of “Use Unicode” has
          already been ignored, at this point it’s garbage-in,
          garbage-out.  Clearly the app/whatever is ignoring the “use
          unicode” guidance for some legacy reason.  If they could
          adapt, it should be to use UTF-8.   It *<b>might</b>* be
          helpful to say something about a BOM likely indicating UTF-8
          text in otherwise unspecified data, but prescriptive stuff is
          pointless, it’s legacy stuff that behaves in a legacy fashion
          for a reason and saying they should have done it differently
          20 years ago isn’t going to help
          <span style="font-family:"Segoe UI
            Emoji",sans-serif">😊</span>  </p>
      </div>
    </blockquote>
    <p>There are applications that, for legacy reasons, are unable to
      change their default encoding to UTF-8, but that also need to
      handle UTF-8 text.  It is not clear to me that such situations are
      hopeless or that they cannot be improved.</p>
    <p>The prescription offered follows what you suggest.  The first
      three cases are are all of the "use Unicode!" variety.  The
      distinction between the third and the fourth is to relegate use of
      a BOM as an encoding signature to the last resort option.  The
      intent is to make it clear, with stronger motivation than is
      currently present in the Unicode standard, that use of a BOM in
      UTF-8 is not a best practice today.<br>
    </p>
    <p>Tom.<br>
    </p>
    <blockquote type="cite"
cite="mid:MN2PR00MB067113EB97319E2B7AC0EF0582071@MN2PR00MB0671.namprd00.prod.outlook.com">
      <div class="WordSection1">
        <p class="MsoNormal"
          style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><o:p></o:p></p>
        <p class="MsoNormal"
          style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">-Shawn<o:p></o:p></p>
        <p class="MsoNormal"><o:p> </o:p></p>
        <div>
          <div style="border:none;border-top:solid #E1E1E1
            1.0pt;padding:3.0pt 0in 0in 0in">
            <p class="MsoNormal"><b>From:</b> Unicode
              <a class="moz-txt-link-rfc2396E" href="mailto:unicode-bounces@unicode.org"><unicode-bounces@unicode.org></a> <b>On Behalf Of
              </b>Tom Honermann via Unicode<br>
              <b>Sent:</b> Monday, October 12, 2020 7:03 AM<br>
              <b>To:</b> Alisdair Meredith <a class="moz-txt-link-rfc2396E" href="mailto:alisdairm@me.com"><alisdairm@me.com></a><br>
              <b>Cc:</b> <a class="moz-txt-link-abbreviated" href="mailto:sg16@lists.isocpp.org">sg16@lists.isocpp.org</a>; Unicode List
              <a class="moz-txt-link-rfc2396E" href="mailto:unicode@unicode.org"><unicode@unicode.org></a><br>
              <b>Subject:</b> Re: [SG16] Draft proposal: Clarify
              guidance for use of a BOM as a UTF-8 encoding signature<o:p></o:p></p>
          </div>
        </div>
        <p class="MsoNormal"><o:p> </o:p></p>
        <div>
          <p class="MsoNormal">Great, here is the change I'm making to
            address this:<o:p></o:p></p>
        </div>
        <blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
          <div>
            <p class="MsoNormal">Protocol designers:<o:p></o:p></p>
          </div>
          <div>
            <ul type="disc">
              <li class="MsoNormal"
                style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
                level1 lfo1">
                If possible, mandate use of UTF-8 without a BOM;
                diagnose the presence of a BOM in consumed text as an
                error, and produce text without a BOM.<o:p></o:p></li>
              <li class="MsoNormal"
                style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
                level1 lfo1">
                Otherwise, if possible, mandate use of UTF-8 with or
                without a BOM; accept and discard a BOM in consumed
                text, and produce text without a BOM.<o:p></o:p></li>
              <li class="MsoNormal"
                style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
                level1 lfo1">
                Otherwise, if possible, use UTF-8 as the default
                encoding with use of other encodings negotiated using
                information other than a BOM; accept and discard a BOM
                in consumed text, and produce text without a BOM.<o:p></o:p></li>
              <li class="MsoNormal"
                style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto;mso-list:l0
                level1 lfo1">
                Otherwise, require the presence of a BOM to
                differentiate UTF-8 encoded text in both consumed and
                produced text<b><span style="color:#009900"> unless the
                    absence of a BOM would result in the text being
                    interpreted as an ASCII-based encoding and the UTF-8
                    text contains no non-ASCII characters (the exception
                    is intended to avoid the addition of a BOM to ASCII
                    text thus rendering such text as non-ASCII)</span></b>.
                This approach should be reserved for scenarios in which
                UTF-8 cannot be adopted as a default due to backward
                compatibility concerns.<o:p></o:p></li>
            </ul>
          </div>
        </blockquote>
        <div>
          <p class="MsoNormal">Tom.<o:p></o:p></p>
        </div>
        <div>
          <p class="MsoNormal"><o:p> </o:p></p>
        </div>
        <div>
          <p class="MsoNormal">On 10/12/20 8:40 AM, Alisdair Meredith
            wrote:<o:p></o:p></p>
        </div>
        <blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
          <p class="MsoNormal">That addresses my main concern.
             Essentially, best practice (for UTF-8) would be no BOM
            unless the document contains code points that require
            multiple code units to express.
            <o:p></o:p></p>
          <div>
            <p class="MsoNormal"><o:p> </o:p></p>
          </div>
          <div>
            <p class="MsoNormal">AlisdairM<o:p></o:p></p>
            <div>
              <p class="MsoNormal"><br>
                <br>
                <o:p></o:p></p>
              <blockquote style="margin-top:5.0pt;margin-bottom:5.0pt">
                <div>
                  <p class="MsoNormal">On Oct 11, 2020, at 23:22, Tom
                    Honermann <<a href="mailto:tom@honermann.net"
                      moz-do-not-send="true">tom@honermann.net</a>>
                    wrote:<o:p></o:p></p>
                </div>
                <p class="MsoNormal"><o:p> </o:p></p>
                <div>
                  <div>
                    <div>
                      <p class="MsoNormal">On 10/10/20 7:58 PM, Alisdair
                        Meredith via SG16 wrote:<o:p></o:p></p>
                    </div>
                    <blockquote
                      style="margin-top:5.0pt;margin-bottom:5.0pt">
                      <p class="MsoNormal">One concern I have, that
                        might lead into rationale for the current
                        discouragement,
                        <o:p></o:p></p>
                      <div>
                        <p class="MsoNormal">is that I would hate to see
                          a best practice that pushes a BOM into ASCII
                          files.<o:p></o:p></p>
                      </div>
                      <div>
                        <p class="MsoNormal">One of the nice properties
                          of UTF-8 is that a valid ASCII file (still
                          very common) is<o:p></o:p></p>
                      </div>
                      <div>
                        <p class="MsoNormal">also a valid UTF-8 file.
                           Changing best practice would encourage
                          updating those<o:p></o:p></p>
                      </div>
                      <div>
                        <p class="MsoNormal">files to be no longer
                          ASCII.<o:p></o:p></p>
                      </div>
                    </blockquote>
                    <p class="MsoNormal"
                      style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Thanks,
                      Alisdair.  I think that concern is implicitly
                      addressed by the suggested resolutions, but
                      perhaps that can be made more clear.  One
                      possibility would be to modify the "protocol
                      designer" guidelines to address the case where a
                      protocol's default encoding is ASCII based and to
                      specify that a BOM is only required for UTF-8 text
                      that contains non-ASCII characters.  Would that be
                      helpful?<o:p></o:p></p>
                    <p class="MsoNormal"
                      style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Tom.<o:p></o:p></p>
                    <blockquote
                      style="margin-top:5.0pt;margin-bottom:5.0pt">
                      <div>
                        <p class="MsoNormal"><o:p> </o:p></p>
                      </div>
                      <div>
                        <p class="MsoNormal">AlisdairM<o:p></o:p></p>
                        <div>
                          <p class="MsoNormal"><br>
                            <br>
                            <o:p></o:p></p>
                          <blockquote
                            style="margin-top:5.0pt;margin-bottom:5.0pt">
                            <div>
                              <p class="MsoNormal">On Oct 10, 2020, at
                                14:54, Tom Honermann via SG16 <<a
                                  href="mailto:sg16@lists.isocpp.org"
                                  moz-do-not-send="true">sg16@lists.isocpp.org</a>>
                                wrote:<o:p></o:p></p>
                            </div>
                            <p class="MsoNormal"><o:p> </o:p></p>
                            <div>
                              <div>
                                <p class="MsoNormal"
                                  style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Attached
                                  is a draft proposal for the Unicode
                                  standard that intends to clarify the
                                  current recommendation regarding use
                                  of a BOM in UTF-8 text.  This is
                                  follow up to
                                  <a
                                    href="https://corp.unicode.org/pipermail/unicode/2020-June/008713.html"
                                    moz-do-not-send="true">discussion on
                                    the Unicode mailing list</a> back in
                                  June.<o:p></o:p></p>
                                <p class="MsoNormal"
                                  style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Feedback
                                  is welcome.  I plan to
                                  <a
                                    href="https://www.unicode.org/pending/docsubmit.html"
                                    moz-do-not-send="true">submit</a>
                                  this to the UTC in a week or so
                                  pending review feedback.<o:p></o:p></p>
                                <p class="MsoNormal"
                                  style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto">Tom.<o:p></o:p></p>
                              </div>
                              <p class="MsoNormal"><Unicode-BOM-guidance.pdf>--
                                <br>
                                SG16 mailing list<br>
                                <a href="mailto:SG16@lists.isocpp.org"
                                  moz-do-not-send="true">SG16@lists.isocpp.org</a><br>
                                <a
                                  href="https://lists.isocpp.org/mailman/listinfo.cgi/sg16"
                                  moz-do-not-send="true">https://lists.isocpp.org/mailman/listinfo.cgi/sg16</a><o:p></o:p></p>
                            </div>
                          </blockquote>
                        </div>
                        <p class="MsoNormal"><o:p> </o:p></p>
                      </div>
                      <p class="MsoNormal"><br>
                        <br>
                        <o:p></o:p></p>
                    </blockquote>
                    <p class="MsoNormal"
                      style="mso-margin-top-alt:auto;mso-margin-bottom-alt:auto"><o:p> </o:p></p>
                  </div>
                </div>
              </blockquote>
            </div>
            <p class="MsoNormal"><o:p> </o:p></p>
          </div>
        </blockquote>
        <p><o:p> </o:p></p>
      </div>
    </blockquote>
    <p><br>
    </p>
  </body>
</html>