|
|
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1"> <meta name="theme-color" content="#375EAB">
<title>utf8 - The Go Programming Language</title>
<link type="text/css" rel="stylesheet" href="../../../lib/godoc/style.css">
<link rel="stylesheet" href="../../../lib/godoc/jquery.treeview.css"> <script type="text/javascript">window.initFuncs = [];</script> </head> <body>
<div id='lowframe' style="position: fixed; bottom: 0; left: 0; height: 0; width: 100%; border-top: thin solid grey; background-color: white; overflow: auto;"> ... </div><!-- #lowframe -->
<div id="topbar" class="wide"><div class="container"> <div class="top-heading" id="heading-wide"><a href="http://localhost:6060/">The Go Programming Language</a></div> <div class="top-heading" id="heading-narrow"><a href="http://localhost:6060/">Go</a></div> <a href="index.html#" id="menu-button"><span id="menu-button-arrow">▽</span></a> <form method="GET" action="http://localhost:6060/search"> <div id="menu"> <a href="http://localhost:6060/doc/">Documents</a> <a href="http://localhost:6060/pkg/">Packages</a> <a href="http://localhost:6060/project/">The Project</a> <a href="http://localhost:6060/help/">Help</a> <a href="http://localhost:6060/blog/">Blog</a>
<input type="text" id="search" name="q" class="inactive" value="Search" placeholder="Search"> </div> </form>
</div></div>
<div id="page" class="wide"> <div class="container">
<h1>Package utf8</h1>
<div id="nav"></div>
<!--
Copyright 2009 The Go Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. --> <!--
Note: Static (i.e., not template-generated) href and id attributes start with "pkg-" to make it impossible for them to conflict with generated attributes (some of which correspond to Go identifiers). -->
<script type='text/javascript'> document.ANALYSIS_DATA = null; document.CALLGRAPH = null; </script>
<div id="short-nav"> <dl> <dd><code>import "unicode/utf8"</code></dd> </dl> <dl> <dd><a href="index.html#pkg-overview" class="overviewLink">Overview</a></dd> <dd><a href="index.html#pkg-index" class="indexLink">Index</a></dd> <dd><a href="index.html#pkg-examples" class="examplesLink">Examples</a></dd> </dl> </div> <!-- The package's Name is printed as title by the top-level template --> <div id="pkg-overview" class="toggleVisible"> <div class="collapsed"> <h2 class="toggleButton" title="Click to show Overview section">Overview ▹</h2> </div> <div class="expanded"> <h2 class="toggleButton" title="Click to hide Overview section">Overview ▾</h2> <p> Package utf8 implements functions and constants to support text encoded in UTF-8. It includes functions to translate between runes and UTF-8 byte sequences. </p>
</div> </div>
<div id="pkg-index" class="toggleVisible"> <div class="collapsed"> <h2 class="toggleButton" title="Click to show Index section">Index ▹</h2> </div> <div class="expanded"> <h2 class="toggleButton" title="Click to hide Index section">Index ▾</h2>
<!-- Table of contents for API; must be named manual-nav to turn off auto nav. --> <div id="manual-nav"> <dl> <dd><a href="index.html#pkg-constants">Constants</a></dd> <dd><a href="index.html#DecodeLastRune">func DecodeLastRune(p []byte) (r rune, size int)</a></dd> <dd><a href="index.html#DecodeLastRuneInString">func DecodeLastRuneInString(s string) (r rune, size int)</a></dd> <dd><a href="index.html#DecodeRune">func DecodeRune(p []byte) (r rune, size int)</a></dd> <dd><a href="index.html#DecodeRuneInString">func DecodeRuneInString(s string) (r rune, size int)</a></dd> <dd><a href="index.html#EncodeRune">func EncodeRune(p []byte, r rune) int</a></dd> <dd><a href="index.html#FullRune">func FullRune(p []byte) bool</a></dd> <dd><a href="index.html#FullRuneInString">func FullRuneInString(s string) bool</a></dd> <dd><a href="index.html#RuneCount">func RuneCount(p []byte) int</a></dd> <dd><a href="index.html#RuneCountInString">func RuneCountInString(s string) (n int)</a></dd> <dd><a href="index.html#RuneLen">func RuneLen(r rune) int</a></dd> <dd><a href="index.html#RuneStart">func RuneStart(b byte) bool</a></dd> <dd><a href="index.html#Valid">func Valid(p []byte) bool</a></dd> <dd><a href="index.html#ValidRune">func ValidRune(r rune) bool</a></dd> <dd><a href="index.html#ValidString">func ValidString(s string) bool</a></dd> </dl> </div><!-- #manual-nav -->
<div id="pkg-examples"> <h4>Examples</h4> <dl> <dd><a class="exampleLink" href="index.html#example_DecodeLastRune">DecodeLastRune</a></dd> <dd><a class="exampleLink" href="index.html#example_DecodeLastRuneInString">DecodeLastRuneInString</a></dd> <dd><a class="exampleLink" href="index.html#example_DecodeRune">DecodeRune</a></dd> <dd><a class="exampleLink" href="index.html#example_DecodeRuneInString">DecodeRuneInString</a></dd> <dd><a class="exampleLink" href="index.html#example_EncodeRune">EncodeRune</a></dd> <dd><a class="exampleLink" href="index.html#example_FullRune">FullRune</a></dd> <dd><a class="exampleLink" href="index.html#example_FullRuneInString">FullRuneInString</a></dd> <dd><a class="exampleLink" href="index.html#example_RuneCount">RuneCount</a></dd> <dd><a class="exampleLink" href="index.html#example_RuneCountInString">RuneCountInString</a></dd> <dd><a class="exampleLink" href="index.html#example_RuneLen">RuneLen</a></dd> <dd><a class="exampleLink" href="index.html#example_RuneStart">RuneStart</a></dd> <dd><a class="exampleLink" href="index.html#example_Valid">Valid</a></dd> <dd><a class="exampleLink" href="index.html#example_ValidRune">ValidRune</a></dd> <dd><a class="exampleLink" href="index.html#example_ValidString">ValidString</a></dd> </dl> </div>
<h4>Package files</h4> <p> <span style="font-size:90%"> <a href="http://localhost:6060/src/unicode/utf8/utf8.go">utf8.go</a> </span> </p> </div><!-- .expanded --> </div><!-- #pkg-index -->
<div id="pkg-callgraph" class="toggle" style="display: none"> <div class="collapsed"> <h2 class="toggleButton" title="Click to show Internal Call Graph section">Internal call graph ▹</h2> </div> <!-- .expanded --> <div class="expanded"> <h2 class="toggleButton" title="Click to hide Internal Call Graph section">Internal call graph ▾</h2> <p> In the call graph viewer below, each node is a function belonging to this package and its children are the functions it calls—perhaps dynamically. </p> <p> The root nodes are the entry points of the package: functions that may be called from outside the package. There may be non-exported or anonymous functions among them if they are called dynamically from another package. </p> <p> Click a node to visit that function's source code. From there you can visit its callers by clicking its declaring <code>func</code> token. </p> <p> Functions may be omitted if they were determined to be unreachable in the particular programs or tests that were analyzed. </p> <!-- Zero means show all package entry points. --> <ul style="margin-left: 0.5in" id="callgraph-0" class="treeview"></ul> </div> </div> <!-- #pkg-callgraph -->
<h2 id="pkg-constants">Constants</h2> <pre>const ( <span id="RuneError">RuneError</span> = '\uFFFD' <span class="comment">// the "error" Rune or "Unicode replacement character"</span> <span id="RuneSelf">RuneSelf</span> = 0x80 <span class="comment">// characters below Runeself are represented as themselves in a single byte.</span> <span id="MaxRune">MaxRune</span> = '\U0010FFFF' <span class="comment">// Maximum valid Unicode code point.</span> <span id="UTFMax">UTFMax</span> = 4 <span class="comment">// maximum number of bytes of a UTF-8 encoded Unicode character.</span> )</pre> <p> Numbers fundamental to the encoding. </p>
<h2 id="DecodeLastRune">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=8153:8201#L239">DecodeLastRune</a></h2> <pre>func DecodeLastRune(p []<a href="../../builtin/index.html#byte">byte</a>) (r <a href="../../builtin/index.html#rune">rune</a>, size <a href="../../builtin/index.html#int">int</a>)</pre> <p> DecodeLastRune unpacks the last UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> <p> An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p>
<div id="example_DecodeLastRune" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">b := []byte("Hello, 世界")
for len(b) > 0 { r, size := utf8.DecodeLastRune(b) fmt.Printf("%c %v\n", r, size)
b = b[:len(b)-size] } <span class="comment"></pre> <p>Output:</p> <pre class="output">界 3 世 3 1 , 1 o 1 l 1 l 1 e 1 H 1 </pre> </div> </div>
<h2 id="DecodeLastRuneInString">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=9163:9219#L279">DecodeLastRuneInString</a></h2> <pre>func DecodeLastRuneInString(s <a href="../../builtin/index.html#string">string</a>) (r <a href="../../builtin/index.html#rune">rune</a>, size <a href="../../builtin/index.html#int">int</a>)</pre> <p> DecodeLastRuneInString is like DecodeLastRune but its input is a string. If s is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> <p> An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p>
<div id="example_DecodeLastRuneInString" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">str := "Hello, 世界"
for len(str) > 0 { r, size := utf8.DecodeLastRuneInString(str) fmt.Printf("%c %v\n", r, size)
str = str[:len(str)-size] } <span class="comment"></pre> <p>Output:</p> <pre class="output">界 3 世 3 1 , 1 o 1 l 1 l 1 e 1 H 1 </pre> </div> </div>
<h2 id="DecodeRune">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=5307:5351#L143">DecodeRune</a></h2> <pre>func DecodeRune(p []<a href="../../builtin/index.html#byte">byte</a>) (r <a href="../../builtin/index.html#rune">rune</a>, size <a href="../../builtin/index.html#int">int</a>)</pre> <p> DecodeRune unpacks the first UTF-8 encoding in p and returns the rune and its width in bytes. If p is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> <p> An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p>
<div id="example_DecodeRune" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">b := []byte("Hello, 世界")
for len(b) > 0 { r, size := utf8.DecodeRune(b) fmt.Printf("%c %v\n", r, size)
b = b[size:] } <span class="comment"></pre> <p>Output:</p> <pre class="output">H 1 e 1 l 1 l 1 o 1 , 1 1 世 3 界 3 </pre> </div> </div>
<h2 id="DecodeRuneInString">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=6710:6762#L191">DecodeRuneInString</a></h2> <pre>func DecodeRuneInString(s <a href="../../builtin/index.html#string">string</a>) (r <a href="../../builtin/index.html#rune">rune</a>, size <a href="../../builtin/index.html#int">int</a>)</pre> <p> DecodeRuneInString is like DecodeRune but its input is a string. If s is empty it returns (RuneError, 0). Otherwise, if the encoding is invalid, it returns (RuneError, 1). Both are impossible results for correct, non-empty UTF-8. </p> <p> An encoding is invalid if it is incorrect UTF-8, encodes a rune that is out of range, or is not the shortest possible UTF-8 encoding for the value. No other validation is performed. </p>
<div id="example_DecodeRuneInString" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">str := "Hello, 世界"
for len(str) > 0 { r, size := utf8.DecodeRuneInString(str) fmt.Printf("%c %v\n", r, size)
str = str[size:] } <span class="comment"></pre> <p>Output:</p> <pre class="output">H 1 e 1 l 1 l 1 o 1 , 1 1 世 3 界 3 </pre> </div> </div>
<h2 id="EncodeRune">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=10279:10316#L333">EncodeRune</a></h2> <pre>func EncodeRune(p []<a href="../../builtin/index.html#byte">byte</a>, r <a href="../../builtin/index.html#rune">rune</a>) <a href="../../builtin/index.html#int">int</a></pre> <p> EncodeRune writes into p (which must be large enough) the UTF-8 encoding of the rune. It returns the number of bytes written. </p>
<div id="example_EncodeRune" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">r := '世' buf := make([]byte, 3)
n := utf8.EncodeRune(buf, r)
fmt.Println(buf) fmt.Println(n) <span class="comment"></pre> <p>Output:</p> <pre class="output">[228 184 150] 3 </pre> </div> </div>
<h2 id="FullRune">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=3999:4027#L92">FullRune</a></h2> <pre>func FullRune(p []<a href="../../builtin/index.html#byte">byte</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> FullRune reports whether the bytes in p begin with a full UTF-8 encoding of a rune. An invalid encoding is considered a full Rune since it will convert as a width-1 error rune. </p>
<div id="example_FullRune" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">buf := []byte{228, 184, 150} <span class="comment">// 世</span> fmt.Println(utf8.FullRune(buf)) fmt.Println(utf8.FullRune(buf[:2])) <span class="comment"></pre> <p>Output:</p> <pre class="output">true false </pre> </div> </div>
<h2 id="FullRuneInString">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=4448:4484#L114">FullRuneInString</a></h2> <pre>func FullRuneInString(s <a href="../../builtin/index.html#string">string</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> FullRuneInString is like FullRune but its input is a string. </p>
<div id="example_FullRuneInString" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">str := "世" fmt.Println(utf8.FullRuneInString(str)) fmt.Println(utf8.FullRuneInString(str[:2])) <span class="comment"></pre> <p>Output:</p> <pre class="output">true false </pre> </div> </div>
<h2 id="RuneCount">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=11033:11061#L362">RuneCount</a></h2> <pre>func RuneCount(p []<a href="../../builtin/index.html#byte">byte</a>) <a href="../../builtin/index.html#int">int</a></pre> <p> RuneCount returns the number of runes in p. Erroneous and short encodings are treated as single runes of width 1 byte. </p>
<div id="example_RuneCount" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">buf := []byte("Hello, 世界") fmt.Println("bytes =", len(buf)) fmt.Println("runes =", utf8.RuneCount(buf)) <span class="comment"></pre> <p>Output:</p> <pre class="output">bytes = 13 runes = 9 </pre> </div> </div>
<h2 id="RuneCountInString">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=11705:11745#L399">RuneCountInString</a></h2> <pre>func RuneCountInString(s <a href="../../builtin/index.html#string">string</a>) (n <a href="../../builtin/index.html#int">int</a>)</pre> <p> RuneCountInString is like RuneCount but its input is a string. </p>
<div id="example_RuneCountInString" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">str := "Hello, 世界" fmt.Println("bytes =", len(str)) fmt.Println("runes =", utf8.RuneCountInString(str)) <span class="comment"></pre> <p>Output:</p> <pre class="output">bytes = 13 runes = 9 </pre> </div> </div>
<h2 id="RuneLen">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=9883:9907#L313">RuneLen</a></h2> <pre>func RuneLen(r <a href="../../builtin/index.html#rune">rune</a>) <a href="../../builtin/index.html#int">int</a></pre> <p> RuneLen returns the number of bytes required to encode the rune. It returns -1 if the rune is not a valid value to encode in UTF-8. </p>
<div id="example_RuneLen" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">fmt.Println(utf8.RuneLen('a')) fmt.Println(utf8.RuneLen('界')) <span class="comment"></pre> <p>Output:</p> <pre class="output">1 3 </pre> </div> </div>
<h2 id="RuneStart">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=12485:12512#L436">RuneStart</a></h2> <pre>func RuneStart(b <a href="../../builtin/index.html#byte">byte</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> RuneStart reports whether the byte could be the first byte of an encoded, possibly invalid rune. Second and subsequent bytes always have the top two bits set to 10. </p>
<div id="example_RuneStart" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">buf := []byte("a界") fmt.Println(utf8.RuneStart(buf[0])) fmt.Println(utf8.RuneStart(buf[1])) fmt.Println(utf8.RuneStart(buf[2])) <span class="comment"></pre> <p>Output:</p> <pre class="output">true true false </pre> </div> </div>
<h2 id="Valid">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=12615:12640#L439">Valid</a></h2> <pre>func Valid(p []<a href="../../builtin/index.html#byte">byte</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> Valid reports whether p consists entirely of valid UTF-8-encoded runes. </p>
<div id="example_Valid" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">valid := []byte("Hello, 世界") invalid := []byte{0xff, 0xfe, 0xfd}
fmt.Println(utf8.Valid(valid)) fmt.Println(utf8.Valid(invalid)) <span class="comment"></pre> <p>Output:</p> <pre class="output">true false </pre> </div> </div>
<h2 id="ValidRune">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=14008:14035#L504">ValidRune</a></h2> <pre>func ValidRune(r <a href="../../builtin/index.html#rune">rune</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> ValidRune reports whether r can be legally encoded as UTF-8. Code points that are out of range or a surrogate half are illegal. </p>
<div id="example_ValidRune" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">valid := 'a' invalid := rune(0xfffffff)
fmt.Println(utf8.ValidRune(valid)) fmt.Println(utf8.ValidRune(invalid)) <span class="comment"></pre> <p>Output:</p> <pre class="output">true false </pre> </div> </div>
<h2 id="ValidString">func <a href="http://localhost:6060/src/unicode/utf8/utf8.go?s=13282:13313#L471">ValidString</a></h2> <pre>func ValidString(s <a href="../../builtin/index.html#string">string</a>) <a href="../../builtin/index.html#bool">bool</a></pre> <p> ValidString reports whether s consists entirely of valid UTF-8-encoded runes. </p>
<div id="example_ValidString" class="toggle"> <div class="collapsed"> <p class="exampleHeading toggleButton">▹ <span class="text">Example</span></p> </div> <div class="expanded"> <p class="exampleHeading toggleButton">▾ <span class="text">Example</span></p> <p>Code:</p> <pre class="code">valid := "Hello, 世界" invalid := string([]byte{0xff, 0xfe, 0xfd})
fmt.Println(utf8.ValidString(valid)) fmt.Println(utf8.ValidString(invalid)) <span class="comment"></pre> <p>Output:</p> <pre class="output">true false </pre> </div> </div>
<div id="footer"> Build version go1.6.<br> Except as <a href="https://developers.google.com/site-policies#restrictions">noted</a>, the content of this page is licensed under the Creative Commons Attribution 3.0 License, and code is licensed under a <a href="http://localhost:6060/LICENSE">BSD license</a>.<br> <a href="http://localhost:6060/doc/tos.html">Terms of Service</a> | <a href="http://www.google.com/intl/en/policies/privacy/">Privacy Policy</a> </div>
</div><!-- .container --> </div><!-- #page -->
<!-- TODO(adonovan): load these from <head> using "defer" attribute? --> <script type="text/javascript" src="../../../lib/godoc/jquery.js"></script> <script type="text/javascript" src="../../../lib/godoc/jquery.treeview.js"></script> <script type="text/javascript" src="../../../lib/godoc/jquery.treeview.edit.js"></script>
<script type="text/javascript" src="../../../lib/godoc/godocs.js"></script>
</body> </html>
|