String.prototype.charAt()
The charAt()
method returns the specified character from a string.
Syntax
<var>str</var>.charAt(<var>index</var>)
Parameters
index
- An integer between 0 and 1-less-than the length of the string. If no index is provided,
charAt()
will use0
.
Return value
A string representing the character at the specified index; empty string if index
is out of range.
Description
Characters in a string are indexed from left to right. The index of the first character is 0, and the index of the last character in a string called stringName
is stringName.length - 1
. If the index
you supply is out of range, JavaScript returns an empty string.
If no index is provided to .charAt(), 0 will be used as default.
Examples
Displaying characters at different locations in a string
The following example displays characters at different locations in the string "Brave new world"
:
var anyString = 'Brave new world'; console.log("The character at index 0 is '" + anyString.charAt() + "'"); // No index was provided, used 0 as default console.log("The character at index 0 is '" + anyString.charAt(0) + "'"); console.log("The character at index 1 is '" + anyString.charAt(1) + "'"); console.log("The character at index 2 is '" + anyString.charAt(2) + "'"); console.log("The character at index 3 is '" + anyString.charAt(3) + "'"); console.log("The character at index 4 is '" + anyString.charAt(4) + "'"); console.log("The character at index 999 is '" + anyString.charAt(999) + "'");
These lines display the following:
The character at index 0 is 'B' The character at index 0 is 'B' The character at index 1 is 'r' The character at index 2 is 'a' The character at index 3 is 'v' The character at index 4 is 'e' The character at index 999 is ''
Getting whole characters
The following provides a means of ensuring that going through a string loop always provides a whole character, even if the string contains characters that are not in the Basic Multi-lingual Plane.
var str = 'A \uD87E\uDC04 Z'; // We could also use a non-BMP character directly for (var i = 0, chr; i < str.length; i++) { if ((chr = getWholeChar(str, i)) === false) { continue; } // Adapt this line at the top of each loop, passing in the whole string and // the current iteration and returning a variable to represent the // individual character console.log(chr); } function getWholeChar(str, i) { var code = str.charCodeAt(i); if (Number.isNaN(code)) { return ''; // Position not found } if (code < 0xD800 || code > 0xDFFF) { return str.charAt(i); } // High surrogate (could change last hex to 0xDB7F to treat high private // surrogates as single characters) if (0xD800 <= code && code <= 0xDBFF) { if (str.length <= (i + 1)) { throw 'High surrogate without following low surrogate'; } var next = str.charCodeAt(i + 1); if (0xDC00 > next || next > 0xDFFF) { throw 'High surrogate without following low surrogate'; } return str.charAt(i) + str.charAt(i + 1); } // Low surrogate (0xDC00 <= code && code <= 0xDFFF) if (i === 0) { throw 'Low surrogate without preceding high surrogate'; } var prev = str.charCodeAt(i - 1); // (could change last hex to 0xDB7F to treat high private // surrogates as single characters) if (0xD800 > prev || prev > 0xDBFF) { throw 'Low surrogate without preceding high surrogate'; } // We can pass over low surrogates now as the second component // in a pair which we have already processed return false; }
In an ECMAScript 2016 environment which allows destructured assignment, the following is a more succinct and somewhat more flexible alternative in that it does incrementing for an incrementing variable automatically (if the character warrants it in being a surrogate pair).
var str = 'A\uD87E\uDC04Z'; // We could also use a non-BMP character directly for (var i = 0, chr; i < str.length; i++) { [chr, i] = getWholeCharAndI(str, i); // Adapt this line at the top of each loop, passing in the whole string and // the current iteration and returning an array with the individual character // and 'i' value (only changed if a surrogate pair) console.log(chr); } function getWholeCharAndI(str, i) { var code = str.charCodeAt(i); if (Number.isNaN(code)) { return ''; // Position not found } if (code < 0xD800 || code > 0xDFFF) { return [str.charAt(i), i]; // Normal character, keeping 'i' the same } // High surrogate (could change last hex to 0xDB7F to treat high private // surrogates as single characters) if (0xD800 <= code && code <= 0xDBFF) { if (str.length <= (i + 1)) { throw 'High surrogate without following low surrogate'; } var next = str.charCodeAt(i + 1); if (0xDC00 > next || next > 0xDFFF) { throw 'High surrogate without following low surrogate'; } return [str.charAt(i) + str.charAt(i + 1), i + 1]; } // Low surrogate (0xDC00 <= code && code <= 0xDFFF) if (i === 0) { throw 'Low surrogate without preceding high surrogate'; } var prev = str.charCodeAt(i - 1); // (could change last hex to 0xDB7F to treat high private surrogates // as single characters) if (0xD800 > prev || prev > 0xDBFF) { throw 'Low surrogate without preceding high surrogate'; } // Return the next character instead (and increment) return [str.charAt(i + 1), i + 1]; }
Fixing charAt()
to support non-Basic-Multilingual-Plane (BMP) characters
While the example above may be more frequently useful for those wishing to support non-BMP characters (since it does not require the caller to know where any non-BMP character might appear), in the event that one does wish, in choosing a character by index, to treat the surrogate pairs within a string as the single characters they represent, one can use the following:
function fixedCharAt(str, idx) { var ret = ''; str += ''; var end = str.length; var surrogatePairs = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g; while ((surrogatePairs.exec(str)) != null) { var li = surrogatePairs.lastIndex; if (li - 2 < idx) { idx++; } else { break; } } if (idx >= end || idx < 0) { return ''; } ret += str.charAt(idx); if (/[\uD800-\uDBFF]/.test(ret) && /[\uDC00-\uDFFF]/.test(str.charAt(idx + 1))) { // Go one further, since one of the "characters" is part of a surrogate pair ret += str.charAt(idx + 1); } return ret; }
Specifications
Specification | Status | Comment |
---|---|---|
ECMAScript 1st Edition (ECMA-262) | Standard | Initial definition. |
ECMAScript 5.1 (ECMA-262) The definition of 'String.prototype.charAt' in that specification. |
Standard | |
ECMAScript 2015 (6th Edition, ECMA-262) The definition of 'String.prototype.charAt' in that specification. |
Standard | |
ECMAScript 2017 Draft (ECMA-262) The definition of 'String.prototype.charAt' in that specification. |
Draft |
Browser compatibility
Feature | Chrome | Firefox (Gecko) | Internet Explorer | Opera | Safari |
---|---|---|---|---|---|
Basic support | (Yes) | (Yes) | (Yes) | (Yes) | (Yes) |
Feature | Android | Chrome for Android | Firefox Mobile (Gecko) | IE Mobile | Opera Mobile | Safari Mobile |
---|---|---|---|---|---|---|
Basic support | (Yes) | (Yes) | (Yes) | (Yes) | (Yes) | (Yes) |
See also
String.prototype.indexOf()
String.prototype.lastIndexOf()
String.prototype.charCodeAt()
String.prototype.codePointAt()
String.prototype.split()
String.fromCodePoint()
- JavaScript has a Unicode problem – Mathias Bynens
License
© 2016 Mozilla Contributors
Licensed under the Creative Commons Attribution-ShareAlike License v2.5 or later.
https://developer.mozilla.org/en-us/docs/web/javascript/reference/global_objects/string/charat