public static void main(String[] args) throws Exception { String s0 = new String("helloworld"); String s1 = s0.intern(); // 此時"helloworld"已經存在常量池中,如今只是經過intern方法取出而已 String s2 = "helloworld"; // System.out.println(s0 == s1); // false System.out.println(s2 == s1); // true } // 學過java編譯過程的都知道編譯會進行熱點代碼的優化,如:方法內聯、常量傳播、空值檢查消除、寄存器分配等等,熱點代碼通常經過熱點探測得出,而HotSpotIntrinsicCandidate註解可以直接手段將某個方法直接指定爲熱點代碼,jvm儘快優化它(非絕對優化,優化時機不肯定)。 // 註釋簡言之:new String("helloworld") 是"helloworld"的一個複製;由於String是不可變的,除非須要顯示覆制"hellworld",否則使用構造器來複制字符串是沒必要要的。 /** * Initializes a newly created {@code String} object so that it represents * the same sequence of characters as the argument; in other words, the * newly created string is a copy of the argument string. Unless an * explicit copy of {@code original} is needed, use of this constructor is * unnecessary since Strings are immutable. * * @param original * A {@code String} */ @HotSpotIntrinsicCandidate public String(String original) { this.value = original.value; this.coder = original.coder; this.hash = original.hash; }
// 對應的反編譯源碼 public static void main(java.lang.String[]) throws java.lang.Exception; Code: // new 建立一個對象,並將其引用值壓入棧頂 0: new #2 // class java/lang/String // 複製棧頂數值(數值不能是long或double類型的)並將複製值壓入棧頂 3: dup // 從常量池中取出字面量(常量值) 4: ldc #3 // String helloworld 6: invokespecial #4 // Method java/lang/String."<init>":(Ljava/lang/String;)V 9: astore_1 10: aload_1 11: invokevirtual #5 // Method java/lang/String.intern:()Ljava/lang/String; 14: astore_2 15: ldc #3 // String helloworld 17: astore_3 18: getstatic #6 // Field java/lang/System.out:Ljava/io/PrintStream; 21: aload_1 22: aload_2 23: if_acmpne 30 26: iconst_1 27: goto 31 30: iconst_0 31: invokevirtual #7 // Method java/io/PrintStream.println:(Z)V 34: getstatic #6 // Field java/lang/System.out:Ljava/io/PrintStream; 37: aload_3 38: aload_2 39: if_acmpne 46 42: iconst_1 43: goto 47 46: iconst_0 47: invokevirtual #7 // Method java/io/PrintStream.println:(Z)V 50: return } // String s0 = new String("helloworld") 的反編譯源碼 0: new #2 // class java/lang/String 3: dup 4: ldc #3 // String helloworld 6: invokespecial #4 // Method java/lang/String."<init>":(Ljava/lang/String;)V 9: astore_1 // String s2 = "helloworld" 的反編譯源碼 15: ldc #3 // String helloworld 17: astore_3 // 對比可知,"helloworld"不會在堆中建立對象,即不調用new指令和String的構造器方法 // 可是隻要看到"helloworld",就會在'靜態常量池'中生成"helloworld"字面量,後面還有個例子能夠比較看看。
關於JVM指令,請參考:https://blog.csdn.net/hudashi/article/details/7062781html
https://blog.csdn.net/hudashi/article/details/7062675java
關於Classpy,請參考:https://github.com/zxh0/classpyc++
StringDemo.class magic:0xCAFEBABE ... ... constant_pool: #01 (Methodref): java/lang/Object.<init> #02 (Class): java/lang/String // 符號引用 #03 (String): helloworld tag:8 string_index:26 ... ... // 字面量 #26 (Utf8):hellowrold tag:1 length:10 bytes:helloworld
// 註釋簡言之:intern會判斷字符串常量池是否擁有該字符串對象,擁有則返回,反之添加到常量池並返回該字符串對象的引用。 // 其中全部的字面量字符串和字符數值的常量表達式(請參考java規範)都會被常量池保存起來。 // 後面再詳細講解intern方法的內部實現 /** * Returns a canonical representation for the string object. * <p> * A pool of strings, initially empty, is maintained privately by the * class {@code String}. * <p> * When the intern method is invoked, if the pool already contains a * string equal to this {@code String} object as determined by * the {@link #equals(Object)} method, then the string from the pool is * returned. Otherwise, this {@code String} object is added to the * pool and a reference to this {@code String} object is returned. * <p> * It follows that for any two strings {@code s} and {@code t}, * {@code s.intern() == t.intern()} is {@code true} * if and only if {@code s.equals(t)} is {@code true}. * <p> * All literal strings and string-valued constant expressions are * interned. String literals are defined in section 3.10.5 of the * <cite>The Java™ Language Specification</cite>. * * @return a string that has the same contents as this string, but is * guaranteed to be from a pool of unique strings. * @jls 3.10.5 String Literals */ public native String intern();
java規範的下載地址:https://docs.oracle.com/javase/specs/index.htmlgit
public static void main(String[] args) throws Exception { // new StringBuilder().append("hello").append("world").toString(); String s0 = new String("hello") + new String("world"); String s1 = s0.intern(); // 此時常量池沒有helloworld,此時放入,放入的是s0的地址。 String s2 = "helloworld"; // 從常量池中取出s0的地址 System.out.println(s0 == s1); // true System.out.println(s2 == s1); // true } public static void main(String[] args) throws Exception { String s2 = "helloworld"; String s0 = new String("hello") + new String("world"); String s1 = s0.intern(); // 此時常量池有helloworld,直接取出,爲s2的地址。 System.out.println(s0 == s1); // false System.out.println(s2 == s1); // true } // 此例證實new StringBuilder().append("hello").append("world").toString()沒有intern的功能 public static void main(String[] args) throws Exception { String s0 = new String("hello") + new String("world"); String s2 = "helloworld"; // 具備intern的功能 System.out.println(s2 == s0); // false } // StringBuilder的toString方法 @Override @HotSpotIntrinsicCandidate public String toString() { // Create a copy, don't share the array return isLatin1() ? StringLatin1.newString(value, 0, count): StringUTF16.newString(value, 0, count); } // StringLatin1.newString方法 ==> String的重載構造器 public static String newString(byte[] val, int index, int len) { return new String(Arrays.copyOfRange(val, index, index + len),LATIN1); }
public static void main(java.lang.String[]) throws java.lang.Exception; Code: 0: new #2 // class java/lang/StringBuilder 3: dup 4: invokespecial #3 // Method java/lang/StringBuilder."<init>":()V 7: new #4 // class java/lang/String 10: dup 11: ldc #5 // String hello 13: invokespecial #6 // Method java/lang/String."<init>":(Ljava/lang/String;)V 16: invokevirtual #7 // Method java/lang/StringBuilder.append:(Ljava/lang/String;)Ljava/lang/StringBuilder; 19: new #4 // class java/lang/String 22: dup 23: ldc #8 // String world 25: invokespecial #6 // Method java/lang/String."<init>":(Ljava/lang/String;)V 28: invokevirtual #7 // Method java/lang/StringBuilder.append:(Ljava/lang/String;)Ljava/lang/StringBuilder; 31: invokevirtual #9 // Method java/lang/StringBuilder.toString:()Ljava/lang/String; 34: astore_1 35: aload_1 36: invokevirtual #10 // Method java/lang/String.intern:()Ljava/lang/String; 39: astore_2 40: ldc #11 // String helloworld 42: astore_3 43: getstatic #12 // Field java/lang/System.out:Ljava/io/PrintStream; 46: aload_1 47: aload_2 48: if_acmpne 55 51: iconst_1 52: goto 56 55: iconst_0 56: invokevirtual #13 // Method java/io/PrintStream.println:(Z)V 59: getstatic #12 // Field java/lang/System.out:Ljava/io/PrintStream; 62: aload_3 63: aload_2 64: if_acmpne 71 67: iconst_1 68: goto 72 71: iconst_0 72: invokevirtual #13 // Method java/io/PrintStream.println:(Z)V 75: return }
代碼經過jdb打斷點中止在 String s1 = s0.intern();這一行。github
Console Line中,繼續輸入inspect 0x00000000d61515d0 ,獲得"helloworld",表明new StringBuilder().toString()不會再常量池中放入字符串的引用。shell
HSDB,請參考:https://blog.csdn.net/kisimple/article/details/45128525express
GBD,請參考:https://www.cnblogs.com/rocedu/p/6371262.html數組
因爲intern方法是native方法,採用了JNI技術。oracle
關於JNI技術,請參考:https://www.cnblogs.com/DengGao/p/jni.htmlapp
爲了理解簡單,下面源碼省略了加鎖、內存管理和重哈希的代碼,感興趣能夠下載HotSpot的源碼進行研讀。
經過源碼能夠知道,String的常量池其實就是C++版本的HashMap而已。
下載源碼,請參考:https://www.cnblogs.com/linzhanfly/p/9474173.html
// \openjdk10\jdk\src\share\native\java\lang\String.c // 第二個參數爲返回值 JNIEXPORT jobject JNICALL // jni命名規範(聲明爲native自動生成):java.lang.String:intern => Java_java_lang_String_intern(Java前綴 + 包名 + 方法名,分隔符號採用_) Java_java_lang_String_intern(JNIEnv *env, jobject this){ //(1)JVM_InternString調用 return JVM_InternString(env, this); } // \openjdk10\hotspot\src\share\vm\prims\jvm.h /* * java.lang.String */ JNIEXPORT jstring JNICALL JVM_InternString(JNIEnv *env, jstring str); // \openjdk10\hotspot\src\share\vm\prims\jvm.cpp // String support /////////////////////////////////////////////////////////////////////////// // (2)JVM_InternString的實現 JVM_ENTRY(jstring, JVM_InternString(JNIEnv *env, jstring str)) JVMWrapper("JVM_InternString"); JvmtiVMObjectAllocEventCollector oam; if (str == NULL) return NULL; oop string = JNIHandles::resolve_non_null(str); // (3)StringTable::intern調用 oop result = StringTable::intern(string, CHECK_NULL); return (jstring) JNIHandles::make_local(env, result); JVM_END // \openjdk10\hotspot\src\share\vm\classfile\stringTable.cpp // (4)StringTable::intern的實現 StringTable是HashTable的子類 oop StringTable::intern(oop string, TRAPS){ if (string == NULL) return NULL; int length; Handle h_string (THREAD, string); // 建立Handle jchar* chars = java_lang_String::as_unicode_string(string, length, CHECK_NULL); // (5)StringTable::intern的重載方法 return intern(h_string, chars, length, CHECK_NULL); } // \openjdk10\hotspot\src\share\vm\classfile\stringTable.cpp oop StringTable::intern(Handle string_or_null, jchar* name,int len, TRAPS) { // shared table always uses java_lang_String::hash_code // 我的理解: java_lang_String屬於工具類,提供一些操做string的方法 unsigned int hashValue = java_lang_String::hash_code(name, len); // (6)查詢共享數組 oop found_string = lookup_shared(name, len, hashValue); if (found_string != NULL) return found_string; // the_table()返回StringTable的引用 int index = the_table() -> hash_to_index(hashValue);// 其實就是hashValue % _table_size found_string = the_table() -> lookup_in_main_table(index, name, len, hashValue); if (found_string != NULL) return found_string; Handle string; if (!string_or_null.is_null()) string = string_or_null; else string = java_lang_String::create_from_unicode(name, len, CHECK_NULL); // 前面常量池存在該字符串就返回了,不存在則進行添加操做 oop added_or_found = the_table()->basic_add(index,string,name,len,hashValue,CHECK_NULL); return added_or_found; } // \openjdk10\hotspot\src\share\vm\classfile\javaClasses.cpp // hash_code的實現,與jdk源碼String類的HashCode()方法相似 unsigned int java_lang_String::hash_code(oop java_string) { int length = java_lang_String::length(java_string); if (length == 0) return 0; typeArrayOop value = java_lang_String::value(java_string); bool is_latin1 = java_lang_String::is_latin1(java_string); if (is_latin1) { // \openjdk10\hotspot\src\share\vm\classfile\javaClasses.hpp中static修飾的類方法 return java_lang_String::hash_code(value->byte_at_addr(0), length); } else { // \openjdk10\hotspot\src\share\vm\classfile\javaClasses.hpp中static修飾的類方法 return java_lang_String::hash_code(value->char_at_addr(0), length); } } // \openjdk10\hotspot\src\share\vm\classfile\javaClasses.hpp static unsigned int hash_code(const jbyte* s, int len) { unsigned int h = 0; while (len-- > 0) { h = 31*h + (((unsigned int) *s) & 0xFF); s++; } return h; } // package java.lang.StringLatin1類中的hashCode與Openjdk中保持一致 public static int hashCode(byte[] value) { int h = 0; for (byte v : value) { h = 31 * h + (v & 0xff); } return h; } // \openjdk10\hotspot\src\share\vm\classfile\stringTable.cpp oop StringTable::lookup_shared(jchar* name, int len, unsigned int hash) { //(7)共享數組是一個HashTable的子類, CompactHashtable<oop, char> StringTable::_shared_table; return _shared_table.lookup((const char*)name, hash, len); } // \openjdk10\hotspot\src\share\vm\classfile\compactHashtable.inline.hpp template <class T, class N> inline T CompactHashtable<T,N>::lookup(const N* name, unsigned int hash, int len) { if (_entry_count > 0) {// int index = hash % _bucket_count;// _bucket_count爲_buckets數組大小 u4 bucket_info = _buckets[index];// bucket_info爲32位,高2位表明類型,低30爲表明偏移量 u4 bucket_offset = BUCKET_OFFSET(bucket_info);// 取出低30位 int bucket_type = BUCKET_TYPE(bucket_info);// 取出高2位 u4* entry = _entries + bucket_offset;// 根據偏移量取出entries數組中值 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { // 只存值的entry,包含一個偏移量 T res = decode_entry(this, entry[0], name, len);// 獲取存放的值,代碼就不貼了 if (res != NULL) return res; } else { // This is a regular bucket, which has more than one // entries. Each entry is a pair of entry (hash, offset). // Seek until the end of the bucket. // 常規bucket,索引0放着hash值,索引1放着偏移量 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);// 獲取下一個_buckets的偏移量做爲尋找entry的最大值 while (entry < entry_max) { if ((unsigned int)(entry[0]) == hash) { T res = decode_entry(this, entry[1], name, len); if (res != NULL) return res; } entry += 2; } } } return NULL; } // \openjdk10\hotspot\src\share\vm\classfile\stringTable.cpp oop StringTable::lookup_in_main_table(int index, jchar* name,int len, unsigned int hash) { // bucket方法位於hashtable.inline.hpp中,與java的HashMap相似,取出HashtableEntry,類比Map.Entry.單向鏈表形式。 // hash碰撞致使index相同,存放形式爲鏈表。因此須要取出來對比hash值和內部值是否相等。 // bucket(index) ==> _buckets[i].get_entry(); for (HashtableEntry<oop, mtSymbol>* l = bucket(index); l != NULL; l = l->next()) { // hash方法 ==> unsigned int hash() const { return _hash; } if (l->hash() == hash) { // literal方法取出oop,即String字面量 ==> T literal() const { return _literal;} if (java_lang_String::equals(l->literal(), name, len)) return l->literal(); } } return NULL; } // \openjdk10\hotspot\src\share\vm\classfile\stringTable.cpp oop StringTable::basic_add(int index_arg, Handle string, jchar* name,int len, unsigned int hashValue_arg, TRAPS) { unsigned int hashValue = hashValue_arg; int index = index_arg; oop test = lookup_in_main_table(index, name, len, hashValue); if (test != NULL) return test; // \openjdk10\hotspot\src\share\vm\utilities\hashtable.cpp // StringTable繼承了HashTable,()是Handle的運算符重載,返回string的對象值 HashtableEntry<oop, mtSymbol>* entry = new_entry(hashValue, string()); add_entry(index, entry); return string(); } // \openjdk10\hotspot\src\share\vm\utilities\hashtable.inline.cpp template <MEMFLAGS F> inline void BasicHashtable<F>::add_entry(int index, BasicHashtableEntry<F>* entry) { entry->set_next(bucket(index)); _buckets[index].set_entry(entry); ++_number_of_entries; } // \openjdk10\hotspot\src\share\vm\runtime\handles.hpp class Handle VALUE_OBJ_CLASS_SPEC { private: oop* _handle; protected: oop obj() const { return _handle == NULL ? (oop)NULL : *_handle; }// ()運算符重載 }