數據庫:oraclespring
數據源:druidsql
現象:生產環境存在兩套環境,k8s和rancher,k8s環境每隔幾分鐘會出現close connection error,或者close statement error或者對數據庫訪問Broken pipe,然而rancher不會,這種現象下咱們排查的重點關注在k8s網絡環境,一直沒有解決這問題數據庫
問題本質看這類錯誤表現爲tcp鏈接被斷開,排查的對象未k8s網絡狀況和防火牆狀況,防火牆默認會關閉必定時間不活躍的鏈接,具體時間半小時仍是多久看運維配置,由於只發生在k8s節點,這塊一直在等運維同事處理,這塊一直沒有太大頭緒,最終折中處理方式改成客戶端處理,druid配置保活、配置對鏈接池中鏈接檢測、配置驗證sql超時時間、配置移除不活躍鏈接等,測試跟蹤一天,確實沒有相似錯誤了,具體網絡層面緣由再看了網絡
具體配置:oracle
spring: datasource: driver-class-name: oracle.jdbc.OracleDriver username: xx password: xx initialSize: 10 minIdle: 10 maxActive: 50 # 配置獲取鏈接等待超時的時間 maxWait: 60000 # 配置間隔多久才進行一次檢測,檢測須要關閉的空閒鏈接,單位是毫秒 timeBetweenEvictionRunsMillis: 60000 # 配置一個鏈接在池中最小生存的時間,單位是毫秒 minEvictableIdleTimeMillis: 300000 validationQuery: SELECT 1 FROM DUAL testWhileIdle: true testOnBorrow: true testOnReturn: false # 打開PSCache,而且指定每一個鏈接上PSCache的大小 poolPreparedStatements: true maxPoolPreparedStatementPerConnectionSize: 30 # 配置監控統計攔截的filters,去掉後監控界面sql沒法統計,wall用於防火牆 filters: stat,log4j # 經過connectProperties屬性來打開mergeSql功能;慢SQL記錄 connectionProperties: druid.stat.mergeSql=true;druid.stat.slowSqlMillis=3000 #使用非公平鎖 useUnfairLock: true removeAbandoned: true removeAbandonedTimeout: 1800 logAbandoned: false validationQueryTimeout: 1 keepAlive: true
關鍵配置:運維
//每次獲取鏈接後執行validationQuery,必定程度會下降性能,但爲了規避上述問題,須要配置tcp
testOnBorrow: true性能
//非公平鎖、重要,減小競爭等待時間測試
useUnfairLock: true removeAbandoned: true removeAbandonedTimeout: 1800 logAbandoned: falseui
//validationQueryTimeout 最好配置下,否則可能出現長達15分鐘的校驗時間,致使整個查詢超時
validationQueryTimeout: 1
//在小於minIdle鏈接數的時候執行保活操做,防止防火牆斷開鏈接
keepAlive: true
druid獲取鏈接源碼
public DruidPooledConnection getConnectionDirect(long maxWaitMillis) throws SQLException { int notFullTimeoutRetryCnt = 0; for (;;) { // handle notFullTimeoutRetry DruidPooledConnection poolableConnection; try { poolableConnection = getConnectionInternal(maxWaitMillis); } catch (GetConnectionTimeoutException ex) { if (notFullTimeoutRetryCnt <= this.notFullTimeoutRetryCount && !isFull()) { notFullTimeoutRetryCnt++; if (LOG.isWarnEnabled()) { LOG.warn("get connection timeout retry : " + notFullTimeoutRetryCnt); } continue; } throw ex; } if (testOnBorrow) { boolean validate = testConnectionInternal(poolableConnection.holder, poolableConnection.conn); if (!validate) { if (LOG.isDebugEnabled()) { LOG.debug("skip not validate connection."); } Connection realConnection = poolableConnection.conn; discardConnection(realConnection); continue; } } else { Connection realConnection = poolableConnection.conn; if (poolableConnection.conn.isClosed()) { discardConnection(null); // 傳入null,避免重複關閉 continue; } if (testWhileIdle) { long currentTimeMillis = System.currentTimeMillis(); long lastActiveTimeMillis = poolableConnection.holder.lastActiveTimeMillis; long idleMillis = currentTimeMillis - lastActiveTimeMillis; long timeBetweenEvictionRunsMillis = this.timeBetweenEvictionRunsMillis; if (timeBetweenEvictionRunsMillis <= 0) { timeBetweenEvictionRunsMillis = DEFAULT_TIME_BETWEEN_EVICTION_RUNS_MILLIS; } if (idleMillis >= timeBetweenEvictionRunsMillis || idleMillis < 0 // unexcepted branch ) { boolean validate = testConnectionInternal(poolableConnection.holder, poolableConnection.conn); if (!validate) { if (LOG.isDebugEnabled()) { LOG.debug("skip not validate connection."); } discardConnection(realConnection); continue; } } } } if (removeAbandoned) { StackTraceElement[] stackTrace = Thread.currentThread().getStackTrace(); poolableConnection.connectStackTrace = stackTrace; poolableConnection.setConnectedTimeNano(); poolableConnection.traceEnable = true; activeConnectionLock.lock(); try { activeConnections.put(poolableConnection, PRESENT); } finally { activeConnectionLock.unlock(); } } if (!this.defaultAutoCommit) { poolableConnection.setAutoCommit(false); } return poolableConnection; } }