Linux Thermal Framework分析及實施

關鍵詞:Zone、Cooling、Governor、Step Wise、Fair Share、trip等等。html

 

Linux Thermal的目的是控制系統運行過程當中採樣點溫度,避免溫度太高形成器件損壞,確保芯片長期穩定工做。node

整個Thermal框架能夠分爲四部分:數據結構

  • Thermal Driver負責將獲取溫度設備,註冊成struct thermal_zone_device,好比Temp Sensor、NTC等。
  • Thermal Governor則負責如何控制溫度,註冊成struct thermal_governor,好比Step Wise、Bang Bang等等。
  • Thermal Cooling負責將控制溫度設備,註冊成struct thermal_cooling_device,好比風扇、CPU、DDR、GPU等。
  • Thermal Core則是Thermal Driver、Thermal Governor、Thermal Governor的粘合劑,同時提供了用戶空間sysfs節點等通用功能。

因此Thermal的工做流程是經過Thermal Driver獲取溫度,而後通過Thermal Governor決策,最後經過Thermal Cooling執行溫度控制。app

下面首先從整體詳細分析Thermal框架以及數據結構、API(1. Thermal框架分析),而後分別分析Thermal Driver實例(2. Thermal Driver實例)、Thermal Governor(Step Wise和Fair Share)(3. Thermal Governor分析)、以及Thermal Cooling實例(4. Thermal Cooling實例)。框架

最後將這些內容串起來,分析Thermal是如何控制溫度的。函數

 

1. Thermal框架分析

1.1 Thermal數據結構

struct thermal_zone_device是對獲取溫度設備的抽象,成員ops是對該Thermal Zone操做的抽象;governor是該Thermal Zone所使用的調溫策略;thermal_instances是該Thermal Zone下的Cooling Device列表。this

struct thermal_zone_device {
    int id;
    char type[THERMAL_NAME_LENGTH];
    struct device device;
    struct thermal_attr *trip_temp_attrs;
    struct thermal_attr *trip_type_attrs;
    struct thermal_attr *trip_hyst_attrs;
    void *devdata;
    int trips;---------------------------------------------------------thermal zone支持的trip數目。
    unsigned long trips_disabled;    /* bitmap for disabled trips */
    int passive_delay;
    int polling_delay;-------------------------------------------------輪詢讀取溫度的建個,0表示採用中斷形式。 int temperature;---------------------------------------------------當前溫度。 int last_temperature;----------------------------------------------最近一次溫度。 int emul_temperature;
    int passive;
    int prev_low_trip;
    int prev_high_trip;
    unsigned int forced_passive;
    atomic_t need_update;
    struct thermal_zone_device_ops *ops;------------------------------當前thermal zone操做函數集。 struct thermal_zone_params *tzp;----------------------------------當前thermal zone參數。 struct thermal_governor *governor;
    void *governor_data;
    struct list_head thermal_instances;-------------------------------當前thermal zone上thermal_instances列表。 struct idr idr;
    struct mutex lock;
    struct list_head node;
    struct delayed_work poll_queue;
    enum thermal_notify_event notify_event;
};

struct thermal_zone_device_ops {
    int (*bind) (struct thermal_zone_device *,
             struct thermal_cooling_device *);------------------------將cooling device綁定到thermal zone中,二者經過struct thermal_instances在thermal_zone_bind_cooling_device()中綁定。 int (*unbind) (struct thermal_zone_device *,
               struct thermal_cooling_device *);
    int (*get_temp) (struct thermal_zone_device *, int *);
    int (*set_trips) (struct thermal_zone_device *, int, int);
    int (*get_mode) (struct thermal_zone_device *,
             enum thermal_device_mode *);
    int (*set_mode) (struct thermal_zone_device *,
        enum thermal_device_mode);
    int (*get_trip_type) (struct thermal_zone_device *, int,
        enum thermal_trip_type *);
    int (*get_trip_temp) (struct thermal_zone_device *, int, int *);
    int (*set_trip_temp) (struct thermal_zone_device *, int, int);
    int (*get_trip_hyst) (struct thermal_zone_device *, int, int *);
    int (*set_trip_hyst) (struct thermal_zone_device *, int, int);
    int (*get_crit_temp) (struct thermal_zone_device *, int *);
    int (*set_emul_temp) (struct thermal_zone_device *, int);
    int (*get_trend) (struct thermal_zone_device *, int,
              enum thermal_trend *);
    int (*notify) (struct thermal_zone_device *, int,
               enum thermal_trip_type);
};

struct thermal_bind_params {
    struct thermal_cooling_device *cdev;
    int weight;
    int trip_mask;
    unsigned long *binding_limits;
    int (*match) (struct thermal_zone_device *tz,
            struct thermal_cooling_device *cdev);
};

struct thermal_zone_params {
    char governor_name[THERMAL_NAME_LENGTH];
    bool no_hwmon;
    int num_tbps;    /* Number of tbp entries */
    struct thermal_bind_params *tbp;
...
    int slope;
    int offset;
};

struct thermal_zone_of_device_ops {
    int (*get_temp)(void *, int *);
    int (*get_trend)(void *, int, enum thermal_trend *);
    int (*set_trips)(void *, int, int);
    int (*set_emul_temp)(void *, int);
    int (*set_trip_temp)(void *, int, int);
};

struct thermal_cooling_device是對降溫設備的抽象,對風扇設備就是不一樣的轉速,對CPU、DDR、GPU就是不一樣的電壓或者頻率。atom

struct thermal_cooling_device_ops是Cooling Device操做函數集,其中set_cur_state()是對設備進行溫度控制。spa

struct thermal_cooling_device {
    int id;
    char type[THERMAL_NAME_LENGTH];
    struct device device;
    struct device_node *np;
    void *devdata;
    const struct thermal_cooling_device_ops *ops;
    bool updated; /* true if the cooling device does not need update */
    struct mutex lock; /* protect thermal_instances list */
    struct list_head thermal_instances;
    struct list_head node;
};

struct thermal_cooling_device_ops {
    int (*get_max_state) (struct thermal_cooling_device *, unsigned long *);
    int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *);
    int (*set_cur_state) (struct thermal_cooling_device *, unsigned long);
...
};

strcut thermal_governor是對溫控策略的抽象,也就是根據Thermal Zone的trip來選擇Thermal Cooling設備的行爲。好比,溫度越高風扇轉速越快;溫度越高CPU運行在更低電壓和頻率上。debug

struct thermal_governor {
    char name[THERMAL_NAME_LENGTH];
    int (*bind_to_tz)(struct thermal_zone_device *tz);---------------------將一個governor綁定到thermal zone得一個trip上。 void (*unbind_from_tz)(struct thermal_zone_device *tz);----------------將一個governor從thermal zone解綁。 int (*throttle)(struct thermal_zone_device *tz, int trip);-------------根據trip遍歷當前thermal zone下全部的cooling device執行溫控策略。 struct list_head    governor_list;-------------------------------------thermal_governor_list上的一個列表元素。
};

全部的策略選擇都是經過throttle()函數進行的,不一樣的Governor的區別也主要在這裏。內核已經實現了Step Wise、User等等,而且還在演進中。

經過struct thermal_instances能夠將thermal zone和thermal cooling設備綁定起來。

struct thermal_instance {
    int id;
    char name[THERMAL_NAME_LENGTH];
    struct thermal_zone_device *tz;-------------------------------------------綁定的thermal zone。 struct thermal_cooling_device *cdev;--------------------------------------綁定的thermal cooling設備。 int trip;-----------------------------------------------------------------對應的thermal zone的trip。 bool initialized;
    unsigned long upper;    /* Highest cooling state for this trip point */---cooling設備的最高降溫狀態。
    unsigned long lower;    /* Lowest cooling state for this trip point */----cooling設備最低降溫狀態。
    unsigned long target;    /* expected cooling state */---------------------cooling設備的當前狀態,也是thermal_cooling_device_ops->set_cur_state()設置後的值。
    char attr_name[THERMAL_NAME_LENGTH];
    struct device_attribute attr;
    char weight_attr_name[THERMAL_NAME_LENGTH];
    struct device_attribute weight_attr;
    struct list_head tz_node; /* node in tz->thermal_instances */-------------thermal_zone_device->thermal_instances上的節點。
    struct list_head cdev_node; /* node in cdev->thermal_instances */---------thermal_cooling_device->thermal_instances上的節點。
    unsigned int weight; /* The weight of the cooling device */
};

thermal_device_mode表示當前的thermal zone是否使能。

thermal_trip_type表示thermal zone的當前trip類型,其中ACTIVE和PASSIVE屬於non-critical類型,交由Governor進行處理;HOT和CRITICAL屬於critical類型,其中CRITICAL會執行orderly_poweroff()。

thermal_trend表示thermal zone的溫度趨勢,是平緩、上升、降低仍是跳躍式的,這就給Governor選擇trip提供依據。

enum thermal_device_mode {
    THERMAL_DEVICE_DISABLED = 0,
    THERMAL_DEVICE_ENABLED,
};

enum thermal_trip_type {
    THERMAL_TRIP_ACTIVE = 0,
    THERMAL_TRIP_PASSIVE,
    THERMAL_TRIP_HOT,
    THERMAL_TRIP_CRITICAL,
};

enum thermal_trend {
    THERMAL_TREND_STABLE, /* temperature is stable */-----------------------表示溫度平穩。
    THERMAL_TREND_RAISING, /* temperature is raising */---------------------表示當前溫度趨勢是升高的。
    THERMAL_TREND_DROPPING, /* temperature is dropping */-------------------表示當前溫度趨勢是下降的。
    THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */------------直接應用upper對應的trip。
    THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */--------------直接應用lower對應的trip。
};

/* Thermal notification reason */
enum thermal_notify_event {
    THERMAL_EVENT_UNSPECIFIED, /* Unspecified event */
    THERMAL_EVENT_TEMP_SAMPLE, /* New Temperature sample */
    THERMAL_TRIP_VIOLATED, /* TRIP Point violation */
    THERMAL_TRIP_CHANGED, /* TRIP Point temperature changed */
    THERMAL_DEVICE_DOWN, /* Thermal device is down */
    THERMAL_DEVICE_UP, /* Thermal device is up after a down event */
    THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */
};

1.2 Thermal Core APIs

Thermal core是Thermal Zone、Thermal Cooling、ThermalGovernor的粘合劑。

經過Thermal core提供的API,將這三者相互關聯起來;從Thermal Zone設備獲取溫度,選擇對應的Thermal Governor,Thermal Governor設置Thermal Cooling的狀態,進而達到控制溫度的目的。

經過thermal_zone_device_register()註冊thermal zone設備,建立一系列sysfs節點,而且和governor、cooling進行綁定。

struct thermal_zone_device *thermal_zone_device_register(const char *type,
    int trips, int mask, void *devdata,
    struct thermal_zone_device_ops *ops,
    struct thermal_zone_params *tzp,
    int passive_delay, int polling_delay)
{
    struct thermal_zone_device *tz;
    enum thermal_trip_type trip_type;
    int trip_temp;
    int result;
    int count;
    int passive = 0;
    struct thermal_governor *governor;

    if (type && strlen(type) >= THERMAL_NAME_LENGTH)
        return ERR_PTR(-EINVAL);

    if (trips > THERMAL_MAX_TRIPS || trips < 0 || mask >> trips)
        return ERR_PTR(-EINVAL);

    if (!ops)
        return ERR_PTR(-EINVAL);

    if (trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
        return ERR_PTR(-EINVAL);

    tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL);
    if (!tz)
        return ERR_PTR(-ENOMEM);

    INIT_LIST_HEAD(&tz->thermal_instances);------------------------------初始化thermal_instances鏈表,放置struct thermal_instances實例。經過thermal_instances能夠關聯thermal zone和thermal cooling。
    idr_init(&tz->idr);
    mutex_init(&tz->lock);
    result = get_idr(&thermal_tz_idr, &thermal_idr_lock, &tz->id);
    if (result) {
        kfree(tz);
        return ERR_PTR(result);
    }

    strlcpy(tz->type, type ? : "", sizeof(tz->type));
    tz->ops = ops;
    tz->tzp = tzp;
    tz->device.class = &thermal_class;------------------------------------建立的設備會在/sys/class/thermal下面有個連接。
    tz->devdata = devdata;
    tz->trips = trips;
    tz->passive_delay = passive_delay;
    tz->polling_delay = polling_delay;
    /* A new thermal zone needs to be updated anyway. */
    atomic_set(&tz->need_update, 1);

    dev_set_name(&tz->device, "thermal_zone%d", tz->id);
    result = device_register(&tz->device);--------------------------------建立/sys/devices/virtual/thermal/thermal_zone*設備。 if (result) {
        release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
        kfree(tz);
        return ERR_PTR(result);
    }

    /* sys I/F */---------------------------------------------------------分別建立type、temp、mode、trip等sysfs節點。
    if (type) {
        result = device_create_file(&tz->device, &dev_attr_type);
        if (result)
            goto unregister;
    }
...
    result = create_trip_attrs(tz, mask);-----------------------爲每一個trip建立trip_point_*_temp/hyst/type節點。 if (result)
        goto unregister;
...
/* Update 'this' zone's governor information */
    mutex_lock(&thermal_governor_lock);

    if (tz->tzp)-------------------------------------------------若是指定thermal zone的governor則經過__find_governor()選定;不然使用默認def_governor。
        governor = __find_governor(tz->tzp->governor_name);
    else
        governor = def_governor;

    result = thermal_set_governor(tz, governor);-----------------將governor綁定到tz上,優先使用bind_to_tz()執行綁定;不然直接指定tz->governor爲governor。 if (result) {
        mutex_unlock(&thermal_governor_lock);
        goto unregister;
    }

    mutex_unlock(&thermal_governor_lock);

    if (!tz->tzp || !tz->tzp->no_hwmon) {
        result = thermal_add_hwmon_sysfs(tz);
        if (result)
            goto unregister;
    }

    mutex_lock(&thermal_list_lock);
    list_add_tail(&tz->node, &thermal_tz_list);------------------------將當前thermal zone加入到thermal_tz_list列表上。
    mutex_unlock(&thermal_list_lock);

    /* Bind cooling devices for this zone */ bind_tz(tz);-------------------------------------------------------調用tz->ops->bind()將thermal_cdev_list上的cooling設備綁定到tz上。

    INIT_DELAYED_WORK(&(tz->poll_queue), thermal_zone_device_check);

    thermal_zone_device_reset(tz);-------------------------------------對thermal zone的溫度等復位。 /* Update the new thermal zone and mark it as already updated. */
    if (atomic_cmpxchg(&tz->need_update, 1, 0))
        thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);

    return tz;

unregister:
    release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id);
    device_unregister(&tz->device);
    return ERR_PTR(result);
}

static int thermal_set_governor(struct thermal_zone_device *tz,
                struct thermal_governor *new_gov)
{
    int ret = 0;

    if (tz->governor && tz->governor->unbind_from_tz)
        tz->governor->unbind_from_tz(tz);------------------------------先調用當前governor進行unbind()。 if (new_gov && new_gov->bind_to_tz) {
        ret = new_gov->bind_to_tz(tz);---------------------------------使用當前governor進行bind()。 if (ret) {
            bind_previous_governor(tz, new_gov->name);

            return ret;
        }
    }

    tz->governor = new_gov;--------------------------------------------更新tz->governor。 return ret;
}

static void bind_tz(struct thermal_zone_device *tz)
{
    int i, ret;
    struct thermal_cooling_device *pos = NULL;
    const struct thermal_zone_params *tzp = tz->tzp;

    if (!tzp && !tz->ops->bind)
        return;

    mutex_lock(&thermal_list_lock);

    /* If there is ops->bind, try to use ops->bind */
    if (tz->ops->bind) {
        list_for_each_entry(pos, &thermal_cdev_list, node) {-----------遍歷thermal_cdev_list的cooling設備,而後和當前thermal zone進行綁定。
            ret = tz->ops->bind(tz, pos);
            if (ret)
                print_bind_err_msg(tz, pos, ret);
        }
        goto exit;
    }
...
exit:
    mutex_unlock(&thermal_list_lock);
}

static void thermal_zone_device_check(struct work_struct *work)
{
    struct thermal_zone_device *tz = container_of(work, struct
                              thermal_zone_device,
                              poll_queue.work);
    thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
}

 thermal_zone_device_unregister()則執行相反的操做,將thermal zone從thermal_tz_list上摘除,而且和cooling設備去綁定,以及刪除一系列sysfs節點。

void thermal_zone_device_unregister(struct thermal_zone_device *tz)
{
    int i;
    const struct thermal_zone_params *tzp;
    struct thermal_cooling_device *cdev;
    struct thermal_zone_device *pos = NULL;

    if (!tz)
        return;

    tzp = tz->tzp;

    mutex_lock(&thermal_list_lock);
    list_for_each_entry(pos, &thermal_tz_list, node)
        if (pos == tz)
        break;
    if (pos != tz) {
        /* thermal zone device not found */
        mutex_unlock(&thermal_list_lock);
        return;
    }
    list_del(&tz->node);

    /* Unbind all cdevs associated with 'this' thermal zone */
    list_for_each_entry(cdev, &thermal_cdev_list, node) {
        if (tz->ops->unbind) {
            tz->ops->unbind(tz, cdev);
            continue;
        }
...
    }
...
    return;
}

thermal_cooling_device_register()建立cooling設備並放入thermal_cdev_list中,以及相關sysfs節點,並將cooling設備和thermal zone綁定。

thermal_cooling_device_unregister()則進行相反的操做。

struct thermal_cooling_device *
thermal_cooling_device_register(char *type, void *devdata,
                const struct thermal_cooling_device_ops *ops)
{
    return __thermal_cooling_device_register(NULL, type, devdata, ops);
}

static struct thermal_cooling_device *
__thermal_cooling_device_register(struct device_node *np,
                  char *type, void *devdata,
                  const struct thermal_cooling_device_ops *ops)
{
    struct thermal_cooling_device *cdev;
    struct thermal_zone_device *pos = NULL;
    int result;

    if (type && strlen(type) >= THERMAL_NAME_LENGTH)
        return ERR_PTR(-EINVAL);

    if (!ops || !ops->get_max_state || !ops->get_cur_state ||
        !ops->set_cur_state)
        return ERR_PTR(-EINVAL);

    cdev = kzalloc(sizeof(struct thermal_cooling_device), GFP_KERNEL);
    if (!cdev)
        return ERR_PTR(-ENOMEM);

    result = get_idr(&thermal_cdev_idr, &thermal_idr_lock, &cdev->id);
    if (result) {
        kfree(cdev);
        return ERR_PTR(result);
    }

    strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
    mutex_init(&cdev->lock);
    INIT_LIST_HEAD(&cdev->thermal_instances);
    cdev->np = np;
    cdev->ops = ops;
    cdev->updated = false;
    cdev->device.class = &thermal_class;---------------------------------cooling設備一樣會在/sys/class/thermal下建立連接。
    cdev->device.groups = cooling_device_attr_groups;--------------------建立cur_state、max_state、type三個sysfs節點。
    cdev->devdata = devdata;
    dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
    result = device_register(&cdev->device);-----------------------------建立/sys/devices/virtual/thermal/cooling_device*設備節點。 if (result) {
        release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
        kfree(cdev);
        return ERR_PTR(result);
    }

    /* Add 'this' new cdev to the global cdev list */
    mutex_lock(&thermal_list_lock);
    list_add(&cdev->node, &thermal_cdev_list);---------------------------將設備放入thermal_cdev_list設備鏈表。
    mutex_unlock(&thermal_list_lock);

    /* Update binding information for 'this' new cdev */
    bind_cdev(cdev);-----------------------------------------------------遍歷thermal_tz_list,將cdev綁定到上面的thermal zone。

    mutex_lock(&thermal_list_lock);
    list_for_each_entry(pos, &thermal_tz_list, node)
        if (atomic_cmpxchg(&pos->need_update, 1, 0))
            thermal_zone_device_update(pos,
                           THERMAL_EVENT_UNSPECIFIED);
    mutex_unlock(&thermal_list_lock);

    return cdev;
}

void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
{
    int i;
    const struct thermal_zone_params *tzp;
    struct thermal_zone_device *tz;
    struct thermal_cooling_device *pos = NULL;

    if (!cdev)
        return;

    mutex_lock(&thermal_list_lock);
    list_for_each_entry(pos, &thermal_cdev_list, node)
        if (pos == cdev)
        break;
    if (pos != cdev) {
        /* thermal cooling device not found */
        mutex_unlock(&thermal_list_lock);
        return;
    }
    list_del(&cdev->node);

    /* Unbind all thermal zones associated with 'this' cdev */
    list_for_each_entry(tz, &thermal_tz_list, node) {
        if (tz->ops->unbind) {
            tz->ops->unbind(tz, cdev);
            continue;
        }

        if (!tz->tzp || !tz->tzp->tbp)
            continue;

        tzp = tz->tzp;
        for (i = 0; i < tzp->num_tbps; i++) {
            if (tzp->tbp[i].cdev == cdev) {
                __unbind(tz, tzp->tbp[i].trip_mask, cdev);
                tzp->tbp[i].cdev = NULL;
            }
        }
    }

    mutex_unlock(&thermal_list_lock);

    if (cdev->type[0])
        device_remove_file(&cdev->device, &dev_attr_cdev_type);
    device_remove_file(&cdev->device, &dev_attr_max_state);
    device_remove_file(&cdev->device, &dev_attr_cur_state);

    release_idr(&thermal_cdev_idr, &thermal_idr_lock, cdev->id);
    device_unregister(&cdev->device);
    return;
}

thermal_register_governor()首先判斷thermal_governor_list上是否有同名governor,而後更新thermal_tz_list上未指定governor的thermal zone。

thermal_unregister_governor()則相反,將governor和thermal zone調用unbind_from_tz()並置空;最後從thermal_go上摘除。

int thermal_register_governor(struct thermal_governor *governor)
{
    int err;
    const char *name;
    struct thermal_zone_device *pos;

    if (!governor)
        return -EINVAL;

    mutex_lock(&thermal_governor_lock);

    err = -EBUSY;
    if (__find_governor(governor->name) == NULL) {--------------------檢查此governor是否已經在thermal_governor_list中,若是不在則加入thermal_governor_list。而且判斷是否爲def_governor。
        err = 0;
        list_add(&governor->governor_list, &thermal_governor_list);
        if (!def_governor && !strncmp(governor->name,
            DEFAULT_THERMAL_GOVERNOR, THERMAL_NAME_LENGTH))
            def_governor = governor;
    }

    mutex_lock(&thermal_list_lock);

    list_for_each_entry(pos, &thermal_tz_list, node) {
        if (pos->governor)--------------------------------------------若是thermal zone已經制定governor,則跳過。 continue;
        name = pos->tzp->governor_name;
        if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) {
            int ret;

            ret = thermal_set_governor(pos, governor);----------------給當前thermal zone制定governor。 if (ret)
                dev_err(&pos->device,
                    "Failed to set governor %s for thermal zone %s: %d\n",
                    governor->name, pos->type, ret);
        }
    }

    mutex_unlock(&thermal_list_lock);
    mutex_unlock(&thermal_governor_lock);

    return err;
}

void thermal_unregister_governor(struct thermal_governor *governor)
{
    struct thermal_zone_device *pos;

    if (!governor)
        return;

    mutex_lock(&thermal_governor_lock);

    if (__find_governor(governor->name) == NULL)
        goto exit;

    mutex_lock(&thermal_list_lock);

    list_for_each_entry(pos, &thermal_tz_list, node) {
        if (!strncasecmp(pos->governor->name, governor->name,
                        THERMAL_NAME_LENGTH))
            thermal_set_governor(pos, NULL);
    }

    mutex_unlock(&thermal_list_lock);
    list_del(&governor->governor_list);
exit:
    mutex_unlock(&thermal_governor_lock);
    return;
}

thermal_zone_bind_cooling_device()經過建立thermal_instances設備將Thermal Zone和Thermal Cooling綁定,這樣Thermal Zone就能夠根據溫度處理Thermal Cooling設備。

thermal_zone_unbind_cooling_device() 則將關聯Thermal Zone和Thermal Cooling的thermal_instances從二者的鏈表上摘除。

int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
                     int trip,
                     struct thermal_cooling_device *cdev,
                     unsigned long upper, unsigned long lower,
                     unsigned int weight)
{
    struct thermal_instance *dev;
    struct thermal_instance *pos;
    struct thermal_zone_device *pos1;
    struct thermal_cooling_device *pos2;
    unsigned long max_state;
    int result, ret;

    if (trip >= tz->trips || (trip < 0 && trip != THERMAL_TRIPS_NONE))
        return -EINVAL;

    list_for_each_entry(pos1, &thermal_tz_list, node) {
        if (pos1 == tz)
            break;
    }
    list_for_each_entry(pos2, &thermal_cdev_list, node) {
        if (pos2 == cdev)
            break;
    }

    if (tz != pos1 || cdev != pos2)
        return -EINVAL;

    ret = cdev->ops->get_max_state(cdev, &max_state);----------------------從Cooling設備操做函數get_max_state()獲取max_state,進而決定thermal_instances的lower和upper範圍。 if (ret)
        return ret;

    /* lower default 0, upper default max_state */
    lower = lower == THERMAL_NO_LIMIT ? 0 : lower;
    upper = upper == THERMAL_NO_LIMIT ? max_state : upper;

if (lower > upper || upper > max_state)
        return -EINVAL;

    dev =
        kzalloc(sizeof(struct thermal_instance), GFP_KERNEL);
    if (!dev)
        return -ENOMEM;
    dev->tz = tz;
    dev->cdev = cdev;
    dev->trip = trip;
    dev->upper = upper;
    dev->lower = lower;
    dev->target = THERMAL_NO_TARGET;
    dev->weight = weight;

    result = get_idr(&tz->idr, &tz->lock, &dev->id);
    if (result)
        goto free_mem;

    sprintf(dev->name, "cdev%d", dev->id);
    result =
        sysfs_create_link(&tz->device.kobj, &cdev->device.kobj, dev->name);------cdevx鏈接到cooling_devicex。 if (result)
        goto release_idr;

    sprintf(dev->attr_name, "cdev%d_trip_point", dev->id);-----------------------建立cdevx_trip_point和cdevx_weight節點。
    sysfs_attr_init(&dev->attr.attr);
    dev->attr.attr.name = dev->attr_name;
    dev->attr.attr.mode = 0444;
    dev->attr.show = thermal_cooling_device_trip_point_show;
    result = device_create_file(&tz->device, &dev->attr);
    if (result)
        goto remove_symbol_link;

    sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id);
    sysfs_attr_init(&dev->weight_attr.attr);
    dev->weight_attr.attr.name = dev->weight_attr_name;
    dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO;
    dev->weight_attr.show = thermal_cooling_device_weight_show;
    dev->weight_attr.store = thermal_cooling_device_weight_store;
    result = device_create_file(&tz->device, &dev->weight_attr);
    if (result)
        goto remove_trip_file;...
}

int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
                       int trip,
                       struct thermal_cooling_device *cdev)
{
    struct thermal_instance *pos, *next;

    mutex_lock(&tz->lock);
    mutex_lock(&cdev->lock);
    list_for_each_entry_safe(pos, next, &tz->thermal_instances, tz_node) {
        if (pos->tz == tz && pos->trip == trip && pos->cdev == cdev) {
            list_del(&pos->tz_node);
            list_del(&pos->cdev_node);
            mutex_unlock(&cdev->lock);
            mutex_unlock(&tz->lock);
            goto unbind;
        }
    }
    mutex_unlock(&cdev->lock);
    mutex_unlock(&tz->lock);

    return -ENODEV;

unbind:
    device_remove_file(&tz->device, &pos->weight_attr);
    device_remove_file(&tz->device, &pos->attr);
    sysfs_remove_link(&tz->device.kobj, pos->name);
    release_idr(&tz->idr, &tz->lock, pos->id);
    kfree(pos);
    return 0;
}

thermal_zone_device_update()通常由Thermal驅動調用,有多是polling或者中斷觸發。

而後更新當前Thermal Zone的溫度,最後根據溫度值經過handle_thermal_trip()進行處理。

monitor_thermal_zone()根據passive和polling的設置決定是否啓動thermal_zone_device->pool_queue這個delayed_work。

整個polling流程由thermal_zone_device_update()觸發,依次流程爲:handle_thermal_trip()中啓動monitor_thermal_zone(),monitor_thermal_zone()中調用mod_delayed_work()進行poll_queue延時值的更新。若是thermal zone有多個trip,poll_queue延時值可能被屢次更新。poll_queue放入system_freezable_wq後,達到時間後調用thermal_zone_device_check(),進而調用thermal_zone_device_update()完成周期性循環。

void thermal_zone_device_update(struct thermal_zone_device *tz,
                enum thermal_notify_event event)
{
    int count;

    if (atomic_read(&in_suspend))
        return;

    if (!tz->ops->get_temp)
        return;

    update_temperature(tz);

    thermal_zone_set_trips(tz);

    tz->notify_event = event;

    for (count = 0; count < tz->trips; count++)
        handle_thermal_trip(tz, count);
}

static void update_temperature(struct thermal_zone_device *tz)
{
    int temp, ret;

    ret = thermal_zone_get_temp(tz, &temp);
    if (ret) {
        if (ret != -EAGAIN)
            dev_warn(&tz->device,
                 "failed to read out thermal zone (%d)\n",
                 ret);
        return;
    }

    mutex_lock(&tz->lock);
    tz->last_temperature = tz->temperature;
    tz->temperature = temp;
    mutex_unlock(&tz->lock);

    trace_thermal_temperature(tz);
    if (tz->last_temperature == THERMAL_TEMP_INVALID)
        dev_dbg(&tz->device, "last_temperature N/A, current_temperature=%d\n",
            tz->temperature);
    else
        dev_dbg(&tz->device, "last_temperature=%d, current_temperature=%d\n",
            tz->last_temperature, tz->temperature);
}

void thermal_zone_set_trips(struct thermal_zone_device *tz)
{
    int low = -INT_MAX;
    int high = INT_MAX;
    int trip_temp, hysteresis;
    int i, ret;

    mutex_lock(&tz->lock);

    if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)
        goto exit;

    for (i = 0; i < tz->trips; i++) {
        int trip_low;

        tz->ops->get_trip_temp(tz, i, &trip_temp);
        tz->ops->get_trip_hyst(tz, i, &hysteresis);

        trip_low = trip_temp - hysteresis;

        if (trip_low < tz->temperature && trip_low > low)
            low = trip_low;

        if (trip_temp > tz->temperature && trip_temp < high)
            high = trip_temp;
    }

    /* No need to change trip points */
    if (tz->prev_low_trip == low && tz->prev_high_trip == high)
        goto exit;

    tz->prev_low_trip = low;
    tz->prev_high_trip = high;

    dev_dbg(&tz->device,
        "new temperature boundaries: %d < x < %d\n", low, high);

    ret = tz->ops->set_trips(tz, low, high);
    if (ret)
        dev_err(&tz->device, "Failed to set trips: %d\n", ret);

exit:
    mutex_unlock(&tz->lock);
}

static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
{
    enum thermal_trip_type type;

    /* Ignore disabled trip points */
    if (test_bit(trip, &tz->trips_disabled))
        return;

    tz->ops->get_trip_type(tz, trip, &type);

    if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
        handle_critical_trips(tz, trip, type);
    else handle_non_critical_trips(tz, trip, type);
    /*
     * Alright, we handled this trip successfully.
     * So, start monitoring again.
     */ monitor_thermal_zone(tz);
}

static void handle_critical_trips(struct thermal_zone_device *tz,
                int trip, enum thermal_trip_type trip_type)
{
    int trip_temp;

    tz->ops->get_trip_temp(tz, trip, &trip_temp);

    /* If we have not crossed the trip_temp, we do not care. */
    if (trip_temp <= 0 || tz->temperature < trip_temp)
        return;

    trace_thermal_zone_trip(tz, trip, trip_type);

    if (tz->ops->notify)
        tz->ops->notify(tz, trip, trip_type);

    if (trip_type == THERMAL_TRIP_CRITICAL) {
        dev_emerg(&tz->device,
              "critical temperature reached(%d C),shutting down\n",
              tz->temperature / 1000);
        orderly_poweroff(true);
    }
}

static void handle_non_critical_trips(struct thermal_zone_device *tz,
            int trip, enum thermal_trip_type trip_type)
{
    tz->governor ? tz->governor->throttle(tz, trip) :
               def_governor->throttle(tz, trip);
}

static void monitor_thermal_zone(struct thermal_zone_device *tz)
{
    mutex_lock(&tz->lock);

    if (tz->passive)-----------------------------------分別設置passive和polling兩種延時工做。
        thermal_zone_device_set_polling(tz, tz->passive_delay);
    else if (tz->polling_delay)
        thermal_zone_device_set_polling(tz, tz->polling_delay);
    else
        thermal_zone_device_set_polling(tz, 0);

    mutex_unlock(&tz->lock);
}

static void thermal_zone_device_set_polling(struct thermal_zone_device *tz,
                        int delay)
{
    if (delay > 1000)----------------------------------將poll_queue放入system_freezable_wq工做隊列上,屢次調用mod_delayed_work()在超時前只有最後一次生效。
        mod_delayed_work(system_freezable_wq, &tz->poll_queue,
                 round_jiffies(msecs_to_jiffies(delay)));
    else if (delay)
        mod_delayed_work(system_freezable_wq, &tz->poll_queue,
                 msecs_to_jiffies(delay));
    else
        cancel_delayed_work(&tz->poll_queue);----------若是delay爲0,則取消poll_queue延時工做。
}

thermal_cdev_update()是由Governor調用進行cooling device設置。

void thermal_cdev_update(struct thermal_cooling_device *cdev)
{
    struct thermal_instance *instance;
    unsigned long target = 0;

    mutex_lock(&cdev->lock);
    /* cooling device is updated*/
    if (cdev->updated) {
        mutex_unlock(&cdev->lock);
        return;
    }

    /* Make sure cdev enters the deepest cooling state */
    list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {----------遍歷當前cooling device上全部的thermal zone。
        dev_dbg(&cdev->device, "zone%d->target=%lu\n",
                instance->tz->id, instance->target);
        if (instance->target == THERMAL_NO_TARGET)
            continue;
        if (instance->target > target)
            target = instance->target;---------------------------------------------確保cooling設備選擇最高cooling狀態,而後調用cooling設備的set_cur_state()進行降溫。
    }
    cdev->ops->set_cur_state(cdev, target);
    cdev->updated = true;
    mutex_unlock(&cdev->lock);
    trace_cdev_update(cdev, target);
    dev_dbg(&cdev->device, "set to state %lu\n", target);
}

1.3 Thermal初始化

thermal_init()在內核fs_initcall()階段調用,進行governor、thermal_class、Generic Netlink註冊等操做。

static int __init thermal_init(void)
{
    int result;

    result = thermal_register_governors();---------------註冊平臺支持的全部governor。 if (result)
        goto error;

    result = class_register(&thermal_class);-------------註冊thermal_class。 if (result)
        goto unregister_governors;

    result = genetlink_init();---------------------------註冊Generic Netlink。 if (result)
        goto unregister_class;...
    return result;
}

static void __exit thermal_exit(void)
{
    unregister_pm_notifier(&thermal_pm_nb);
    of_thermal_destroy_zones();
    genetlink_exit();
    class_unregister(&thermal_class);
    thermal_unregister_governors();
 ...
}

fs_initcall(thermal_init);
module_exit(thermal_exit);

2. Thermal Driver實例

 下面首先簡單看一下Temp Sensor的硬件,而後分析DTS,最後分析驅動的實現。

2.1 Temp Sensor硬件

對Temp Sensor的配置能夠經過APB BUS進行,包括兩個Temp Sensor,每一個Temp Sensor中包括3個Trip觸發點設置,以及一個Alarm配置。

Trip達到後會觸發中斷,CPU的INTC收到中斷後,進行中斷處理;Alarm達到後直接致使CPU復位或者關閉PLL。

Temp Sensor默認使用32K時鐘,每32768個時鐘採樣一次。還能夠根據狀況選擇24M做爲時鐘輸入。

一個重要工做就是根據實際狀況,選定Trip溫度以及Alarm溫度。

 

另外一個核心的工做就是肯定如何根據Data寄存器的值計算出溫度值。這就須要計算兩個參數A和B。

 

經過其餘測量手段讀出溫度值,以及當前溫度值下的DBN<11:0>。這獲取一系列數據以後,經過直線數據擬合,得出A和B的值。

2.2 Temp Sensor DTS

 DTS是對硬件的抽象,包括寄存器配置地址和範圍、中斷、3個trip溫度、一個alarm溫度。

        sensor0: sensor0@0xfc20a000 {
            compatible = "vsi,dp1000-thermal";
            reg = <0xfc20a000 0x20>;
            interrupts = <38>;
            vsi,temp0 = <90>;
            vsi,temp1 = <95>;
            vsi,temp2 = <100>;
            vsi,alarm_temp = <120>;
            vsi,alarm_en;
        };

從DTS能夠看出,經過配置不一樣trip和alarm的溫度,中斷觸發後,CPU會讀取溫度進行相應處理。

2.3 Temp Sensor驅動

Temp Sensor的驅動首先解析DTS,並進行iomem映射;而後註冊中斷以及下半部workqueue處理;再進行硬件設置;最後註冊thermal zone設備。

在設備正常工做中,根據配置的trip和alarm值觸發中斷,而後進行work處理;中間會用到struct thermal_zone_device_ops提供的成員函數獲取溫度、和cooling設備綁定等等操做。

2.3.1 Thermal Sensor註冊

static int dp1000_thermal_probe(struct platform_device *pdev)
{
    struct dp1000_thermal_priv *priv;
    struct resource *res;
    int ret;

    priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
    if (!priv)
        return -ENOMEM;

    res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    priv->reg_base = devm_ioremap_resource(&pdev->dev, res);
    if (IS_ERR(priv->reg_base))
        return PTR_ERR(priv->reg_base);

    if (device_property_read_u32(&pdev->dev, "vsi,temp0", &priv->temp0) < 0) {
        dev_dbg(&pdev->dev,
            "\"temp0\" property is missing, using default value.\n");
        priv->temp0 = 0;
    }
...
    INIT_DELAYED_WORK(&priv->work, dp1000_thermal_work);---------------------建立delayed_work,對應的處理函數是dp1000_thermal_work()。

    priv->irq = platform_get_irq(pdev, 0);
...
    ret = devm_request_irq(&pdev->dev, priv->irq,
                   dp1000_thermal_interrupt, 0,
                   dev_name(&pdev->dev), priv);-------------------------------irq註冊,中斷處理函數爲dp1000_thermal_interrupt()。 ...
    dp1000_init_thermal(priv);------------------------------------------------硬件初始化。

    priv->zone = thermal_zone_device_register("dp1000_thermal", DP1000_THERMAL_TRIPS, 0,
                           priv, &dp1000_thermal_zone_ops, NULL, 0, 0);-------Thermal Zone註冊。
    #ifdef DP1000_THERMAL_STUB
    if(priv->zone->id == 0)
        thermal_zone_0 = priv->zone;
    else if(priv->zone->id == 1)
        thermal_zone_1 = priv->zone;
    #endif
...
    return 0;
}

static int dp1000_thermal_remove(struct platform_device *pdev)
{
    struct dp1000_thermal_priv *priv = dev_get_drvdata(&pdev->dev);
...
return 0;
}

static const struct of_device_id dp1000_thermal_id_table[] = {
    { .compatible = "vsi,dp1000-thermal" },-----------------------------------和dts匹配。
    { }
};
MODULE_DEVICE_TABLE(of, dp1000_thermal_id_table);

static struct platform_driver dp1000_thermal_driver = {
    .probe = dp1000_thermal_probe,
    .remove = dp1000_thermal_remove,
    .driver = {
        .name = "dp1000_thermal",
        .of_match_table = dp1000_thermal_id_table,
    },
};

module_platform_driver(dp1000_thermal_driver);

dp1000_thermal_zone_ops函數集是Thermal Sensor的核心,對Thermal Zone的操做都是經過調用這些函數實現的。

static struct thermal_zone_device_ops dp1000_thermal_zone_ops = {
    .bind = dp1000_thermal_bind,
    .unbind = dp1000_thermal_unbind,
    .get_trip_type = dp1000_thermal_get_trip_type,
    .get_trip_hyst = dp1000_thermal_get_trip_hyst,
    .get_temp    = dp1000_thermal_get_temp,
    .set_trip_temp = dp1000_thermal_set_trip_temp,
    .get_trip_temp = dp1000_thermal_get_trip_temp,
    .get_crit_temp = dp1000_thermal_get_crit_temp,
};

2.3.2 Thermal Driver中斷能觸發流程

當Thermal Sensor的溫度達到trip值時,會觸發中斷。

而後進入dp1000_thermal_interrupt(),在延時300ms進行dp1000_thermal_work()處理。

static irqreturn_t dp1000_thermal_interrupt(int irq, void *id)
{
    struct dp1000_thermal_priv *priv = (struct dp1000_thermal_priv *)id;
    unsigned int status;

    if (status == 0)
        return IRQ_NONE;
    else {
        schedule_delayed_work(&priv->work, msecs_to_jiffies(300));
    }

    return IRQ_HANDLED;
}

static void dp1000_thermal_work(struct work_struct *work)
{
    struct dp1000_thermal_priv *priv;

    priv = container_of(work, struct dp1000_thermal_priv, work.work);

 thermal_zone_device_update(priv->zone, THERMAL_EVENT_UNSPECIFIED);
}

最終的工做交給thermal_zone_device_update()進行,讀取溫度,根據溫度選擇trip。

3. Thermal Governor分析

下面簡單分析兩個Governor:Step Wise和Fair Share。

3.1 Step Wise分析

首先看一下Step Wise的註冊。

static struct thermal_governor thermal_gov_step_wise = {
    .name        = "step_wise",
    .throttle    = step_wise_throttle,
};

int thermal_gov_step_wise_register(void)
{
    return thermal_register_governor(&thermal_gov_step_wise);
}

void thermal_gov_step_wise_unregister(void)
{
    thermal_unregister_governor(&thermal_gov_step_wise);
}

handle_non_critical_trips()中,首先選用當前thermal zone的throttle()進行處理。

對於Step Wise governor來講,對外的接口只有step_wise_throttle()。

static int step_wise_throttle(struct thermal_zone_device *tz, int trip)
{
    struct thermal_instance *instance;
    thermal_zone_trip_update(tz, trip);-----------------------------根據當前溫度和上次溫度對比,獲得溫度趨勢;而後根據溫度趨勢得出Cooling設備對應的state。 if (tz->forced_passive)
        thermal_zone_trip_update(tz, THERMAL_TRIPS_NONE);

    mutex_lock(&tz->lock);

    list_for_each_entry(instance, &tz->thermal_instances, tz_node)
        thermal_cdev_update(instance->cdev);------------------------遍歷cdev->thermal_instances選擇最深的cooling狀態。而後調用cdev->ops->set_cur_state()中。

    mutex_unlock(&tz->lock);

    return 0;
}

static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
{
    int trip_temp;
    enum thermal_trip_type trip_type;
    enum thermal_trend trend;
    struct thermal_instance *instance;
    bool throttle = false;
    int old_target;

    if (trip == THERMAL_TRIPS_NONE) {
        trip_temp = tz->forced_passive;
        trip_type = THERMAL_TRIPS_NONE;
    } else {
        tz->ops->get_trip_temp(tz, trip, &trip_temp);
        tz->ops->get_trip_type(tz, trip, &trip_type);
    }

    trend = get_tz_trend(tz, trip);----------------------------------------根據當前溫度tz->temperature和tz->last_temperature對比,斷定tend是STABLE/RAISING/DROPPING等中的一種。 if (tz->temperature >= trip_temp) {
        throttle = true;---------------------------------------------------throttle爲true表示須要節流,即降溫。
        trace_thermal_zone_trip(tz, trip, trip_type);
    }
    mutex_lock(&tz->lock);

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
        if (instance->trip != trip)----------------------------------------相同trip不作處理。 continue;

        old_target = instance->target;
        instance->target = get_target_state(instance, trend, throttle);----instance->target是將要設置到Cooling設備的狀態。
if (instance->initialized && old_target == instance->target)
            continue;

        /* Activate a passive thermal instance */
        if (old_target == THERMAL_NO_TARGET &&
            instance->target != THERMAL_NO_TARGET)
            update_passive_instance(tz, trip_type, 1);
        /* Deactivate a passive thermal instance */
        else if (old_target != THERMAL_NO_TARGET &&
            instance->target == THERMAL_NO_TARGET)
            update_passive_instance(tz, trip_type, -1);

        instance->initialized = true;
        mutex_lock(&instance->cdev->lock);
        instance->cdev->updated = false;------------------------------------updated爲false表示Cooling設備須要更新狀態,在thermal_cdev_update()中會進行判斷。
        mutex_unlock(&instance->cdev->lock);
    }

    mutex_unlock(&tz->lock);
}

static unsigned long get_target_state(struct thermal_instance *instance,
                enum thermal_trend trend, bool throttle)
{
    struct thermal_cooling_device *cdev = instance->cdev;
    unsigned long cur_state;
    unsigned long next_target;

    cdev->ops->get_cur_state(cdev, &cur_state);
    next_target = instance->target;
    dev_dbg(&cdev->device, "cur_state=%ld\n", cur_state);

    if (!instance->initialized) {
        if (throttle) {
            next_target = (cur_state + 1) >= instance->upper ?
                    instance->upper :
                    ((cur_state + 1) < instance->lower ?
                    instance->lower : (cur_state + 1));
        } else {
            next_target = THERMAL_NO_TARGET;
        }

        return next_target;
    }

    switch (trend) {
    case THERMAL_TREND_RAISING:------------------------------------升溫狀態下,next_target爲cur_state+1,可是不超過instance->upper。 if (throttle) {
            next_target = cur_state < instance->upper ?
                    (cur_state + 1) : instance->upper;
            if (next_target < instance->lower)
                next_target = instance->lower;
        }
        break;
    case THERMAL_TREND_RAISE_FULL:
        if (throttle)
            next_target = instance->upper;
        break;
    case THERMAL_TREND_DROPPING:------------------------------------降溫狀態下,next_target爲cur_state-1,但不低於instance->lower。存在特殊狀況爲THERMAL_NO_TARGET。 if (cur_state <= instance->lower) {
            if (!throttle)
                next_target = THERMAL_NO_TARGET;
        } else {
            next_target = cur_state - 1;
            if (next_target > instance->upper)
                next_target = instance->upper;
        }
        break;
    case THERMAL_TREND_DROP_FULL:
        if (cur_state == instance->lower) {
            if (!throttle)
                next_target = THERMAL_NO_TARGET;
        } else
            next_target = instance->lower;
        break;
    default:--------------------------------------------------------stable狀態,不改變target值。 break;
    }

    return next_target;
}

static void update_passive_instance(struct thermal_zone_device *tz,
                enum thermal_trip_type type, int value)
{
    if (type == THERMAL_TRIP_PASSIVE || type == THERMAL_TRIPS_NONE)
        tz->passive += value;
}

Step Wise在中斷觸發後根據溫度的變化趨勢選擇Cooling狀態。而不是根據trip值選擇Cooling狀態。

 * If the temperature is higher than a trip point,
 *    a. if the trend is THERMAL_TREND_RAISING, use higher cooling
 *       state for this trip point
 *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
 *       state for this trip point
 *    c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit
 *       for this trip point
 *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit
 *       for this trip point
 * If the temperature is lower than a trip point,
 *    a. if the trend is THERMAL_TREND_RAISING, do nothing
 *    b. if the trend is THERMAL_TREND_DROPPING, use lower cooling
 *       state for this trip point, if the cooling state already
 *       equals lower limit, deactivate the thermal instance
 *    c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing
 *    d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit,
 *       if the cooling state already equals lower limit,
 *       deactivate the thermal instance

如上是step_wise.c中關於Step Wise governor的溫控策略。分別對高於或低於trip溫度下不一樣趨勢行爲作出瞭解釋。

3.2 Fair Share分析

FairShare引入了weight概念。若是一個thermal zone中存在多個Cooling設備,不一樣的設備降溫效果可能不一樣,用weight表示降溫的能力。

weight大的設備得分較高,所以能夠選擇更深的Cooling狀態。

static struct thermal_governor thermal_gov_fair_share = {
    .name        = "fair_share",
    .throttle    = fair_share_throttle,
};

int thermal_gov_fair_share_register(void)
{
    return thermal_register_governor(&thermal_gov_fair_share);
}

void thermal_gov_fair_share_unregister(void)
{
    thermal_unregister_governor(&thermal_gov_fair_share);
}

fair_share_throttle()首先根據溫度得出當前trip等級,而後綜合不一樣Cooling的weight等計算出每一個Cooling設備的target。

static int fair_share_throttle(struct thermal_zone_device *tz, int trip)
{
    struct thermal_instance *instance;
    int total_weight = 0;
    int total_instance = 0;
    int cur_trip_level = get_trip_level(tz);------------------------------------根據溫度獲取對應trip等級。

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
        if (instance->trip != trip)
            continue;

        total_weight += instance->weight;
        total_instance++;
    }

    list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
        int percentage;
        struct thermal_cooling_device *cdev = instance->cdev;

        if (instance->trip != trip)
            continue;

        if (!total_weight)
            percentage = 100 / total_instance;----------------------------------在都沒有定義weight的狀況下,每一個Cooling設備一樣percentage。 else
            percentage = (instance->weight * 100) / total_weight;---------------若是存在weight的狀況下,根據權重來劃分percentage。

        instance->target = get_target_state(tz, cdev, percentage,
                            cur_trip_level);------------------------------------獲取當前Cooling設備對應的state。

        mutex_lock(&instance->cdev->lock);
        instance->cdev->updated = false;
        mutex_unlock(&instance->cdev->lock);
        thermal_cdev_update(cdev);
    }
    return 0;
}

static int get_trip_level(struct thermal_zone_device *tz)
{
    int count = 0;
    int trip_temp;
    enum thermal_trip_type trip_type;

    if (tz->trips == 0 || !tz->ops->get_trip_temp)
        return 0;

    for (count = 0; count < tz->trips; count++) {
        tz->ops->get_trip_temp(tz, count, &trip_temp);
        if (tz->temperature < trip_temp)----------------------------------------根據thermal zone的溫度值,選擇合適的trip等級。 break;
    }

    if (count > 0) {
        tz->ops->get_trip_type(tz, count - 1, &trip_type);----------------------僅是更新thermal trace point。
        trace_thermal_zone_trip(tz, count - 1, trip_type);
    }
return count;
}


static long get_target_state(struct thermal_zone_device *tz,
        struct thermal_cooling_device *cdev, int percentage, int level)
{
    unsigned long max_state;

    cdev->ops->get_max_state(cdev, &max_state);
return (long)(percentage * level * max_state) / (100 * tz->trips);--------------
}

在weight爲0的狀況下,不一樣Cooling設備state均等映射到trip。在只有一個Cooling設備狀況下,若是Cooling最大狀態和ThermalZone trip最大值相等,怎能夠trip和狀態一一對應。

 * Parameters used for Throttling:
 * P1. max_state: Maximum throttle state exposed by the cooling device.
 * P2. percentage[i]/100:
 *    How 'effective' the 'i'th device is, in cooling the given zone.
 * P3. cur_trip_level/max_no_of_trips:
 *    This describes the extent to which the devices should be throttled.
 *    We do not want to throttle too much when we trip a lower temperature,
 *    whereas the throttling is at full swing if we trip critical levels.
 *    (Heavily assumes the trip points are in ascending order)
 * new_state of cooling device = P3 * P2 * P1

fair_share.c中給出了計算Cooling設備狀態的計算公式,new_state=percentage*cur_trip_level*max_state/(100*max_no_of_trips)。

4. Thermal Cooling實例

建立一個Dummy Cooling驅動表示Cooling設備,經過thermal_cooling_device_register()註冊Thermal Cooling設備,將其和Thermal Zone綁定。在Thermal Zone中斷出發後,經過Governor選擇state,而後經過set_cur_state()執行溫控操做。

/* bind to generic thermal layer as cooling device*/
static struct thermal_cooling_device_ops dummy_cooling_ops = {
    .get_max_state = dummy_cooling_get_max_state,---------------------------Cooling設備最深降溫狀態。
    .get_cur_state = dummy_cooling_get_cur_state,---------------------------當前Cooling狀態。
    .set_cur_state = dummy_cooling_set_cur_state,---------------------------根據狀態,執行溫控操做。
};

static int __init dummy_cooling_init(void)
{
    int retval;

    dummy_cooling_dev = thermal_cooling_device_register("dummy_cooling", NULL,
                        &dummy_cooling_ops);
    if (IS_ERR(dummy_cooling_dev)) {
        retval = -ENODEV;
    }

    return retval;
}
module_init(dummy_cooling_init);

static void __exit dummy_cooling_exit(void)
{
    thermal_cooling_device_unregister(dummy_cooling_dev);
}
module_exit(dummy_cooling_exit);

5. Thermal調試以及流程分析

首先使能已有Thermal調試手段,並添加proc節點模擬中斷觸發;而後基於log分析Thermal流程。

5.1 Thermal調試手段

對Termal的調試能夠有兩種方式:

  • 在thermal_core.c和step_wise.c的include以前#define DEBUG打開調試功能。
  • 打開thermal trace point:echo 1 > /sys/kernel/debug/tracing/events/thermal/enable

爲了模擬溫度變化,添加proc節點,而後使用腳本模擬溫度觸發流程。

#ifdef DP1000_THERMAL_STUB
static int dp1000_temp_stub = 0;
struct thermal_zone_device *thermal_zone_0, *thermal_zone_1;
struct proc_dir_entry *dp1000_temp_proc = NULL;
#endif

static int dp1000_thermal_get_temp(struct thermal_zone_device *zone, int *temp)
{
#ifdef DP1000_THERMAL_STUB
    *temp = dp1000_temp_stub;--------------------------------------------------------替代從寄存器獲取溫度流程,使用/proc/dp1000_temp_stub輸入的溫度值。 #else
...
#endif
    return 0;
}

#ifdef DP1000_THERMAL_STUB
static int dp1000_temp_stub_proc_show(struct seq_file *m, void *v)
{
    seq_printf(m, "%d\n", dp1000_temp_stub);
    return 0;
}

static int dp1000_temp_stub_proc_open(struct inode *inode, struct file *file)
{
    return single_open(file, dp1000_temp_stub_proc_show, NULL);
}

static ssize_t dp1000_temp_stub_proc_write(struct file *file,
        const char __user *buffer, size_t count, loff_t *ppos)
{
    int rc;

    rc = kstrtoint_from_user(buffer, count, 0, &dp1000_temp_stub);
    if (rc)
        return rc;

    thermal_zone_device_update(thermal_zone_0, THERMAL_EVENT_UNSPECIFIED);------------對/proc/dp1000_temp_stub寫入溫度,觸發流程。模擬中斷觸發流程。 //    thermal_zone_device_update(thermal_zone_1, THERMAL_EVENT_UNSPECIFIED);

    return count;
}

static const struct file_operations dp1000_temp_stub_proc_fops = {
    .open        = dp1000_temp_stub_proc_open,
    .read        = seq_read,
    .llseek     = seq_lseek,
    .release    = single_release,
    .write        = dp1000_temp_stub_proc_write,
};
#endif


static void dp1000_init_thermal(struct dp1000_thermal_priv *priv)
{
...
    #ifdef DP1000_THERMAL_STUB
    if(!dp1000_temp_proc)
        dp1000_temp_proc = proc_create("dp1000_temp_stub", 0, NULL, &dp1000_temp_stub_proc_fops);------------建立/proc/dp1000_temp_stub節點。 #endif
...
}

static int dp1000_thermal_probe(struct platform_device *pdev)
{
...
    priv->zone = thermal_zone_device_register("dp1000_thermal", DP1000_THERMAL_TRIPS, 0,
                           priv, &dp1000_thermal_zone_ops, NULL, 0, 0);
    #ifdef DP1000_THERMAL_STUB
    if(priv->zone->id == 0)
        thermal_zone_0 = priv->zone;
    else if(priv->zone->id == 1)
        thermal_zone_1 = priv->zone;
    #endif
...
}

5.2 Thermal流程分析

使用以下腳本進行Thermal流程調試:

echo 0 > /sys/kernel/debug/tracing/events/enable
echo 1 > /sys/kernel/debug/tracing/events/thermal/enable
echo > /sys/kernel/debug/tracing/trace

for i in 89 95 100 95 90 95 100
do
    echo -e "\n"
    echo $i > /proc/dp1000_temp_stub
    sleep 1
done

cat /sys/kernel/debug/tracing/trace

獲得的結果以下:

[   35.900013] thermal thermal_zone0: last_temperature=0, current_temperature=89
[   35.907277] thermal thermal_zone0: Trip0[type=0,temp=89]:trend=1,throttle=1--------------89度達到trip0的觸發溫度,並且是升溫狀態。trip1和trip2都沒有throttle。
[   35.914290] thermal cooling_device0: cur_state=0
[   35.918933] thermal cooling_device0: old_target=-1, target=1
[   35.924619] thermal cooling_device0: zone0->target=1
[   35.929608] thermal cooling_device0: zone1->target=4294967295
[   35.935383] thermal cooling_device0: set to state 1--------------------------------------Cooling設備當前狀態時0,因此要將狀態設置爲1。
[   35.940293] thermal thermal_zone0: Trip1[type=0,temp=94]:trend=1,throttle=0
[   35.947286] thermal thermal_zone0: Trip2[type=0,temp=99]:trend=1,throttle=0

[   36.999977] thermal thermal_zone0: last_temperature=89, current_temperature=95
[   37.007326] thermal thermal_zone0: Trip0[type=0,temp=89]:trend=1,throttle=1
[   37.014332] thermal cooling_device0: cur_state=1
[   37.018973] thermal cooling_device0: old_target=1, target=2
[   37.024570] thermal cooling_device0: zone0->target=2
[   37.029558] thermal cooling_device0: zone1->target=4294967295
[   37.035336] thermal cooling_device0: set to state 2--------------------------------------95度是trip1的觸發溫度,因此Cooling狀態從當前的1設置到2。trip2沒有throttle。
[   37.040248] thermal thermal_zone0: Trip1[type=0,temp=94]:trend=1,throttle=1
[   37.047240] thermal thermal_zone0: Trip2[type=0,temp=99]:trend=1,throttle=0
...

從thermal trace能夠纔看出,首先獲取溫度,而後選擇trip,最後設置Cooling設備。

#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION
#              | |       |   ||||       |         |
              sh-156   [000] ....    35.899971: thermal_temperature: thermal_zone=dp1000_thermal id=0 temp_prev=0 temp=89
              sh-156   [000] ....    35.907265: thermal_zone_trip: thermal_zone=dp1000_thermal id=0 trip=0 trip_type=ACTIVE
              sh-156   [000] .n..    35.935374: cdev_update: type=dummy_cooling target=1
              sh-156   [000] ....    36.999933: thermal_temperature: thermal_zone=dp1000_thermal id=0 temp_prev=89 temp=95
              sh-156   [000] .n..    37.007312: thermal_zone_trip: thermal_zone=dp1000_thermal id=0 trip=0 trip_type=ACTIVE
              sh-156   [000] .n..    37.035327: cdev_update: type=dummy_cooling target=2
              sh-156   [000] .n..    37.040238: thermal_zone_trip: thermal_zone=dp1000_thermal id=0 trip=1 trip_type=ACTIVE
              sh-156   [000] ....    38.079912: thermal_temperature: thermal_zone=dp1000_thermal id=0 temp_prev=95 temp=100
              sh-156   [000] .n..    38.087374: thermal_zone_trip: thermal_zone=dp1000_thermal id=0 trip=0 trip_type=ACTIVE
              sh-156   [000] .n..    38.115385: cdev_update: type=dummy_cooling target=3
...

6. 小結

Thermal Framework一共能夠分爲四部分,Thermal Core、Thermal Zone、Thermal Governor、Thermal Cooling。

其中Core很穩定,主要是會使用;Governor也比較穩定,已有的Governor能覆蓋大部分場景;須要開發的主要有Thermal Zone的Driver和降溫設備Cooling。

在開發過程當中,能夠藉助Trace point等措施進行問題定位。

相關文章
相關標籤/搜索