Linux操做系統的一大優點就是支持數以萬計的芯片設備,大大小小的芯片廠商工程師都在積極地向Linux kernel提交設備驅動代碼。能讓這個目標得以實現,這背後隱藏着一個看不見的技術優點:Linux內核提供了一套易於擴展和維護的設備驅動框架。Linux內核自己提供一套設備驅動模型,此模型提供了Linux內核對設備的通常性抽象描述,包括設備的電源管理、對象生命週期管理、用戶空間呈現等等。在設備模型的幫助下,設備驅動開發工程師從設備的通常性抽象中解脫出來。可是每一個設備的具體功能實現還須要大量開發工做,若是每一個設備都從頭開發,那工做量無疑至關巨大。而這些設備能夠按功能進行分類,每一個設備類在業界或者標準組織中定義了硬件標準規範,因此針對每一個設備類,若是有一個針對此設備類的功能性抽象框架,這將大大加快新設備的添加和開發效率。設備標準規範的存在,無疑對設備驅動框架的設計提供了有力的支撐。html
可是壞消息也很多:linux
這些差別性需求下,一個設備類就像一顆樹,其樹幹爲設備標準規範(可能有多個,以下圖),每一個分支爲廠商設備類或設備子類規範,而每片樹葉就是每一個具體的設備。設備框架的目的就是能幫助驅動工程師簡潔優雅地添加一片樹葉。這些差別性需求對框架設計是一個不小的挑戰,如何很好地支持這些需求,是考驗優秀設備驅動框架的試金石。api
本文的目的就是總結一些內核設備驅動框架的優秀設計方案,以供你們參考。若有疏漏,也歡迎你們留言補充。數據結構
ATA驅動模塊管理衆多的SATA、PATA設備。以SATA設備爲例,又分支持和不支持 port multiplier功能的,支持port multiplier的。並且SATA總線又存在多種標準規範定義的(ahci、fsl、 sil24 etc.),就算是用ahci 總線標準的,有些廠商總在某些地方作的跟標準不一致。app
框架設計相關代碼示例:框架
struct ata_port_operations { /* * Command execution */ int (*qc_defer)(struct ata_queued_cmd *qc); int (*check_atapi_dma)(struct ata_queued_cmd *qc); void (*qc_prep)(struct ata_queued_cmd *qc); unsigned int (*qc_issue)(struct ata_queued_cmd *qc); bool (*qc_fill_rtf)(struct ata_queued_cmd *qc); /* * Configuration and exception handling */ int (*cable_detect)(struct ata_port *ap); unsigned long (*mode_filter)(struct ata_device *dev, unsigned long xfer_mask); void (*set_piomode)(struct ata_port *ap, struct ata_device *dev); void (*set_dmamode)(struct ata_port *ap, struct ata_device *dev); int (*set_mode)(struct ata_link *link, struct ata_device **r_failed_dev); unsigned int (*read_id)(struct ata_device *dev, struct ata_taskfile *tf, u16 *id); void (*dev_config)(struct ata_device *dev); void (*freeze)(struct ata_port *ap); void (*thaw)(struct ata_port *ap); ata_prereset_fn_t prereset; ata_reset_fn_t softreset; ata_reset_fn_t hardreset; ata_postreset_fn_t postreset; ata_prereset_fn_t pmp_prereset; ata_reset_fn_t pmp_softreset; ata_reset_fn_t pmp_hardreset; ata_postreset_fn_t pmp_postreset; void (*error_handler)(struct ata_port *ap); void (*lost_interrupt)(struct ata_port *ap); void (*post_internal_cmd)(struct ata_queued_cmd *qc); /* * Optional features */ int (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val); int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); int (*enable_pm)(struct ata_port *ap, enum link_pm policy); void (*disable_pm)(struct ata_port *ap); /* * Start, stop, suspend and resume */ int (*port_suspend)(struct ata_port *ap, pm_message_t mesg); int (*port_resume)(struct ata_port *ap); int (*port_start)(struct ata_port *ap); void (*port_stop)(struct ata_port *ap); void (*host_stop)(struct ata_host *host); #ifdef CONFIG_ATA_SFF /* * SFF / taskfile oriented ops */ void (*sff_dev_select)(struct ata_port *ap, unsigned int device); u8 (*sff_check_status)(struct ata_port *ap); u8 (*sff_check_altstatus)(struct ata_port *ap); void (*sff_tf_load)(struct ata_port *ap, const struct ata_taskfile *tf); void (*sff_tf_read)(struct ata_port *ap, struct ata_taskfile *tf); void (*sff_exec_command)(struct ata_port *ap, const struct ata_taskfile *tf); unsigned int (*sff_data_xfer)(struct ata_device *dev, unsigned char *buf, unsigned int buflen, int rw); u8 (*sff_irq_on)(struct ata_port *); void (*sff_irq_clear)(struct ata_port *); void (*bmdma_setup)(struct ata_queued_cmd *qc); void (*bmdma_start)(struct ata_queued_cmd *qc); void (*bmdma_stop)(struct ata_queued_cmd *qc); u8 (*bmdma_status)(struct ata_port *ap); void (*drain_fifo)(struct ata_queued_cmd *qc); #endif /* CONFIG_ATA_SFF */ ssize_t (*em_show)(struct ata_port *ap, char *buf); ssize_t (*em_store)(struct ata_port *ap, const char *message, size_t size); ssize_t (*sw_activity_show)(struct ata_device *dev, char *buf); ssize_t (*sw_activity_store)(struct ata_device *dev, enum sw_activity val); /* * Obsolete */ void (*phy_reset)(struct ata_port *ap); void (*eng_timeout)(struct ata_port *ap); /* * ->inherits must be the last field and all the preceding * fields must be pointers. */ const struct ata_port_operations *inherits; [在對象ata_port_operations 最後一個字段定義一個指向ata_port_operations 的指針。 ata_port_operations 相似於 C++ 中的 vtable, 這裏模仿 C++ 中繼承基類vtable的子類vtable內存佈局。]}; const struct ata_port_operations ata_base_port_ops = { .prereset = ata_std_prereset, .postreset = ata_std_postreset, .error_handler = ata_std_error_handler, }; [基類vtable] const struct ata_port_operations sata_port_ops = { .inherits = &ata_base_port_ops, .qc_defer = ata_std_qc_defer, .hardreset = sata_std_hardreset, };[繼承ata_base_port_ops的子類vtable] const struct ata_port_operations sata_pmp_port_ops = { .inherits = &sata_port_ops, .pmp_prereset = ata_std_prereset, .pmp_hardreset = sata_std_hardreset, .pmp_postreset = ata_std_postreset, .error_handler = sata_pmp_error_handler, }; static struct ata_port_operations ahci_ops = { .inherits = &sata_pmp_port_ops, .qc_defer = sata_pmp_qc_defer_cmd_switch, .qc_prep = ahci_qc_prep, .qc_issue = ahci_qc_issue, .qc_fill_rtf = ahci_qc_fill_rtf, .freeze = ahci_freeze, .thaw = ahci_thaw, .softreset = ahci_softreset, .hardreset = ahci_hardreset, .postreset = ahci_postreset, .pmp_softreset = ahci_softreset, .error_handler = ahci_error_handler, .post_internal_cmd = ahci_post_internal_cmd, .dev_config = ahci_dev_config, .scr_read = ahci_scr_read, .scr_write = ahci_scr_write, .pmp_attach = ahci_pmp_attach, .pmp_detach = ahci_pmp_detach, .enable_pm = ahci_enable_alpm, .disable_pm = ahci_disable_alpm, .em_show = ahci_led_show, .em_store = ahci_led_store, .sw_activity_show = ahci_activity_show, .sw_activity_store = ahci_activity_store, #ifdef CONFIG_PM .port_suspend = ahci_port_suspend, .port_resume = ahci_port_resume, #endif .port_start = ahci_port_start, .port_stop = ahci_port_stop, }; static struct ata_port_operations ahci_vt8251_ops = { .inherits = &ahci_ops, .hardreset = ahci_vt8251_hardreset, };[繼承 ahci_ops的子類 vtable] static const struct ata_port_info ahci_port_info[] = { [board_ahci] = { .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, [board_ahci_vt8251] = { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP), .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_vt8251_ops,[初始化 對象ahci_port_info[board_ahci_vt8251] 的vtable入口port_ops 爲ahci_vt8251_ops] }, } static void ata_finalize_port_ops(struct ata_port_operations *ops) { static DEFINE_SPINLOCK(lock); const struct ata_port_operations *cur; void **begin = (void **)ops; void **end = (void **)&ops->inherits; void **pp; if (!ops || !ops->inherits) return; spin_lock(&lock); for (cur = ops->inherits; cur; cur = cur->inherits) { void **inherit = (void **)cur; for (pp = begin; pp < end; pp++, inherit++) if (!*pp) *pp = *inherit; } for (pp = begin; pp < end; pp++) if (IS_ERR(*pp)) *pp = NULL; ops->inherits = NULL; [掃描多重繼承的虛函數接口] spin_unlock(&lock); } int ata_host_start(struct ata_host *host) { int have_stop = 0; void *start_dr = NULL; int i, rc; if (host->flags & ATA_HOST_STARTED) return 0; ata_finalize_port_ops(host->ops);[host對象初始化時,調用ata_finalize_port_ops初始化對象vtable指針host->ops] … }
《單板控制領域模型設計與實現》採用此驅動框架設計開發。async
PMBus有一套標準規範,其中有些是基本功能,有些是可選功能。基本功能是必需要實現的,並且寄存器接口也進行標準化。而可選功能由各設備廠商自由決定,並且這些可選功能的寄存器接口也無統一規範,支持PMBus設備廠商的自定義寄存器。ide
框架設計相關代碼示例:函數
struct pmbus_data { struct device *dev; struct device *hwmon_dev; u32 flags; /* from platform data */ int exponent; /* linear mode: exponent for output voltages */ const struct pmbus_driver_info *info; int max_attributes; int num_attributes; struct attribute_group group; struct pmbus_sensor *sensors; struct mutex update_lock; bool valid; unsigned long last_updated; /* in jiffies */ /* * A single status register covers multiple attributes, * so we keep them all together. */ u8 status[PB_NUM_STATUS_REG]; u8 status_register; u8 currpage; }; struct pmbus_driver_info { int pages; /* Total number of pages */ enum pmbus_data_format format[PSC_NUM_CLASSES]; /* * Support one set of coefficients for each sensor type * Used for chips providing data in direct mode. */ int m[PSC_NUM_CLASSES]; /* mantissa for direct data format */ int b[PSC_NUM_CLASSES]; /* offset */ int R[PSC_NUM_CLASSES]; /* exponent */ u32 func[PMBUS_PAGES]; /* Functionality, per page */ /* * The following functions map manufacturing specific register values * to PMBus standard register values. Specify only if mapping is * necessary. * Functions return the register value (read) or zero (write) if * successful. A return value of -ENODATA indicates that there is no * manufacturer specific register, but that a standard PMBus register * may exist. Any other negative return value indicates that the * register does not exist, and that no attempt should be made to read * the standard register. */ int (*read_byte_data)(struct i2c_client *client, int page, int reg); int (*read_word_data)(struct i2c_client *client, int page, int reg); int (*write_word_data)(struct i2c_client *client, int page, int reg, u16 word); int (*write_byte)(struct i2c_client *client, int page, u8 value); /* * The identify function determines supported PMBus functionality. * This function is only necessary if a chip driver supports multiple * chips, and the chip functionality is not pre-determined. */ int (*identify)(struct i2c_client *client, struct pmbus_driver_info *info); }; struct pmbus_sensor { struct pmbus_sensor *next; char name[PMBUS_NAME_SIZE]; /* sysfs sensor name */ struct device_attribute attribute; u8 page; /* page number */ u16 reg; /* register */ enum pmbus_sensor_classes class; /* sensor class */ bool update; /* runtime sensor update needed */ int data; /* Sensor data. Negative if there was a read error */ }; static struct pmbus_driver_info tps53667_info = { .pages = 1, .format[PSC_VOLTAGE_IN] = linear, .format[PSC_VOLTAGE_OUT] = vid, .format[PSC_TEMPERATURE] = linear, .format[PSC_CURRENT_IN] = linear, .format[PSC_CURRENT_OUT] = linear, .format[PSC_POWER] = linear, .read_word_data = tps53667_read_word_data, .write_word_data = tps53667_write_word_data, .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_VOUT | PMBUS_HAVE_IIN | PMBUS_HAVE_IOUT | PMBUS_HAVE_PIN | PMBUS_HAVE_POUT | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_VOUT | PMBUS_HAVE_STATUS_IOUT | PMBUS_HAVE_STATUS_INPUT | PMBUS_HAVE_STATUS_TEMP, }; static int tps53667_probe(struct i2c_client *client, const struct i2c_device_id *id) { return pmbus_do_probe(client, id, &tps53667_info); } int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id, struct pmbus_driver_info *info) { struct device *dev = &client->dev; const struct pmbus_platform_data *pdata = dev->platform_data; struct pmbus_data *data; int ret; if (!info) return -ENODEV; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WRITE_BYTE | I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA)) return -ENODEV; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; i2c_set_clientdata(client, data); mutex_init(&data->update_lock); data->dev = dev; if (pdata) data->flags = pdata->flags; data->info = info; ret = pmbus_init_common(client, data, info); if (ret < 0) return ret; ret = pmbus_find_attributes(client, data); if (ret) goto out_kfree; /* * If there are no attributes, something is wrong. * Bail out instead of trying to register nothing. */ if (!data->num_attributes) { dev_err(dev, "No attributes found\n"); ret = -ENODEV; goto out_kfree; } /* Register sysfs hooks */ ret = sysfs_create_group(&dev->kobj, &data->group); if (ret) { dev_err(dev, "Failed to create sysfs entries\n"); goto out_kfree; } data->hwmon_dev = hwmon_device_register(dev); if (IS_ERR(data->hwmon_dev)) { ret = PTR_ERR(data->hwmon_dev); dev_err(dev, "Failed to register hwmon device\n"); goto out_hwmon_device_register; } ret = device_create_file(dev, &dev_attr_clear_fault); if (ret) goto out_hwmon_device_register; return 0; out_hwmon_device_register: sysfs_remove_group(&dev->kobj, &data->group); out_kfree: kfree(data->group.attrs); return ret; }
《linux PMBus總線及設備驅動分析》是對PMBus設備的進一步介紹。 佈局
USB塊設備有一套標準規範定義,但USB設備類型衆多(如U盤、MP3播放器、手機、GPS設備等等),各廠商水平良莠不齊,實現混亂。並且由於USB規範相對較新,一直在演進變化中,這也加重了這一混亂。好比:
另外,Linux內核已有一套通用的SCSI塊設備驅動框架,若是USB塊設備能複用這個框架,那是一個最好的選擇。
框架設計相關代碼示例:
/* Driver for USB Mass Storage compliant devices * SCSI layer glue code #define US_DO_ALL_FLAGS \ US_FLAG(SINGLE_LUN, 0x00000001) \ /* allow access to only LUN 0 */ \ US_FLAG(NEED_OVERRIDE, 0x00000002) \ /* unusual_devs entry is necessary */ \ US_FLAG(SCM_MULT_TARG, 0x00000004) \ /* supports multiple targets */ \ US_FLAG(FIX_INQUIRY, 0x00000008) \ /* INQUIRY response needs faking */ \ US_FLAG(FIX_CAPACITY, 0x00000010) \ /* READ CAPACITY response too big */ \ US_FLAG(IGNORE_RESIDUE, 0x00000020) \ /* reported residue is wrong */ \ US_FLAG(BULK32, 0x00000040) \ /* Uses 32-byte CBW length */ \ US_FLAG(NOT_LOCKABLE, 0x00000080) \ /* PREVENT/ALLOW not supported */ \ US_FLAG(GO_SLOW, 0x00000100) \ /* Need delay after Command phase */ \ US_FLAG(NO_WP_DETECT, 0x00000200) \ /* Don't check for write-protect */ \ US_FLAG(MAX_SECTORS_64, 0x00000400) \ /* Sets max_sectors to 64 */ \ US_FLAG(IGNORE_DEVICE, 0x00000800) \ /* Don't claim device */ \ US_FLAG(CAPACITY_HEURISTICS, 0x00001000) \ /* sometimes sizes is too big */ \ US_FLAG(MAX_SECTORS_MIN,0x00002000) \ /* Sets max_sectors to arch min */ \ US_FLAG(BULK_IGNORE_TAG,0x00004000) \ /* Ignore tag mismatch in bulk operations */ \ US_FLAG(SANE_SENSE, 0x00008000) \ /* Sane Sense (> 18 bytes) */ \ US_FLAG(CAPACITY_OK, 0x00010000) \ /* READ CAPACITY response is correct */ \ US_FLAG(BAD_SENSE, 0x00020000) \ /* Bad Sense (never more than 18 bytes) */ \ US_FLAG(NO_READ_DISC_INFO, 0x00040000) \ /* cannot handle READ_DISC_INFO */ \ US_FLAG(NO_READ_CAPACITY_16, 0x00080000) \ /* cannot handle READ_CAPACITY_16 */ \ US_FLAG(INITIAL_READ10, 0x00100000) \ /* Initial READ(10) (and others) must be retried */ \ US_FLAG(WRITE_CACHE, 0x00200000) \ /* Write Cache status is not available */ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; #undef US_FLAG struct us_data { /* The device we're working with * It's important to note: * (o) you must hold dev_mutex to change pusb_dev */ struct mutex dev_mutex; /* protect pusb_dev */ struct usb_device *pusb_dev; /* this usb_device */ struct usb_interface *pusb_intf; /* this interface */ struct us_unusual_dev *unusual_dev; /* device-filter entry */ unsigned long fflags; /* fixed flags from filter */ unsigned long dflags; /* dynamic atomic bitflags */ unsigned int send_bulk_pipe; /* cached pipe values */ unsigned int recv_bulk_pipe; unsigned int send_ctrl_pipe; unsigned int recv_ctrl_pipe; unsigned int recv_intr_pipe; /* information about the device */ char *transport_name; char *protocol_name; __le32 bcs_signature; u8 subclass; u8 protocol; u8 max_lun; u8 ifnum; /* interface number */ u8 ep_bInterval; /* interrupt interval */ /* function pointers for this device */ trans_cmnd transport; /* transport function */ trans_reset transport_reset; /* transport device reset */ proto_cmnd proto_handler; /* protocol handler */ /* SCSI interfaces */ struct scsi_cmnd *srb; /* current srb */ unsigned int tag; /* current dCBWTag */ char scsi_name[32]; /* scsi_host name */ /* control and bulk communications data */ struct urb *current_urb; /* USB requests */ struct usb_ctrlrequest *cr; /* control requests */ struct usb_sg_request current_sg; /* scatter-gather req. */ unsigned char *iobuf; /* I/O buffer */ dma_addr_t iobuf_dma; /* buffer DMA addresses */ struct task_struct *ctl_thread; /* the control thread */ /* mutual exclusion and synchronization structures */ struct completion cmnd_ready; /* to sleep thread on */ struct completion notify; /* thread begin/end */ wait_queue_head_t delay_wait; /* wait during reset */ struct delayed_work scan_dwork; /* for async scanning */ /* subdriver information */ void *extra; /* Any extra data */ extra_data_destructor extra_destructor;/* extra data destructor */ #ifdef CONFIG_PM pm_hook suspend_resume_hook; #endif /* hacks for READ CAPACITY bug handling */ int use_last_sector_hacks; int last_sector_retries; }; struct usb_device_id { /* which fields to match against? */ __u16 match_flags; /* Used for product specific matches; range is inclusive */ __u16 idVendor; __u16 idProduct; __u16 bcdDevice_lo; __u16 bcdDevice_hi; /* Used for device class matches */ __u8 bDeviceClass; __u8 bDeviceSubClass; __u8 bDeviceProtocol; /* Used for interface class matches */ __u8 bInterfaceClass; __u8 bInterfaceSubClass; __u8 bInterfaceProtocol; /* Used for vendor-specific interface matches */ __u8 bInterfaceNumber; /* not matched against */ kernel_ulong_t driver_info __attribute__((aligned(sizeof(kernel_ulong_t)))); }; /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } struct usb_device_id usb_storage_usb_ids[] = { # include "unusual_devs.h" { } /* Terminating entry */ }; UNUSUAL_DEV( 0x22b8, 0x6426, 0x0101, 0x0101, "Motorola", "MSnc.", USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_FIX_INQUIRY | US_FL_FIX_CAPACITY | US_FL_BULK_IGNORE_TAG), static int slave_configure(struct scsi_device *sdev) { ... if (us->fflags & US_FL_FIX_CAPACITY) sdev->fix_capacity = 1; ... } /* * read disk capacity */ static void sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer) { ... /* Some devices are known to return the total number of blocks, * not the highest block number. Some devices have versions * which do this and others which do not. Some devices we might * suspect of doing this but we don't know for certain. * * If we know the reported capacity is wrong, decrement it. If * we can only guess, then assume the number of blocks is even * (usually true but not always) and err on the side of lowering * the capacity. */ if (sdp->fix_capacity || (sdp->guess_capacity && (sdkp->capacity & 0x01))) { sd_printk(KERN_INFO, sdkp, "Adjusting the sector count " "from its reported value: %llu\n", (unsigned long long) sdkp->capacity); --sdkp->capacity; } ... } struct scsi_host_template usb_stor_host_template = { /* basic userland interface stuff */ .name = "usb-storage", .proc_name = "usb-storage", .show_info = show_info, .write_info = write_info, .info = host_info, /* command interface -- queued only */ .queuecommand = queuecommand, /* error and abort handlers */ .eh_abort_handler = command_abort, .eh_device_reset_handler = device_reset, .eh_bus_reset_handler = bus_reset, /* queue commands only, only one command per LUN */ .can_queue = 1, .cmd_per_lun = 1, /* unknown initiator id */ .this_id = -1, .slave_alloc = slave_alloc, .slave_configure = slave_configure, .target_alloc = target_alloc, /* lots of sg segments can be handled */ .sg_tablesize = SCSI_MAX_SG_CHAIN_SEGMENTS, /* limit the total size of a transfer to 120 KB */ .max_sectors = 240, /* merge commands... this seems to help performance, but * periodically someone should test to see which setting is more * optimal. */ .use_clustering = 1, /* emulated HBA */ .emulated = 1, /* we do our own delay after a device or bus reset */ .skip_settle_delay = 1, /* sysfs device attributes */ .sdev_attrs = sysfs_device_attr_list, /* module management */ .module = THIS_MODULE };