起源:python
所下載視頻,有音視頻分離者,須要合併起來,採用python之subprocess.Popen()調用ffmpeg實現。python版本爲2.7.13,而音視頻文件路徑,有unicode字符者,合併失敗。git
此問題由來已久,終於不忍受,用盡工夫尋其機現,終於尋得蛛絲螞跡,完成其修復。github
其緣由爲:python 2.7.x中subprocess.Popen()函數,最終調用了kernel32.dll中的CreateProcess函數Ansi版本CreateProcessA,傳非Ansi參數給它會被它拒絕,而觸發異常。shell
測試代碼以下:windows
# encoding: utf-8 from __future__ import unicode_literals import subprocess file_path = r'D:\Percy Faith [CA US] by chkjns ♫ 175 songs\v.txt' args = u'notepad "%s"' % file_path try: p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: stderr = stderr.decode('utf-8', 'replace') print stderr except Exception as e: print e.message
其異常爲UnicodeEncodeError,表現字串爲:'ascii' codec can't encode character u'\u266b' in position 42: ordinal not in range(128)函數
一、Python 2.x先天缺陷測試
此問題百度不到有效答案,因而上stackoverlow,一路摸去,找到了python官方對此bug描述,其連接以下:spa
Issue 19264: subprocess.Popen doesn't support unicode on Windows - Python trackerrest
七嘴八舌衆說紛紜,但總算大體捋清原委,即上述其調用了Ansi版本的CreateProcess所致。循此緣由,是否替換爲Unicode函數CreateProcessW便可?code
最後一條回覆:
msg289664 - (view) Author: Valentin LAB (Valentin LAB) Date: 2017-03-15 10:39
給出了折中方案,其方案即如所思,作函數層替換。
二、win_subprocess.py
Valentin LAB這哥們,就在github上開放了源碼,解決此問題。其核心爲win_subprocess.py,內容以下:
(其代碼中有用os,他卻忘記import,貼代碼時已作修正)
## issue: https://bugs.python.org/issue19264 import ctypes import subprocess import _subprocess import os from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \ Structure, sizeof, c_wchar, WinError from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \ HANDLE ## ## Types ## CREATE_UNICODE_ENVIRONMENT = 0x00000400 LPCTSTR = c_char_p LPTSTR = c_wchar_p LPSECURITY_ATTRIBUTES = c_void_p LPBYTE = ctypes.POINTER(BYTE) class STARTUPINFOW(Structure): _fields_ = [ ("cb", DWORD), ("lpReserved", LPWSTR), ("lpDesktop", LPWSTR), ("lpTitle", LPWSTR), ("dwX", DWORD), ("dwY", DWORD), ("dwXSize", DWORD), ("dwYSize", DWORD), ("dwXCountChars", DWORD), ("dwYCountChars", DWORD), ("dwFillAtrribute", DWORD), ("dwFlags", DWORD), ("wShowWindow", WORD), ("cbReserved2", WORD), ("lpReserved2", LPBYTE), ("hStdInput", HANDLE), ("hStdOutput", HANDLE), ("hStdError", HANDLE), ] LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW) class PROCESS_INFORMATION(Structure): _fields_ = [ ("hProcess", HANDLE), ("hThread", HANDLE), ("dwProcessId", DWORD), ("dwThreadId", DWORD), ] LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION) class DUMMY_HANDLE(ctypes.c_void_p): def __init__(self, *a, **kw): super(DUMMY_HANDLE, self).__init__(*a, **kw) self.closed = False def Close(self): if not self.closed: windll.kernel32.CloseHandle(self) self.closed = True def __int__(self): return self.value CreateProcessW = windll.kernel32.CreateProcessW CreateProcessW.argtypes = [ LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES, LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR, LPSTARTUPINFOW, LPPROCESS_INFORMATION, ] CreateProcessW.restype = BOOL ## ## Patched functions/classes ## def CreateProcess(executable, args, _p_attr, _t_attr, inherit_handles, creation_flags, env, cwd, startup_info): """Create a process supporting unicode executable and args for win32 Python implementation of CreateProcess using CreateProcessW for Win32 """ si = STARTUPINFOW( dwFlags=startup_info.dwFlags, wShowWindow=startup_info.wShowWindow, cb=sizeof(STARTUPINFOW), ## XXXvlab: not sure of the casting here to ints. hStdInput=int(startup_info.hStdInput), hStdOutput=int(startup_info.hStdOutput), hStdError=int(startup_info.hStdError), ) wenv = None if env is not None: ## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar env = (unicode("").join([ unicode("%s=%s\0") % (k, v) for k, v in env.items()])) + unicode("\0") wenv = (c_wchar * len(env))() wenv.value = env pi = PROCESS_INFORMATION() creation_flags |= CREATE_UNICODE_ENVIRONMENT if CreateProcessW(executable, args, None, None, inherit_handles, creation_flags, wenv, cwd, byref(si), byref(pi)): return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread), pi.dwProcessId, pi.dwThreadId) raise WinError() class Popen(subprocess.Popen): """This superseeds Popen and corrects a bug in cPython 2.7 implem""" def _execute_child(self, args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, shell, to_close, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite): """Code from part of _execute_child from Python 2.7 (9fbb65e) There are only 2 little changes concerning the construction of the the final string in shell mode: we preempt the creation of the command string when shell is True, because original function will try to encode unicode args which we want to avoid to be able to sending it as-is to ``CreateProcess``. """ if not isinstance(args, subprocess.types.StringTypes): args = subprocess.list2cmdline(args) if startupinfo is None: startupinfo = subprocess.STARTUPINFO() if shell: startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW startupinfo.wShowWindow = _subprocess.SW_HIDE comspec = os.environ.get("COMSPEC", unicode("cmd.exe")) args = unicode('{} /c "{}"').format(comspec, args) if (_subprocess.GetVersion() >= 0x80000000 or os.path.basename(comspec).lower() == "command.com"): w9xpopen = self._find_w9xpopen() args = unicode('"%s" %s') % (w9xpopen, args) creationflags |= _subprocess.CREATE_NEW_CONSOLE super(Popen, self)._execute_child(args, executable, preexec_fn, close_fds, cwd, env, universal_newlines, startupinfo, creationflags, False, to_close, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite) _subprocess.CreateProcess = CreateProcess
三、使用方法
若已在.py文件中引入unicode標記(建議自有項目,皆加以unicode支持):
from __future__ import unicode_literals
那麼,直接import win_subprocess就行,其代碼中,已以自定義CreateProcess替換了_subprocess.CreateProcess同名函數。
固然,也可直接以win_subprocess.Popen()調用。
可是,通過驗證,直接引用win_subprocess就能很好工做了,所以推薦直接引過去就行。
參考資料:
Fixing python 2.7 windows unicode issue with 'subprocess.Popen'