Python: subprocess.Popen()不支持unicode問題解決

起源:python

所下載視頻,有音視頻分離者,須要合併起來,採用python之subprocess.Popen()調用ffmpeg實現。python版本爲2.7.13,而音視頻文件路徑,有unicode字符者,合併失敗。git

此問題由來已久,終於不忍受,用盡工夫尋其機現,終於尋得蛛絲螞跡,完成其修復。github

其緣由爲:python 2.7.x中subprocess.Popen()函數,最終調用了kernel32.dll中的CreateProcess函數Ansi版本CreateProcessA,傳非Ansi參數給它會被它拒絕,而觸發異常。shell

測試代碼以下:windows

# encoding: utf-8

from __future__ import unicode_literals
import subprocess

file_path = r'D:\Percy Faith [CA US] by chkjns ♫ 175 songs\v.txt'
args = u'notepad "%s"' % file_path

try:
    p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if p.returncode != 0:
        stderr = stderr.decode('utf-8', 'replace')
        print stderr
except Exception as e:
    print e.message

其異常爲UnicodeEncodeError,表現字串爲:'ascii' codec can't encode character u'\u266b' in position 42: ordinal not in range(128)函數

 

一、Python 2.x先天缺陷測試

此問題百度不到有效答案,因而上stackoverlow,一路摸去,找到了python官方對此bug描述,其連接以下:spa

Issue 19264: subprocess.Popen doesn't support unicode on Windows - Python trackerrest

七嘴八舌衆說紛紜,但總算大體捋清原委,即上述其調用了Ansi版本的CreateProcess所致。循此緣由,是否替換爲Unicode函數CreateProcessW便可?code

最後一條回覆:

msg289664 - (view)    Author: Valentin LAB (Valentin LAB)    Date: 2017-03-15 10:39

給出了折中方案,其方案即如所思,作函數層替換。

 

二、win_subprocess.py

Valentin LAB這哥們,就在github上開放了源碼,解決此問題。其核心爲win_subprocess.py,內容以下:
(其代碼中有用os,他卻忘記import,貼代碼時已作修正)

## issue: https://bugs.python.org/issue19264

import ctypes
import subprocess
import _subprocess
import os
from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \
     Structure, sizeof, c_wchar, WinError
from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \
     HANDLE


##
## Types
##

CREATE_UNICODE_ENVIRONMENT = 0x00000400
LPCTSTR = c_char_p
LPTSTR = c_wchar_p
LPSECURITY_ATTRIBUTES = c_void_p
LPBYTE  = ctypes.POINTER(BYTE)

class STARTUPINFOW(Structure):
    _fields_ = [
        ("cb",              DWORD),  ("lpReserved",    LPWSTR),
        ("lpDesktop",       LPWSTR), ("lpTitle",       LPWSTR),
        ("dwX",             DWORD),  ("dwY",           DWORD),
        ("dwXSize",         DWORD),  ("dwYSize",       DWORD),
        ("dwXCountChars",   DWORD),  ("dwYCountChars", DWORD),
        ("dwFillAtrribute", DWORD),  ("dwFlags",       DWORD),
        ("wShowWindow",     WORD),   ("cbReserved2",   WORD),
        ("lpReserved2",     LPBYTE), ("hStdInput",     HANDLE),
        ("hStdOutput",      HANDLE), ("hStdError",     HANDLE),
    ]

LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW)


class PROCESS_INFORMATION(Structure):
    _fields_ = [
        ("hProcess",         HANDLE), ("hThread",          HANDLE),
        ("dwProcessId",      DWORD),  ("dwThreadId",       DWORD),
    ]

LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION)


class DUMMY_HANDLE(ctypes.c_void_p):

    def __init__(self, *a, **kw):
        super(DUMMY_HANDLE, self).__init__(*a, **kw)
        self.closed = False

    def Close(self):
        if not self.closed:
            windll.kernel32.CloseHandle(self)
            self.closed = True

    def __int__(self):
        return self.value


CreateProcessW = windll.kernel32.CreateProcessW
CreateProcessW.argtypes = [
    LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES,
    LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR,
    LPSTARTUPINFOW, LPPROCESS_INFORMATION,
]
CreateProcessW.restype = BOOL


##
## Patched functions/classes
##

def CreateProcess(executable, args, _p_attr, _t_attr,
                  inherit_handles, creation_flags, env, cwd,
                  startup_info):
    """Create a process supporting unicode executable and args for win32

    Python implementation of CreateProcess using CreateProcessW for Win32

    """

    si = STARTUPINFOW(
        dwFlags=startup_info.dwFlags,
        wShowWindow=startup_info.wShowWindow,
        cb=sizeof(STARTUPINFOW),
        ## XXXvlab: not sure of the casting here to ints.
        hStdInput=int(startup_info.hStdInput),
        hStdOutput=int(startup_info.hStdOutput),
        hStdError=int(startup_info.hStdError),
    )

    wenv = None
    if env is not None:
        ## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar
        env = (unicode("").join([
            unicode("%s=%s\0") % (k, v)
            for k, v in env.items()])) + unicode("\0")
        wenv = (c_wchar * len(env))()
        wenv.value = env

    pi = PROCESS_INFORMATION()
    creation_flags |= CREATE_UNICODE_ENVIRONMENT

    if CreateProcessW(executable, args, None, None,
                      inherit_handles, creation_flags,
                      wenv, cwd, byref(si), byref(pi)):
        return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread),
                pi.dwProcessId, pi.dwThreadId)
    raise WinError()


class Popen(subprocess.Popen):
    """This superseeds Popen and corrects a bug in cPython 2.7 implem"""

    def _execute_child(self, args, executable, preexec_fn, close_fds,
                       cwd, env, universal_newlines,
                       startupinfo, creationflags, shell, to_close,
                       p2cread, p2cwrite,
                       c2pread, c2pwrite,
                       errread, errwrite):
        """Code from part of _execute_child from Python 2.7 (9fbb65e)

        There are only 2 little changes concerning the construction of
        the the final string in shell mode: we preempt the creation of
        the command string when shell is True, because original function
        will try to encode unicode args which we want to avoid to be able to
        sending it as-is to ``CreateProcess``.

        """
        if not isinstance(args, subprocess.types.StringTypes):
            args = subprocess.list2cmdline(args)

        if startupinfo is None:
            startupinfo = subprocess.STARTUPINFO()
        if shell:
            startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW
            startupinfo.wShowWindow = _subprocess.SW_HIDE
            comspec = os.environ.get("COMSPEC", unicode("cmd.exe"))
            args = unicode('{} /c "{}"').format(comspec, args)
            if (_subprocess.GetVersion() >= 0x80000000 or
                    os.path.basename(comspec).lower() == "command.com"):
                w9xpopen = self._find_w9xpopen()
                args = unicode('"%s" %s') % (w9xpopen, args)
                creationflags |= _subprocess.CREATE_NEW_CONSOLE

        super(Popen, self)._execute_child(args, executable,
            preexec_fn, close_fds, cwd, env, universal_newlines,
            startupinfo, creationflags, False, to_close, p2cread,
            p2cwrite, c2pread, c2pwrite, errread, errwrite)

_subprocess.CreateProcess = CreateProcess

 

三、使用方法

若已在.py文件中引入unicode標記(建議自有項目,皆加以unicode支持):

from __future__ import unicode_literals

那麼,直接import win_subprocess就行,其代碼中,已以自定義CreateProcess替換了_subprocess.CreateProcess同名函數。

固然,也可直接以win_subprocess.Popen()調用。

可是,通過驗證,直接引用win_subprocess就能很好工做了,所以推薦直接引過去就行。

 

 

參考資料:

Fixing python 2.7 windows unicode issue with 'subprocess.Popen'

相關文章
相關標籤/搜索