6_文件IO

1. 基本文件讀取

        readline()readlines()write()writelines()python

        f.read(size),指定讀取文件的字節數,須要注意的是,對於同一個文件,一直調用這個函數,文件指針會一直移動直到文件尾。shell

        因此若是要中途修改文件指針的位置,那麼須要調用seek()函數。經常使用的是seek(0):移動到文件頭;seek(n):移動到文件位置n處;seek(0,n):移動到文件尾。json

        f.tell()函數存儲文件指針當前的位置app



2. 文件操做

       2.1 文件複製

              除了手動編寫,也能夠使用現成模塊函數

import shutil

shutil.copyfile('1.txt','2.txt')
##複製文件2.txt到1.txt中

       2.2 文件刪除

import os,os.path

filename = 'test.txt'
if os.path.exists(filename):                ##判斷文件是否存在
    os.remove(filename)
else:
    print('not exist!')

       2.3 文件重命名

import os

filename = 'test.txt'
rename = 'write.txt'

os.rename(filename,rename)


3. 目錄操做

       3.1 目錄建立

import os

os.listdir('f:/')

os.mkdir('f:/newdir')                       ##建立一個新目錄

os.mkdirs('f:/newdir1/newdir2')             ##建立一個二級目錄

       3.2 目錄刪除

import os

os.rmdir('f:/newdir')                       ##只能刪除空目錄

os.removedirs('f:/newdir1/newdir2')         ##刪除兩級目錄

shutil.rmtree('f:/newdir')                  ##能夠刪除非空目錄

       3.3 目錄遍歷

###遞歸法

import os

def visitdir1(path):
    for lists in os.listdir(path):
        sub_path = os.path.join(path,lists)
        print(sub_path)
        if os.path.isdir(sub_path):
            visitdir(sub_path)

​`````````````````````````````````````````````````````````````````````````````````````
###os.walk法

import os
def visitdir2(path):
    list_dirs = os.walk(path)               ##返回三元組:路徑名,目錄列表,文件列表
    for root,dirs,files in list_dirs:
        for d in dirs:
            print(os.path.join(root,d))
        
        for f in files:
            print(os.path.join(root,f))


4. 綜合應用

       題目post

              音頻文件的根目錄在:/dnn4_added/0_ASR/01_resource/original_speech_data/AISHELL-ASR009-zh-cn-mandarin178/data_aishell/wav指針

              說話信息在/dnn4_added/0_ASR/01_resource/original_speech_data/AISHELL-ASR009-zh-cn-mandarin178/data_aishell/transcript/aishell_transcript_v0.8.txtcode

              請獲得一個列表文件,文件裏面顯示音頻全路徑和說話信息,並把說話信息裏的空格都去掉。而且把音頻文件的後綴名改爲.mfccorm

       代碼遞歸

import os
import os.path
import json
import sys


filter = [".wav"]           ##設置文件過濾後綴
postfix = '.mfcc'           ##題目要求的後綴

dirrname = '/dnn4_added/0_ASR/01_resource/original_speech_data/AISHELL-ASR009-zh-cn-mandarin178/data_aishell/wav'
filename = '/dnn4_added/0_ASR/01_resource/original_speech_data/AISHELL-ASR009-zh-cn-mandarin178/data_aishell/transcript/aishell_transcript_v0.8.txt'

##目錄遍歷,把wav文件都找出來
def all_path(dirname):
    PATHS = []
    for maindir, subdir, file_name_list in os.walk(dirname):
        for filename in file_name_list:
            apath = os.path.join(maindir,filename)
            ext = os.path.splitext(apath)[1]
            
            if ext in filter:
                PATHS.append(apath)
    
    return PATHS

PATHS = all_path(dirrname)

##read txt as dictionary
dict = {}
with open(filename,'r') as file_object:
    for line in file_object:
        split_list = line.split(' ',1)
        split_list[1] = ''.join(split_list[1].split())
        dict[split_list[0]] = split_list[1] 

##find key in dictionary and update it 
for path in PATHS:
    split_dirname = os.path.splitext(path)[0]
    split_basename = os.path.basename(split_dirname)
    if dict.has_key(split_basename):
        split_dirname += postfix
        dict[split_dirname] = dict.pop(split_basename)

##write the new dictionary as txt
keys = list(dict.keys())
values = list(dict.values())
z = list(zip(keys,values))

outputfile = open('dic.txt','w')
for row in z:
    rowtext = '{} {}'.format(row[0],row[1])
    outputfile.write(rowtext)
    outputfile.write('\n')
outputfile.close
相關文章
相關標籤/搜索