#slurm集羣配置 ##集羣名稱 ClusterName=myslurm ##主控制器的主機名 ControlMachine=node11 ##主控制器的IP地址 ControlAddr=192.168.80.11 ##備控制器的主機名 BackupController=node12 ##備控制器的IP地址 BackupAddr=192.168.80.12 # ##slurmd的進程用戶 SlurmdUser=root ##slurctld的監聽端口 SlurmctldPort=6817 ##slurmd的通訊端口 SlurmdPort=6818 ##組件認證方式 AuthType=auth/munge # none|munge ##slurm daemons(slurmctld,slurmdbd,slurmd),slurm clients,的通訊認證 AuthInfo=/var/run/munge/munge.socket.2 #cred_expire|socket|ttl #JobCredentialPrivateKey= #JobCredentialPublicCertificate= ##集羣狀態文件存放位置(全局文件系統) StateSaveLocation=/usr/local/globle/softs/slurm/16.05.3/state ##? SlurmdSpoolDir=/var/spool/slurmd ##? SwitchType=switch/none MpiDefault=none ##調度 #[批處理做業]在被視爲丟失並釋放分配的資源以前[容許進行啓動的最大時間] (以秒爲單位) #默認10秒.execute prolog,load user environment variables, slurmd get page from memroy.可能須要更大的時間. #BatchStartTimeout=10 #BurstBufferType=burst_buffer/none #CheckpointType=checkpoint/blcr|none|ompi ### ##slurmctld的pid文件存放 SlurmctldPidFile=/var/run/slurmctld.pid ##slurmd的pid文件存放 SlurmdPidFile=/var/run/slurmd.pid ##? #ProctrackType=proctrack/cgroup ProctrackType=proctrack/pgid #PluginDir= ##第一個做業ID FirstJobId=1 ##? ReturnToService=2 ##最大的做業ID MaxJobCount=10000 #PlugStackConfig= #PropagatePrioProcess= #PropagateResourceLimits= #PropagateResourceLimitsExcept= #Prolog= #Epilog= #SrunProlog= #SrunEpilog= #TaskProlog= #TaskEpilog= #TaskPlugin= #TrackWCKey=no #TreeWidth=50 #TmpFS= #UsePAM= # # TIMERS ##控制器通訊超時 SlurmctldTimeout=60 ##slurmd通訊超時 SlurmdTimeout=60 InactiveLimit=0 MinJobAge=300 KillWait=30 Waittime=0 # # SCHEDULING SchedulerType=sched/backfill #SchedulerAuth= #SchedulerPort= #SchedulerRootFilter= SelectType=select/linear FastSchedule=1 #PriorityType=priority/multifactor #PriorityDecayHalfLife=14-0 #PriorityUsageResetPeriod=14-0 #PriorityWeightFairshare=100000 #PriorityWeightAge=1000 #PriorityWeightPartition=10000 #PriorityWeightJobSize=1000 #PriorityMaxAge=1-0 # # LOGGING SlurmctldDebug=6 SlurmctldLogFile=/usr/local/globle/softs/slurm/16.05.3/log/SlurmctldLogFile SlurmdDebug=6 SlurmdLogFile=/var/log/SlurmdLogFile JobCompType=jobcomp/none #JobCompLoc= # # ACCOUNTING #JobAcctGatherType=jobacct_gather/cgroup JobAcctGatherFrequency=30 ##AccountingStorage setting #filetxt,none,slurmdbd AccountingStorageType=accounting_storage/slurmdbd AccountingStorageHost=192.168.80.13 AccountingStorageBackupHost=192.168.80.14 AccountingStorageLoc=slurm_acct_db #AccountingStoragePass=liwanliang AccountingStoragePass=/var/run/munge/munge.socket.2 AccountingStorageUser=slurmadmin AccountingStorageEnforce=limints,qos #AccountingStorageTRES=gres/craynetwork,license/iop1,cpu,energy,memroy,nodes #AccountingStoreJobComment=yes AcctGatherNodeFreq=180 #AcctGatherEnergyType=acct_gather_energy/none|ipmi|rapl #AcctGatherInfinibandType=acct_gather_infiniband/none|ofed #AcctGatherFilesystemType=acct_gather_filesystem/none|lustre #AcctGatherProfileType=acct_gather_profile/none|hdf5 #AllowSpecResourceUsage=0|1 # # COMPUTE NODES NodeName=node[11-14] CPUs=4 RealMemory=900 Sockets=4 CoresPerSocket=1 ThreadsPerCore=1 Procs=4 State=UNKNOWN PartitionName=q_x86_1 Nodes=node[11-12] Default=YES MaxTime=INFINITE State=UP PartitionName=q_x86_2 Nodes=node[13-14] MaxTime=INFINITE State=UP