WinRT SpeechSynthesizer を使った音声合成のサンプル
実行すると、Textに設定したサンプルの音声が流れます。
・WinRT(System.Runtime.WindowsRuntimeの必要ライブラリ)読込は、classのコンストラクタで読み込むコーディングではうまく動かなかったため、Githubのソースコードを参考にスクリプトを起動した直後(Try{~)}に読み込むようにした。
・WinRTでは必須のAwaitは、Githubのソースをそのまま流用。
・参考Githubに含まれていたClampだが、Powershellから使えない(バグ?)なので、個別に処理を作成し実装した。
・[Windows.Media.SpeechSynthesis.SpeechSynthesizerOptions]を外だしできるように設計している。[Windows.Media.SpeechSynthesis.SpeechSynthesizerOptions]を使えば、パラメータを変更し様々な合成音声を楽しめる。実装例は以下の部分
サンプルコード
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add-Type -AssemblyName System.Runtime.WindowsRuntime | |
class PSSoundPlayer{ | |
[System.Media.SoundPlayer]$player | |
[System.IO.MemoryStream]$memstream | |
PlaySync([System.Byte[]]$bytes) { | |
$this.memstream = [System.IO.MemoryStream]::new($bytes); | |
$this.player = [System.Media.SoundPlayer]::new($this.memstream) | |
$this.player.PlaySync(); | |
$this. Dispose() | |
} | |
Play([System.Byte[]]$bytes) { | |
$this.memstream = [System.IO.MemoryStream]::new($bytes); | |
$this.player = [System.Media.SoundPlayer]::new($this.memstream) | |
$this.player.PlaySync(); | |
} | |
Stop() { | |
$this.player.Stop() | |
$this.Dispose() | |
} | |
Dispose(){ | |
$this.player.Dispose() | |
$this.memstream.Dispose() | |
} | |
} | |
class PSSpeechSynthesizer { | |
[Windows.Media.SpeechSynthesis.SpeechSynthesizer]$speech | |
[Windows.Media.SpeechSynthesis.VoiceInformation]$voiceInfo | |
[PSSoundPlayer]$PSSoundPlayer | |
PSSpeechSynthesizer() | |
{ | |
$this.speech = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::new() | |
#default | |
$this.speech.Voice = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::DefaultVoice | |
$this.speech.Options.AppendedSilence = [Windows.Media.SpeechSynthesis.SpeechAppendedSilence]::Default | |
$this.speech.Options.AudioPitch = $this.Clamp([double]1.0, [double]0.0, [double]2.0) | |
$this.speech.Options.PunctuationSilence = [Windows.Media.SpeechSynthesis.SpeechPunctuationSilence]::Default | |
$this.speech.Options.IncludeSentenceBoundaryMetadata = $False | |
$this.speech.Options.IncludeWordBoundaryMetadata = $False | |
$this.speech.Options.SpeakingRate = $this.Clamp([double]1.0,[double]0.5, [double]6.0) | |
$this.speech.Options.AudioVolume = $this.Clamp([double]1.0, [double]0.0, [double]1.0) | |
$this.PSSoundPlayer = New-Object PSSoundPlayer | |
} | |
[Windows.Media.SpeechSynthesis.SpeechSynthesizerOptions]GetSpeechSynthesizerOptionsObject() | |
{ | |
return $this.speech.Options | |
} | |
[void] GetVoiceDisplayNames([System.Collections.ArrayList]$VoiceDisplayNames) | |
{ | |
$voices = [Windows.Media.SpeechSynthesis.SpeechSynthesizer]::AllVoices | |
foreach ( $voiceInfo in $voices) | |
{ | |
$VoiceDisplayNames.Add($voiceInfo.DisplayName) | |
# $voiceInfo | Select-Object -Property * | |
} | |
} | |
[double] Clamp( [double]$value, [double]$min ,[double]$max){ | |
if ( $value -le $min ){ return $min } | |
elseif ( $value -ge $max ){ return $max } | |
else { return $value } | |
} | |
[void]PlaySync([string]$text){ | |
#$text = "既定では、発話ができるだけ自然に聞こえないように、各句読点の後に約 750 ミリ秒の無音が追加されます。 アプリ コンテンツの高速再生やスクリーン リーダーの説明など、場合によっては、この動作は不要であるか、ユーザーが構成する必要があります。" | |
$ssmlNamespace = 'http://www.w3.org/2001/10/synthesis'; | |
if (-not $text.Trim().StartsWith('<speak')) { | |
$text = [System.Security.SecurityElement]::Escape($text); | |
$text = "<speak version=`"1.0`">$text</speak>"; | |
} | |
$dom = [xml]$text; | |
$dom.speak.SetAttribute('version', '1.0'); | |
$dom.speak.SetAttribute('xml:lang', $this.speech.Voice.Language); | |
$dom.speak.SetAttribute('xmlns', $ssmlNamespace); | |
$text = $dom.speak.OuterXml; | |
$stream = Await ($this.speech.SynthesizeSsmlToStreamAsync($text)) ([Windows.Media.SpeechSynthesis.SpeechSynthesisStream]); | |
$bytes = [array]::CreateInstance([byte], $stream.Size); | |
[Windows.Storage.Streams.IBuffer]$buffer = AsBuffer($bytes); | |
AwaitWithProgress ($stream.ReadAsync($buffer, [uint32]$stream.Size, [Windows.Storage.Streams.InputStreamOptions]::None)) ([Windows.Storage.Streams.IBuffer]) ([UInt32]) | |
$this.PSSoundPlayer.PlaySync($bytes) | |
if ($stream) { | |
$stream.Dispose() | |
} | |
if ($this.speech) { | |
$this.speech.Dispose() | |
} | |
} | |
} | |
try{ | |
Add-Type -AssemblyName System.Runtime.WindowsRuntime | |
# | |
$null =[Windows.Foundation.IAsyncOperation`1, Windows.Foundation, ContentType=WindowsRuntime] | |
$null =[Windows.Foundation.IAsyncOperationWithProgress`2, Windows.Foundation, ContentType=WindowsRuntime] | |
$null =[Windows.Media.SpeechSynthesis.SpeechSynthesizer, Windows.Media.SpeechSynthesis, ContentType=WindowsRuntime] | |
$null =[Windows.Media.SpeechSynthesis.VoiceInformation, Windows.Media.SpeechSynthesis, ContentType=WindowsRuntime] | |
$null =[Windows.Media.SpeechSynthesis.SpeechSynthesisStream, Windows.Media.SpeechSynthesis, ContentType=WindowsRuntime] | |
$null =[Windows.Media.Core.SpeechCue, Windows.Media.Core, ContentType=WindowsRuntime] | |
$null =[Windows.Media.Core.TimedMetadataTrack, Windows.Media.Core, ContentType=WindowsRuntime] | |
$_taskMethods = [System.WindowsRuntimeSystemExtensions].GetMethods() | ? { | |
$_.Name -eq 'AsTask' -and $_.GetParameters().Count -eq 1 | |
} | |
$asTaskGeneric = ($_taskMethods | ? { $_.GetParameters()[0].ParameterType.Name -eq 'IAsyncOperation`1' })[0]; | |
$asTaskGeneric2 = ($_taskMethods | ? { $_.GetParameters()[0].ParameterType.Name -eq 'IAsyncOperationWithProgress`2' })[0]; | |
Function Await($WinRtTask, $ResultType) { | |
$asTask = $asTaskGeneric.MakeGenericMethod($ResultType) | |
$netTask = $asTask.Invoke($null, @($WinRtTask)) | |
$netTask.Wait(-1) | Out-Null | |
return $netTask.Result | |
} | |
Function AwaitWithProgress($WinRtTask, $ResultType1, $ResultType2) { | |
$asTask = $asTaskGeneric2.MakeGenericMethod($ResultType1, $ResultType2) | |
$netTask = $asTask.Invoke($null, @($WinRtTask)) | |
$netTask.Wait(-1) | Out-Null | |
} | |
function AsBuffer($bytes) | |
{ | |
[Windows.Storage.Streams.IBuffer]$buffer = [System.Runtime.InteropServices.WindowsRuntime.WindowsRuntimeBufferExtensions]::AsBuffer($bytes) | |
return $buffer | |
} | |
$PSSpeechSynthesizer = new-object PSSpeechSynthesizer | |
[System.Collections.ArrayList]$VoiceDisplayNameList = new-object System.Collections.ArrayList | |
$PSSpeechSynthesizer.GetVoiceDisplayNames($VoiceDisplayNameList) | |
foreach ( $VoiceDisplayName in $VoiceDisplayNameList) | |
{ | |
Write-Host $VoiceDisplayName | |
} | |
[Windows.Media.SpeechSynthesis.SpeechSynthesizerOptions]$Options = $PSSpeechSynthesizer.GetSpeechSynthesizerOptionsObject() | |
$Options.AudioPitch = $PSSpeechSynthesizer.Clamp([double]2.0, [double]0.0, [double]2.0) | |
$Options.SpeakingRate = $PSSpeechSynthesizer.Clamp([double]1.0,[double]0.5, [double]6.0) | |
[string]$text = "既定では、発話ができるだけ自然に聞こえないように、各句読点の後に約 750 ミリ秒の無音が追加されます。 アプリ コンテンツの高速再生やスクリーン リーダーの説明など、場合によっては、この動作は不要であるか、ユーザーが構成する必要があります。" | |
$PSSpeechSynthesizer.PlaySync($text) | |
} | |
catch { | |
Write-Output "Something threw an exception or used Write-Error" | |
Write-Output $_ | |
} |
参考URL
PowerShellでSpeechSynthesizerを使う – Qiita
SpeechSynthesizer クラス (Windows.Media.SpeechSynthesis) – Windows UWP applications | Microsoft Learn
コメント