WPF C#/VB
Text-to-Speech Desktop App
C#
// AZUL CODING ---------------------------------------
// WPF C#/VB - Text-to-Speech Desktop App
// https://youtu.be/vpehE-xYhAM
using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Windows;
using System.Windows.Controls;
using System.Speech.Synthesis;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech;
using Microsoft.Win32;
namespace TTS
{
/// <summary>
/// Interaction logic for MainWindow.xaml
/// </summary>
public partial class MainWindow : Window
{
private readonly System.Speech.Synthesis.SpeechSynthesizer BuiltInSpeech = new();
private Microsoft.CognitiveServices.Speech.SpeechSynthesizer? AzureSpeech;
private readonly SpeechConfig AzureSpeechConfig;
private readonly SaveFileDialog WAVSaveDialog = new()
{
Title = "Save as WAV file",
Filter = "WAV Files (*.wav)|*.wav"
};
public MainWindow()
{
InitializeComponent();
BuiltInSpeech.SpeakCompleted += (s, e) => ResetPlayingState();
// Replace these details with your Azure API key and region:
// https://azure.microsoft.com/en-gb/free/cognitive-services/
AzureSpeechConfig = SpeechConfig.FromSubscription("<API_KEY_GOES_HERE>", "<REGION_GOES_HERE>");
LoadBuiltInVoices();
}
private void Window_Closing(object sender, System.ComponentModel.CancelEventArgs e)
{
BuiltInSpeech.SpeakAsyncCancelAll();
BuiltInSpeech.Dispose();
AzureSpeech?.StopSpeakingAsync();
}
#region UI State
private void SetPlayingState()
{
TitleLbl.Content = "Playing...";
ListenBtn.Visibility = Visibility.Collapsed;
SaveBtn.Visibility = Visibility.Collapsed;
StopBtn.Visibility = Visibility.Visible;
}
private void ResetPlayingState()
{
TitleLbl.Content = "Text-to-speech";
ListenBtn.Visibility = Visibility.Visible;
SaveBtn.Visibility = Visibility.Visible;
StopBtn.Visibility = Visibility.Collapsed;
}
#endregion
#region Voices
private void LoadBuiltInVoices()
{
PitchSlider.IsEnabled = false;
SpeedSlider.Minimum = -10;
SpeedSlider.Maximum = 10;
VoiceCombo.ItemsSource = BuiltInSpeech.GetInstalledVoices().Select(x =>
{
return new ComboBoxItem()
{
Content = $"{x.VoiceInfo.Culture.Name} - {x.VoiceInfo.Name}",
Tag = x.VoiceInfo.Name
};
});
VoiceCombo.SelectedIndex = 0;
}
private void BuiltInRadio_Click(object sender, RoutedEventArgs e)
{
LoadBuiltInVoices();
}
private void LoadAzureVoices()
{
PitchSlider.IsEnabled = true;
SpeedSlider.Minimum = -50;
SpeedSlider.Maximum = 50;
// These are some example voices - for the full list of voices, go to:
// https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts
Dictionary<string, string> voices = new()
{
{"en-GB-SoniaNeural", "en-GB - Sonia"},
{"en-GB-RyanNeural", "en-GB - Ryan"},
{"en-US-JennyNeural", "en-US - Jenny"},
{"fr-FR-YvetteNeural", "fr-FR - Yvette"}
};
VoiceCombo.ItemsSource = voices.Select(x =>
{
return new ComboBoxItem()
{
Content = x.Value,
Tag = x.Key
};
});
VoiceCombo.SelectedIndex = 0;
}
private void AzureRadio_Click(object sender, RoutedEventArgs e)
{
LoadAzureVoices();
}
#endregion
#region Playback
private static string GenerateSSML(string text, string voice, int speed, int pitch)
{
return @$"<speak xmlns=""http://www.w3.org/2001/10/synthesis"" version=""1.0"" xml:lang=""en-US"">
<voice name=""{voice}"">
<prosody rate=""{speed}%"" pitch=""{pitch}%"">
{text.Replace("&", "&").Replace("<", " <").Replace(">", ">")}
</prosody>
</voice>
</speak>";
}
private async void ListenBtn_Click(object sender, RoutedEventArgs e)
{
string voice = (string)((ComboBoxItem)VoiceCombo.SelectedItem).Tag;
if (BuiltInRadio.IsChecked == true)
{
BuiltInSpeech.SelectVoice(voice);
BuiltInSpeech.Rate = (int)SpeedSlider.Value;
BuiltInSpeech.SpeakAsync(SpeechTxt.Text);
SetPlayingState();
}
else
{
AzureSpeech = new(AzureSpeechConfig);
SetPlayingState();
string ssml = GenerateSSML(SpeechTxt.Text, voice, (int)SpeedSlider.Value, (int)PitchSlider.Value);
await AzureSpeech.SpeakSsmlAsync(ssml);
ResetPlayingState();
}
}
private async void SaveBtn_Click(object sender, RoutedEventArgs e)
{
if (WAVSaveDialog.ShowDialog() == true)
{
string voice = (string)((ComboBoxItem)VoiceCombo.SelectedItem).Tag;
if (BuiltInRadio.IsChecked == true)
{
BuiltInSpeech.SelectVoice(voice);
BuiltInSpeech.Rate = (int)SpeedSlider.Value;
BuiltInSpeech.SetOutputToWaveFile(WAVSaveDialog.FileName);
BuiltInSpeech.Speak(SpeechTxt.Text);
BuiltInSpeech.SetOutputToDefaultAudioDevice();
}
else
{
AzureSpeech = new(AzureSpeechConfig, AudioConfig.FromWavFileOutput(WAVSaveDialog.FileName));
string ssml = GenerateSSML(SpeechTxt.Text, voice, (int)SpeedSlider.Value, (int)PitchSlider.Value);
await AzureSpeech.SpeakSsmlAsync(ssml);
AzureSpeech.Dispose();
}
}
}
private void StopBtn_Click(object sender, RoutedEventArgs e)
{
BuiltInSpeech.SpeakAsyncCancelAll();
AzureSpeech?.StopSpeakingAsync();
}
#endregion
}
}
VB.NET
' AZUL CODING ---------------------------------------
' WPF C#/VB - Text-to-Speech Desktop App
' https://youtu.be/vpehE-xYhAM
Imports System.Speech.Synthesis
Imports Microsoft.CognitiveServices.Speech.Audio
Imports Microsoft.CognitiveServices.Speech
Imports Microsoft.Win32
Class MainWindow
Private ReadOnly BuiltInSpeech As New Speech.Synthesis.SpeechSynthesizer
Private AzureSpeech As Microsoft.CognitiveServices.Speech.SpeechSynthesizer
Private ReadOnly AzureSpeechConfig As SpeechConfig
Private ReadOnly WAVSaveDialog As New SaveFileDialog() With {
.Title = "Save as WAV file",
.Filter = "WAV Files (*.wav)|*.wav"
}
Public Sub New()
InitializeComponent()
AddHandler BuiltInSpeech.SpeakCompleted, Sub(s, e) ResetPlayingState()
' Replace these details with your Azure API key and region:
' https://azure.microsoft.com/en-gb/free/cognitive-services/
AzureSpeechConfig = SpeechConfig.FromSubscription("<API_KEY_GOES_HERE>", "<REGION_GOES_HERE>")
LoadBuiltInVoices()
End Sub
Private Sub Window_Closing(sender As Object, e As ComponentModel.CancelEventArgs)
BuiltInSpeech.SpeakAsyncCancelAll()
BuiltInSpeech.Dispose()
AzureSpeech?.StopSpeakingAsync()
End Sub
#Region "UI State"
Private Sub SetPlayingState()
TitleLbl.Content = "Playing..."
ListenBtn.Visibility = Visibility.Collapsed
SaveBtn.Visibility = Visibility.Collapsed
StopBtn.Visibility = Visibility.Visible
End Sub
Private Sub ResetPlayingState()
TitleLbl.Content = "Text-to-speech"
ListenBtn.Visibility = Visibility.Visible
SaveBtn.Visibility = Visibility.Visible
StopBtn.Visibility = Visibility.Collapsed
End Sub
#End Region
#Region "Voices"
Private Sub LoadBuiltInVoices()
PitchSlider.IsEnabled = False
SpeedSlider.Minimum = -10
SpeedSlider.Maximum = 10
VoiceCombo.ItemsSource = BuiltInSpeech.GetInstalledVoices().Select(Function(x) New ComboBoxItem() With {
.Content = $"{x.VoiceInfo.Culture.Name} - {x.VoiceInfo.Name}",
.Tag = x.VoiceInfo.Name
})
VoiceCombo.SelectedIndex = 0
End Sub
Private Sub BuiltInRadio_Click(sender As Object, e As RoutedEventArgs)
LoadBuiltInVoices()
End Sub
Private Sub LoadAzureVoices()
PitchSlider.IsEnabled = True
SpeedSlider.Minimum = -50
SpeedSlider.Maximum = 50
' These are some example voices - for the full list of voices, go to:
' https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts
Dim voices As New Dictionary(Of String, String)() From {
{"en-GB-SoniaNeural", "en-GB - Sonia"},
{"en-GB-RyanNeural", "en-GB - Ryan"},
{"en-US-JennyNeural", "en-US - Jenny"},
{"fr-FR-YvetteNeural", "fr-FR - Yvette"}
}
VoiceCombo.ItemsSource = voices.Select(Function(x) New ComboBoxItem() With {
.Content = x.Value,
.Tag = x.Key
})
VoiceCombo.SelectedIndex = 0
End Sub
Private Sub AzureRadio_Click(sender As Object, e As RoutedEventArgs)
LoadAzureVoices()
End Sub
#End Region
#Region "Playback"
Private Function GenerateSSML(text As String, voice As String, speed As Integer, pitch As Integer) As String
Return $"<speak xmlns=""http://www.w3.org/2001/10/synthesis"" version=""1.0"" xml:lang=""en-US"">
<voice name=""{voice}"">
<prosody rate=""{speed}%"" pitch=""{pitch}%"">
{text.Replace("&", "&").Replace("<", " <").Replace(">", ">")}
</prosody>
</voice>
</speak>"
End Function
Private Async Sub ListenBtn_Click(sender As Object, e As RoutedEventArgs)
Dim voice As String = VoiceCombo.SelectedItem.Tag
If BuiltInRadio.IsChecked = True Then
BuiltInSpeech.SelectVoice(voice)
BuiltInSpeech.Rate = SpeedSlider.Value
BuiltInSpeech.SpeakAsync(SpeechTxt.Text)
SetPlayingState()
Else
AzureSpeech = New Microsoft.CognitiveServices.Speech.SpeechSynthesizer(AzureSpeechConfig)
SetPlayingState()
Dim ssml As String = GenerateSSML(SpeechTxt.Text, voice, SpeedSlider.Value, PitchSlider.Value)
Await AzureSpeech.SpeakSsmlAsync(ssml)
ResetPlayingState()
End If
End Sub
Private Async Sub SaveBtn_Click(sender As Object, e As RoutedEventArgs)
If WAVSaveDialog.ShowDialog() = True Then
Dim voice As String = VoiceCombo.SelectedItem.Tag
If BuiltInRadio.IsChecked = True Then
BuiltInSpeech.SelectVoice(voice)
BuiltInSpeech.Rate = SpeedSlider.Value
BuiltInSpeech.SetOutputToWaveFile(WAVSaveDialog.FileName)
BuiltInSpeech.Speak(SpeechTxt.Text)
BuiltInSpeech.SetOutputToDefaultAudioDevice()
Else
AzureSpeech = New Microsoft.CognitiveServices.Speech.SpeechSynthesizer(AzureSpeechConfig, AudioConfig.FromWavFileOutput(WAVSaveDialog.FileName))
Dim ssml As String = GenerateSSML(SpeechTxt.Text, voice, SpeedSlider.Value, PitchSlider.Value)
Await AzureSpeech.SpeakSsmlAsync(ssml)
AzureSpeech.Dispose()
End If
End If
End Sub
Private Sub StopBtn_Click(sender As Object, e As RoutedEventArgs)
BuiltInSpeech.SpeakAsyncCancelAll()
AzureSpeech?.StopSpeakingAsync()
End Sub
#End Region
End Class
XAML
<!-- AZUL CODING --------------------------------------- -->
<!-- WPF C#/VB - Text-to-Speech Desktop App -->
<!-- https://youtu.be/vpehE-xYhAM -->
<Window x:Class="TTS.MainWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:local="clr-namespace:TTS" mc:Ignorable="d"
Title="Text-to-speech - Azul Coding" Closing="Window_Closing" Width="500" SizeToContent="Height" ResizeMode="CanMinimize">
<StackPanel Background="White">
<Label x:Name="TitleLbl" Content="Text-to-speech" Padding="5,0,5,5" Margin="20" FontWeight="SemiBold" FontSize="16" BorderBrush="DodgerBlue" BorderThickness="0,0,0,2"/>
<TextBox x:Name="SpeechTxt" Text="This is text-to-speech." Padding="5" Margin="20,0" FontSize="14" MinLines="5" MaxLines="5" TextWrapping="Wrap" AcceptsReturn="True"/>
<StackPanel Orientation="Horizontal" Margin="0,10">
<Label Content="Voice:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
<ComboBox x:Name="VoiceCombo" Width="225" SelectedIndex="0" VerticalContentAlignment="Center" FontSize="14"/>
<RadioButton x:Name="BuiltInRadio" Click="BuiltInRadio_Click" Content="Built-in" GroupName="TTSTypeRadios" Margin="20,0,20,0" IsChecked="True" FontSize="14" VerticalContentAlignment="Center"/>
<RadioButton x:Name="AzureRadio" Click="AzureRadio_Click" Content="Azure" GroupName="TTSTypeRadios" Margin="0" FontSize="14" VerticalContentAlignment="Center"/>
</StackPanel>
<StackPanel Orientation="Horizontal" Margin="0,10">
<Label Content="Speed:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
<Slider x:Name="SpeedSlider" Width="225" VerticalAlignment="Center" SmallChange="1" IsSnapToTickEnabled="True" Minimum="-10" Maximum="10"/>
<Label Content="{Binding Value, ElementName=SpeedSlider}" FontSize="14" Margin="15,0,5,0" VerticalAlignment="Center"/>
</StackPanel>
<StackPanel Orientation="Horizontal" Margin="0,10">
<Label Content="Pitch:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
<Slider x:Name="PitchSlider" Width="225" VerticalAlignment="Center" SmallChange="1" IsSnapToTickEnabled="True" Minimum="-50" Maximum="50"/>
<Label Content="{Binding Value, ElementName=PitchSlider}" FontSize="14" Margin="15,0,5,0" VerticalAlignment="Center"/>
</StackPanel>
<StackPanel Margin="20" Orientation="Horizontal">
<Button Name="ListenBtn" Click="ListenBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0">
<StackPanel Orientation="Horizontal" VerticalAlignment="Center">
<Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/speaker.png"/>
<TextBlock Text="Listen" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
</StackPanel>
</Button>
<Button Name="SaveBtn" Click="SaveBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0">
<StackPanel Orientation="Horizontal" VerticalAlignment="Center">
<Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/save.png"/>
<TextBlock Text="Save as WAV" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
</StackPanel>
</Button>
<Button Name="StopBtn" Click="StopBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0" Visibility="Collapsed">
<StackPanel Orientation="Horizontal" VerticalAlignment="Center">
<Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/stop.png"/>
<TextBlock Text="Stop" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
</StackPanel>
</Button>
</StackPanel>
</StackPanel>
</Window>