Skip to main content

WPF C#/VB
Text-to-Speech Desktop App



C#

// AZUL CODING ---------------------------------------
// WPF C#/VB - Text-to-Speech Desktop App
// https://youtu.be/vpehE-xYhAM


using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Windows;
using System.Windows.Controls;

using System.Speech.Synthesis;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.CognitiveServices.Speech;
using Microsoft.Win32;

namespace TTS
{
    /// <summary>
    /// Interaction logic for MainWindow.xaml
    /// </summary>
    public partial class MainWindow : Window
    {
        private readonly System.Speech.Synthesis.SpeechSynthesizer BuiltInSpeech = new();

        private Microsoft.CognitiveServices.Speech.SpeechSynthesizer? AzureSpeech;
        private readonly SpeechConfig AzureSpeechConfig;

        private readonly SaveFileDialog WAVSaveDialog = new()
        {
            Title = "Save as WAV file",
            Filter = "WAV Files (*.wav)|*.wav"
        };

        public MainWindow()
        {
            InitializeComponent();

            BuiltInSpeech.SpeakCompleted += (s, e) => ResetPlayingState();

            // Replace these details with your Azure API key and region:
            // https://azure.microsoft.com/en-gb/free/cognitive-services/
            AzureSpeechConfig = SpeechConfig.FromSubscription("<API_KEY_GOES_HERE>", "<REGION_GOES_HERE>");

            LoadBuiltInVoices();
        }

        private void Window_Closing(object sender, System.ComponentModel.CancelEventArgs e)
        {
            BuiltInSpeech.SpeakAsyncCancelAll();
            BuiltInSpeech.Dispose();
            AzureSpeech?.StopSpeakingAsync();
        }

        #region UI State

        private void SetPlayingState()
        {
            TitleLbl.Content = "Playing...";
            ListenBtn.Visibility = Visibility.Collapsed;
            SaveBtn.Visibility = Visibility.Collapsed;
            StopBtn.Visibility = Visibility.Visible;
        }

        private void ResetPlayingState()
        {
            TitleLbl.Content = "Text-to-speech";
            ListenBtn.Visibility = Visibility.Visible;
            SaveBtn.Visibility = Visibility.Visible;
            StopBtn.Visibility = Visibility.Collapsed;
        }

        #endregion
        #region Voices

        private void LoadBuiltInVoices()
        {
            PitchSlider.IsEnabled = false;
            SpeedSlider.Minimum = -10;
            SpeedSlider.Maximum = 10;

            VoiceCombo.ItemsSource = BuiltInSpeech.GetInstalledVoices().Select(x =>
            {
                return new ComboBoxItem()
                {
                    Content = $"{x.VoiceInfo.Culture.Name} - {x.VoiceInfo.Name}",
                    Tag = x.VoiceInfo.Name
                };
            });
            VoiceCombo.SelectedIndex = 0;
        }

        private void BuiltInRadio_Click(object sender, RoutedEventArgs e)
        {
            LoadBuiltInVoices();
        }

        private void LoadAzureVoices()
        {
            PitchSlider.IsEnabled = true;
            SpeedSlider.Minimum = -50;
            SpeedSlider.Maximum = 50;

            // These are some example voices - for the full list of voices, go to:
            // https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts
            Dictionary<string, string> voices = new()
            {
                {"en-GB-SoniaNeural", "en-GB - Sonia"},
                {"en-GB-RyanNeural", "en-GB - Ryan"},
                {"en-US-JennyNeural", "en-US - Jenny"},
                {"fr-FR-YvetteNeural", "fr-FR - Yvette"}
            };
            VoiceCombo.ItemsSource = voices.Select(x =>
            {
                return new ComboBoxItem()
                {
                    Content = x.Value,
                    Tag = x.Key
                };
            });
            VoiceCombo.SelectedIndex = 0;
        }

        private void AzureRadio_Click(object sender, RoutedEventArgs e)
        {
            LoadAzureVoices();
        }

        #endregion
        #region Playback

        private static string GenerateSSML(string text, string voice, int speed, int pitch)
        {
            return @$"<speak xmlns=""http://www.w3.org/2001/10/synthesis"" version=""1.0"" xml:lang=""en-US"">
                        <voice name=""{voice}"">
                          <prosody rate=""{speed}%"" pitch=""{pitch}%"">
                            {text.Replace("&", "&amp;").Replace("<", " &lt;").Replace(">", "&gt;")}
                          </prosody>
                        </voice>
                      </speak>";
        }

        private async void ListenBtn_Click(object sender, RoutedEventArgs e)
        {
            string voice = (string)((ComboBoxItem)VoiceCombo.SelectedItem).Tag;

            if (BuiltInRadio.IsChecked == true)
            {
                BuiltInSpeech.SelectVoice(voice);
                BuiltInSpeech.Rate = (int)SpeedSlider.Value;

                BuiltInSpeech.SpeakAsync(SpeechTxt.Text);
                SetPlayingState();
            }
            else
            {
                AzureSpeech = new(AzureSpeechConfig);
                SetPlayingState();

                string ssml = GenerateSSML(SpeechTxt.Text, voice, (int)SpeedSlider.Value, (int)PitchSlider.Value);
                await AzureSpeech.SpeakSsmlAsync(ssml);
                ResetPlayingState();
            }
        }

        private async void SaveBtn_Click(object sender, RoutedEventArgs e)
        {
            if (WAVSaveDialog.ShowDialog() == true)
            {
                string voice = (string)((ComboBoxItem)VoiceCombo.SelectedItem).Tag;

                if (BuiltInRadio.IsChecked == true)
                {
                    BuiltInSpeech.SelectVoice(voice);
                    BuiltInSpeech.Rate = (int)SpeedSlider.Value;
                    BuiltInSpeech.SetOutputToWaveFile(WAVSaveDialog.FileName);

                    BuiltInSpeech.Speak(SpeechTxt.Text);
                    BuiltInSpeech.SetOutputToDefaultAudioDevice();
                }
                else
                {
                    AzureSpeech = new(AzureSpeechConfig, AudioConfig.FromWavFileOutput(WAVSaveDialog.FileName));

                    string ssml = GenerateSSML(SpeechTxt.Text, voice, (int)SpeedSlider.Value, (int)PitchSlider.Value);
                    await AzureSpeech.SpeakSsmlAsync(ssml);
                    AzureSpeech.Dispose();
                }
            }
        }

        private void StopBtn_Click(object sender, RoutedEventArgs e)
        {
            BuiltInSpeech.SpeakAsyncCancelAll();
            AzureSpeech?.StopSpeakingAsync();
        }

        #endregion
    }
}

Enjoying this tutorial?


VB.NET

' AZUL CODING ---------------------------------------
' WPF C#/VB - Text-to-Speech Desktop App
' https://youtu.be/vpehE-xYhAM


Imports System.Speech.Synthesis
Imports Microsoft.CognitiveServices.Speech.Audio
Imports Microsoft.CognitiveServices.Speech
Imports Microsoft.Win32

Class MainWindow

    Private ReadOnly BuiltInSpeech As New Speech.Synthesis.SpeechSynthesizer

    Private AzureSpeech As Microsoft.CognitiveServices.Speech.SpeechSynthesizer
    Private ReadOnly AzureSpeechConfig As SpeechConfig

    Private ReadOnly WAVSaveDialog As New SaveFileDialog() With {
        .Title = "Save as WAV file",
        .Filter = "WAV Files (*.wav)|*.wav"
    }

    Public Sub New()
        InitializeComponent()

        AddHandler BuiltInSpeech.SpeakCompleted, Sub(s, e) ResetPlayingState()

        ' Replace these details with your Azure API key and region:
        ' https://azure.microsoft.com/en-gb/free/cognitive-services/
        AzureSpeechConfig = SpeechConfig.FromSubscription("<API_KEY_GOES_HERE>", "<REGION_GOES_HERE>")
        
        LoadBuiltInVoices()
    End Sub

    Private Sub Window_Closing(sender As Object, e As ComponentModel.CancelEventArgs)
        BuiltInSpeech.SpeakAsyncCancelAll()
        BuiltInSpeech.Dispose()
        AzureSpeech?.StopSpeakingAsync()
    End Sub

    #Region "UI State"

    Private Sub SetPlayingState()
        TitleLbl.Content = "Playing..."
        ListenBtn.Visibility = Visibility.Collapsed
        SaveBtn.Visibility = Visibility.Collapsed
        StopBtn.Visibility = Visibility.Visible
    End Sub

    Private Sub ResetPlayingState()
        TitleLbl.Content = "Text-to-speech"
        ListenBtn.Visibility = Visibility.Visible
        SaveBtn.Visibility = Visibility.Visible
        StopBtn.Visibility = Visibility.Collapsed
    End Sub

    #End Region
    #Region "Voices"

    Private Sub LoadBuiltInVoices()
        PitchSlider.IsEnabled = False
        SpeedSlider.Minimum = -10
        SpeedSlider.Maximum = 10

        VoiceCombo.ItemsSource = BuiltInSpeech.GetInstalledVoices().Select(Function(x) New ComboBoxItem() With {
            .Content = $"{x.VoiceInfo.Culture.Name} - {x.VoiceInfo.Name}",
            .Tag = x.VoiceInfo.Name
        })
        VoiceCombo.SelectedIndex = 0
    End Sub

    Private Sub BuiltInRadio_Click(sender As Object, e As RoutedEventArgs)
        LoadBuiltInVoices()
    End Sub

    Private Sub LoadAzureVoices()
        PitchSlider.IsEnabled = True
        SpeedSlider.Minimum = -50
        SpeedSlider.Maximum = 50

        ' These are some example voices - for the full list of voices, go to:
        ' https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts
        Dim voices As New Dictionary(Of String, String)() From {
            {"en-GB-SoniaNeural", "en-GB - Sonia"},
            {"en-GB-RyanNeural", "en-GB - Ryan"},
            {"en-US-JennyNeural", "en-US - Jenny"},
            {"fr-FR-YvetteNeural", "fr-FR - Yvette"}
        }

        VoiceCombo.ItemsSource = voices.Select(Function(x) New ComboBoxItem() With {
            .Content = x.Value,
            .Tag = x.Key
        })
        VoiceCombo.SelectedIndex = 0
    End Sub

    Private Sub AzureRadio_Click(sender As Object, e As RoutedEventArgs)
        LoadAzureVoices()
    End Sub

    #End Region
    #Region "Playback"

    Private Function GenerateSSML(text As String, voice As String, speed As Integer, pitch As Integer) As String
        Return $"<speak xmlns=""http://www.w3.org/2001/10/synthesis"" version=""1.0"" xml:lang=""en-US"">
                   <voice name=""{voice}"">
                     <prosody rate=""{speed}%"" pitch=""{pitch}%"">
                       {text.Replace("&", "&amp;").Replace("<", " &lt;").Replace(">", "&gt;")}
                     </prosody>
                   </voice>
                 </speak>"
    End Function

    Private Async Sub ListenBtn_Click(sender As Object, e As RoutedEventArgs)
        Dim voice As String = VoiceCombo.SelectedItem.Tag

        If BuiltInRadio.IsChecked = True Then
            BuiltInSpeech.SelectVoice(voice)
            BuiltInSpeech.Rate = SpeedSlider.Value

            BuiltInSpeech.SpeakAsync(SpeechTxt.Text)
            SetPlayingState()
        Else
            AzureSpeech = New Microsoft.CognitiveServices.Speech.SpeechSynthesizer(AzureSpeechConfig)
            SetPlayingState()

            Dim ssml As String = GenerateSSML(SpeechTxt.Text, voice, SpeedSlider.Value, PitchSlider.Value)
            Await AzureSpeech.SpeakSsmlAsync(ssml)
            ResetPlayingState()
        End If
    End Sub

    Private Async Sub SaveBtn_Click(sender As Object, e As RoutedEventArgs)
        If WAVSaveDialog.ShowDialog() = True Then
            Dim voice As String = VoiceCombo.SelectedItem.Tag

            If BuiltInRadio.IsChecked = True Then
                BuiltInSpeech.SelectVoice(voice)
                BuiltInSpeech.Rate = SpeedSlider.Value
                BuiltInSpeech.SetOutputToWaveFile(WAVSaveDialog.FileName)

                BuiltInSpeech.Speak(SpeechTxt.Text)
                BuiltInSpeech.SetOutputToDefaultAudioDevice()
            Else
                AzureSpeech = New Microsoft.CognitiveServices.Speech.SpeechSynthesizer(AzureSpeechConfig, AudioConfig.FromWavFileOutput(WAVSaveDialog.FileName))

                Dim ssml As String = GenerateSSML(SpeechTxt.Text, voice, SpeedSlider.Value, PitchSlider.Value)
                Await AzureSpeech.SpeakSsmlAsync(ssml)
                AzureSpeech.Dispose()
            End If
        End If
    End Sub

    Private Sub StopBtn_Click(sender As Object, e As RoutedEventArgs)
        BuiltInSpeech.SpeakAsyncCancelAll()
        AzureSpeech?.StopSpeakingAsync()
    End Sub

    #End Region
End Class

XAML

<!-- AZUL CODING --------------------------------------- -->
<!-- WPF C#/VB - Text-to-Speech Desktop App -->
<!-- https://youtu.be/vpehE-xYhAM -->


<Window x:Class="TTS.MainWindow"
        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
        xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
        xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
        xmlns:local="clr-namespace:TTS" mc:Ignorable="d"
        Title="Text-to-speech - Azul Coding" Closing="Window_Closing" Width="500" SizeToContent="Height" ResizeMode="CanMinimize">
    <StackPanel Background="White">
        <Label x:Name="TitleLbl" Content="Text-to-speech" Padding="5,0,5,5" Margin="20" FontWeight="SemiBold" FontSize="16" BorderBrush="DodgerBlue" BorderThickness="0,0,0,2"/>
        <TextBox x:Name="SpeechTxt" Text="This is text-to-speech." Padding="5" Margin="20,0" FontSize="14" MinLines="5" MaxLines="5" TextWrapping="Wrap" AcceptsReturn="True"/>
        <StackPanel Orientation="Horizontal" Margin="0,10">
            <Label Content="Voice:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
            <ComboBox x:Name="VoiceCombo" Width="225" SelectedIndex="0" VerticalContentAlignment="Center" FontSize="14"/>
            <RadioButton x:Name="BuiltInRadio" Click="BuiltInRadio_Click" Content="Built-in" GroupName="TTSTypeRadios" Margin="20,0,20,0" IsChecked="True" FontSize="14" VerticalContentAlignment="Center"/>
            <RadioButton x:Name="AzureRadio" Click="AzureRadio_Click" Content="Azure" GroupName="TTSTypeRadios" Margin="0" FontSize="14" VerticalContentAlignment="Center"/>
        </StackPanel>
        <StackPanel Orientation="Horizontal" Margin="0,10">
            <Label Content="Speed:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
            <Slider x:Name="SpeedSlider" Width="225" VerticalAlignment="Center" SmallChange="1" IsSnapToTickEnabled="True" Minimum="-10" Maximum="10"/>
            <Label Content="{Binding Value, ElementName=SpeedSlider}" FontSize="14" Margin="15,0,5,0" VerticalAlignment="Center"/>
        </StackPanel>
        <StackPanel Orientation="Horizontal" Margin="0,10">
            <Label Content="Pitch:" FontSize="14" Margin="20,0,0,0" MinWidth="60" VerticalAlignment="Center"/>
            <Slider x:Name="PitchSlider" Width="225" VerticalAlignment="Center" SmallChange="1" IsSnapToTickEnabled="True" Minimum="-50" Maximum="50"/>
            <Label Content="{Binding Value, ElementName=PitchSlider}" FontSize="14" Margin="15,0,5,0" VerticalAlignment="Center"/>
        </StackPanel>
        <StackPanel Margin="20" Orientation="Horizontal">
            <Button Name="ListenBtn" Click="ListenBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0">
                <StackPanel Orientation="Horizontal" VerticalAlignment="Center">
                    <Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/speaker.png"/>
                    <TextBlock Text="Listen" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
                </StackPanel>
            </Button>
            <Button Name="SaveBtn" Click="SaveBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0">
                <StackPanel Orientation="Horizontal" VerticalAlignment="Center">
                    <Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/save.png"/>
                    <TextBlock Text="Save as WAV" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
                </StackPanel>
            </Button>
            <Button Name="StopBtn" Click="StopBtn_Click" Padding="10,5" Margin="0,0,10,0" Background="#f0f0f0" Visibility="Collapsed">
                <StackPanel Orientation="Horizontal" VerticalAlignment="Center">
                    <Image Height="24" Width="24" Source="https://img.icons8.com/fluency/48/stop.png"/>
                    <TextBlock Text="Stop" VerticalAlignment="Center" FontSize="14" Margin="10,0,5,2"/>
                </StackPanel>
            </Button>
        </StackPanel>
    </StackPanel>
</Window>