TIL_220112_IOT

구글 speech로 음성인식

구글 클라우드 플랫폼 접속

라이브러리 설치

사용자 인증 정보 만들기

Cloud Speech-to-Text API 사용 키 만들기

Anaconda3 아나콘다 프롬포트

구글 음성인식 라이브러리 설치(아나콘다 프롬포트에서 설치해야됨!)

pip install --upgrade google-auth
pip install --upgrade google-api-python-client
pip install google-cloud-speech
pip install gTTS

pip install pipwin
pipwin install pyaudio

(micstream은 파일로 받았음 : micstream.py)

영어 음성 인식

\IOT\speechrecg\gspeech_316.py

import os
# 윈도우즈나 유닉스 등의 운영체제에서 제공하는 기능을 사용하기 위해 필요
# os.environ 변수(환경변수 관리) 사용을 위해 필요
from google.cloud import speech
# google.cloud : 구글 클라우드 접근 제공
# google.cloud.speech : 구글 클라우드 음성 인식 모듈
from micstream import MicrophoneStream

# os 환경변수의 키 값 "GOO~"을 "speech~"로 설정(API 인증 키 파일)
os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = \
  "speechrecog-338000-39d8902d9c50.json" # 다운로드 받은 개인 키 이름

# 녹음에 필요한 매개변수 설정
RATE = 44100         # 초당 추출할 소리의 개수
CHUNK = int(RATE/10) # RATE/10=4410 한번에 얻어올 소리의 개수

# --- 구글 클라우드에서 얻어온 음성 인식 결과를 문자열로 출력 ---
def listen_print_loop(responses): # 밑에서 responses를 담아 호출됨
  for response in responses:
      result = response.results[0]
      transcript = result.alternatives[0].transcript

      print(transcript)

      if 'exit' in transcript or 'quit' in transcript:
        print('Exiting..')
        break

language_code = 'en-US' # 영어 / a BCP-47 language tag

client = speech.SpeechClient() # speech 모듈의 SpeechClient 클래스의 인스턴스 client 생성
config = speech.RecognitionConfig(
# speech 모듈의 RecognitionConfig 클래스의 인스턴스 config 생성(음성 인식 설정을 위한 클래스)
encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16, # 음성 추출 방식 : 16비트 정수 리틀 엔디언 추출 표본
sample_rate_hertz = RATE, # 초당 음성 추출 횟수 (위의 RATE = 44100)
language_code = language_code) # 추출 음성 언어 (위의 language_code = 영어)

streaming_config = speech.StreamingRecognitionConfig(config = config)
# speech 모듈의 StreamingRecognitionConfig 클래스 이용 인자로 위에서 설정한 config

# with문을 수행하여 MicrophoneStream 객체를 stream이라는 이름으로 생성
with MicrophoneStream(RATE, CHUNK) as stream:
  audio_generator = stream.generator() # 음성 데이터 생성
  requests = (speech.StreamingRecognizeRequest(audio_content = content)
  for content in audio_generator) # 음성 인식 요청 메시지 생성
  responses = client.streaming_recognize(streaming_config, requests) #응답 문자열 메시지 생성
# 다음과 같은 구조로 연결되어 있음
# responses -> requests -> audio_generator
# listen_print_loop 함수 내에서 값 전달 순서는 다음과 같음
# audio_generator -> requests -> responses
# requests에 음성데이터 담아서 response에 인자로 전달 (google client로 전달 하면) Response로 결과 return

  listen_print_loop(responses) # 위의 responses 담아 호출

with문이란?

close()를 개발자가 직접 안해도 해준다.

자세한 설명 링크

한글 음성 인식

\IOT\speechrecg\gspeech_319p.py

import os
from google.cloud import speech
from micstream import MicrophoneStream

os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=\
  "speechrecog-338000-39d8902d9c50.json"  ## 다운로드 받은 개인 키 이름 넣어주세요

RATE = 44100
CHUNK = int(RATE /10)  #녹음시 필요한 매개변수

def listen_print_loop(responses):
  for response in responses:
    result = response.results[0]
    transcript = result.alternatives[0].transcript
    print(transcript)               
    #아래 마이크 입력 -> google client -> 결과 response안에 배열구조 풀면 transcript결과 출력

    if u'종료' in transcript or u'그만' in transcript: # u 꼭 붙여야됨
       print('종료합니다..')
       break

language_code = 'ko-KR'  # 영어 인식

client = speech.SpeechClient()  # 위 speech 모듈 speech client 생성
                                # 아래는 음성인식을 위한 설정
config = speech.RecognitionConfig(
  encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16,
  sample_rate_hertz=RATE,
  language_code = language_code)
streaming_config = speech.StreamingRecognitionConfig(config = config)

# MicrophoneStream -> stream으로 표현 requests에 음성데이터 담아서
# response에 인자로 전달 (google client로 전달 하면) Response로 결과 return
with MicrophoneStream(RATE, CHUNK) as stream:
	audio_generator = stream.generator()
	requests = (speech.StreamingRecognizeRequest(audio_content =content)
				for content in audio_generator)
	responses = client.streaming_recognize(streaming_config, requests)
	
	listen_print_loop(responses)

한글 읽고 말하기

C:\Users\user\Desktop\IOT\speechrecg\gspeech_321p.py

import os
from google.cloud import speech
from gtts import gTTS
from micstream import MicrophoneStream

os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = \
  "speechrecog-338000-39d8902d9c50.json"

RATE = 44100
CHUNK = int(RATE /10)

def do_TTS(text):
  tts = gTTS(text = text, lang = 'ko')
  a = os.path.exists('read.mp3')
  if a:
    os.remove('read.mp3')
    
  tts.save('read.mp3')
  os.system('mqg321 read.mp3')

def listen_print_loop(responses):
  for response in responses:
    result = response.results[0]
    transcript = result.alternatives[0].transcript

    print(transcript)

    if u'종료' in transcript or u'그만' in transcript:
      print('종료합니다..')
      break

    do_TTS(transcript)

language_code = 'ko-KR'

client = speech.SpeechClient()
config = speech.RecognitionConfig(
  encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16,
  sample_rate_hertz = RATE,
  language_code = language_code)
streaming_config = speech.StreamingRecognitionConfig(config = config)

with MicrophoneStream(RATE, CHUNK) as stream:
  audio_generator = stream.generator()
  requests = (speech.StreamingRecognizeRequest(audio_content = content)
  for content in audio_generator)
  responses = client.streaming_recognize(streaming_config, requests)

  listen_print_loop(responses)

영어로 대화하기

영어 대화를 위한 음성 파일 만들기

\IOT\speechrecg\gspeech_329p.py

from gtts import gTTS
import os

conversations = {
  "hello_hi" : "Hi",
  "what_is_your_name" : "I am A",
  "it_s_nice_to_meet_you" : "It's nice to meet you, too",
  "how_are_you" : "I'm Good",
  "how_old_are_you" : "I'm 10 years old",
  "when_is_your_birthday" : "It's March 18th",
  "where_are_you_from" : "I'm from Korea, Seoul",
  "what_do_you_do" : "I'm a student",
  "what_time_is_it" : "It's 5:30 pm",
  "what_day_is_it" : "It's Wednesday",
  "how_is_the_weather_today" : "It's sunny",
  "what_is_this" : "It's a pencil",
  "who_is_this" : "This is a teacher",
  "what_are_you_doing" : "I'm studying",
  "where_is_a_pencil" : "Here",
  "how_much_is_it" : "It's a 1000 won"
}

for key in conversations:
  respond = gTTS(text = conversations[key], lang = 'en')
  respond.save(key + '.mp3')
  os.system('mpg321' + key + '.mp3')

pardon = gTTS(text = "Pardon? Repeat Please...", lang='en')
pardon.save('pardon.mp3')
os.system('mpg321' + 'pardon.mp3')

영어 대화하기

\IOT\speechrecg\gspeech_331p.py

import os
from google.cloud import speech
from gtts import gTTS
from micstream import MicrophoneStream

os.environ[ "GOOGLE_APPLICATION_CREDENTIALS" ] = \
  "speechrecog-338000-39d8902d9c50.json"

RATE = 44100
CHUNK = int(RATE/10)

conversations = {
  "hello hi" : "hello_hi",
  "what is your name" : "what_is_your_name",
  "it's nice to meet you" : "it_s_nice_to_meet_you",
  "how are you" : "how_are_you",
  "how old are you" : "how_old_are_you",
  "what is your birthday" : "when_is_your_birthday",
  "where are you from" : "where_are_you_from",
  "what do you do" : "what_do_you_do",
  "what time is it" : "what_time_is_it",
  "what day is it" : "what_day_is_it",
  "how is the weather today" : "how_is_the_weather_today",
  "what is this" : "what_is_this",
  "who is this" : "who_is_this",
  "what are you doing" : "what_are_you_doing",
  "where is a pencil" : "where_is_a_pencil",
  "how much is this" : "how_much_is_this"
}

def respond(text):
  for key in conversations:
    if key in text:
      os.system('mpg321' + conversations[key] + '.mp3')
      return

  os.system('mpq321' + 'pardon.mp3')

def listen_print_loop(responses):
  for response in responses:
    result = response.results[0]
    transcript = result.alternatives[0].transcript
    print(transcript)

    if 'exit' in transcript or 'quit' in transcript:
      print('Exiting...')
      break

    respond(transcript)

language_code = 'en-US'

client = speech.SpeechClient()
config = speech.RecognitionConfig(
  encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16,
  sample_rate_hertz = RATE,
  language_code = language_code)
streaming_config = speech.StreamingRecognitionConfig(config = config)

with MicrophoneStream(RATE, CHUNK) as stream:
  audio_generator = stream.generator()
  requests = (speech.StreamingRecognizeRequest(audio_content = content)
    for content in audio_generator)
  responses = client.streaming_recognize(streaming_config, requests)

listen_print_loop(responses)

MQTT 보충

일종의 통신방법 중 하나

Mosquitto는 broker명

Python 방식

Publish

\IOT\MQTT code\0112_mqttTest\pythonTest\publish.py

import paho.mqtt.client as mqtt
import json


def on_connect(client, userdata, flags, rc):
    if rc == 0:
        print("connected OK")
    else:
        print("Bad connection Returned code=", rc)


def on_disconnect(client, userdata, flags, rc=0):
    print(str(rc))


def on_publish(client, userdata, mid):
    print("In on_pub callback mid= ", mid)


# 새로운 클라이언트 생성
client = mqtt.Client()
# 콜백 함수 설정 on_connect(브로커에 접속), on_disconnect(브로커에 접속중료), on_publish(메세지 발행)
client.on_connect = on_connect
client.on_disconnect = on_disconnect
client.on_publish = on_publish
# address : localhost, port: 1883 에 연결
client.connect('localhost', 1883)
client.loop_start()
# common topic 으로 메세지 발행
client.publish('common', "hello_python", 1)
client.loop_stop()
# 연결 종료
client.disconnect()

\IOT\MQTT code\0112_mqttTest\pythonTest\subscribe.py

import paho.mqtt.client as mqtt


def on_connect(client, userdata, flags, rc):
    if rc == 0:
        print("connected OK")
    else:
        print("Bad connection Returned code=", rc)


def on_disconnect(client, userdata, flags, rc=0):
    print(str(rc))


def on_subscribe(client, userdata, mid, granted_qos):
    print("subscribe 하는중 ")


def on_message(client, userdata, msg):
    print(str(msg.payload.decode("utf-8")))


# 새로운 클라이언트 생성
client = mqtt.Client()
# 콜백 함수 설정 on_connect(브로커에 접속), on_disconnect(브로커에 접속중료), on_subscribe(topic 구독),
# on_message(발행된 메세지가 들어왔을 때)
client.on_connect = on_connect
client.on_disconnect = on_disconnect
client.on_subscribe = on_subscribe
client.on_message = on_message
# address : localhost, port: 1883 에 연결
client.connect('localhost', 1883)
# common topic 으로 메세지 발행
client.subscribe('common', 1)
client.loop_forever()

결과

Nodejs 방식 (훨씬 간단해보임)

Publish

\IOT\MQTT code\0112_mqttTest\nodejsTest\publish.js

const mqtt = require('mqtt')
const client = mqtt.connect('mqtt://localhost:1883')

client.publish('common','hello_nodejs ')

\IOT\MQTT code\0112_mqttTest\nodejsTest\subscribe.js

const mqtt = require('mqtt')
const client = mqtt.connect('mqtt://localhost:1883')


client.subscribe('common');
client.on('message', function(topic, mess){
  console.log(mess.toString())
});

결과

느낀 점

[슈르연구소] 유사과학 들으면 토하는 로봇

이 영상을 되게 감명깊게 봤는데 오늘 배운 걸 응용하면 만들 수 있을 것 같아 신기했다.

IOT 수업에선 막연히 신기해하던 것들을 많이 알게된 것 같다.

'공부 > Digital Twin Bootcamp' 카테고리의 다른 글

TIL_220117_IOT (0)	2022.01.17
TIL_220114_IOT (0)	2022.01.14
TIL_220111_IOT (0)	2022.01.11
TIL_220110_IOT (0)	2022.01.11
TIL_220107_IOT (0)	2022.01.07

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

TIL_220112_IOT

구글 speech로 음성인식

Anaconda3 아나콘다 프롬포트

영어로 대화하기

MQTT 보충

'공부 > Digital Twin Bootcamp' 카테고리의 다른 글

구글 speech로 음성인식

Anaconda3 아나콘다 프롬포트

영어로 대화하기

MQTT 보충

'공부 > Digital Twin Bootcamp' 카테고리의 다른 글

티스토리툴바

개인정보

단축키

내 블로그

블로그 게시글

모든 영역