#!/usr/bin/praat #praat script # NOTA: Este archivo usa UTF-8! debug = 0 script_name$ = "MaSCoT-esp" version$ = "2.1.7" author$ = "Scott Sadowsky" # MaSCoT.praat ############################################################################## # SCRIPT: MAssive Speech COrpora Tool (MaSCoT) # AUTOR: Scott Sadowsky < s s a d o w s k y @ g m a i l . c o m > # FECHA: 07 sept 2010 # VERSIÓN: 2.1.7 # DESCRIPCIÓN: Este script funciona con un objeto LongSound y el TextGrid # correspondiente. Busca todos los intervalos del tier seleccionado # por el usuario que cumplan con la expresión regular de búsqueda, # y guarda el sonido de cada intervalo en un archivo WAV distinto. # También permite extraer los TextGrids que corresponden a estos # extractos. # NOTAS: - El archivo de sonido DEBE abrirse como un objeto LongSound. # - El TextGrid y el LongSound deben tener nombres idénticos, # y deben estar seleccionados en la ventana de objetos de Praat # antes de ejecutar el script. # AGRADECIMIENTOS: La subrutina GetTierName fue adaptado de un script de Mietta # Lennes. Algunas otras partes de este script también están basadas # en un script de Mietta Lennes. # # CHANGELOG # 2-1-6: Localización en castellano. # Se agregó opción para extraer etiquetas de un segundo tier. # 2-1-4: Added more info to the log file that MaSCoT produces # 2-1-3: Changed default field contents for Regex, since the bracketed expression # [áéíóú] works irregularly (!!!) # 2-1-0: Added ability to save TextGrids along with sound files. ############################################################################## form MAssive Speech COrpus Tool (MaSCoT) v. 2.1.7 comment ***** Se debe seleccionar un TextGrid y un LongSound ***** sentence Tier_de_busqueda pal-fonémica sentence Expresion_regular (b|d|g|p|t|k)(̪)?ɾ comment EJ. REGEX: ^LINEA$ · [a-z] [ieaou] [^0-9] (a|b|c) \d \D \l \L · ? . .* .+ {2,5} sentence Restringir_busqueda_a_este_tier_(optativo) instrumento sentence Y_a_esta_seccion_del_tier_(optativo) Lectura de oraciones sentence Extraer_audio_de_este_tier_(optativo) pal-ortográfica sentence Usar_etiquetas_de_este_tier_(optativo) pal-fonémica sentence Usar_etiquetas_adicionales_de_este_tier alófonos boolean Extraer_archivos_WAV yes boolean Tambien_extraer_TextGrids yes comment ¿Cuáles son el primer y el último intervalo que quieres extraer? integer Primer_intervalo 1 integer Ultimo_intervalo_(0=hasta_el_final) 0 comment Margen de tiempo que se agregará al inicio y al final de cada intervalo que se extrae: # The margen used to be 0.001... I changed it for vowel analysis. positive Margen_(segundos) 0.015 #comment ¿Iniciar numeración de archivos a partir de qué número? #integer Primer_numero 1 comment ¿En qué carpeta quieres guardar los archivos WAV? (Coloca ruta completa, con "/" al final) text Carpeta c:/praat-output/ positive Num_caracteres_en_prefijo_del_nombre_de_archivo 5 positive Largo_max_de_campos_de_nombres_de_archivos 20 endform # Define certain variables. useRestrict = 0 useSoundExtractionTier = 0 num_caracteres_en_prefijo_del_nombre_de_archivo = 15 date$ = date$() primer_numero = 1 # Give form variables shorter, English names. restrictionTierName$ = restringir_busqueda_a_este_tier$ restrictionText$ = y_a_esta_seccion_del_tier$ searchTierName$ = tier_de_busqueda$ extractionTierName$ = extraer_audio_de_este_tier$ labelTierName$ = usar_etiquetas_de_este_tier$ addLabelTierName$ = usar_etiquetas_adicionales_de_este_tier$ maxLength = largo_max_de_campos_de_nombres_de_archivos # Perform necessary processing of certain form fields. # If no search tier is provided, die. if searchTierName$ = "" exit ¡ERROR!'newline$''newline$'Debes ingresar en nombre del tier en el cual quieres buscar. 'newline$''newline$'Haz clic en para cerrar esta ventana, en para cerrar la ventana del script, e intenta de nuevo. 'newline$''newline$' endif # Set variable for use in WAV filename extractionTierLabel$ = extractionTierName$ # If no extraction tier is provided, use the search tier for this. if extractionTierName$ = "" extractionTierName$ = searchTierName$ endif # If no tier is provided for the source of labels, use the sound tier for them. if labelTierName$ = "" labelTierName$ = searchTierName$ endif if ( restrictionTierName$ <> "" ) and ( restrictionText$ <> "" ) useRestrict = 1 else useRestrict = 0 endif if addLabelTierName$ <> "" useAddLabel = 1 else useAddLabel = 0 endif #+++++ if ( extractionTierName$ <> "" ) and ( extractionTierName$ <> searchTierName$ ) useSoundExtractionTier = 1 else useSoundExtractionTier = 0 endif soundName$ = selected$ ("TextGrid", 1) select TextGrid 'soundName$' # Get the numbers of the tiers whose names were given. call GetTierNum 'searchTierName$' searchTierNum call GetTierNum 'labelTierName$' labelTierNum if useAddLabel = 1 call GetTierNum 'addLabelTierName$' addLabelTierNum endif call GetTierNum 'extractionTierName$' extractionTierNum if ( useRestrict = 1 ) call GetTierNum 'restrictionTierName$' restrictionTierNum endif # Check the interval values and correct them if necessary. numberOfIntervals = Get number of intervals... searchTierNum if primer_intervalo > numberOfIntervals exit ¡ERROR!'newline$''newline$'No existen 'primer_intervalo' intervalos en el tier llamado ``'searchTierName$'´´.'newline$''newline$' endif if ultimo_intervalo > numberOfIntervals ultimo_intervalo = numberOfIntervals endif if ultimo_intervalo = 0 ultimo_intervalo = numberOfIntervals endif # Set default values for certain variables. files = 0 intervalstart = 0 intervalend = 0 searchInterval = 1 intnumber = primer_numero - 1 soundIntervalName$ = "" labelIntervalName$ = "" addLabelIntervalName$ = "" intervalfile$ = "" endoffile = Get finishing time prefix$ = left$("'soundName$'", num_caracteres_en_prefijo_del_nombre_de_archivo) # Ask if the user wants to go through with saving all the files: for searchInterval from primer_intervalo to ultimo_intervalo searchIntervalLabel$ = Get label of interval... searchTierNum searchInterval check = 1 # Take the necessary steps to make sure that the restriction tier and text # the user provides are checked # Get the position of current search interval in the search tier, to find # corresponding intervals on the other tiers searchSelectionStart = Get start point... searchTierNum searchInterval searchSelectionEnd = Get end point... searchTierNum searchInterval # Get the number of the interval on the restriction tier that corresponds to the # current search interval, if the user wants to restrict the search to a given tier. if ( useRestrict = 1 ) restrictionInterval = Get interval at time... restrictionTierNum searchSelectionStart restrictionIntervalLabel$ = Get label of interval... restrictionTierNum restrictionInterval if index_regex (searchIntervalLabel$, expresion_regular$) ... and restrictionIntervalLabel$ = restrictionText$ check = 0 endif elsif ( useRestrict = 0 ) if index_regex (searchIntervalLabel$, expresion_regular$) check = 0 endif else exit ¡El valor de useRestrict es inválido! endif if check = 0 files = files + 1 endif endfor searchInterval = 1 # Check to see if there are 0 matches. If so, die. if files = 0 select TextGrid 'soundName$' plus LongSound 'soundName$' exit ¡Lo siento!'newline$''newline$'Ningún intervalo cumple con los requisitos de búsqueda. ...'newline$''newline$'Haz clic en para cerrar esta ventana, haz clic en para cerrar la ventana del script, ...e intenta de nuevo. 'newline$''newline$' endif # NEW IN 217 # Ask user for confirmation to proceed. pause Vas a extraer 'files' archivos WAV. ¿Estás seguro? # Define path and name of log file textfilename$ = "'carpeta$'" + "'soundName$'" + "_" + "'primer_numero'" + "-to-" + "'files'" + ".txt" # Check if the log file exists. If so, give the user the option to overwrite it. if fileReadable (textfilename$) pause El archivo de registro, 'soundName$'_'primer_numero'-to-'files'.txt, ya existe. ¿Quieres sobreescribirlo? filedelete 'textfilename$' endif # NEW IN 216: Print metadata to log file dog$ = "======================================================================'newline$' ...'script_name$' ver. 'version$' de 'author$''newline$''newline$' ...Fecha:'tab$''date$'.'newline$' ...Carpeta:'tab$''carpeta$''newline$''newline$' ...Expresión de búsqueda:'tab$''tab$''tab$''expresion_regular$''newline$' ...Tier de búsqueda:'tab$''tab$''tab$''tab$''tier_de_busqueda$''newline$' ...Tier del cual se extrajo el audio:'tab$''extractionTierName$''newline$' ...Tier de restricción:'tab$''tab$''tab$''restringir_busqueda_a_este_tier$''newline$' ...Sección del tier de restricción:'tab$''y_a_esta_seccion_del_tier$''newline$' ...Tier utilizado para etiquetas:'tab$''usar_etiquetas_de_este_tier$''newline$' ...Tier de etiquetas adicionales:'tab$''addLabelTierName$''newline$' ...======================================================================'newline$''newline$'" fileappend "'textfilename$'" 'dog$' # NEW IN 216: Print header row to log file # For reasons I can't figure out, the normal (and possibly more efficient) way of doing # this --if X then dog$=Y else dog$=Z-- doesn't work. I have to do dog$=Y, if X then dog$=Z. dog$ = "INTERVAL_NUM'tab$'FILE_PREFIX'tab$'SEARCH_INT_LABEL'tab$'LABEL_INT_NAME'tab$'RESTRICTION_TXT'tab$'EXTRACTION_TIER_LABEL'newline$'" if ( useAddLabel = 1 ) dog$ = "INTERVAL_NUM'tab$'FILE_PREFIX'tab$'SEARCH_INT_LABEL'tab$'ADD_INT_LABEL'tab$'LABEL_INT_NAME'tab$'RESTRICTION_TXT'tab$'EXTRACTION_TIER_LABEL'newline$'" endif fileappend "'textfilename$'" 'dog$' # Loop through all intervals in the selected tier of the TextGrid for searchInterval from primer_intervalo to ultimo_intervalo check = 1 select TextGrid 'soundName$' searchIntervalLabel$ = "" searchIntervalLabel$ = Get label of interval... searchTierNum searchInterval # Get the position of current search interval in the search tier, to find # corresponding intervals on the other tiers searchSelectionStart = Get start point... searchTierNum searchInterval searchSelectionEnd = Get end point... searchTierNum searchInterval # Get the number of the interval on the restriction tier that corresponds to the # current search interval, if the user wants to restrict search to a certain tier section. # =======================================================================================>>>>>>>>>>>>> if ( useRestrict = 1 ) restrictionInterval = Get interval at time... restrictionTierNum searchSelectionStart restrictionIntervalLabel$ = Get label of interval... restrictionTierNum restrictionInterval if index_regex (searchIntervalLabel$, expresion_regular$) ... and restrictionIntervalLabel$ = restrictionText$ check = 0 endif endif if ( useRestrict = 0 ) if index_regex (searchIntervalLabel$, expresion_regular$) check = 0 endif endif # Get the number of the interval on the extraction tier that corresponds to the current # search interval. extractionTierName$ extractionTierNum extractionInterval = Get interval at time... extractionTierNum searchSelectionStart extractionIntervalLabel$ = "" extractionIntervalLabel$ = Get label of interval... extractionTierNum extractionInterval # Extract the text from the label interval on the label tier # Get the number of the interval on the label tier that corresponds to the current search interval labelInterval = Get interval at time... labelTierNum searchSelectionStart # On the label tier, get the interval label that corresponds to the current search interval. labelIntervalName$ = "" labelIntervalName$ = Get label of interval... labelTierNum labelInterval # Get the number of the interval on the ADDITIONAL label tier that corresponds to the current search interval if ( useAddLabel = 1 ) addLabelInterval = Get interval at time... addLabelTierNum searchSelectionStart # On the ADDITIONAL label tier, get the interval label that corresponds to the current search interval. addLabelIntervalName$ = "" addLabelIntervalName$ = Get label of interval... addLabelTierNum addLabelInterval endif # Perform the actual sound extraction. if check = 0 intnumber = intnumber + 1 # Add margens to start and end times for extraction. intervalstart = Get starting point... extractionTierNum extractionInterval if intervalstart > margen intervalstart = intervalstart - margen else intervalstart = 0 endif intervalend = Get end point... extractionTierNum extractionInterval if intervalend < endoffile - margen intervalend = intervalend + margen else intervalend = endoffile endif # NEW IN 217: MAKE EXTRACTING WAVS OPTIONAL if extraer_archivos_WAV = 1 # Extract the sound from the interval. THE KEY VALUES ARE intervalstart AND intervalend ***************** select LongSound 'soundName$' Extract part... intervalstart intervalend no # Truncate the name of any field to be used in the file name that is excessively long #maxLength = 20 stringLength = length (prefix$) if stringLength > maxLength prefix$ = left$ (prefix$, maxLength) endif stringLength = length (restrictionText$) if stringLength > maxLength restrictionText$ = left$ (restrictionText$, maxLength) endif stringLength = length (extractionTierName$) if stringLength > maxLength extractionTierName$ = left$ (extractionTierName$, maxLength) endif stringLength = length (labelIntervalName$) if stringLength > maxLength labelIntervalName$ = left$ (labelIntervalName$, maxLength) endif # Append ADDITIONAL label to search interval label, if user specifies an additional label. combinedIntervalLabel$ = searchIntervalLabel$ if (useAddLabel = 1) combinedIntervalLabel$ = searchIntervalLabel$ + "__" + addLabelIntervalName$ endif # The name of the sound file then consists of these elements: if ( useRestrict = 1 && useSoundExtractionTier = 1) intervalfile$ = "'carpeta$'" + ... "'prefix$'-" + ... "['intnumber']__" + ... "{'combinedIntervalLabel$'}__" + ... "utterance='labelIntervalName$'__" + ... "restriction='restrictionText$'__" + ... "extracted-from='extractionTierLabel$'" elsif ( useRestrict = 1 && useSoundExtractionTier = 0) intervalfile$ = "'carpeta$'" + ... "'prefix$'-" + ... "['intnumber']__" + ... "{'combinedIntervalLabel$'}__" + ... "utterance='labelIntervalName$'__" + ... "restriction='restrictionText$'" elsif ( useRestrict = 0 && useSoundExtractionTier = 1) intervalfile$ = "'carpeta$'" + ... "'prefix$'-" + ... "['intnumber']__" + ... "{'combinedIntervalLabel$'}__" + ... "utterance='labelIntervalName$'__" + ... "extracted-from='extractionTierLabel$'" else intervalfile$ = "'carpeta$'" + ... "'prefix$'-" + ... "{'combinedIntervalLabel$'}__" + ... "_['intnumber']" endif intervalfileWithExt$ = intervalfile$ + ".wav" Write to WAV file... 'intervalfileWithExt$' Remove # NEW IN 217: MAKE WRITING WAVS OPTIONAL endif # Take the label of the saved sound interval and add it to the text file: select TextGrid 'soundName$' # #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # !!!!! EXPERIMENTAL !!!!! WARNING !!! Extract TextGrid along with sound file if tambien_extraer_TextGrids Extract part... intervalstart intervalend no intervalfileWithExt$ = intervalfile$ + ".TextGrid" Write to text file... 'intervalfileWithExt$' Remove endif #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # Write information about the extracted sound to log file dog$ = "'intnumber''tab$''prefix$''tab$''searchIntervalLabel$''tab$''labelIntervalName$''tab$''restrictionText$''tab$''extractionTierLabel$''newline$'" if (useAddLabel = 1) dog$ = "'intnumber''tab$''prefix$''tab$''searchIntervalLabel$''tab$''addLabelIntervalName$''tab$''labelIntervalName$''tab$''restrictionText$''tab$''extractionTierLabel$''newline$'" endif fileappend "'textfilename$'" 'dog$' endif endfor ########DEBUG if debug = 1 printline ========== NEW DEBUG ========== printline useRestrict = 'useRestrict' printline useSoundExtractionTier = 'useSoundExtractionTier' printline printline regex = 'expresion_regular$' printline restrictionTierName$ = 'restrictionTierName$' printline restrictionText$ = 'restrictionText$' printline extractionTierName$ = 'extractionTierName$' printline labelTierName$ = 'labelTierName$' printline labelIntervalName$ = 'labelIntervalName$' printline intnumber = 'intnumber' printline searchIntervalLabel$ = 'searchIntervalLabel$' printline ------------------------------- printline printline endif select TextGrid 'soundName$' plus LongSound 'soundName$' ############################################################################## # PROCEDURE: GetTierNum .name$ .variable$ # DESCRIPTION: Finds the number of a tier that has a given label. # GLOBAL VARIABLES NEEDED: # is the name of the sound and TextGrid file being used. # THANKS: Adapted from a script by Mietta Lennes. ############################################################################## procedure GetTierNum .name$ .variable$ select TextGrid 'soundName$' .numberOfTiers = Get number of tiers # Cycle through the tiers in the TextGrid and check tier names until the # desired one is found or all tiers have been tried unsuccessfully. .itier = 1 repeat .currentTier$ = Get tier name... .itier .itier = .itier + 1 until .currentTier$ = .name$ or .itier > .numberOfTiers # If no tier has the name being searched for, set the variable passed back # to the main part of the script (whose name is contained in .variable$) to 0. if .currentTier$ <> .name$ '.variable$' = 0 # If the tier being searched for WAS found, set the variable passed as the # procedure's second parameter (held in .variable$) to the tier number. else '.variable$' = .itier - 1 endif # If the tier being searched for was not found, die and throw an error message. if '.variable$' = 0 exit No existe ningún tier llamado '.name$' en el archivo 'soundName$'! endif endproc