This interface illustrates how wrapper functions can simplify the user API, and handle C header declarations that are not readily accessible in Fortran.
Below are sources for the main Fortran module, the C support/wrapper code, and a small example program.
This is only a preliminary version, It would be nice to automatically handle the match array, but the maximum number of matches must still be declared by the caller.
PCRE is a much more powerful pattern library, and may be a better choice than POSIX regex. You may find some wrappers in the projects here: fortran_pcre fregex ftlRegex
module regex_module
!-------------------------------------------------------------------
! Fortran interface to POSIX regex, using ISO_C_BINDING.
!
! Regex is defined as an API using C headers. It does not define the
! exact value of flag tokens, just the names. It also uses an opaque
! data structure and a declared numeric type for the match array.
! Therefore, the code must either be generated for each target
! platform, or it use wrapper functions written in C.
!
! Fortran wrapper functions are also required to present a normal
! Fortran API, ant not require C conversions by the caller.
!
! The interface here is not strictly correct, because it does not
! explicitly convert Fortran strings to the C character kind.
! Fortran only supports conversion of string kinds by assignment,
! or by a rather slow internal WRITE. For now, the easiest approach
! is to assume that C and Fortran default character kinds are the
! same. This is generally true, but UTF-8 strings are likely to
! cause problems.
!-------------------------------------------------------------------
! API:
!
! Compile a regex into a regex object
! subroutine regcomp(this,pattern,flags,status)
! type(regex_type), intent(out) :: this ! new regex object
! character(len=*), intent(in) :: pattern ! regex pattern string
! character(len=*), intent(in), &
! optional :: flags ! flag characters:
! ! x = extended regex (REG_EXTENDED)
! ! m = multi-line (REG_NEWLINE)
! ! i = case-insensitive (REG_ICASE)
! ! n = no MATCH required (REG_NOSUB)
! integer, intent(out), optional :: status ! If absent, errors are fatal
! end subroutine regcomp
!
! Execute a compiled regex against a string
! function regexec(this,string,matches,flags,status) result(match)
! logical :: match ! .TRUE. if the pattern matched
! type(regex_type), intent(in) :: this ! regex object
! character(len=*), intent(in) :: string ! target string
! character(len=*), intent(in), &
! optional :: flags ! flag characters (for partial lines):
! ! b = no beginning-of-line (REG_NOTBOL)
! ! e = no end-of-line (REG_NOTEOL)
! integer, intent(out), optional :: matches(:,:) ! match locations,
! ! dimension(2,nmatches)
! integer, intent(out), optional :: status ! If absent, errors are fatal
! end function
!
! Get the string message for a status error value
! subroutine regerror(this,errcode,errmsg,errmsg_len)
! type(regex_type), intent(in) :: this
! integer, intent(in) :: errcode
! character, intent(out) :: errmsg
! integer, intent(out) :: errmsg_len
! errmsg_len = C_regerror(int(errcode,C_int), this%preg, &
! errmsg, int(len(errmsg),C_size_t))
! end subroutine regerror
!
! Release
! subroutine regfree(this)
! type(regex_type), intent(inout) :: this
! end subroutine regfree
!-------------------------------------------------------------------
! TODO:
! * More documentation.
! * Implement allocatable-length strings when commonly available.
! * Maybe store the matches array inside the regex_type structure?
!-------------------------------------------------------------------
use ISO_C_Binding, only: C_ptr, C_int, C_size_t, C_char, &
C_NULL_char, C_NULL_ptr
use ISO_Fortran_Env, only: ERROR_UNIT
! Fortran regex structure holds a pointer to an opaque C structure
type regex_type
type(C_ptr) :: preg
end type regex_type
interface
subroutine C_regalloc(preg_return) &
bind(C,name="C_regalloc")
import
type(C_ptr), intent(out) :: preg_return
end subroutine C_regalloc
subroutine C_regcomp(preg,pattern,flags,status) &
bind(C,name="C_regcomp")
import
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(in) :: pattern(*)
character(len=1,kind=C_char), intent(in) :: flags(*)
integer(C_int), intent(inout) :: status
end subroutine C_regcomp
subroutine C_regexec(preg,string,nmatch,matches,flags,status) &
bind(C,name="C_regexec")
import
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(in) :: string(*)
integer(C_int), intent(in), value :: nmatch
integer(C_int), intent(out) :: matches(2,nmatch)
character(len=1,kind=C_char), intent(in) :: flags(*)
integer(C_int), intent(out) :: status
end subroutine C_regexec
function C_regerror(errcode, preg, errbuf, errbuf_size) &
result(regerror) bind(C,name="regerror")
import
integer(C_size_t) :: regerror
integer(C_int), value :: errcode
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(out) :: errbuf
integer(C_size_t), value :: errbuf_size
end function C_regerror
subroutine C_regfree(preg) bind(C,name="regfree")
import
type(C_ptr), intent(in), value :: preg
end subroutine C_regfree
end interface
contains
subroutine regcomp(this,pattern,flags,status)
type(regex_type), intent(out) :: this
character(len=*), intent(in) :: pattern
character(len=*), intent(in), optional :: flags
integer, intent(out), optional :: status
! local
integer(C_int) :: status_
character(len=10,kind=C_char) :: flags_
! begin
flags_=' '
if (present(flags)) flags_=flags
this%preg = C_NULL_ptr
call C_regalloc(this%preg)
call C_regcomp(this%preg, trim(pattern)//C_NULL_char, &
trim(flags)//C_NULL_char, status_)
if (present(status)) then
status=status_
else if (status_/=0) then
stop 'Regex runtime error: regcomp failed.'
end if
end subroutine regcomp
logical function regexec(this,string,matches,flags,status) &
result(match)
type(regex_type), intent(in) :: this
character(len=*), intent(in) :: string
character(len=*), intent(in), optional :: flags
integer, intent(out), optional :: matches(:,:)
integer, intent(out), optional :: status
! local
integer(C_int) :: status_, matches_(2,1)
character(len=10,kind=C_char) :: flags_
! begin
flags_=' '
if (present(flags)) flags_=flags
write(*,*) 'calling C, nmatches=',nmatches
if (present(matches)) then
call C_regexec(this%preg, trim(string)//C_NULL_char, &
size(matches,2),matches, &
trim(flags_)//C_NULL_char, status_)
else
call C_regexec(this%preg, trim(string)//C_NULL_char, &
int(0,C_int),matches_, &
trim(flags_)//C_NULL_char, status_)
end if
match = status_==0
if (present(status)) then
status=status_
else if (status_/=0.and.status_/=1) then
stop 'Regex runtime error: regexec failed.'
end if
end function regexec
function regmatch(match,string,matches)
integer, intent(in) :: match, matches(2,*)
character(len=*), intent(in) :: string
character(len=matches(2,match)-matches(1,match)) :: regmatch
regmatch = string(matches(1,match)+1:matches(2,match))
end function regmatch
subroutine regerror(this,errcode,errmsg,errmsg_len)
type(regex_type), intent(in) :: this
integer, intent(in) :: errcode
character, intent(out) :: errmsg
integer, intent(out) :: errmsg_len
errmsg_len = C_regerror(int(errcode,C_int), this%preg, &
errmsg, int(len(errmsg),C_size_t))
end subroutine regerror
subroutine regfree(this)
type(regex_type), intent(inout) :: this
call C_regfree(this%preg)
this%preg = C_NULL_ptr
end subroutine regfree
end module regex_module
#include <sys/types.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>
void C_regalloc(regex_t **preg_return) {
*preg_return = malloc(sizeof(**preg_return));
}
/* pattern must be NUL terminated. */
void C_regcomp(regex_t *preg, const char *pattern,
const char *flags, int *status_return) {
int i, cflags=0;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'i': cflags |= REG_ICASE; break;
case 'm': cflags |= REG_NEWLINE; break;
case 'x': cflags |= REG_EXTENDED; break;
case 'n': cflags |= REG_NOSUB; break;
case ' ': break;
default: *status_return=-2; return;
}
}
*status_return = regcomp(preg,pattern,cflags);
}
void C_regexec(const regex_t *preg, const char *string, int nmatch,
int matches[nmatch][2], const char *flags,
int *status_return) {
int i, eflags=0;
regmatch_t *pmatch;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'b': eflags |= REG_NOTBOL; break;
case 'e': eflags |= REG_NOTEOL; break;
case ' ': break;
default: *status_return=-2; return;
}
}
if (nmatch>0 && sizeof(pmatch->rm_so)!=sizeof(matches[0][0])) {
pmatch = malloc(sizeof(regmatch_t)*nmatch);
*status_return = regexec(preg,string,nmatch,pmatch,eflags);
for (i=0;i<nmatch;i++) {
matches[i][0]=pmatch[i].rm_so;
matches[i][1]=pmatch[i].rm_eo;
}
free(pmatch);
} else {
*status_return = regexec(preg,string,nmatch,(regmatch_t*)&(matches[0][0]),eflags);
}
}
A simple example program
program test_program
use regex_module
logical match
integer status, matches(2,1)
type(regex_type) :: regex
call regcomp(regex,"\(a..\)")
match=regexec(regex,"what the?",matches)
write(*,*) 'match=',match,', status=',status
write(*,*) 'match="',regmatch(1,"what the?",matches),'"'
call regfree(regex)
end program test_program
Hello!, I am new to this Fortran Wiki, I do not know how to post a comment so I am editing the page. My apologies if this messes up your great work. I did small modification which I think it is an improvement. I hope it helps.
#include <sys/types.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>
void C_regalloc(regex_t **preg_return) {
*preg_return = malloc(sizeof(**preg_return));
}
/* pattern must be NUL terminated. */
void C_regcomp(regex_t *preg, const char *pattern,
const char *flags, int *status_return) {
int i, cflags=0;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'i': cflags |= REG_ICASE; break;
case 'm': cflags |= REG_NEWLINE; break;
case 'x': cflags |= REG_EXTENDED; break;
case 'n': cflags |= REG_NOSUB; break;
case ' ': break;
default: *status_return=-2; return;
}
}
*status_return = regcomp(preg,pattern,cflags);
}
void C_regexec(const regex_t *preg, const char *string, int nmatch,
int matches[nmatch][2], const char *flags,
int *status_return) {
int i, eflags=0;
int j=0;
const char * p = string;
regmatch_t *pmatch;
int start;
int finish;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'b': eflags |= REG_NOTBOL; break;
case 'e': eflags |= REG_NOTEOL; break;
case ' ': break;
default: *status_return=-2; return;
}
}
// Added by Trurl The Constructor
// Elkin Arroyo
pmatch = malloc(sizeof(regmatch_t)*nmatch);
while(1 && j<=nmatch) {
int no_match;
*status_return = regexec(preg,p,nmatch,pmatch,eflags);
if (status_return[0]) {
break;
}
if (pmatch[0].rm_so == -1) {
break;
}
start = pmatch[0].rm_so + (p - string);
finish = pmatch[0].rm_eo + (p - string);
matches[j][0]=start;
matches[j][1]=finish;
p += pmatch[0].rm_eo;
j=j+1;
}
free(pmatch);
}
This is my modified simple test program
program test_program
use regex_module
logical match
integer nmatch
integer status, matches(2,10)
type(regex_type) :: regex
nmatch=10
call regcomp(regex,"([0-9\.\-\*\/]+)+",'xm')
match=regexec(regex,"30*0 250*1 5 6 7",matches)
DO i=1,nmatch
write(*,*) 'match="',regmatch(i,"30*0 250*1 5 6 7",matches),'"'
ENDDO
call regfree(regex)
end program test_program
This is the results of the program once it ran
match="30*0"
match="250*1"
match="5"
match="6"
match="7"
match=""
match=""
match=""
match=""
match=""
M.M.: This is what I was looking for and tried it out and discovered slight problems on some test and tried to fix it. I like to contribute with the revised sources:
regexp_module: init of matches is included
regex.c: iteration to find more than 1 matches is cleaned up
test_program: allows for input of rep and text to make test more flexible…
module regex_module
!-------------------------------------------------------------------
! Fortran interface to POSIX regex, using ISO_C_BINDING.
!
! Regex is defined as an API using C headers. It does not define the
! exact value of flag tokens, just the names. It also uses an opaque
! data structure and a declared numeric type for the match array.
! Therefore, the code must either be generated for each target
! platform, or it use wrapper functions written in C.
!
! Fortran wrapper functions are also required to present a normal
! Fortran API, ant not require C conversions by the caller.
!
! The interface here is not strictly correct, because it does not
! explicitly convert Fortran strings to the C character kind.
! Fortran only supports conversion of string kinds by assignment,
! or by a rather slow internal WRITE. For now, the easiest approach
! is to assume that C and Fortran default character kinds are the
! same. This is generally true, but UTF-8 strings are likely to
! cause problems.
!-------------------------------------------------------------------
! API:
!
! Compile a regex into a regex object
! subroutine regcomp(this,pattern,flags,status)
! type(regex_type), intent(out) :: this ! new regex object
! character(len=*), intent(in) :: pattern ! regex pattern string
! character(len=*), intent(in), &
! optional :: flags ! flag characters:
! ! x = extended regex (REG_EXTENDED)
! ! m = multi-line (REG_NEWLINE)
! ! i = case-insensitive (REG_ICASE)
! ! n = no MATCH required (REG_NOSUB)
! integer, intent(out), optional :: status ! If absent, errors are fatal
! end subroutine regcomp
!
! Execute a compiled regex against a string
! function regexec(this,string,matches,flags,status) result(match)
! logical :: match ! .TRUE. if the pattern matched
! type(regex_type), intent(in) :: this ! regex object
! character(len=*), intent(in) :: string ! target string
! character(len=*), intent(in), &
! optional :: flags ! flag characters (for partial lines):
! ! b = no beginning-of-line (REG_NOTBOL)
! ! e = no end-of-line (REG_NOTEOL)
! integer, intent(out), optional :: matches(:,:) ! match locations,
! ! dimension(2,nmatches)
! integer, intent(out), optional :: status ! If absent, errors are fatal
! end function
!
! Get the string message for a status error value
! subroutine regerror(this,errcode,errmsg,errmsg_len)
! type(regex_type), intent(in) :: this
! integer, intent(in) :: errcode
! character, intent(out) :: errmsg
! integer, intent(out) :: errmsg_len
! errmsg_len = C_regerror(int(errcode,C_int), this%preg, &
! errmsg, int(len(errmsg),C_size_t))
! end subroutine regerror
!
! Release
! subroutine regfree(this)
! type(regex_type), intent(inout) :: this
! end subroutine regfree
!-------------------------------------------------------------------
! TODO:
! * More documentation.
! * Implement allocatable-length strings when commonly available.
! * Maybe store the matches array inside the regex_type structure?
!-------------------------------------------------------------------
use ISO_C_Binding, only: C_ptr, C_int, C_size_t, C_char, &
C_NULL_char, C_NULL_ptr
use ISO_Fortran_Env, only: ERROR_UNIT
integer, parameter :: NO_MATCH = -1
! Fortran regex structure holds a pointer to an opaque C structure
type regex_type
type(C_ptr) :: preg
end type regex_type
interface
subroutine C_regalloc(preg_return) &
bind(C,name="C_regalloc")
import
type(C_ptr), intent(out) :: preg_return
end subroutine C_regalloc
subroutine C_regcomp(preg,pattern,flags,status) &
bind(C,name="C_regcomp")
import
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(in) :: pattern(*)
character(len=1,kind=C_char), intent(in) :: flags(*)
integer(C_int), intent(inout) :: status
end subroutine C_regcomp
subroutine C_regexec(preg,string,nmatch,matches,flags,status) &
bind(C,name="C_regexec")
import
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(in) :: string(*)
integer(C_int), intent(in), value :: nmatch
integer(C_int), intent(out) :: matches(2,nmatch)
character(len=1,kind=C_char), intent(in) :: flags(*)
integer(C_int), intent(out) :: status
end subroutine C_regexec
function C_regerror(errcode, preg, errbuf, errbuf_size) &
result(regerror) bind(C,name="regerror")
import
integer(C_size_t) :: regerror
integer(C_int), value :: errcode
type(C_ptr), intent(in), value :: preg
character(len=1,kind=C_char), intent(out) :: errbuf
integer(C_size_t), value :: errbuf_size
end function C_regerror
subroutine C_regfree(preg) bind(C,name="regfree")
import
type(C_ptr), intent(in), value :: preg
end subroutine C_regfree
end interface
contains
subroutine regcomp(this,pattern,flags,status)
type(regex_type), intent(out) :: this
character(len=*), intent(in) :: pattern
character(len=*), intent(in), optional :: flags
integer, intent(out), optional :: status
! local
integer(C_int) :: status_
character(len=10,kind=C_char) :: flags_
! begin
flags_=' '
if (present(flags)) flags_=flags
this%preg = C_NULL_ptr
call C_regalloc(this%preg)
call C_regcomp(this%preg, trim(pattern)//C_NULL_char, &
trim(flags)//C_NULL_char, status_)
if (present(status)) then
status=status_
else if (status_/=0) then
stop 'Regex runtime error: regcomp failed.'
end if
end subroutine regcomp
logical function regexec(this,string,matches,flags,status) &
result(match)
type(regex_type), intent(in) :: this
character(len=*), intent(in) :: string
character(len=*), intent(in), optional :: flags
integer, intent(out), optional :: matches(:,:)
integer, intent(out), optional :: status
! local
integer(C_int) :: status_, matches_(2,1)
character(len=10,kind=C_char) :: flags_
! begin
matches = NO_MATCH !! m.m. added to allow for the extension nmatch > 1
flags_=' '
if (present(flags)) flags_=flags
! write(*,*) 'calling C, nmatches=',size(matches,2)
if (present(matches)) then
call C_regexec(this%preg, trim(string)//C_NULL_char, &
size(matches,2),matches, &
trim(flags_)//C_NULL_char, status_)
else
call C_regexec(this%preg, trim(string)//C_NULL_char, &
int(0,C_int),matches_, &
trim(flags_)//C_NULL_char, status_)
end if
match = status_==0
if (present(status)) then
status=status_
else if (status_/=0.and.status_/=1) then
stop 'Regex runtime error: regexec failed.'
end if
end function regexec
function regmatch(match,string,matches)
integer, intent(in) :: match, matches(2,*)
character(len=*), intent(in) :: string
character(len=matches(2,match)-matches(1,match)) :: regmatch
regmatch = string(matches(1,match)+1:matches(2,match))
end function regmatch
subroutine regerror(this,errcode,errmsg,errmsg_len)
type(regex_type), intent(in) :: this
integer, intent(in) :: errcode
character, intent(out) :: errmsg
integer, intent(out) :: errmsg_len
errmsg_len = C_regerror(int(errcode,C_int), this%preg, &
errmsg, int(len(errmsg),C_size_t))
end subroutine regerror
subroutine regfree(this)
type(regex_type), intent(inout) :: this
call C_regfree(this%preg)
this%preg = C_NULL_ptr
end subroutine regfree
end module regex_module
#include <sys/types.h>
#include <regex.h>
#include <string.h>
#include <stdlib.h>
void C_regalloc(regex_t **preg_return) {
*preg_return = malloc(sizeof(**preg_return));
}
/* pattern must be NUL terminated. */
void C_regcomp(regex_t *preg, const char *pattern,
const char *flags, int *status_return) {
int i, cflags=0;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'i': cflags |= REG_ICASE; break;
case 'm': cflags |= REG_NEWLINE; break;
case 'x': cflags |= REG_EXTENDED; break;
case 'n': cflags |= REG_NOSUB; break;
case ' ': break;
default: *status_return=-2; return;
}
}
*status_return = regcomp(preg,pattern,cflags);
}
void C_regexec(const regex_t *preg, const char *string, int nmatch,
int matches[nmatch][2], const char *flags,
int *status_return) {
int i, eflags=0;
int j=0;
const char * p = string;
regmatch_t *pmatch;
int start;
int finish;
for (i=0;flags[i];i++) {
switch (flags[i]) {
case 'b': eflags |= REG_NOTBOL; break;
case 'e': eflags |= REG_NOTEOL; break;
case ' ': break;
default: *status_return=-2; return;
}
}
// Added by Trurl The Constructor
// Elkin Arroyo
pmatch = malloc(sizeof(regmatch_t)); // m.m. modified
for(j=0;j<nmatch;j++) { // m.m. modified
*status_return = regexec(preg,p,1,pmatch,eflags); // m.m. modified
if (status_return[0] == REG_NOMATCH) { // m.m. modified
break;
}
// m.m some lines removed
start = pmatch[0].rm_so + (p - string);
finish = pmatch[0].rm_eo + (p - string);
matches[j][0]=start;
matches[j][1]=finish;
p += pmatch[0].rm_eo;
}
free(pmatch);
}
program test_program2 ! m.m. modifed
use regex_module
implicit none
integer, parameter :: nmatch = 10
integer, parameter :: linelength = 200
logical :: match
integer :: i, istatus, matches(2,nmatch)
type(regex_type) :: regex
character(len=linelength) :: inlin, regc, found
write(*,'(a)',advance="no")"regexp: "
read (*,'(a)',end=999 ) regc
DO
write(*,'(a)',advance="no") "text: "
read( *,'(a)',end=999 ) inlin
call regcomp(regex,trim(regc),'xm') ! trim --> blanks at the end must be specified as [ ]
match = regexec(regex,trim(inlin),matches,status=istatus)
FORALL(i=1:len(found))found(i:i)="-"
DO i=1,nmatch
if(matches(1,i) == NO_MATCH) exit
write(*,*) 'match=',matches(:,i),'"',regmatch(i,trim(inlin),matches),'"'
found(matches(1,i)+1:matches(2,i)) = regmatch(i,trim(inlin),matches)
ENDDO
write(*,*)
write(*,'(a)')trim(inlin)
write(*,'(a)')trim(found(1:len_trim(inlin)))
ENDDO
call regfree(regex)
999 stop
end program test_program2