PHP – 正则表达式

贪婪模式
preg_filter ($arr_pat, $arr_rep, $arr_arr)
preg_replace 字符串
preg_replace 数组
分组捕捉的例子
prep_grep
preg_match
preg_match_all
match 和 match_all
preg_split
regex 类
相关阅读

? echo ‘hellllloll‘ | egrep ‘(ll|lo)\1‘
# 捕捉到了 llll

? echo ‘heollllolo‘ | egrep ‘(ll|lo)\1‘
# 只捕捉到了 llll

? echo ‘heolllllolo‘ | egrep ‘(ll|lo)\1‘
# 捕捉到了 llll 和 lolo

在分组里面使用 ‘|‘, 当进行后向引用的时候, 就不会看成是 |, 只认识其中一个
(ll|lo)\1 不会匹配到 lllo, 也不会匹配到 loll, 只会匹配 llll 和 lolo

i (PCRE_CASELESS)
m (PCRE_MULTILINE)
s (PCRE_DOTALL): 表示的 ‘.‘ 可以匹配包含换行符. 如果没有这个修饰符, 点号不匹配换行符. 一个取反字符类比如 [^a] 总是匹配换行符, 而不依赖于这个修饰符的设置
U (PCRE_UNGREEDY): 这个修饰符逆转了量词的"贪婪"模式, 取消贪婪匹配也可以使用 ‘?‘
x 模式中的空白忽略不计, 除非他已经被转义
e 将结果用来替换搜索的字符串, (不理解)

贪婪模式

$str = ‘abc‘;
$pat = ‘/[a-z]{2,3}?/‘;                  // ? 取消贪婪模式
preg_match($pat, $str, $m, PREG_OFFSET_CAPTURE);
// Array ( [0] => Array ( [0] => ab [1] => 0 ) )

$str = ‘abc‘;
$pat = ‘/[a-z]{2,3}?/U‘;                 // 使用 U 对贪婪模式取反
preg_match($pat, $str, $m, PREG_OFFSET_CAPTURE);
// Array ( [0] => Array ( [0] => abc [1] => 0 ) )

preg_filter ($arr_pat, $arr_rep, $arr_arr)

preg_filter ( mixed $pattern, $replacement, $subject [, int $limit = -1 [, int &$count ]] )

preg_filter() 等价于 preg_replace() 除了它仅仅返回(可能经过转化)与目标匹配的结果

$arr  = array( ‘1A‘ ,  ‘a‘ ,  ‘2‘ ,  ‘b‘ ,  ‘3‘ ,  ‘A‘ ,  ‘B‘ ,  ‘4‘ );
$pattern  = array( ‘/(\d)(A)/‘ ,  ‘/[a-z]/‘ ,  ‘/[\s]/‘ );
$replace  = array( ‘$2‘ ,  ‘$0‘ ,  ‘$0‘ );      // 可以使用 \\2 \\0

$rows = preg_filter ( $pattern ,  $replace ,  $arr );

print_r($rows);
// Array (
//    [0] => A
//    [1] => a
//    [3] => b          // 注意这个
// )

$rows = preg_replace ( $pattern ,  $replace ,  $arr );

print_r($rows);
// Array (
//    [0] => A
//    [1] => a
//    [2] => 2
//    [3] => b
//    [4] => 3
//    [5] => A
//    [6] => B
//    [7] => 4
// )

// $1 表示的是捕获的分组中的第一个分组, $0 表示整个匹配到的表达式子

// 如果是数组, 当数组的第一个替换到了之后, 他的结果会影响数组的第二个元素

preg_replace 字符串

$str = ‘1233 hello 9000‘;
$pat = ‘/\d\d\d\d/i‘;
$news = preg_replace($pat, ‘hello‘, $str);
echo $news, ‘<br>‘;             // hello hello hello

preg_replace 数组

$arr = array(‘a‘, ‘b‘, ‘c‘, 1, 2, 3);
$pat = array(‘/[a-z]/‘, ‘/[0-9]/‘);
$rep = array(‘123‘, ‘|abc|‘);

$m = preg_replace($pat, $rep, $arr);
print_r($m);

Array(
    [0] => |abc||abc||abc|
    [1] => |abc||abc||abc|
    [2] => |abc||abc||abc|
    [3] => |abc|
    [4] => |abc|
    [5] => |abc|
)

分组捕捉的例子

$text = ‘today is 4/28/2015‘;
$pat = "/(\d{1,2})\/(\d{1,2})\/(\d{4})/";
echo preg_replace($pat, "\\3-\\1-\\2", $text);         // today is 2015-4-28
echo preg_replace($pat, "\${3}-\${1}-\${2}", $text);   // today is 2015-4-28
echo preg_replace($pat, ‘${3}-${1}-${2}‘, $text);      // today is 2015-4-28

prep_grep

和 match_all 类似, 返回的是数组

array preg_grep ( string $pattern , array $input [, int $flags = 0 ] )
// 返回所有包含浮点数的元素
$fl_array  =  preg_grep ( "/^(\d+)?\.\d+$/" ,  $array );

$arr = array("linux with space0.2", "apache0.1", "mysqli5.5", "php5.5");
$m = preg_grep(‘/^[a-zA-Z]+(\d|\.)+$/‘, $arr);
foreach ($m as $v) {
    echo $v . "<br>";
}
// apache0.1
// mysqli5.5
// php5.5

preg_match

匹配一次, 替换后退出, 返回匹配到的次数

int preg_match ( string $pattern , string $subject [, array &$matches [, int $flags = 0 [, int $offset = 0 ]]] )

$subject  =  "abcdef" ;
$pattern  =  ‘/^def/‘ ;
preg_match ( $pattern ,  $subject ,  $matches ,  PREG_OFFSET_CAPTURE ,  3 );
print_r ( $matches );     // Array ( )


$subject  =  "abcdef" ;
$pattern  =  ‘/def/‘ ;
preg_match ( $pattern ,  $subject ,  $matches ,  PREG_OFFSET_CAPTURE ,  3 );
print_r ( $matches );     // Array ( [0] => Array ( [0] => def [1] => 3 ) )

$subject  =  "abcdef" ;
$pattern  =  ‘/^def/‘ ;
preg_match ( $pattern ,  substr($subject, 3) ,  $matches, PREG_OFFSET_CAPTURE);
print_r ( $matches );           // Array ( [0] => Array ( [0] => def [1] => 0 ) )

preg_match ( $pattern ,  substr($subject, 3) ,  $matches);
print_r ( $matches );           // Array ( [0] => def )

preg_match_all

匹配多次, 返回匹配到的次数

int preg_match_all ( string $pattern , string $subject [, array &$matches [, int $flags = PREG_PATTERN_ORDER [, int $offset = 0 ]]] )

$str = ‘abc def def‘;
$pat = ‘/(def)/‘;
preg_match_all($pat, $str, $m);
print_r($m);

// Array (
//     [0] => Array ( [0] => def [1] => def )
//     [1] => Array ( [0] => def [1] => def )
// )

preg_match($pat, $str, $m);
print_r($m);
// Array ( [0] => def [1] => def )


preg_match_all($pat, $str, $m, PREG_OFFSET_CAPTURE);
print_r($m);
// Array (
//     [0] => Array ( [0] => Array ( [0] => def [1] => 4 )
//                    [1] => Array ( [0] => def [1] => 8 )
//     )
//     [1] => Array ( [0] => Array ( [0] => def [1] => 4 )
//                    [1] => Array ( [0] => def [1] => 8 )
//     )
// )

// 有了 PREG_OFFSET_CAPTURE 就会让数组增加一维

// match 是 1 维           match    ($pat, $str, $m, PREG_OFFSET_CAPTURE) 2
// match_all 是 2 维       match_all($pat, $str, $m, PREG_OFFSET_CAPTURE) 3

match 和 match_all

$subject  =  "abcdef def" ;
$pattern  =  ‘/(d)(e)f/‘ ;

preg_match ( $pattern ,  $subject ,  $matches );
print_r ( $matches );


preg_match ( $pattern ,  $subject ,  $matches , PREG_OFFSET_CAPTURE);
print_r ( $matches );


preg_match_all ( $pattern ,  $subject ,  $matches);
print_r ( $matches );


preg_match_all ( $pattern ,  $subject ,  $matches , PREG_OFFSET_CAPTURE);
print_r ( $matches );

Array(
    [0] => def
    [1] => d
    [2] => e
)

Array(
    [0] => Array(
        [0] => def
        [1] => 3
    )
    [1] => Array(
        [0] => d
        [1] => 3
    )
    [2] => Array(
        [0] => e
        [1] => 4
    ))

Array(
    [0] => Array(
        [0] => def
        [1] => def
    )
    [1] => Array(
        [0] => d
        [1] => d
    )
    [2] => Array(
        [0] => e
        [1] => e
    ))

Array(
    [0] => Array(
        [0] => Array(
            [0] => def
            [1] => 3
        )

        [1] => Array(
            [0] => def
            [1] => 7
        ))

    [1] => Array(
        [0] => Array(
            [0] => d
            [1] => 3
        )
        [1] => Array(
            [0] => d
            [1] => 7
        ))

    [2] => Array(
        [0] => Array(
            [0] => e
            [1] => 4
        )

        [1] => Array (
            [0] => e
            [1] => 8
        )))

preg_split

arr preg_split ( string $pattern , string $subject [, int $limit = -1 [, int $flags = 0 ]] )
flag:
PREG_SPLIT_NO_EMPTY
  如果这个标记被设置, preg_split() 将进返回分隔后的非空部分
PREG_SPLIT_DELIM_CAPTURE
  如果这个标记设置了, 用于分隔的模式中的括号表达式将被捕获并返回
PREG_SPLIT_OFFSET_CAPTURE
  如果这个标记被设置, 对于每一个出现的匹配返回时将会附加字符串偏移量.
  注意：这将会改变返回数组中的每一个元素, 使其每个元素成为一个由第 0 个元素为分隔后的子串，第 1 个元素为该子串在 subject 中的偏移量组成的数组

$str  =  ‘str‘ ;
$chars  =  preg_split ( ‘//‘ ,  $str , -1 );
print_r ( $chars );
// Array ( [0] => [1] => s [2] => t [3] => r [4] => )

$str  =  ‘str‘ ;
$chars  =  preg_split ( ‘//‘ ,  $str , - 1 , PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY);
print_r ( $chars );
// Array (
//     [0] => Array ( [0] => s [1] => 0 )
//     [1] => Array ( [0] => t [1] => 1 )
//     [2] => Array ( [0] => r [1] => 2 )
// )

regex 类

来源: 慕课网 http://www.imooc.com/learn/350

class regexTool {

    private $validate = array(
        ‘require‘   =>  ‘/.+/‘,
        ‘email‘     =>  ‘/^\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$/‘,
        ‘url‘       =>  ‘/^http(s?):\/\/(?:[A-za-z0-9-]+\.)+[A-za-z]{2,4}(?:[\/\?#][\/=\?%\-&~`@[\]\‘:+!\.#\w]*)?$/‘,
        ‘currency‘  =>  ‘/^\d+(\.\d+)?$/‘,
        ‘number‘    =>  ‘/^\d+$/‘,
        ‘zip‘       =>  ‘/^\d{6}$/‘,
        ‘integer‘   =>  ‘/^[-\+]?\d+$/‘,
        ‘double‘    =>  ‘/^[-\+]?\d+(\.\d+)?$/‘,
        ‘english‘   =>  ‘/^[A-Za-z]+$/‘,
        ‘qq‘            =>      ‘/^\d{5,11}$/‘,
        ‘mobile‘        =>      ‘/^1(3|4|5|7|8)\d{9}$/‘,
    );
    private $returnMatchResult = false;
    private $fixMode = null;
    private $matches = array();
    private $isMatch = false;

    public function __construct($returnMatchResult = false, $fixMode = null) {
        $this->returnMatchResult = $returnMatchResult;
        $this->fixMode = $fixMode;
    }

    private function regex($pattern, $subject) {
        if(array_key_exists(strtolower($pattern), $this->validate))
            $pattern = $this->validate[$pattern].$this->fixMode;
        $this->returnMatchResult ?
            preg_match_all($pattern, $subject, $this->matches) :
            $this->isMatch = preg_match($pattern, $subject) === 1;
        return $this->getRegexResult();
    }

    private function getRegexResult() {
        if($this->returnMatchResult) {
            return $this->matches;
        } else {
            return $this->isMatch;
        }
    }

    public function toggleReturnType($bool = null) {
        if(empty($bool)) {
            $this->returnMatchResult = !$this->returnMatchResult;
        } else {
            $this->returnMatchResult = is_bool($bool) ? $bool : (bool)$bool;
        }
    }

    public function setFixMode($fixMode) {
        $this->fixMode = $fixMode;
    }

    public function noEmpty($str) {
        return $this->regex(‘require‘, $str);
    }

    public function isEmail($email) {
        return $this->regex(‘email‘, $email);
    }

    public function isMobile($mobile) {
        return $this->regex(‘mobile‘, $mobile);
    }

    public function check($pattern, $subject) {
        return $this->regex($pattern, $subject);
    }

    //......
}

PHP -- 正则表达式