PHP – 正则表达式
Table of Contents
? echo ‘hellllloll‘ | egrep ‘(ll|lo)\1‘ # 捕捉到了 llll
? echo ‘heollllolo‘ | egrep ‘(ll|lo)\1‘ # 只捕捉到了 llll
? echo ‘heolllllolo‘ | egrep ‘(ll|lo)\1‘ # 捕捉到了 llll 和 lolo
在分组里面使用 ‘|‘, 当进行后向引用的时候, 就不会看成是 |, 只认识其中一个
(ll|lo)\1 不会匹配到 lllo, 也不会匹配到 loll, 只会匹配 llll 和 lolo
i (PCRE_CASELESS)
m (PCRE_MULTILINE)
s (PCRE_DOTALL): 表示的 ‘.‘ 可以匹配包含换行符. 如果没有这个修饰符, 点号不匹配换行符. 一个取反字符类比如 [^a] 总是匹配换行符, 而不依赖于这个修饰符的设置
U (PCRE_UNGREEDY): 这个修饰符逆转了量词的"贪婪"模式, 取消贪婪匹配也可以使用 ‘?‘
x 模式中的空白忽略不计, 除非他已经被转义
e 将结果用来替换搜索的字符串, (不理解
)
贪婪模式
$str = ‘abc‘; $pat = ‘/[a-z]{2,3}?/‘; // ? 取消贪婪模式 preg_match($pat, $str, $m, PREG_OFFSET_CAPTURE); // Array ( [0] => Array ( [0] => ab [1] => 0 ) )
$str = ‘abc‘; $pat = ‘/[a-z]{2,3}?/U‘; // 使用 U 对贪婪模式取反 preg_match($pat, $str, $m, PREG_OFFSET_CAPTURE); // Array ( [0] => Array ( [0] => abc [1] => 0 ) )
preg_filter ($arr_pat, $arr_rep, $arr_arr)
preg_filter ( mixed $pattern, $replacement, $subject [, int $limit = -1 [, int &$count ]] )
preg_filter() 等价于 preg_replace() 除了它仅仅返回(可能经过转化)与目标匹配的结果
$arr = array( ‘1A‘ , ‘a‘ , ‘2‘ , ‘b‘ , ‘3‘ , ‘A‘ , ‘B‘ , ‘4‘ ); $pattern = array( ‘/(\d)(A)/‘ , ‘/[a-z]/‘ , ‘/[\s]/‘ ); $replace = array( ‘$2‘ , ‘$0‘ , ‘$0‘ ); // 可以使用 \\2 \\0 $rows = preg_filter ( $pattern , $replace , $arr ); print_r($rows); // Array ( // [0] => A // [1] => a // [3] => b // 注意这个 // ) $rows = preg_replace ( $pattern , $replace , $arr ); print_r($rows); // Array ( // [0] => A // [1] => a // [2] => 2 // [3] => b // [4] => 3 // [5] => A // [6] => B // [7] => 4 // ) // $1 表示的是捕获的分组中的第一个分组, $0 表示整个匹配到的表达式子 // 如果是数组, 当数组的第一个替换到了之后, 他的结果会影响数组的第二个元素
preg_replace 字符串
$str = ‘1233 hello 9000‘; $pat = ‘/\d\d\d\d/i‘; $news = preg_replace($pat, ‘hello‘, $str); echo $news, ‘<br>‘; // hello hello hello
preg_replace 数组
$arr = array(‘a‘, ‘b‘, ‘c‘, 1, 2, 3); $pat = array(‘/[a-z]/‘, ‘/[0-9]/‘); $rep = array(‘123‘, ‘|abc|‘); $m = preg_replace($pat, $rep, $arr); print_r($m);
Array( [0] => |abc||abc||abc| [1] => |abc||abc||abc| [2] => |abc||abc||abc| [3] => |abc| [4] => |abc| [5] => |abc| )
分组捕捉的例子
$text = ‘today is 4/28/2015‘; $pat = "/(\d{1,2})\/(\d{1,2})\/(\d{4})/"; echo preg_replace($pat, "\\3-\\1-\\2", $text); // today is 2015-4-28 echo preg_replace($pat, "\${3}-\${1}-\${2}", $text); // today is 2015-4-28 echo preg_replace($pat, ‘${3}-${1}-${2}‘, $text); // today is 2015-4-28
prep_grep
和 match_all 类似, 返回的是数组
array preg_grep ( string $pattern , array $input [, int $flags = 0 ] ) // 返回所有包含浮点数的元素 $fl_array = preg_grep ( "/^(\d+)?\.\d+$/" , $array );
$arr = array("linux with space0.2", "apache0.1", "mysqli5.5", "php5.5"); $m = preg_grep(‘/^[a-zA-Z]+(\d|\.)+$/‘, $arr); foreach ($m as $v) { echo $v . "<br>"; } // apache0.1 // mysqli5.5 // php5.5
preg_match
匹配一次, 替换后退出, 返回匹配到的次数
int preg_match ( string $pattern , string $subject [, array &$matches [, int $flags = 0 [, int $offset = 0 ]]] )
$subject = "abcdef" ; $pattern = ‘/^def/‘ ; preg_match ( $pattern , $subject , $matches , PREG_OFFSET_CAPTURE , 3 ); print_r ( $matches ); // Array ( ) $subject = "abcdef" ; $pattern = ‘/def/‘ ; preg_match ( $pattern , $subject , $matches , PREG_OFFSET_CAPTURE , 3 ); print_r ( $matches ); // Array ( [0] => Array ( [0] => def [1] => 3 ) )
$subject = "abcdef" ; $pattern = ‘/^def/‘ ; preg_match ( $pattern , substr($subject, 3) , $matches, PREG_OFFSET_CAPTURE); print_r ( $matches ); // Array ( [0] => Array ( [0] => def [1] => 0 ) ) preg_match ( $pattern , substr($subject, 3) , $matches); print_r ( $matches ); // Array ( [0] => def )
preg_match_all
匹配多次, 返回匹配到的次数
int preg_match_all ( string $pattern , string $subject [, array &$matches [, int $flags = PREG_PATTERN_ORDER [, int $offset = 0 ]]] )
$str = ‘abc def def‘; $pat = ‘/(def)/‘; preg_match_all($pat, $str, $m); print_r($m); // Array ( // [0] => Array ( [0] => def [1] => def ) // [1] => Array ( [0] => def [1] => def ) // ) preg_match($pat, $str, $m); print_r($m); // Array ( [0] => def [1] => def ) preg_match_all($pat, $str, $m, PREG_OFFSET_CAPTURE); print_r($m); // Array ( // [0] => Array ( [0] => Array ( [0] => def [1] => 4 ) // [1] => Array ( [0] => def [1] => 8 ) // ) // [1] => Array ( [0] => Array ( [0] => def [1] => 4 ) // [1] => Array ( [0] => def [1] => 8 ) // ) // ) // 有了 PREG_OFFSET_CAPTURE 就会让数组增加一维 // match 是 1 维 match ($pat, $str, $m, PREG_OFFSET_CAPTURE) 2 // match_all 是 2 维 match_all($pat, $str, $m, PREG_OFFSET_CAPTURE) 3
match 和 match_all
$subject = "abcdef def" ; $pattern = ‘/(d)(e)f/‘ ; preg_match ( $pattern , $subject , $matches ); print_r ( $matches ); preg_match ( $pattern , $subject , $matches , PREG_OFFSET_CAPTURE); print_r ( $matches ); preg_match_all ( $pattern , $subject , $matches); print_r ( $matches ); preg_match_all ( $pattern , $subject , $matches , PREG_OFFSET_CAPTURE); print_r ( $matches );
Array( [0] => def [1] => d [2] => e ) Array( [0] => Array( [0] => def [1] => 3 ) [1] => Array( [0] => d [1] => 3 ) [2] => Array( [0] => e [1] => 4 )) Array( [0] => Array( [0] => def [1] => def ) [1] => Array( [0] => d [1] => d ) [2] => Array( [0] => e [1] => e )) Array( [0] => Array( [0] => Array( [0] => def [1] => 3 ) [1] => Array( [0] => def [1] => 7 )) [1] => Array( [0] => Array( [0] => d [1] => 3 ) [1] => Array( [0] => d [1] => 7 )) [2] => Array( [0] => Array( [0] => e [1] => 4 ) [1] => Array ( [0] => e [1] => 8 )))
preg_split
arr preg_split ( string $pattern , string $subject [, int $limit = -1 [, int $flags = 0 ]] ) flag: PREG_SPLIT_NO_EMPTY 如果这个标记被设置, preg_split() 将进返回分隔后的非空部分 PREG_SPLIT_DELIM_CAPTURE 如果这个标记设置了, 用于分隔的模式中的括号表达式将被捕获并返回 PREG_SPLIT_OFFSET_CAPTURE 如果这个标记被设置, 对于每一个出现的匹配返回时将会附加字符串偏移量. 注意:这将会改变返回数组中的每一个元素, 使其每个元素成为一个由第 0 个元素为分隔后的子串,第 1 个元素为该子串在 subject 中的偏移量组成的数组
$str = ‘str‘ ; $chars = preg_split ( ‘//‘ , $str , -1 ); print_r ( $chars ); // Array ( [0] => [1] => s [2] => t [3] => r [4] => )
$str = ‘str‘ ; $chars = preg_split ( ‘//‘ , $str , - 1 , PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_NO_EMPTY); print_r ( $chars ); // Array ( // [0] => Array ( [0] => s [1] => 0 ) // [1] => Array ( [0] => t [1] => 1 ) // [2] => Array ( [0] => r [1] => 2 ) // )
regex 类
来源: 慕课网 http://www.imooc.com/learn/350
class regexTool { private $validate = array( ‘require‘ => ‘/.+/‘, ‘email‘ => ‘/^\w+([-+.]\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*$/‘, ‘url‘ => ‘/^http(s?):\/\/(?:[A-za-z0-9-]+\.)+[A-za-z]{2,4}(?:[\/\?#][\/=\?%\-&~`@[\]\‘:+!\.#\w]*)?$/‘, ‘currency‘ => ‘/^\d+(\.\d+)?$/‘, ‘number‘ => ‘/^\d+$/‘, ‘zip‘ => ‘/^\d{6}$/‘, ‘integer‘ => ‘/^[-\+]?\d+$/‘, ‘double‘ => ‘/^[-\+]?\d+(\.\d+)?$/‘, ‘english‘ => ‘/^[A-Za-z]+$/‘, ‘qq‘ => ‘/^\d{5,11}$/‘, ‘mobile‘ => ‘/^1(3|4|5|7|8)\d{9}$/‘, ); private $returnMatchResult = false; private $fixMode = null; private $matches = array(); private $isMatch = false; public function __construct($returnMatchResult = false, $fixMode = null) { $this->returnMatchResult = $returnMatchResult; $this->fixMode = $fixMode; } private function regex($pattern, $subject) { if(array_key_exists(strtolower($pattern), $this->validate)) $pattern = $this->validate[$pattern].$this->fixMode; $this->returnMatchResult ? preg_match_all($pattern, $subject, $this->matches) : $this->isMatch = preg_match($pattern, $subject) === 1; return $this->getRegexResult(); } private function getRegexResult() { if($this->returnMatchResult) { return $this->matches; } else { return $this->isMatch; } } public function toggleReturnType($bool = null) { if(empty($bool)) { $this->returnMatchResult = !$this->returnMatchResult; } else { $this->returnMatchResult = is_bool($bool) ? $bool : (bool)$bool; } } public function setFixMode($fixMode) { $this->fixMode = $fixMode; } public function noEmpty($str) { return $this->regex(‘require‘, $str); } public function isEmail($email) { return $this->regex(‘email‘, $email); } public function isMobile($mobile) { return $this->regex(‘mobile‘, $mobile); } public function check($pattern, $subject) { return $this->regex($pattern, $subject); } //...... }