标签:
https://github.com/xiaojiong/scanfile
演示站点: http://www.weigongkai.com/ 7G数据 2s完成扫描
package scanfile
/*
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int IndexStr(char *haystack, char *needle,unsigned int begin) {
char *p = strstr(haystack+begin, needle);
if (p)
return p - haystack;
return -1;
}
int IndexChar(char *haystack, char c,unsigned int begin) {
char *p = haystack = haystack + begin;
while(*p != ‘‘) {
if(*p == c) {
return p - haystack;
}
++p;
}
return -1;
}
int LastIndexChar(char *haystack, char c,unsigned int begin) {
int len = strlen(haystack);
if(begin > 0) {
if (begin > len) {
return -1;
}
} else {
begin = len - 1;
}
haystack +=begin;
while(1) {
if(*haystack == c) {
return begin;
}
if(begin == 0) {
return -1;
}
--haystack;
--begin;
}
return -1;
}
*/
import"C"
import"unsafe"
func strScan(str *string, key *string, counter *Counter) []string {
begin := 0
CStr := C.CString(*str)
Ckey := C.CString(*key)
defer func() {
C.free(unsafe.Pointer(CStr))
C.free(unsafe.Pointer(Ckey))
}()
var res []string
for {
var index int = 0
if index = int(C.IndexStr(CStr, Ckey, C.uint(begin))); index == -1 {
break
}
var startIndex int = 0
if index > 0 {
if pos := int(C.LastIndexChar(CStr, ‘n‘, C.uint(index))); pos != -1 {
startIndex = pos + 1
}
}
var endIndex int = len(*str)
if pos := int(C.IndexChar(CStr, ‘n‘, C.uint(index))); pos != -1 {
endIndex = pos + index
}
begin = endIndex
if counter.IsMax() {
break
}
res = append(res, (*str)[startIndex:endIndex])
counter.Add()
if begin == len(*str) {
break
}
}
return res
}
package scanfile
import (
"io"
"os"
"sync"
)
var LineFeed = byte(‘n‘) //文本换行符标识
var BufSize = 1024 * 1024 // buf大小
func Scan(files []string, searchStr *string) string {
var result ScanResult
//计数器
counter := InitCounter(10)
//扫描结果输出通道
out := make(chan *FileRes, 10)
fileCount := len(files)
for i := 0; i < fileCount; i++ {
go ScanFile(files[i], searchStr, counter, out)
}
for i := 0; i < fileCount; i++ {
result.AddFileRes(<-out)
}
result.AddCounter(counter)
return result.ToJson()
}
func ScanFile(fileName string, searchStr *string, counter *Counter, out chan *FileRes) {
//文件 IO
fileContentChan := fileRead(fileName, counter)
fileRes := InitFileRes(fileName)
//使用多路复用 wg防止线程泄漏
wg := sync.WaitGroup{}
for i := 0; i < 3; i++ {
wg.Add(1)
go func() {
for {
if text, ok := <-fileContentChan; ok {
if counter.IsMax() {
//清空未读取channel
clearFileContentChan(fileContentChan)
break
} else {
if counter.IsMax() {
break
}
rs := strScan(text, searchStr, counter)
for i := 0; i < len(rs); i++ {
fileRes.Add(rs[i])
}
}
} else {
break
}
}
wg.Done()
}()
}
wg.Wait()
out <- fileRes
}
func clearFileContentChan(c chan *string) {
for {
if _, ok := <-c; ok == false {
break
}
}
}
func fileRead(fileName string, counter *Counter) chan *string {
fileContentChan := make(chan *string, 5)
go func() {
fh, err := os.Open(fileName)
if err != nil {
panic(err)
}
//异常处理
defer fh.Close()
buf := make([]byte, BufSize)
var start int64
fh.Seek(start, 0)
for {
//超过计数器最大返回值 跳出程序
if counter.IsMax() {
break
}
n, err := fh.Read(buf)
if err != nil && err != io.EOF {
panic(err)
}
if n == 0 {
break
}
l := lastByteIndex(buf, LineFeed)
content := string(buf[0 : l+1])
start += int64(l + 1)
fh.Seek(start, 0)
fileContentChan <- &content
}
close(fileContentChan)
}()
return fileContentChan
}
func lastByteIndex(s []byte, sep byte) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == sep {
return i
}
}
return -1
}标签:
原文地址:http://www.cnblogs.com/milantgh/p/4452931.html