码迷,mamicode.com
首页 > 其他好文 > 详细

深入折腾scrapy之一:环境搭建 beta0.3(2013-2-4更新)

时间:2015-12-21 07:03:42      阅读:235      评论:0      收藏:0      [点我收藏+]

标签:

发表于:http://www.ballooncat.com/scrapy-setup.html

最近在深入折腾scrapy,先放出环境搭建教程一枚,随后放出更多关于scrapy配置,扩展方面的教程.教程为beta版,部分细节在实际环境中可能会有出入.

前言:

1.系统centos5.5,默认安装了python2.4,需要升级到python2.7+

安装教程包括:

sqlite3 + python2.7.3 + mysql5.1.55 + sphinx2.0.6 + easy_install + python_mysql + scrapy + nginx + uwsgi + Redis install guide



安装准备工作


1. 安装开发套件

yum install gcc gcc-c++ zlib zlib-devel make cmake libxml2 libxslt-devel openssl-devel imake automake libtool python-devel ncurses-devel


开始搭建环境:


pre_2.安装sqlite3
必须在安装python之前安装

wget -c http://www.sqlite.org/sqlite-autoconf-3071501.tar.gz
tar zxvf sqlite-autoconf-3071501.tar.gz 
cd sqlite-autoconf-3071501
./configure 
make
make install


cd ~




2. 安装python2.7.3

wget -c http://www.python.org/ftp/python/2.7.3/Python-2.7.3.tgz
tar zxvf Python-2.7.3.tgz 
cd Python-2.7.3
./configure
make && make install


mv /usr/bin/python /usr/bin/python_old_2.4
ln -s /usr/local/bin/python2.7 /usr/bin/python 
python -V (显示2.7.3版本则为安装成功)
vim /usr/bin/yum
修改#!/usr/bin/python为#!/usr/bin/python2.4


cd ~




3.安装easy_install

wget -q http://peak.telecommunity.com/dist/ez_setup.py 
python ez_setup.py




4.安装mysql5.1.55

wget -c http://downloads.mysql.com/archives/mysql-5.1/mysql-5.1.55.tar.gz
tar zxvf mysql-5.1.55.tar.gz
cd mysql-5.1.55
sh BUILD/autorun.sh
./configure --prefix=/usr/local/mysql --with-charset=utf8 --with-extra-charset=all -enable-thread-safe-client -enable-assembler --with-readline --with-big-tables --with-named-curses-libs=/usr/lib/libncursesw.so.5
make && make install
ln -s /usr/local/mysql/bin/mysql /usr/local/bin/mysql
ln -s /usr/local/mysql/bin/mysqladmin /usr/local/bin/mysqladmin
groupadd mysql
useradd -g mysql mysql
chown -R mysql.mysql /usr/local/mysql/
cd /usr/local/mysql
mkdir var
chown -R root.mysql .
chown -R mysql /usr/local/mysql/var/
cp /root/mysql-5.1.55/support-files/mysql.server /etc/init.d/mysqld
chmod 700 /etc/init.d/mysqld
chkconfig --add mysqld
chkconfig mysqld on
cp /root/mysql-5.1.55/support-files/my-medium.cnf /etc/my.cnf
/usr/local/mysql/bin/mysql_install_db --user=mysql --datadir=/usr/local/mysql/var
/etc/init.d/mysqld start
mysqladmin -u root password 123123
mysql -u root -p -S /tmp/mysql.sock


cd ~




5.安装sphinx

wget -c http://sphinxsearch.com/files/sphinx-2.0.6-release.tar.gz
tar zxvf sphinx-2.0.6-release.tar.gz
cd sphinx-2.0.6-release
./configure
make && make install
cd /usr/local/etc/
cp sphinx.conf.dist sphinx.conf


cd ~



6.安装mysql_python 

wget -c http://downloads.sourceforge.net/project/mysql-python/mysql-python-test/1.2.3c1/MySQL-python-1.2.3c1.tar.gz?use_mirror=nchc
tar xzvf MySQL-python-1.2.3c1.tar.gz 
cd MySQL-python-1.2.3c1
ln -s /usr/local/mysql/bin/mysql_config /usr/local/bin/mysql_config
ln -s /usr/local/mysql/lib/mysql/libmysqlclient* /usr/lib
ldconfig
python setup.py install
cd ~




7.安装scrapy

wget -c http://lxml.de/files/lxml-3.0.1.tgz
tar zxvf lxml-3.0.1.tgz 
cd lxml-3.0.1
python setup.py install
cd ~
wget -c http://pypi.python.org/packages/source/T/Twisted/Twisted-12.2.0.tar.bz2#md5=9a321b904d01efd695079f8484b37861
tar jxvf Twisted-12.2.0.tar.bz2 
cd Twisted-12.2.0
python setup.py install
wget http://pypi.python.org/packages/source/p/pyOpenSSL/pyOpenSSL-0.12.tar.gz
tar zxvf pyOpenSSL-0.12.tar.gz
cd pyOpenSSL-0.12
python setup.py install
easy_install scrapy


cd ~




8.安装uwsgi

wget -c http://projects.unbit.it/downloads/uwsgi-1.4.3.tar.gz
tar zxvf uwsgi-1.4.3.tar.gz 
cd uwsgi-1.4.3
python setup.py install


cd ~



9. 安装Redis 

wget https://redis.googlecode.com/files/redis-2.6.7.tar.gz
tar zxvf redis-2.6.7.tar.gz
cd redis-2.6.7
make && make install

easy_install redis




10.安装nginx

wget ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-8.30.tar.gz
tar zxvf pcre-8.30.tar.gz 
cd pcre-8.30
./configure 
make && make install



wget http://sourceforge.net/projects/libpng/files/zlib/1.2.6/zlib-1.2.6.tar.gz/download
tar zxvf zlib-1.2.6.tar.gz 
cd zlib-1.2.6
./configure 
make && make install



wget http://nginx.org/download/nginx-1.1.9.tar.gz
tar zxvf nginx-1.1.9.tar.gz 
cd nginx-1.1.9
mkdir -p /var/tmp/nginx
./configure --prefix=/usr/local/nginx --pid-path=/var/run/nginx.pid --lock-path=/var/lock/nginx.lock --with-http_ssl_module --with-http_dav_module --with-http_flv_module --with-http_realip_module --with-http_gzip_static_module --with-http_stub_status_module --with-mail --with-mail_ssl_module --with-pcre=../pcre-8.30 --with-zlib=../zlib-1.2.6 --with-debug --http-client-body-temp-path=/var/tmp/nginx/client --http-proxy-temp-path=/var/tmp/nginx/proxy --http-fastcgi-temp-path=/var/tmp/nginx/fastcgi --http-uwsgi-temp-path=/var/tmp/nginx/uwsgi --http-scgi-temp-path=/var/tmp/nginx/scgi 
make && make install
vim /etc/init.d/nginx


输入:

#!/bin/bash  
#  
#chkconfig: - 85 15  
#description: Nginx is a World Wide Web server.  
#processname: nginx  
  
nginx=/usr/local/nginx/sbin/nginx  
conf=/usr/local/nginx/conf/nginx.conf  
  
case $1 in  
  start)  
 echo -n "Starting Nginx"  
 $nginx -c $conf  
 echo " done"  
  ;;  
  
  stop)  
 echo -n "Stopping Nginx"  
 killall -9 nginx  
 echo " done"  
  ;;  
  
  test)  
 $nginx -t -c $conf  
  ;;  
  
reload)  
 echo -n "Reloading Nginx"  
 ps auxww | grep nginx | grep master | awk ‘{print $2}‘ | xargs kill -HUP  
 echo " done"  
  ;;  
  
restart)  
$0 stop  
$0 start  
  ;;  
  
  show)  
 ps -aux|grep nginx  
  ;;  
  
  *)  
 echo -n "Usage: $0 {start|restart|reload|stop|test|show}"  
  ;;  
  
esac


chkmod +x /etc/init.d/nginx
chkconfig --add nginx  
chkconfig nginx on




11. 配置sphinx:
在/usr/local/sphinx/etc下创建scrapy.conf,写入:

source scrapy_source
{
type        = mysql
sql_host    = localhost
sql_user    = root
sql_pass    = 123123
sql_db      = python
sql_port    = 3306
sql_query_pre   = SET NAMES utf8
sql_query       = SELECT id,title,keywords,descrip,body FROM `data`
}
index scrapy_index
{
source      = scrapy_source
path        = /usr/local/sphinx/var/data/scrapy_index
docinfo     = extern
mlock       = 0
morphology  = none
min_word_len    = 1
html_strip  = 0
}


indexer
{
mem_limit = 128M
}


searchd
{
listen           = 9312
read_timeout     = 5
max_children     = 30
max_matches      = 1000
seamless_rotate  = 0
preopen_indexes  = 0
unlink_old       = 1
pid_file         = /usr/local/sphinx/var/log/searchd.pid
log              = /usr/local/sphinx/var/log/searcd.log
query_log        = /usr/local/sphinx/var/log/query.log
}




创建索引:

/usr/local/sphinx/bin/indexer -c ../etc/scrapy.conf --all


重新创建索引:

/usr/local/sphinx/bin/indexer -c ../etc/scrapy.conf --all --rotate


搜索测试:

/usr/local/sphinx/bin/search -c ../etc/scrapy.conf centos


beta0.2:

1. fix几处开发套件遗漏问题

2. fix scrapy安装过程中pyOpenssl问题

3. fix mysql安装步骤先后顺序问题


beta0.3:

1. fix一处python mysql扩展执行时找不到mysql库文件的问题

2. fix python redis扩展安装的遗漏

深入折腾scrapy之一:环境搭建 beta0.3(2013-2-4更新)

标签:

原文地址:http://www.cnblogs.com/qxhy123/p/5062371.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!