-
카테고리
-
세부 분야
프로그래밍 언어
-
해결 여부
미해결
결과물을 보면 행이 바뀔때 \n이 한개가 있는게 아니라 \n\n 적용이 됩니다.
19.03.21 23:02 작성 조회수 80
0
from bs4 import BeautifulSoup
import csv
with open("ipa110106.XML", "r", encoding="utf8") as source_file:
xml = source_file.read()
text = '<?xml version="1.0" encoding="UTF-8"?>'
xml_list = xml.split(text)
line_header = 0
header_list = ["publication_doc_number", "publication_date", "application_doc_number", "application_date", "invention_title"]
with open("data.csv", "w", encoding="utf8") as destination_file:
for temp in xml_list:
if len(temp) == 0:
continue
else:
if line_header == 0:
destination_file.write(",".join(header_list)+"n")
line_header += 1
else:
line_header += 1
soup = BeautifulSoup(temp, "lxml")
publication_reference = soup.find("publication-reference")
publication_doc_number = publication_reference.find("doc-number")
publication_date = publication_reference.find("date")
application_reference = soup.find("application-reference")
application_doc_number = application_reference.find("doc-number")
application_date = application_reference.find("date")
invention_title = soup.find("invention-title")
writer = csv.writer(destination_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
writer.writerow([publication_doc_number.get_text(), publication_date.get_text(), application_doc_number.get_text(), application_date.get_text(), invention_title.get_text()])
output
publication_doc_number publication_date application_doc_number application_date invention_title
20110000003 20110106 12460569 20090721 Wetsuit made with a non-absorbent and quick drying fabric
20110000004 20110106 12459737 20090706 Fingerezz
20110000005 20110106 12498332 20090706 POSTURE IMPROVING GARMENT
20110000006 20110106 12497914 20090706 HEEL PROTECTORS
한줄씩 더 띄어지는데 문제가 뭘까요 교수님..?ㅠ
답변을 작성해보세요.
0
TeamLab
지식공유자2019.03.23
아마 \r\n
이 들어가서 그런듯 합니다. lineterminator를 아래처럼 설정해줄 수 있습니다.
csv.register_dialect('myDialect', delimiter = '|', lineterminator = 'rnrn')
with open('lineterminator.csv', 'w') as f:
writer = csv.writer(f, dialect='myDialect')
writer.writerows(csvData)
답변 1