• 카테고리

    질문 & 답변
  • 세부 분야

    프로그래밍 언어

  • 해결 여부

    미해결

결과물을 보면 행이 바뀔때 \n이 한개가 있는게 아니라 \n\n 적용이 됩니다.

19.03.21 23:02 작성 조회수 80

0

from bs4 import BeautifulSoup

import csv

with open("ipa110106.XML", "r", encoding="utf8") as source_file:

xml = source_file.read()

text = '<?xml version="1.0" encoding="UTF-8"?>'

xml_list = xml.split(text)

line_header = 0

header_list = ["publication_doc_number", "publication_date", "application_doc_number", "application_date", "invention_title"]

with open("data.csv", "w", encoding="utf8") as destination_file:

for temp in xml_list:

if len(temp) == 0:

continue

else:

if line_header == 0:

destination_file.write(",".join(header_list)+"n")

line_header += 1

else:

line_header += 1

soup = BeautifulSoup(temp, "lxml")

publication_reference = soup.find("publication-reference")

publication_doc_number = publication_reference.find("doc-number")

publication_date = publication_reference.find("date")

application_reference = soup.find("application-reference")

application_doc_number = application_reference.find("doc-number")

application_date = application_reference.find("date")

invention_title = soup.find("invention-title")

writer = csv.writer(destination_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)

writer.writerow([publication_doc_number.get_text(), publication_date.get_text(), application_doc_number.get_text(), application_date.get_text(), invention_title.get_text()])

output

publication_doc_number publication_date application_doc_number application_date invention_title

20110000003 20110106 12460569 20090721 Wetsuit made with a non-absorbent and quick drying fabric

20110000004 20110106 12459737 20090706 Fingerezz

20110000005 20110106 12498332 20090706 POSTURE IMPROVING GARMENT

20110000006 20110106 12497914 20090706 HEEL PROTECTORS

한줄씩 더 띄어지는데 문제가 뭘까요 교수님..?ㅠ

답변 1

답변을 작성해보세요.

0

아마 \r\n 이 들어가서 그런듯 합니다. lineterminator를 아래처럼 설정해줄 수 있습니다.

csv.register_dialect('myDialect', delimiter = '|', lineterminator = 'rnrn')

with open('lineterminator.csv', 'w') as f:

writer = csv.writer(f, dialect='myDialect')

writer.writerows(csvData)